From 2eed3b723d93d2886ee62f1da7f33f7354e16720 Mon Sep 17 00:00:00 2001 From: barbaravaldez Date: Mon, 19 Oct 2020 09:32:32 -0700 Subject: [PATCH] Remove CU8 book --- Big-Data-Clusters/CU8/Public/_config.yml | 2 - Big-Data-Clusters/CU8/Public/_data/toc.yml | 371 ------- .../cer001-create-root-ca.ipynb | 723 -------------- .../cer002-download-existing-root-ca.ipynb | 560 ----------- .../cer003-upload-existing-root-ca.ipynb | 583 ----------- ...004-download-upload-existing-root-ca.ipynb | 703 ------------- ...10-install-generated-root-ca-locally.ipynb | 608 ------------ ...create-management-service-proxy-cert.ipynb | 933 ----------------- .../cer021-create-knox-cert.ipynb | 933 ----------------- .../cer022-create-app-proxy-cert.ipynb | 933 ----------------- .../cer023-create-master-cert.ipynb | 934 ------------------ .../cer024-create-controller-cert.ipynb | 921 ----------------- ...upload-management-service-proxy-cert.ipynb | 556 ----------- .../cer026-upload-knox-cert.ipynb | 532 ---------- .../cer027-upload-app-proxy-cert.ipynb | 556 ----------- .../cer028-upload-master-cert.ipynb | 556 ----------- .../cer029-upload-controller-cert.ipynb | 558 ----------- ...30-sign-service-proxy-generated-cert.ipynb | 788 --------------- .../cer031-sign-knox-generated-cert.ipynb | 788 --------------- ...cer032-sign-app-proxy-generated-cert.ipynb | 788 --------------- .../cer033-sign-master-generated-cert.ipynb | 788 --------------- ...er034-sign-controller-generated-cert.ipynb | 865 ---------------- ...35-ca-sign-controller-generated-cert.ipynb | 829 ---------------- .../cer040-install-service-proxy-cert.ipynb | 922 ----------------- .../cer041-install-knox-cert.ipynb | 914 ----------------- .../cer042-install-app-proxy-cert.ipynb | 919 ----------------- .../cer044-install-controller-cert.ipynb | 911 ----------------- .../cer050-wait-cluster-healthly.ipynb | 264 ----- .../cer100-create-root-ca-install-certs.ipynb | 620 ------------ .../cer101-use-root-ca-install-certs.ipynb | 609 ------------ .../Public/content/cert-management/readme.md | 114 --- .../CU8/Public/content/common/readme.md | 25 - .../content/common/sop005-az-login.ipynb | 400 -------- .../content/common/sop006-az-logout.ipynb | 400 -------- .../sop007-get-key-version-information.ipynb | 516 ---------- .../sop011-set-kubernetes-context.ipynb | 503 ---------- ...op013-create-secret-for-azdata-login.ipynb | 631 ------------ ...op014-delete-secret-for-azdata-login.ipynb | 511 ---------- .../content/common/sop028-azdata-login.ipynb | 574 ----------- .../content/common/sop033-azdata-logout.ipynb | 409 -------- .../common/sop034-wait-cluster-healthly.ipynb | 269 ----- .../CU8/Public/content/diagnose/readme.md | 29 - .../diagnose/tsg027-observe-bdc-create.ipynb | 743 -------------- .../tsg029-find-dumps-in-the-cluster.ipynb | 180 ---- ...et-cpu-and-memory-for-all-containers.ipynb | 158 --- ...037-determine-primary-master-replica.ipynb | 546 ---------- .../tsg055-time-curl-to-sparkhead.ipynb | 544 ---------- .../tsg060-get-disk-space-for-all-pvcs.ipynb | 558 ----------- .../diagnose/tsg078-is-cluster-healthy.ipynb | 555 ----------- ...tsg079-generate-controller-core-dump.ipynb | 500 ---------- .../tsg086-run-top-for-all-containers.ipynb | 146 --- .../diagnose/tsg087-use-hadoop-fs.ipynb | 489 --------- .../tsg108-controller-failed-to-upgrade.ipynb | 490 --------- .../CU8/Public/content/install/readme.md | 37 - .../content/install/sop010-upgrade-bdc.ipynb | 491 --------- ...012-brew-install-odbc-for-sql-server.ipynb | 397 -------- .../install/sop036-install-kubectl.ipynb | 416 -------- .../install/sop037-uninstall-kubectl.ipynb | 409 -------- .../content/install/sop038-install-az.ipynb | 406 -------- .../content/install/sop039-uninstall-az.ipynb | 397 -------- .../content/install/sop040-upgrade-pip.ipynb | 399 -------- .../install/sop054-install-azdata.ipynb | 430 -------- .../install/sop055-uninstall-azdata.ipynb | 424 -------- .../sop059-install-kubernetes-module.ipynb | 415 -------- .../sop060-uninstall-kubernetes-module.ipynb | 415 -------- .../sop062-install-ipython-sql-module.ipynb | 429 -------- .../sop063-packman-install-azdata.ipynb | 457 --------- .../sop064-packman-uninstall-azdata.ipynb | 431 -------- ...9-install-odbc-driver-for-sql-server.ipynb | 63 -- .../Public/content/log-analyzers/readme.md | 49 - .../tsg030-get-errorlog-from-all-pods.ipynb | 289 ------ ...sg031-get-polybase-logs-for-all-pods.ipynb | 287 ------ .../log-analyzers/tsg034-get-livy-logs.ipynb | 291 ------ .../tsg035-get-sparkhistory-logs.ipynb | 291 ------ .../tsg036-get-controller-logs.ipynb | 327 ------ .../log-analyzers/tsg046-get-knox-logs.ipynb | 328 ------ .../tsg073-get-influxdb-logs.ipynb | 285 ------ .../tsg076-get-elastic-search-logs.ipynb | 288 ------ .../tsg077-get-kibana-logs.ipynb | 285 ------ .../tsg088-get-datanode-logs.ipynb | 290 ------ .../tsg090-get-nodemanager-logs.ipynb | 288 ------ ...tsg092-get-all-supervisord-log-tails.ipynb | 288 ------ .../tsg093-get-all-agent-log-tails.ipynb | 262 ----- .../tsg094-get-grafana-logs.ipynb | 285 ------ .../tsg095-get-namenode-logs.ipynb | 288 ------ .../tsg096-get-zookeeper-logs.ipynb | 288 ------ .../tsg117-get-approxy-nginx-logs.ipynb | 297 ------ ...tsg120-get-all-provisioner-log-tails.ipynb | 249 ----- ...tsg121-get-all-supervisor-mssql-logs.ipynb | 291 ------ .../tsg122-get-hive-metastore-logs.ipynb | 288 ------ .../log-analyzers/tsg123-get-hive-logs.ipynb | 288 ------ .../CU8/Public/content/log-files/readme.md | 19 - .../content/log-files/tsg001-copy-logs.ipynb | 492 --------- .../tsg061-tail-bdc-container-logs.ipynb | 205 ---- ...062-tail-bdc-previous-container-logs.ipynb | 200 ---- ...tsg083-run-kubectl-cluster-info-dump.ipynb | 401 -------- .../tsg084-internal-query-process-error.ipynb | 66 -- .../log-files/tsg091-get-azdata-logs.ipynb | 58 -- .../CU8/Public/content/monitor-bdc/readme.md | 29 - .../tsg003-show-spark-sessions.ipynb | 475 --------- .../monitor-bdc/tsg004-show-app-list.ipynb | 520 ---------- .../tsg012-azdata-bdc-status.ipynb | 396 -------- .../tsg013-azdata-bdc-hdfs-ls.ipynb | 495 ---------- .../tsg014-azdata-bdc-endpoint-list.ipynb | 396 -------- .../tsg017-azdata-bdc-config-show.ipynb | 396 -------- .../tsg033-azdata-bdc-sql-status.ipynb | 396 -------- .../tsg049-azdata-bdc-control-status.ipynb | 396 -------- .../tsg068-azdata-bdc-hdfs-status.ipynb | 396 -------- .../tsg069-azdata-bdc-gateway-status.ipynb | 396 -------- .../tsg070-use-azdata-sql-query.ipynb | 514 ---------- .../CU8/Public/content/monitor-k8s/readme.md | 51 - .../tsg006-view-system-pod-status.ipynb | 415 -------- .../tsg007-view-bdc-pod-status.ipynb | 462 --------- .../tsg008-get-k8s-version-info.ipynb | 401 -------- .../monitor-k8s/tsg009-get-nodes.ipynb | 415 -------- .../tsg010-get-kubernetes-contexts.ipynb | 413 -------- .../tsg015-view-k8s-services-for-bdc.ipynb | 443 --------- ...6-describe-all-pods-in-bdc-namespace.ipynb | 493 --------- .../tsg020-describe-all-nodes.ipynb | 443 --------- .../tsg021-get-k8s-cluster-info.ipynb | 402 -------- ...g022-get-external-ip-of-kubeadm-host.ipynb | 401 -------- .../tsg023-run-kubectl-get-all.ipynb | 465 --------- ...-get-hosting-node-and-data-log-mount.ipynb | 560 ----------- .../tsg063-get-storage-classes.ipynb | 401 -------- .../tsg064-get-persistent-volume-claims.ipynb | 448 --------- ...tsg065-get-secrets-for-bdc-namespace.ipynb | 448 --------- .../tsg066-get-kubernetes-events.ipynb | 480 --------- .../tsg072-get-persistent-volumes.ipynb | 402 -------- .../tsg081-get-kubernetes-namespaces.ipynb | 421 -------- ...089-describe-non-running-pods-in-bdc.ipynb | 511 ---------- .../monitor-k8s/tsg097-get-statefulsets.ipynb | 446 --------- .../monitor-k8s/tsg098-get-replicasets.ipynb | 446 --------- .../monitor-k8s/tsg099-get-daemonsets.ipynb | 449 --------- .../CU8/Public/content/readme.md | 43 - .../CU8/Public/content/repair/readme.md | 41 - .../repair/sop016-get-sid-for-principal.ipynb | 100 -- .../repair/sop017-add-app-deploy-group.ipynb | 204 ---- .../tsg024-name-node-is-in-safe-mode.ipynb | 616 ------------ ...-restart-nodemanager-in-storage-pool.ipynb | 165 ---- .../tsg038-doc-is-missing-key-error.ipynb | 52 - .../tsg041-increase-fs-aio-max-nr.ipynb | 47 - ...tsg045-max-number-data-disks-allowed.ipynb | 634 ------------ ...7-expected-only-one-object-with-name.ipynb | 59 -- ...-create-stuck-waiting-for-controller.ipynb | 220 ----- ...-timeout-expired-waiting-for-volumes.ipynb | 654 ------------ .../repair/tsg053-save-book-first.ipynb | 40 - ...g057-failed-when-starting-controller.ipynb | 63 -- ...failed-to-complete-kube-config-setup.ipynb | 62 -- ...etworkplugin-cni-failed-to-setup-pod.ipynb | 506 ---------- .../repair/tsg109-upgrade-stalled.ipynb | 242 ----- .../tsg110-azdata-returns-apierror.ipynb | 43 - .../tsg124-no-credentials-were-supplied.ipynb | 40 - .../CU8/Public/content/sample/readme.md | 23 - .../sam001a-load-sample-data-into-bdc.ipynb | 594 ----------- .../sam001b-convert-csv-to-parquet.ipynb | 133 --- .../sam001c-query-hdfs-in-sql-server.ipynb | 709 ------------- .../sam002-query-hdfs-in-sql-server.ipynb | 722 -------------- .../content/sample/sam003-data-pool.ipynb | 286 ------ .../sample/sam008-spark-using-azdata.ipynb | 657 ------------ .../sample/sam009-hdfs-using-azdata.ipynb | 624 ------------ .../sample/sam010-app-using-azdata.ipynb | 677 ------------- .../Public/content/troubleshooters/readme.md | 21 - .../tsg100-troubleshoot-bdc.ipynb | 104 -- .../tsg101-troubleshoot-sql-server.ipynb | 58 -- .../tsg102-troubleshoot-hdfs.ipynb | 63 -- .../tsg103-troubleshoot-spark.ipynb | 66 -- .../tsg104-troubleshoot-control.ipynb | 53 - .../tsg105-troubleshoot-gateway.ipynb | 42 - .../tsg106-troubleshoot-app.ipynb | 46 - 169 files changed, 67166 deletions(-) delete mode 100644 Big-Data-Clusters/CU8/Public/_config.yml delete mode 100644 Big-Data-Clusters/CU8/Public/_data/toc.yml delete mode 100644 Big-Data-Clusters/CU8/Public/content/cert-management/cer001-create-root-ca.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/cert-management/cer002-download-existing-root-ca.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/cert-management/cer003-upload-existing-root-ca.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/cert-management/cer004-download-upload-existing-root-ca.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/cert-management/cer010-install-generated-root-ca-locally.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/cert-management/cer020-create-management-service-proxy-cert.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/cert-management/cer021-create-knox-cert.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/cert-management/cer022-create-app-proxy-cert.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/cert-management/cer023-create-master-cert.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/cert-management/cer024-create-controller-cert.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/cert-management/cer025-upload-management-service-proxy-cert.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/cert-management/cer026-upload-knox-cert.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/cert-management/cer027-upload-app-proxy-cert.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/cert-management/cer028-upload-master-cert.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/cert-management/cer029-upload-controller-cert.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/cert-management/cer030-sign-service-proxy-generated-cert.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/cert-management/cer031-sign-knox-generated-cert.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/cert-management/cer032-sign-app-proxy-generated-cert.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/cert-management/cer033-sign-master-generated-cert.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/cert-management/cer034-sign-controller-generated-cert.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/cert-management/cer035-ca-sign-controller-generated-cert.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/cert-management/cer040-install-service-proxy-cert.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/cert-management/cer041-install-knox-cert.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/cert-management/cer042-install-app-proxy-cert.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/cert-management/cer044-install-controller-cert.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/cert-management/cer050-wait-cluster-healthly.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/cert-management/cer100-create-root-ca-install-certs.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/cert-management/cer101-use-root-ca-install-certs.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/cert-management/readme.md delete mode 100644 Big-Data-Clusters/CU8/Public/content/common/readme.md delete mode 100644 Big-Data-Clusters/CU8/Public/content/common/sop005-az-login.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/common/sop006-az-logout.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/common/sop007-get-key-version-information.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/common/sop011-set-kubernetes-context.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/common/sop013-create-secret-for-azdata-login.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/common/sop014-delete-secret-for-azdata-login.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/common/sop028-azdata-login.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/common/sop033-azdata-logout.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/common/sop034-wait-cluster-healthly.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/diagnose/readme.md delete mode 100644 Big-Data-Clusters/CU8/Public/content/diagnose/tsg027-observe-bdc-create.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/diagnose/tsg029-find-dumps-in-the-cluster.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/diagnose/tsg032-get-cpu-and-memory-for-all-containers.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/diagnose/tsg037-determine-primary-master-replica.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/diagnose/tsg055-time-curl-to-sparkhead.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/diagnose/tsg060-get-disk-space-for-all-pvcs.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/diagnose/tsg078-is-cluster-healthy.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/diagnose/tsg079-generate-controller-core-dump.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/diagnose/tsg086-run-top-for-all-containers.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/diagnose/tsg087-use-hadoop-fs.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/diagnose/tsg108-controller-failed-to-upgrade.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/install/readme.md delete mode 100644 Big-Data-Clusters/CU8/Public/content/install/sop010-upgrade-bdc.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/install/sop012-brew-install-odbc-for-sql-server.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/install/sop036-install-kubectl.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/install/sop037-uninstall-kubectl.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/install/sop038-install-az.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/install/sop039-uninstall-az.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/install/sop040-upgrade-pip.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/install/sop054-install-azdata.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/install/sop055-uninstall-azdata.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/install/sop059-install-kubernetes-module.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/install/sop060-uninstall-kubernetes-module.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/install/sop062-install-ipython-sql-module.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/install/sop063-packman-install-azdata.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/install/sop064-packman-uninstall-azdata.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/install/sop069-install-odbc-driver-for-sql-server.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/log-analyzers/readme.md delete mode 100644 Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg030-get-errorlog-from-all-pods.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg031-get-polybase-logs-for-all-pods.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg034-get-livy-logs.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg035-get-sparkhistory-logs.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg036-get-controller-logs.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg046-get-knox-logs.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg073-get-influxdb-logs.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg076-get-elastic-search-logs.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg077-get-kibana-logs.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg088-get-datanode-logs.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg090-get-nodemanager-logs.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg092-get-all-supervisord-log-tails.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg093-get-all-agent-log-tails.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg094-get-grafana-logs.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg095-get-namenode-logs.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg096-get-zookeeper-logs.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg117-get-approxy-nginx-logs.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg120-get-all-provisioner-log-tails.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg121-get-all-supervisor-mssql-logs.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg122-get-hive-metastore-logs.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg123-get-hive-logs.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/log-files/readme.md delete mode 100644 Big-Data-Clusters/CU8/Public/content/log-files/tsg001-copy-logs.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/log-files/tsg061-tail-bdc-container-logs.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/log-files/tsg062-tail-bdc-previous-container-logs.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/log-files/tsg083-run-kubectl-cluster-info-dump.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/log-files/tsg084-internal-query-process-error.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/log-files/tsg091-get-azdata-logs.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/monitor-bdc/readme.md delete mode 100644 Big-Data-Clusters/CU8/Public/content/monitor-bdc/tsg003-show-spark-sessions.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/monitor-bdc/tsg004-show-app-list.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/monitor-bdc/tsg012-azdata-bdc-status.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/monitor-bdc/tsg013-azdata-bdc-hdfs-ls.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/monitor-bdc/tsg014-azdata-bdc-endpoint-list.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/monitor-bdc/tsg017-azdata-bdc-config-show.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/monitor-bdc/tsg033-azdata-bdc-sql-status.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/monitor-bdc/tsg049-azdata-bdc-control-status.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/monitor-bdc/tsg068-azdata-bdc-hdfs-status.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/monitor-bdc/tsg069-azdata-bdc-gateway-status.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/monitor-bdc/tsg070-use-azdata-sql-query.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/monitor-k8s/readme.md delete mode 100644 Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg006-view-system-pod-status.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg007-view-bdc-pod-status.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg008-get-k8s-version-info.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg009-get-nodes.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg010-get-kubernetes-contexts.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg015-view-k8s-services-for-bdc.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg016-describe-all-pods-in-bdc-namespace.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg020-describe-all-nodes.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg021-get-k8s-cluster-info.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg022-get-external-ip-of-kubeadm-host.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg023-run-kubectl-get-all.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg042-get-hosting-node-and-data-log-mount.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg063-get-storage-classes.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg064-get-persistent-volume-claims.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg065-get-secrets-for-bdc-namespace.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg066-get-kubernetes-events.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg072-get-persistent-volumes.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg089-describe-non-running-pods-in-bdc.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg097-get-statefulsets.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg098-get-replicasets.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg099-get-daemonsets.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/readme.md delete mode 100644 Big-Data-Clusters/CU8/Public/content/repair/readme.md delete mode 100644 Big-Data-Clusters/CU8/Public/content/repair/sop016-get-sid-for-principal.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/repair/sop017-add-app-deploy-group.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/repair/tsg024-name-node-is-in-safe-mode.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/repair/tsg028-restart-nodemanager-in-storage-pool.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/repair/tsg038-doc-is-missing-key-error.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/repair/tsg041-increase-fs-aio-max-nr.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/repair/tsg045-max-number-data-disks-allowed.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/repair/tsg047-expected-only-one-object-with-name.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/repair/tsg048-create-stuck-waiting-for-controller.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/repair/tsg050-timeout-expired-waiting-for-volumes.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/repair/tsg053-save-book-first.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/repair/tsg057-failed-when-starting-controller.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/repair/tsg067-failed-to-complete-kube-config-setup.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/repair/tsg075-networkplugin-cni-failed-to-setup-pod.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/repair/tsg109-upgrade-stalled.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/repair/tsg110-azdata-returns-apierror.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/repair/tsg124-no-credentials-were-supplied.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/sample/readme.md delete mode 100644 Big-Data-Clusters/CU8/Public/content/sample/sam001a-load-sample-data-into-bdc.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/sample/sam001b-convert-csv-to-parquet.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/sample/sam001c-query-hdfs-in-sql-server.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/sample/sam002-query-hdfs-in-sql-server.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/sample/sam003-data-pool.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/sample/sam008-spark-using-azdata.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/sample/sam009-hdfs-using-azdata.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/sample/sam010-app-using-azdata.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/troubleshooters/readme.md delete mode 100644 Big-Data-Clusters/CU8/Public/content/troubleshooters/tsg100-troubleshoot-bdc.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/troubleshooters/tsg101-troubleshoot-sql-server.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/troubleshooters/tsg102-troubleshoot-hdfs.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/troubleshooters/tsg103-troubleshoot-spark.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/troubleshooters/tsg104-troubleshoot-control.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/troubleshooters/tsg105-troubleshoot-gateway.ipynb delete mode 100644 Big-Data-Clusters/CU8/Public/content/troubleshooters/tsg106-troubleshoot-app.ipynb diff --git a/Big-Data-Clusters/CU8/Public/_config.yml b/Big-Data-Clusters/CU8/Public/_config.yml deleted file mode 100644 index 1b0ff565..00000000 --- a/Big-Data-Clusters/CU8/Public/_config.yml +++ /dev/null @@ -1,2 +0,0 @@ -title: Operations and Support - SQL Server 2019 Big Data Clusters -description: A collection of notebooks to help operate and support SQL Server Big Data Clusters. \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/_data/toc.yml b/Big-Data-Clusters/CU8/Public/_data/toc.yml deleted file mode 100644 index 0a7639f4..00000000 --- a/Big-Data-Clusters/CU8/Public/_data/toc.yml +++ /dev/null @@ -1,371 +0,0 @@ -- title: Welcome - url: /readme - not_numbered: true -- title: Search - search: true - -- title: Troubleshooters - url: /troubleshooters/readme - not_numbered: true - expand_sections: true - sections: - - title: TSG100 - The Big Data Cluster troubleshooter - url: troubleshooters/tsg100-troubleshoot-bdc - - title: TSG101 - SQL Server troubleshooter - url: troubleshooters/tsg101-troubleshoot-sql-server - - title: TSG102 - HDFS troubleshooter - url: troubleshooters/tsg102-troubleshoot-hdfs - - title: TSG103 - Spark troubleshooter - url: troubleshooters/tsg103-troubleshoot-spark - - title: TSG104 - Control troubleshooter - url: troubleshooters/tsg104-troubleshoot-control - - title: TSG105 - Gateway troubleshooter - url: troubleshooters/tsg105-troubleshoot-gateway - - title: TSG106 - App troubleshooter - url: troubleshooters/tsg106-troubleshoot-app -- title: Log Analyzers - url: /log-analyzers/readme - not_numbered: true - expand_sections: true - sections: - - title: TSG046 - Knox gateway logs - url: log-analyzers/tsg046-get-knox-logs - - title: TSG036 - Controller logs - url: log-analyzers/tsg036-get-controller-logs - - title: TSG034 - Livy logs - url: log-analyzers/tsg034-get-livy-logs - - title: TSG035 - Spark History logs - url: log-analyzers/tsg035-get-sparkhistory-logs - - title: TSG030 - SQL Server errorlog files - url: log-analyzers/tsg030-get-errorlog-from-all-pods - - title: TSG031 - SQL Server PolyBase logs - url: log-analyzers/tsg031-get-polybase-logs-for-all-pods - - title: TSG095 - Hadoop namenode logs - url: log-analyzers/tsg095-get-namenode-logs - - title: TSG090 - Yarn nodemanager logs - url: log-analyzers/tsg090-get-nodemanager-logs - - title: TSG088 - Hadoop datanode logs - url: log-analyzers/tsg088-get-datanode-logs - - title: TSG096 - Zookeeper logs - url: log-analyzers/tsg096-get-zookeeper-logs - - title: TSG073 - InfluxDB logs - url: log-analyzers/tsg073-get-influxdb-logs - - title: TSG076 - Elastic Search logs - url: log-analyzers/tsg076-get-elastic-search-logs - - title: TSG077 - Kibana logs - url: log-analyzers/tsg077-get-kibana-logs - - title: TSG092 - Supervisord log tail for all containers in BDC - url: log-analyzers/tsg092-get-all-supervisord-log-tails - - title: TSG093 - Agent log tail for all containers in BDC - url: log-analyzers/tsg093-get-all-agent-log-tails - - title: TSG094 - Grafana logs - url: log-analyzers/tsg094-get-grafana-logs - - title: TSG117 - App-Deploy Proxy Nginx Logs - url: log-analyzers/tsg117-get-approxy-nginx-logs - - title: TSG120 - Provisioner log tail for all containers in BDC - url: log-analyzers/tsg120-get-all-provisioner-log-tails - - title: TSG121 - Supervisor mssql-server logs - url: log-analyzers/tsg121-get-all-supervisor-mssql-logs - - title: TSG122 - Hive Metastore logs - url: log-analyzers/tsg122-get-hive-metastore-logs - - title: TSG123 - Hive logs - url: log-analyzers/tsg123-get-hive-logs -- title: Diagnose - url: /diagnose/readme - not_numbered: true - expand_sections: true - sections: - - title: TSG027 - Observe cluster deployment - url: diagnose/tsg027-observe-bdc-create - - title: TSG078 - Is cluster healthy - url: diagnose/tsg078-is-cluster-healthy - - title: TSG029 - Find dumps in the cluster - url: diagnose/tsg029-find-dumps-in-the-cluster - - title: TSG032 - CPU and Memory usage for all containers - url: diagnose/tsg032-get-cpu-and-memory-for-all-containers - - title: TSG060 - Persistent Volume disk space for all BDC PVCs - url: diagnose/tsg060-get-disk-space-for-all-pvcs - - title: TSG087 - Use hadoop fs CLI on nmnode pod - url: diagnose/tsg087-use-hadoop-fs - - title: TSG037 - Determine master pool pod hosting primary replica - url: diagnose/tsg037-determine-primary-master-replica - - title: TSG055 - Time Curl to Sparkhead - url: diagnose/tsg055-time-curl-to-sparkhead - - title: TSG079 - Generate `controller` core dump - url: diagnose/tsg079-generate-controller-core-dump - - title: TSG086 - Run `top` in all containers - url: diagnose/tsg086-run-top-for-all-containers - - title: TSG108 - View the controller upgrade config map - url: diagnose/tsg108-controller-failed-to-upgrade -- title: Repair - url: /repair/readme - not_numbered: true - expand_sections: false - sections: - - title: TSG024 - Namenode is in safe mode - url: repair/tsg024-name-node-is-in-safe-mode - - title: TSG041 - Unable to create a new asynchronous I/O context (increase sysctl fs.aio-max-nr) - url: repair/tsg041-increase-fs-aio-max-nr - - title: TSG048 - Deployment stuck at "Waiting for controller pod to be up" - url: repair/tsg048-create-stuck-waiting-for-controller - - title: TSG038 - BDC create failures due to - doc is missing key - url: repair/tsg038-doc-is-missing-key-error - - title: TSG047 - ConfigException - Expected only one object with name - url: repair/tsg047-expected-only-one-object-with-name - - title: TSG050 - Cluster create hangs with "timeout expired waiting for volumes to attach or mount for pod" - url: repair/tsg050-timeout-expired-waiting-for-volumes - - title: TSG057 - Failed when starting controller service. System.TimeoutException - url: repair/tsg057-failed-when-starting-controller - - title: TSG067 - Failed to complete kube config setup - url: repair/tsg067-failed-to-complete-kube-config-setup - - title: TSG075 - FailedCreatePodSandBox due to NetworkPlugin cni failed to set up pod - url: repair/tsg075-networkplugin-cni-failed-to-setup-pod - - title: TSG110 - Azdata returns ApiError - url: repair/tsg110-azdata-returns-apierror - - title: TSG028 - Restart node manager on all storage pool nodes - url: repair/tsg028-restart-nodemanager-in-storage-pool - - title: TSG045 - The maximum number of data disks allowed to be attached to a VM of this size (AKS) - url: repair/tsg045-max-number-data-disks-allowed - - title: TSG109 - Set upgrade timeouts - url: repair/tsg109-upgrade-stalled - - title: TSG053 - ADS Provided Books must be saved before use - url: repair/tsg053-save-book-first - - title: SOP016 - Get SID for Active Directory user/group - url: repair/sop016-get-sid-for-principal - - title: SOP017 - Add app-deploy AD group - url: repair/sop017-add-app-deploy-group - - title: TSG124 - 'No credentials were supplied' error from azdata login - url: repair/tsg124-no-credentials-were-supplied -- title: Monitor - Big Data Cluster - url: /monitor-bdc/readme - not_numbered: true - expand_sections: true - sections: - - title: TSG014 - Show BDC endpoints - url: monitor-bdc/tsg014-azdata-bdc-endpoint-list - - title: TSG012 - Show BDC Status - url: monitor-bdc/tsg012-azdata-bdc-status - - title: TSG069 - Show Big Data Cluster Gateway status - url: monitor-bdc/tsg069-azdata-bdc-gateway-status - - title: TSG049 - Show BDC Controller status - url: monitor-bdc/tsg049-azdata-bdc-control-status - - title: TSG033 - Show BDC SQL status - url: monitor-bdc/tsg033-azdata-bdc-sql-status - - title: TSG068 - Show BDC HDFS status - url: monitor-bdc/tsg068-azdata-bdc-hdfs-status - - title: TSG017 - Show BDC Configuration - url: monitor-bdc/tsg017-azdata-bdc-config-show - - title: TSG004 - Show BDC Apps - url: monitor-bdc/tsg004-show-app-list - - title: TSG003 - Show BDC Spark sessions - url: monitor-bdc/tsg003-show-spark-sessions - - title: TSG013 - Show file list in Storage Pool (HDFS) - url: monitor-bdc/tsg013-azdata-bdc-hdfs-ls - - title: TSG070 - Query SQL master pool - url: monitor-bdc/tsg070-use-azdata-sql-query -- title: Monitor - Kubernetes - url: /monitor-k8s/readme - not_numbered: true - expand_sections: false - sections: - - title: TSG021 - Get cluster info (Kubernetes) - url: monitor-k8s/tsg021-get-k8s-cluster-info - - title: TSG008 - Get version information (Kubernetes) - url: monitor-k8s/tsg008-get-k8s-version-info - - title: TSG081 - Get namespaces (Kubernetes) - url: monitor-k8s/tsg081-get-kubernetes-namespaces - - title: TSG009 - Get nodes (Kubernetes) - url: monitor-k8s/tsg009-get-nodes - - title: TSG006 - Get system pod status - url: monitor-k8s/tsg006-view-system-pod-status - - title: TSG007 - Get BDC pod status - url: monitor-k8s/tsg007-view-bdc-pod-status - - title: TSG015 - View BDC services (Kubernetes) - url: monitor-k8s/tsg015-view-k8s-services-for-bdc - - title: TSG097 - Get BDC stateful sets (Kubernetes) - url: monitor-k8s/tsg097-get-statefulsets - - title: TSG098 - Get BDC replicasets (Kubernetes) - url: monitor-k8s/tsg098-get-replicasets - - title: TSG099 - Get BDC daemonsets (Kubernetes) - url: monitor-k8s/tsg099-get-daemonsets - - title: TSG023 - Get all BDC objects (Kubernetes) - url: monitor-k8s/tsg023-run-kubectl-get-all - - title: TSG063 - Get storage classes (Kubernetes) - url: monitor-k8s/tsg063-get-storage-classes - - title: TSG072 - Get Persistent Volumes (Kubernetes) - url: monitor-k8s/tsg072-get-persistent-volumes - - title: TSG064 - Get BDC Persistent Volume Claims - url: monitor-k8s/tsg064-get-persistent-volume-claims - - title: TSG065 - Get BDC secrets (Kubernetes) - url: monitor-k8s/tsg065-get-secrets-for-bdc-namespace - - title: TSG066 - Get BDC event (Kubernetes) - url: monitor-k8s/tsg066-get-kubernetes-events - - title: TSG020- Describe nodes (Kubernetes) - url: monitor-k8s/tsg020-describe-all-nodes - - title: TSG016- Describe BDC pods - url: monitor-k8s/tsg016-describe-all-pods-in-bdc-namespace - - title: TSG089 - Describe BDC non-running pods - url: monitor-k8s/tsg089-describe-non-running-pods-in-bdc - - title: TSG010 - Get configuration contexts - url: monitor-k8s/tsg010-get-kubernetes-contexts - - title: TSG022 - Get external IP address for kubeadm host - url: monitor-k8s/tsg022-get-external-ip-of-kubeadm-host - - title: TSG042 - Get `node name` and external mounts for `Data` and `Logs` `PVCs` - url: monitor-k8s/tsg042-get-hosting-node-and-data-log-mount -- title: Logs - url: /log-files/readme - not_numbered: true - expand_sections: false - sections: - - title: TSG001 - Run azdata copy-logs - url: log-files/tsg001-copy-logs - - title: TSG091 - Get the azdata CLI logs - url: log-files/tsg091-get-azdata-logs - - title: TSG083 - Run kubectl cluster-info dump - url: log-files/tsg083-run-kubectl-cluster-info-dump - - title: TSG061 - Get tail of all container logs for pods in BDC namespace - url: log-files/tsg061-tail-bdc-container-logs - - title: TSG062 - Get tail of all previous container logs for pods in BDC namespace - url: log-files/tsg062-tail-bdc-previous-container-logs - - title: TSG084 - Internal Query Processor Error - url: log-files/tsg084-internal-query-process-error -- title: Samples - url: /sample/readme - not_numbered: true - expand_sections: false - sections: - - title: SAM001a - Query Storage Pool from SQL Server Master Pool (1 of 3) - Load sample data - url: sample/sam001a-load-sample-data-into-bdc - - title: SAM001b - Query Storage Pool from SQL Server Master Pool (2 of 3) - Convert data to parquet - url: sample/sam001b-convert-csv-to-parquet - - title: SAM001c - Query Storage Pool from SQL Server Master Pool (3 of 3) - Query HDFS from SQL Server - url: sample/sam001c-query-hdfs-in-sql-server - - title: SAM002 - Storage Pool (2 of 2) - Query HDFS - url: sample/sam002-query-hdfs-in-sql-server - - title: SAM003 - Data Pool Example - url: sample/sam003-data-pool - - title: SAM008 - Spark using azdata - url: sample/sam008-spark-using-azdata - - title: SAM009 - HDFS using azdata - url: sample/sam009-hdfs-using-azdata - - title: SAM010 - App using azdata - url: sample/sam010-app-using-azdata -- title: Install - url: /install/readme - not_numbered: true - expand_sections: false - sections: - - title: SOP036 - Install kubectl command line interface - url: install/sop036-install-kubectl - - title: SOP037 - Uninstall kubectl command line interface - url: install/sop037-uninstall-kubectl - - title: SOP059 - Install Kubernetes Python module - url: install/sop059-install-kubernetes-module - - title: SOP060 - Uninstall kubernetes module - url: install/sop060-uninstall-kubernetes-module - - title: SOP062 - Install ipython-sql and pyodbc modules - url: install/sop062-install-ipython-sql-module - - title: SOP063 - Install azdata CLI (using package manager) - url: install/sop063-packman-install-azdata - - title: SOP064 - Uninstall azdata CLI (using package manager) - url: install/sop064-packman-uninstall-azdata - - title: SOP054 - Install azdata CLI (using pip) - url: install/sop054-install-azdata - - title: SOP055 - Uninstall azdata CLI (using pip) - url: install/sop055-uninstall-azdata - - title: SOP038 - Install azure command line interface - url: install/sop038-install-az - - title: SOP039 - Uninstall azure command line interface - url: install/sop039-uninstall-az - - title: SOP040 - Upgrade pip in ADS Python sandbox - url: install/sop040-upgrade-pip - - title: SOP069 - Install ODBC for SQL Server - url: install/sop069-install-odbc-driver-for-sql-server - - title: SOP012 - Install unixodbc for Mac - url: install/sop012-brew-install-odbc-for-sql-server - - title: SOP010 - Upgrade a big data cluster - url: install/sop010-upgrade-bdc -- title: Certificate Management - url: /cert-management/readme - not_numbered: true - expand_sections: false - sections: - - title: CER001 - Generate a Root CA certificate - url: cert-management/cer001-create-root-ca - - title: CER002 - Download existing Root CA certificate - url: cert-management/cer002-download-existing-root-ca - - title: CER003 - Upload existing Root CA certificate - url: cert-management/cer003-upload-existing-root-ca - - title: CER004 - Download and Upload existing Root CA certificate - url: cert-management/cer004-download-upload-existing-root-ca - - title: CER010 - Install generated Root CA locally - url: cert-management/cer010-install-generated-root-ca-locally - - title: CER020 - Create Management Proxy certificate - url: cert-management/cer020-create-management-service-proxy-cert - - title: CER021 - Create Knox certificate - url: cert-management/cer021-create-knox-cert - - title: CER022 - Create App Proxy certificate - url: cert-management/cer022-create-app-proxy-cert - - title: CER023 - Create Master certificate - url: cert-management/cer023-create-master-cert - - title: CER024 - Create Controller certificate - url: cert-management/cer024-create-controller-cert - - title: CER025 - Upload existing Management Proxy certificate - url: cert-management/cer025-upload-management-service-proxy-cert - - title: CER026 - Upload existing Gateway certificate - url: cert-management/cer026-upload-knox-cert - - title: CER027 - Upload existing App Service Proxy certificate - url: cert-management/cer027-upload-app-proxy-cert - - title: CER028 - Upload existing Master certificate - url: cert-management/cer028-upload-master-cert - - title: CER028 - Upload existing Contoller certificate - url: cert-management/cer029-upload-controller-cert - - title: CER030 - Sign Management Proxy certificate with generated CA - url: cert-management/cer030-sign-service-proxy-generated-cert - - title: CER031 - Sign Knox certificate with generated CA - url: cert-management/cer031-sign-knox-generated-cert - - title: CER032 - Sign App-Proxy certificate with generated CA - url: cert-management/cer032-sign-app-proxy-generated-cert - - title: CER033 - Sign Master certificate with generated CA - url: cert-management/cer033-sign-master-generated-cert - - title: CER034 - Sign Controller certificate with cluster Root CA - url: cert-management/cer034-sign-controller-generated-cert - - title: CER035 - Sign Controller certificate with external Root CA - url: cert-management/cer035-ca-sign-controller-generated-cert - - title: CER040 - Install signed Management Proxy certificate - url: cert-management/cer040-install-service-proxy-cert - - title: CER041 - Install signed Knox certificate - url: cert-management/cer041-install-knox-cert - - title: CER042 - Install signed App-Proxy certificate - url: cert-management/cer042-install-app-proxy-cert - - title: CER044 - Install signed Controller certificate - url: cert-management/cer044-install-controller-cert - - title: CER050 - Wait for BDC to be Healthy - url: cert-management/cer050-wait-cluster-healthly - - title: CER100 - Configure Cluster with Self Signed Certificates - url: cert-management/cer100-create-root-ca-install-certs - - title: CER101 - Configure Cluster with Self Signed Certificates using existing Root CA - url: cert-management/cer101-use-root-ca-install-certs -- title: Common - url: /common/readme - not_numbered: true - expand_sections: false - sections: - - title: SOP005 - az login - url: common/sop005-az-login - - title: SOP006 - az logout - url: common/sop006-az-logout - - title: SOP007 - Version information (azdata, bdc, kubernetes) - url: common/sop007-get-key-version-information - - title: SOP011 - Set kubernetes configuration context - url: common/sop011-set-kubernetes-context - - title: SOP013 - Create secret for azdata login (inside cluster) - url: common/sop013-create-secret-for-azdata-login - - title: SOP014 - Delete secret for azdata login (inside cluster) - url: common/sop014-delete-secret-for-azdata-login - - title: SOP028 - azdata login - url: common/sop028-azdata-login - - title: SOP033 - azdata logout - url: common/sop033-azdata-logout - - title: SOP034 - Wait for BDC to be Healthy - url: common/sop034-wait-cluster-healthly diff --git a/Big-Data-Clusters/CU8/Public/content/cert-management/cer001-create-root-ca.ipynb b/Big-Data-Clusters/CU8/Public/content/cert-management/cer001-create-root-ca.ipynb deleted file mode 100644 index 58f2eea7..00000000 --- a/Big-Data-Clusters/CU8/Public/content/cert-management/cer001-create-root-ca.ipynb +++ /dev/null @@ -1,723 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "CER001 - Generate a Root CA certificate\n", - "=======================================\n", - "\n", - "If a Certificate Authority certificate for the test environmnet has\n", - "never been generated, generate one using this notebook.\n", - "\n", - "If a Certificate Authoriy has been generated in another cluster, and you\n", - "want to reuse the same CA for multiple clusters, then use CER002/CER003\n", - "download and upload the already generated Root CA.\n", - "\n", - "- [CER002 - Download existing Root CA\n", - " certificate](../cert-management/cer002-download-existing-root-ca.ipynb)\n", - "- [CER003 - Upload existing Root CA\n", - " certificate](../cert-management/cer003-upload-existing-root-ca.ipynb)\n", - "\n", - "Consider using one Root CA certificate for all non-production clusters\n", - "in each environment, as this reduces the number of Root CA certificates\n", - "that need to be uploaded to clients connecting to these clusters.\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "import getpass\n", - "\n", - "common_name = \"SQL Server Big Data Clusters Test CA\"\n", - "\n", - "country_name = \"US\"\n", - "state_or_province_name = \"Illinois\"\n", - "locality_name = \"Chicago\"\n", - "organization_name = \"Contoso\"\n", - "organizational_unit_name = \"Finance\"\n", - "email_address = f\"{getpass.getuser().lower()}@contoso.com\"\n", - "\n", - "days = \"398\" # Max supported validity period in Safari - https://www.thesslstore.com/blog/ssl-certificate-validity-will-be-limited-to-one-year-by-apples-safari-browser/\n", - "\n", - "test_cert_store_root = \"/var/opt/secrets/test-certificates\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"cer001-create-root-ca.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create a temporary directory to stage files" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Create a temporary directory to hold configuration files\n", - "\n", - "import tempfile\n", - "\n", - "temp_dir = tempfile.mkdtemp()\n", - "\n", - "print(f\"Temporary directory created: {temp_dir}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Helper function to save configuration files to disk" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define helper function 'save_file' to save configuration files to the temporary directory created above\n", - "import os\n", - "import io\n", - "\n", - "def save_file(filename, contents):\n", - " with io.open(os.path.join(temp_dir, filename), \"w\", encoding='utf8', newline='\\n') as text_file:\n", - " text_file.write(contents)\n", - "\n", - " print(\"File saved: \" + os.path.join(temp_dir, filename))\n", - "\n", - "print(\"Function `save_file` defined successfully.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Certificate configuration file" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "certificate = f\"\"\"\n", - "[ ca ]\n", - "default_ca = CA_default # The default ca section\n", - "\n", - "[ CA_default ]\n", - "default_days = 1000 # How long to certify for\n", - "default_crl_days = 30 # How long before next CRL\n", - "default_md = sha256 # Use public key default MD\n", - "preserve = no # Keep passed DN ordering\n", - "\n", - "x509_extensions = ca_extensions # The extensions to add to the cert\n", - "\n", - "email_in_dn = no # Don't concat the email in the DN\n", - "copy_extensions = copy # Required to copy SANs from CSR to cert\n", - "\n", - "[ req ]\n", - "default_bits = 2048\n", - "default_keyfile = {test_cert_store_root}/cakey.pem\n", - "distinguished_name = ca_distinguished_name\n", - "x509_extensions = ca_extensions\n", - "string_mask = utf8only\n", - "\n", - "[ ca_distinguished_name ]\n", - "countryName = Country Name (2 letter code)\n", - "countryName_default = {country_name}\n", - "\n", - "stateOrProvinceName = State or Province Name (full name)\n", - "stateOrProvinceName_default = {state_or_province_name}\n", - "\n", - "localityName = Locality Name (eg, city)\n", - "localityName_default = {locality_name}\n", - "\n", - "organizationName = Organization Name (eg, company)\n", - "organizationName_default = {organization_name}\n", - "\n", - "organizationalUnitName = Organizational Unit (eg, division)\n", - "organizationalUnitName_default = {organizational_unit_name}\n", - "\n", - "commonName = Common Name (e.g. server FQDN or YOUR name)\n", - "commonName_default = {common_name}\n", - "\n", - "emailAddress = Email Address\n", - "emailAddress_default = {email_address}\n", - "\n", - "[ ca_extensions ]\n", - "subjectKeyIdentifier = hash\n", - "authorityKeyIdentifier = keyid:always, issuer\n", - "basicConstraints = critical, CA:true\n", - "keyUsage = keyCertSign, cRLSign\n", - "\"\"\"\n", - "\n", - "save_file(\"ca.openssl.cnf\", certificate)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get name of the \u2018Running\u2019 `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place the name of the 'Running' controller pod in variable `controller`\n", - "\n", - "controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}} --field-selector=status.phase=Running', return_output=True)\n", - "\n", - "print(f\"Controller pod name: {controller}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create folder on controller to hold Test Certificates" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'kubectl exec {controller} -n {namespace} -c controller -- bash -c \"mkdir -p {test_cert_store_root}\" ')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Copy certificate configuration to `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "cwd = os.getcwd()\n", - "os.chdir(temp_dir) # Workaround kubectl bug on Windows, can't put c:\\ on kubectl cp cmd line \n", - "\n", - "run(f'kubectl cp ca.openssl.cnf {controller}:{test_cert_store_root}/ca.openssl.cnf -c controller -n {namespace}')\n", - "\n", - "os.chdir(cwd)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Generate certificate" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cmd = f\"openssl req -x509 -config {test_cert_store_root}/ca.openssl.cnf -newkey rsa:2048 -sha256 -nodes -days {days} -out {test_cert_store_root}/cacert.pem -outform PEM -subj '/C={country_name}/ST={state_or_province_name}/L={locality_name}/O={organization_name}/OU={organizational_unit_name}/CN={common_name}'\"\n", - "\n", - "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"{cmd}\"')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Clean up temporary directory for staging configuration files" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Delete the temporary directory used to hold configuration files\n", - "\n", - "import shutil\n", - "\n", - "shutil.rmtree(temp_dir)\n", - "\n", - "print(f'Temporary directory deleted: {temp_dir}')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Related\n", - "-------\n", - "\n", - "- [CER002 - Download existing Root CA\n", - " certificate](../cert-management/cer002-download-existing-root-ca.ipynb)\n", - "\n", - "- [CER003 - Upload existing Root CA\n", - " certificate](../cert-management/cer003-upload-existing-root-ca.ipynb)\n", - "\n", - "- [CER010 - Install generated Root CA\n", - " locally](../cert-management/cer010-install-generated-root-ca-locally.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/cert-management/cer002-download-existing-root-ca.ipynb b/Big-Data-Clusters/CU8/Public/content/cert-management/cer002-download-existing-root-ca.ipynb deleted file mode 100644 index e736f538..00000000 --- a/Big-Data-Clusters/CU8/Public/content/cert-management/cer002-download-existing-root-ca.ipynb +++ /dev/null @@ -1,560 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "CER002 - Download existing Root CA certificate\n", - "==============================================\n", - "\n", - "Use this notebook to download a generated Root CA certificate from a\n", - "cluster that installed one using:\n", - "\n", - "- [CER001 - Generate a Root CA\n", - " certificate](../cert-management/cer001-create-root-ca.ipynb)\n", - "\n", - "And then to upload the generated Root CA to another cluster use:\n", - "\n", - "- [CER003 - Upload existing Root CA\n", - " certificate](../cert-management/cer003-upload-existing-root-ca.ipynb)\n", - "\n", - "If needed, use these notebooks to view and set the Kubernetes\n", - "configuration context appropriately to enable downloading the Root CA\n", - "from a Big Data Cluster in one Kubernetes cluster, and to upload it to a\n", - "Big Data Cluster in another Kubernetes cluster.\n", - "\n", - "- [TSG010 - Get configuration\n", - " contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb)\n", - "- [SOP011 - Set kubernetes configuration\n", - " context](../common/sop011-set-kubernetes-context.ipynb)\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "local_folder_name = \"mssql-cluster-root-ca\"\n", - "\n", - "test_cert_store_root = \"/var/opt/secrets/test-certificates\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"cer002-download-existing-root-ca.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get name of the \u2018Running\u2019 `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place the name of the 'Running' controller pod in variable `controller`\n", - "\n", - "controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}} --field-selector=status.phase=Running', return_output=True)\n", - "\n", - "print(f\"Controller pod name: {controller}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create a temporary folder to hold Root CA certificate" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import tempfile\n", - "import shutil\n", - "\n", - "path = os.path.join(tempfile.gettempdir(), local_folder_name)\n", - "\n", - "if os.path.isdir(path):\n", - " shutil.rmtree(path)\n", - "\n", - "os.mkdir(path)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Copy Root CA certificate from `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "cwd = os.getcwd()\n", - "os.chdir(path) # Workaround kubectl bug on Windows, can't put c:\\ on kubectl cp cmd line \n", - "\n", - "run(f'kubectl cp {controller}:{test_cert_store_root}/cacert.pem cacert.pem -c controller -n {namespace}')\n", - "run(f'kubectl cp {controller}:{test_cert_store_root}/cakey.pem cakey.pem -c controller -n {namespace}')\n", - "\n", - "os.chdir(cwd)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Related\n", - "-------\n", - "\n", - "- [CER001 - Generate a Root CA\n", - " certificate](../cert-management/cer001-create-root-ca.ipynb)\n", - "\n", - "- [CER003 - Upload existing Root CA\n", - " certificate](../cert-management/cer003-upload-existing-root-ca.ipynb)\n", - "\n", - "- [CER010 - Install generated Root CA\n", - " locally](../cert-management/cer010-install-generated-root-ca-locally.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/cert-management/cer003-upload-existing-root-ca.ipynb b/Big-Data-Clusters/CU8/Public/content/cert-management/cer003-upload-existing-root-ca.ipynb deleted file mode 100644 index c967cf48..00000000 --- a/Big-Data-Clusters/CU8/Public/content/cert-management/cer003-upload-existing-root-ca.ipynb +++ /dev/null @@ -1,583 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "CER003 - Upload existing Root CA certificate\n", - "============================================\n", - "\n", - "Use this notebook to upload a Root CA certificate to a cluster that was\n", - "downloaded to this machine using:\n", - "\n", - "- [CER002 - Download existing Root CA\n", - " certificate](../cert-management/cer002-download-existing-root-ca.ipynb)\n", - "\n", - "If needed, use these notebooks to view and set the Kubernetes\n", - "configuration context appropriately to enable downloading the Root CA\n", - "from a Big Data Cluster in one Kubernetes cluster, and to upload it to a\n", - "Big Data Cluster in another Kubernetes cluster.\n", - "\n", - "- [TSG010 - Get configuration\n", - " contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb)\n", - "- [SOP011 - Set kubernetes configuration\n", - " context](../common/sop011-set-kubernetes-context.ipynb)\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "local_folder_name = \"mssql-cluster-root-ca\"\n", - "\n", - "test_cert_store_root = \"/var/opt/secrets/test-certificates\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"cer003-upload-existing-root-ca.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get name of the \u2018Running\u2019 `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place the name of the 'Running' controller pod in variable `controller`\n", - "\n", - "controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}} --field-selector=status.phase=Running', return_output=True)\n", - "\n", - "print(f\"Controller pod name: {controller}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Set temporary folder to hold Root CA certificate" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import tempfile\n", - "\n", - "path = os.path.join(tempfile.gettempdir(), local_folder_name)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create folder on `controller` to hold Root CA certificate" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'kubectl exec {controller} -n {namespace} -c controller -- bash -c \"mkdir -p {test_cert_store_root}\" ')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Copy Root CA certificate to `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "cwd = os.getcwd()\n", - "os.chdir(path) # Workaround kubectl bug on Windows, can't put c:\\ on kubectl cp cmd line \n", - "\n", - "run(f'kubectl cp cacert.pem {controller}:{test_cert_store_root}/cacert.pem -c controller -n {namespace}')\n", - "run(f'kubectl cp cakey.pem {controller}:{test_cert_store_root}/cakey.pem -c controller -n {namespace}')\n", - "\n", - "os.chdir(cwd)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Delete the temporary folder holding the Root CA certificate" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import shutil\n", - "\n", - "shutil.rmtree(path)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Related\n", - "-------\n", - "\n", - "- [CER001 - Generate a Root CA\n", - " certificate](../cert-management/cer001-create-root-ca.ipynb)\n", - "\n", - "- [CER002 - Download existing Root CA\n", - " certificate](../cert-management/cer002-download-existing-root-ca.ipynb)\n", - "\n", - "- [CER010 - Install generated Root CA\n", - " locally](../cert-management/cer010-install-generated-root-ca-locally.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/cert-management/cer004-download-upload-existing-root-ca.ipynb b/Big-Data-Clusters/CU8/Public/content/cert-management/cer004-download-upload-existing-root-ca.ipynb deleted file mode 100644 index ccdfcb07..00000000 --- a/Big-Data-Clusters/CU8/Public/content/cert-management/cer004-download-upload-existing-root-ca.ipynb +++ /dev/null @@ -1,703 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "CER004 - Download and Upload existing Root CA certificate\n", - "=========================================================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "k8s_context_source = ''\n", - "k8s_context_destination = ''\n", - "\n", - "local_folder_name = \"mssql-cluster-root-ca\"\n", - "\n", - "test_cert_store_root = \"/var/opt/secrets/test-certificates\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"cer004-download-upload-existing-root-ca.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Set Kubernetes context to source cluster" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'kubectl config use-context {k8s_context_source}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get name of the \u2018Running\u2019 `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place the name of the 'Running' controller pod in variable `controller`\n", - "\n", - "controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}} --field-selector=status.phase=Running', return_output=True)\n", - "\n", - "print(f\"Controller pod name: {controller}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create a temporary folder to hold Root CA certificate" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import tempfile\n", - "import shutil\n", - "\n", - "path = os.path.join(tempfile.gettempdir(), local_folder_name)\n", - "\n", - "if os.path.isdir(path):\n", - " shutil.rmtree(path)\n", - "\n", - "os.mkdir(path)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Copy Root CA certificate from `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "cwd = os.getcwd()\n", - "os.chdir(path) # Workaround kubectl bug on Windows, can't put c:\\ on kubectl cp cmd line \n", - "\n", - "run(f'kubectl cp {controller}:{test_cert_store_root}/cacert.pem cacert.pem -c controller -n {namespace}')\n", - "run(f'kubectl cp {controller}:{test_cert_store_root}/cakey.pem cakey.pem -c controller -n {namespace}')\n", - "\n", - "os.chdir(cwd)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Set Kubernetes context to destination cluster" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'kubectl config use-context {k8s_context_destination}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get name of the \u2018Running\u2019 `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place the name of the 'Running' controller pod in variable `controller`\n", - "\n", - "controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}} --field-selector=status.phase=Running', return_output=True)\n", - "\n", - "print(f\"Controller pod name: {controller}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create folder on `controller` to hold Root CA certificate" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'kubectl exec {controller} -n {namespace} -c controller -- bash -c \"mkdir -p {test_cert_store_root}\" ')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Copy Root CA certificate to `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "cwd = os.getcwd()\n", - "os.chdir(path) # Workaround kubectl bug on Windows, can't put c:\\ on kubectl cp cmd line \n", - "\n", - "run(f'kubectl cp cacert.pem {controller}:{test_cert_store_root}/cacert.pem -c controller -n {namespace}')\n", - "run(f'kubectl cp cakey.pem {controller}:{test_cert_store_root}/cakey.pem -c controller -n {namespace}')\n", - "\n", - "os.chdir(cwd)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Delete the temporary folder holding the Root CA certificate" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import shutil\n", - "\n", - "shutil.rmtree(path)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Related\n", - "-------\n", - "\n", - "- [CER001 - Generate a Root CA\n", - " certificate](../cert-management/cer001-create-root-ca.ipynb)\n", - "\n", - "- [CER002 - Download existing Root CA\n", - " certificate](../cert-management/cer002-download-existing-root-ca.ipynb)\n", - "\n", - "- [CER010 - Install generated Root CA\n", - " locally](../cert-management/cer010-install-generated-root-ca-locally.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/cert-management/cer010-install-generated-root-ca-locally.ipynb b/Big-Data-Clusters/CU8/Public/content/cert-management/cer010-install-generated-root-ca-locally.ipynb deleted file mode 100644 index 47ee73be..00000000 --- a/Big-Data-Clusters/CU8/Public/content/cert-management/cer010-install-generated-root-ca-locally.ipynb +++ /dev/null @@ -1,608 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "CER010 - Install generated Root CA locally\n", - "==========================================\n", - "\n", - "This notebook will copy locally (from a Big Data Cluster) the generated\n", - "Root CA certificate that was installed using either:\n", - "\n", - "- [CER001 - Generate a Root CA\n", - " certificate](../cert-management/cer001-create-root-ca.ipynb)\n", - "- [CER003 - Upload existing Root CA\n", - " certificate](../cert-management/cer003-upload-existing-root-ca.ipynb)\n", - "\n", - "And then install the Root CA certificate into this machine\u2019s local\n", - "certificate store.\n", - "\n", - "NOTE: A Security Dialog popup will appear, accept this dialog to install\n", - "the certificate into the local certificate store.\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "test_cert_store_root = \"/var/opt/secrets/test-certificates\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"cer010-install-generated-root-ca-locally.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create a temporary directory to stage files" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Create a temporary directory to hold configuration files\n", - "\n", - "import tempfile\n", - "\n", - "temp_dir = tempfile.mkdtemp()\n", - "\n", - "print(f\"Temporary directory created: {temp_dir}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get name of the \u2018Running\u2019 `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place the name of the 'Running' controller pod in variable `controller`\n", - "\n", - "controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}} --field-selector=status.phase=Running', return_output=True)\n", - "\n", - "print(f\"Controller pod name: {controller}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Copy certficates local" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "cwd = os.getcwd()\n", - "os.chdir(temp_dir) # Workaround kubectl bug on Windows, can't put c:\\ on kubectl cp cmd line \n", - "\n", - "run(f'kubectl cp {controller}:{test_cert_store_root}/cacert.pem cacert.crt -c controller -n {namespace}')\n", - "\n", - "# Verify the cacert.cert file is actually there (there is a bug in earlier versions of kubectl)\n", - "#\n", - "file_exists = os.path.isfile('cacert.crt')\n", - "\n", - "if not file_exists:\n", - " raise SystemExit(\"File `cacert.crt` does not exist (after `kubectl cp`). This can happen if running older versions of `kubectl`, such as the v1.13 release, run `kubectl version` and upgrade if running an older version of `kubectl`. `kubectl` v1.18 does work.\")\n", - "\n", - "os.chdir(cwd)\n", - "\n", - "print(f'Certificates copied locally to: {temp_dir}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Install the Root CA\n", - "\n", - "Documented here:\n", - "\n", - "- https://docs.microsoft.com/en-us/windows-hardware/drivers/install/using-certmgr-to-install-test-certificates-on-a-test-computer\n", - "\n", - "TODO: Add Mac (and Linux) support here!" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'powershell -Command \"Import-Certificate -FilePath {os.path.join(temp_dir, \"cacert.crt\")} -CertStoreLocation cert:\\\\CurrentUser\\\\Root\"')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Clean up temporary directory for staging configuration files" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Delete the temporary directory used to hold configuration files\n", - "\n", - "import shutil\n", - "\n", - "shutil.rmtree(temp_dir)\n", - "\n", - "print(f'Temporary directory deleted: {temp_dir}')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Related\n", - "-------\n", - "\n", - "- [CER020 - Create Management Proxy\n", - " certificate](../cert-management/cer020-create-management-service-proxy-cert.ipynb)\n", - "\n", - "- [CER021 - Create Knox\n", - " certificate](../cert-management/cer021-create-knox-cert.ipynb)\n", - "\n", - "- [CER022 - Create App Proxy\n", - " certificate](../cert-management/cer022-create-app-proxy-cert.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/cert-management/cer020-create-management-service-proxy-cert.ipynb b/Big-Data-Clusters/CU8/Public/content/cert-management/cer020-create-management-service-proxy-cert.ipynb deleted file mode 100644 index ab4949f0..00000000 --- a/Big-Data-Clusters/CU8/Public/content/cert-management/cer020-create-management-service-proxy-cert.ipynb +++ /dev/null @@ -1,933 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "CER020 - Create Management Proxy certificate\n", - "============================================\n", - "\n", - "This notebook creates a certificate for the Management Proxy endpoint.\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "import getpass\n", - "\n", - "app_name = \"mgmtproxy\"\n", - "scaledset_name = \"mgmtproxy\"\n", - "container_name = \"service-proxy\"\n", - "prefix_keyfile_name = \"service-proxy\"\n", - "common_name = \"mgmtproxy-svc\"\n", - "\n", - "country_name = \"US\"\n", - "state_or_province_name = \"Illinois\"\n", - "locality_name = \"Chicago\"\n", - "organization_name = \"Contoso\"\n", - "organizational_unit_name = \"Finance\"\n", - "email_address = f\"{getpass.getuser().lower()}@contoso.com\"\n", - "\n", - "ssl_configuration_file = \"service.openssl.cnf\"\n", - "\n", - "days = \"398\" # the number of days to certify the certificate for\n", - "\n", - "test_cert_store_root = \"/var/opt/secrets/test-certificates\"\n", - "\n", - "extendedKeyUsage = \"\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"cer020-create-management-service-proxy-cert.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond'], 'azdata': ['Endpoint sql-server-master does not exist', 'Endpoint livy does not exist', 'Failed to get state for cluster', 'Endpoint webhdfs does not exist', 'Adaptive Server is unavailable or does not exist', 'Error: Address already in use', 'Login timeout expired (0) (SQLDriverConnect)']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']], 'azdata': [['The token is expired', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Reason: Unauthorized', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Max retries exceeded with url: /api/v1/bdc/endpoints', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Look at the controller logs for more details', 'TSG027 - Observe cluster deployment', '../diagnose/tsg027-observe-bdc-create.ipynb'], ['provided port is already allocated', 'TSG062 - Get tail of all previous container logs for pods in BDC namespace', '../log-files/tsg062-tail-bdc-previous-container-logs.ipynb'], ['Create cluster failed since the existing namespace', 'SOP061 - Delete a big data cluster', '../install/sop061-delete-bdc.ipynb'], ['Failed to complete kube config setup', 'TSG067 - Failed to complete kube config setup', '../repair/tsg067-failed-to-complete-kube-config-setup.ipynb'], ['Error processing command: \"ApiError', 'TSG110 - Azdata returns ApiError', '../repair/tsg110-azdata-returns-apierror.ipynb'], ['Error processing command: \"ControllerError', 'TSG036 - Controller logs', '../log-analyzers/tsg036-get-controller-logs.ipynb'], ['ERROR: 500', 'TSG046 - Knox gateway logs', '../log-analyzers/tsg046-get-knox-logs.ipynb'], ['Data source name not found and no default driver specified', 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], [\"Can't open lib 'ODBC Driver 17 for SQL Server\", 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], ['Control plane upgrade failed. Failed to upgrade controller.', 'TSG108 - View the controller upgrade config map', '../diagnose/tsg108-controller-failed-to-upgrade.ipynb'], [\"[Errno 2] No such file or directory: '..\\\\\\\\\", 'TSG053 - ADS Provided Books must be saved before use', '../repair/tsg053-save-book-first.ipynb'], [\"NameError: name 'azdata_login_secret_name' is not defined\", 'SOP013 - Create secret for azdata login (inside cluster)', '../common/sop013-create-secret-for-azdata-login.ipynb'], ['ERROR: No credentials were supplied, or the credentials were unavailable or inaccessible.', \"TSG124 - 'No credentials were supplied' error from azdata login\", '../repair/tsg124-no-credentials-were-supplied.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb'], 'azdata': ['SOP063 - Install azdata CLI (using package manager)', '../install/sop063-packman-install-azdata.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create a temporary directory to stage files" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Create a temporary directory to hold configuration files\n", - "\n", - "import tempfile\n", - "\n", - "temp_dir = tempfile.mkdtemp()\n", - "\n", - "print(f\"Temporary directory created: {temp_dir}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Helper function to save configuration files to disk" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define helper function 'save_file' to save configuration files to the temporary directory created above\n", - "import os\n", - "import io\n", - "\n", - "def save_file(filename, contents):\n", - " with io.open(os.path.join(temp_dir, filename), \"w\", encoding='utf8', newline='\\n') as text_file:\n", - " text_file.write(contents)\n", - "\n", - " print(\"File saved: \" + os.path.join(temp_dir, filename))\n", - "\n", - "print(\"Function `save_file` defined successfully.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get endpoint hostname" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "import urllib\n", - "\n", - "endpoint_name = \"sql-server-master\" if app_name == \"master\" else app_name \n", - "\n", - "endpoint = run(f'azdata bdc endpoint list --endpoint=\"{endpoint_name}\"', return_output=True)\n", - "endpoint = json.loads(endpoint)\n", - "endpoint = endpoint['endpoint']\n", - "\n", - "print(f\"endpoint: {endpoint}\")\n", - "\n", - "hostname = urllib.parse.urlparse(endpoint).hostname\n", - "\n", - "print(f\"hostname: {hostname}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get name of the \u2018Running\u2019 `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place the name of the 'Running' controller pod in variable `controller`\n", - "\n", - "controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}} --field-selector=status.phase=Running', return_output=True)\n", - "\n", - "print(f\"Controller pod name: {controller}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the name of the pod." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Get the name of the pod\n", - "\n", - "if (app_name == \"mgmtproxy\"):\n", - " pod = run(f'kubectl get pod --selector=app=mgmtproxy -n {namespace} -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - "elif (app_name == \"gateway\"):\n", - " pod = run(f'kubectl get pod --selector=app=gateway -n {namespace} -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - "elif (app_name == \"app-proxy\"): \n", - " pod = run(f'kubectl get pod --selector=app=appproxy -n {namespace} -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - "elif (app_name == \"controller\"):\n", - " pod = run(f'kubectl get pod --selector=app=mgmtproxy -n {namespace} -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - "elif (app_name == \"master\"):\n", - " # Use explicitly provided pod name for master\n", - " # \n", - " pod = master_pod_name\n", - "else:\n", - " raise SystemExit(f'Invalid app name')\n", - "\n", - "print(f\"Pod name: {pod}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create the DNS alt\\_names for data plane in secure clusters\n", - "\n", - "Get the cluster configuration from the Big Data Cluster using\n", - "`azdata bdc config`, and pull the Active Directory DNS names out of it,\n", - "and place them into the certificate configuration file as DNS alt\\_names" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "\n", - "alt_names = \"\"\n", - "bdc_fqdn = \"\"\n", - "\n", - "hdfs_vault_svc = \"hdfsvault-svc\"\n", - "bdc_config = run(\"azdata bdc config show\", return_output=True)\n", - "bdc_config = json.loads(bdc_config)\n", - "\n", - "dns_counter = 3 # DNS.1 and DNS.2 are already in the certificate template.\n", - "\n", - "if app_name == \"gateway\" or app_name == \"master\":\n", - " alt_names += f'DNS.{str(dns_counter)} = {pod}.{common_name}\\n'\n", - " dns_counter = dns_counter + 1\n", - " alt_names += f'DNS.{str(dns_counter)} = {pod}.{common_name}.{namespace}.svc.cluster.local\\n'\n", - " dns_counter = dns_counter + 1\n", - "\n", - "if \"security\" in bdc_config[\"spec\"] and \"activeDirectory\" in bdc_config[\"spec\"][\"security\"]:\n", - " domain_dns_name = bdc_config[\"spec\"][\"security\"][\"activeDirectory\"][\"domainDnsName\"]\n", - " sub_domain_name = bdc_config[\"spec\"][\"security\"][\"activeDirectory\"][\"subdomain\"]\n", - "\n", - " alt_names += f\"DNS.{str(dns_counter)} = {common_name}.{domain_dns_name}\\n\"\n", - " dns_counter = dns_counter + 1\n", - "\n", - " if app_name == \"gateway\" or app_name == \"master\":\n", - " alt_names += f'DNS.{str(dns_counter)} = {pod}.{domain_dns_name}\\n'\n", - " dns_counter = dns_counter + 1\n", - "\n", - " if sub_domain_name:\n", - " bdc_fqdn = f\"{sub_domain_name}.{domain_dns_name}\"\n", - " else:\n", - " bdc_fqdn = domain_dns_name\n", - "\n", - "if app_name in bdc_config[\"spec\"][\"resources\"]:\n", - " app_name_endpoints = bdc_config[\"spec\"][\"resources\"][app_name][\"spec\"][\"endpoints\"]\n", - " for endpoint in app_name_endpoints:\n", - " if \"dnsName\" in endpoint:\n", - " alt_names += f'DNS.{str(dns_counter)} = {endpoint[\"dnsName\"]}\\n'\n", - " dns_counter = dns_counter + 1\n", - "\n", - "# Special case for the controller certificate\n", - "#\n", - "if app_name == \"controller\":\n", - " alt_names += f\"DNS.{str(dns_counter)} = localhost\\n\"\n", - " dns_counter = dns_counter + 1\n", - "\n", - " # Add hdfsvault-svc host for key management calls.\n", - " #\n", - " alt_names += f\"DNS.{str(dns_counter)} = {hdfs_vault_svc}\\n\"\n", - " dns_counter = dns_counter + 1\n", - "\n", - " # Add hdfsvault-svc FQDN for key management calls.\n", - " #\n", - " if bdc_fqdn:\n", - " alt_names += f\"DNS.{str(dns_counter)} = {hdfs_vault_svc}.{bdc_fqdn}\\n\"\n", - " dns_counter = dns_counter + 1\n", - "\n", - "print(\"DNS alt_names (data plane):\")\n", - "print(alt_names)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create the DNS alt\\_names for control plane in secure clusters\n", - "\n", - "Get the cluster configuration from the Big Data Cluster using\n", - "`azdata bdc endpoint list`, and pull the Active Directory DNS names out\n", - "of it for the control plane expternal endpoints (Controller and\n", - "Management Proxy), and place them into the certificate configuration\n", - "file as DNS alt\\_names" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "from urllib.parse import urlparse\n", - "\n", - "if app_name == \"controller\" or app_name == \"mgmtproxy\":\n", - " bdc_endpoint_list = run(\"azdata bdc endpoint list\", return_output=True)\n", - " bdc_endpoint_list = json.loads(bdc_endpoint_list)\n", - "\n", - " # Parse the DNS host name from:\n", - " #\n", - " # \"endpoint\": \"https://monitor.aris.local:30777\"\n", - " # \n", - " for endpoint in bdc_endpoint_list:\n", - " if endpoint[\"name\"] == app_name:\n", - " url = urlparse(endpoint[\"endpoint\"])\n", - " alt_names += f\"DNS.{str(dns_counter)} = {url.hostname}\\n\"\n", - " dns_counter = dns_counter + 1\n", - "\n", - "print(\"DNS alt_names (control plane):\")\n", - "print(alt_names)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create alt\\_names\n", - "\n", - "If the Kuberenetes service is of \u201cNodePort\u201d type, then the IP address\n", - "needed to validate the cluster certificate could be for any node in the\n", - "Kubernetes cluster, so here all node IP addresses in the Big Data\n", - "Cluster are added as alt\\_names. Otherwise (if not NodePort, and\n", - "therefore LoadBalancer), add just the hostname as returned from\n", - "`azdata bdc endpoint list` above." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "service_type = run(f\"kubectl get svc {common_name}-external -n {namespace} -o jsonpath={{.spec.type}}\", return_output=True)\n", - "\n", - "print(f\"Service type for '{common_name}-external' is: '{service_type}'\")\n", - "print(\"\")\n", - "\n", - "if service_type == \"NodePort\":\n", - " nodes_ip_address = run(\"kubectl \"\"get nodes -o jsonpath={.items[*].status.addresses[0].address}\"\"\", return_output=True)\n", - " nodes_ip_address = nodes_ip_address.split(' ')\n", - "\n", - " counter = 1\n", - " for ip in nodes_ip_address:\n", - " alt_names += f\"IP.{counter} = {ip}\\n\"\n", - " counter = counter + 1\n", - "else:\n", - " alt_names += f\"IP.1 = {hostname}\\n\"\n", - "\n", - "print(\"All (DNS and IP) alt_names:\")\n", - "print(alt_names)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Generate Certificate Configuration file\n", - "\n", - "NOTE: There is a special case for the `controller` certificate, that\n", - "needs to be generated in PKCS\\#1 format." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "certificate = f\"\"\"\n", - "[ req ]\n", - "# Options for the `req` tool (`man req`).\n", - "default_bits = 2048\n", - "default_keyfile = {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-privatekey{\".pkcs8\" if app_name == \"controller\" else \"\"}.pem\n", - "distinguished_name = req_distinguished_name\n", - "string_mask = utf8only\n", - "\n", - "# SHA-1 is deprecated, so use SHA-2 instead.\n", - "default_md = sha256\n", - "req_extensions = v3_req\n", - "\n", - "[ req_distinguished_name ]\n", - "countryName = Country Name (2 letter code)\n", - "countryName_default = {country_name}\n", - "\n", - "stateOrProvinceName = State or Province Name (full name)\n", - "stateOrProvinceName_default = {state_or_province_name}\n", - "\n", - "localityName = Locality Name (eg, city)\n", - "localityName_default = {locality_name}\n", - "\n", - "organizationName = Organization Name (eg, company)\n", - "organizationName_default = {organization_name}\n", - "\n", - "organizationalUnitName = Organizational Unit (eg, division)\n", - "organizationalUnitName_default = {organizational_unit_name}\n", - "\n", - "commonName = Common Name (e.g. server FQDN or YOUR name)\n", - "commonName_default = {common_name}\n", - "\n", - "emailAddress = Email Address\n", - "emailAddress_default = {email_address}\n", - "\n", - "[ v3_req ]\n", - "subjectAltName = @alt_names\n", - "subjectKeyIdentifier = hash\n", - "basicConstraints = CA:FALSE\n", - "keyUsage = digitalSignature, keyEncipherment\n", - "{extendedKeyUsage}\n", - "\n", - "[ alt_names ]\n", - "DNS.1 = {common_name}\n", - "DNS.2 = {common_name}.{namespace}.svc.cluster.local # Use the namespace applicable for your cluster\n", - "{alt_names}\n", - "\"\"\"\n", - "\n", - "print(certificate)\n", - "\n", - "save_file(ssl_configuration_file, certificate)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Copy certificate configuration to `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "cwd = os.getcwd()\n", - "os.chdir(temp_dir) # Use chdir to workaround kubectl bug on Windows, which incorrectly processes 'c:\\' on kubectl cp cmd line \n", - "\n", - "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"mkdir -p {test_cert_store_root}/{app_name}\"')\n", - "\n", - "run(f'kubectl cp {ssl_configuration_file} {controller}:{test_cert_store_root}/{app_name}/{ssl_configuration_file} -c controller -n {namespace}')\n", - "\n", - "os.chdir(cwd)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Generate certificate\n", - "\n", - "Use openssl req to generate a certificate in PKCS\\#10 format. See:\n", - "\n", - "- https://www.openssl.org/docs/man1.0.2/man1/req.html" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cmd = f\"openssl req -config {test_cert_store_root}/{app_name}/service.openssl.cnf -newkey rsa:2048 -sha256 -nodes -days {days} -out {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-signingrequest.csr -outform PEM -subj '/C={country_name}/ST={state_or_province_name}/L={locality_name}/O={organization_name}/OU={organizational_unit_name}/CN={common_name}'\"\n", - "\n", - "run(f'kubectl exec {controller} -n {namespace} -c controller -- bash -c \"{cmd}\"')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Clean up temporary directory for staging configuration files" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Delete the temporary directory used to hold configuration files\n", - "\n", - "import shutil\n", - "\n", - "shutil.rmtree(temp_dir)\n", - "\n", - "print(f'Temporary directory deleted: {temp_dir}')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Related\n", - "-------\n", - "\n", - "- [CER021 - Create Knox\n", - " certificate](../cert-management/cer021-create-knox-cert.ipynb)\n", - "\n", - "- [CER030 - Sign Management Proxy certificate with generated\n", - " CA](../cert-management/cer030-sign-service-proxy-generated-cert.ipynb)\n", - "\n", - "- [CER040 - Install signed Management Proxy\n", - " certificate](../cert-management/cer040-install-service-proxy-cert.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/cert-management/cer021-create-knox-cert.ipynb b/Big-Data-Clusters/CU8/Public/content/cert-management/cer021-create-knox-cert.ipynb deleted file mode 100644 index c16bf5e5..00000000 --- a/Big-Data-Clusters/CU8/Public/content/cert-management/cer021-create-knox-cert.ipynb +++ /dev/null @@ -1,933 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "CER021 - Create Knox certificate\n", - "================================\n", - "\n", - "This notebook creates a certificate for the Knox Gateway endpoint.\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "import getpass\n", - "\n", - "app_name = \"gateway\"\n", - "scaledset_name = \"gateway/pods/gateway-0\"\n", - "container_name = \"knox\"\n", - "prefix_keyfile_name = \"knox\"\n", - "common_name = \"gateway-svc\"\n", - "\n", - "country_name = \"US\"\n", - "state_or_province_name = \"Illinois\"\n", - "locality_name = \"Chicago\"\n", - "organization_name = \"Contoso\"\n", - "organizational_unit_name = \"Finance\"\n", - "email_address = f\"{getpass.getuser().lower()}@contoso.com\"\n", - "\n", - "ssl_configuration_file = \"service.openssl.cnf\"\n", - "\n", - "days = \"398\" # the number of days to certify the certificate for\n", - "\n", - "test_cert_store_root = \"/var/opt/secrets/test-certificates\"\n", - "\n", - "extendedKeyUsage = \"\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"cer021-create-knox-cert.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond'], 'azdata': ['Endpoint sql-server-master does not exist', 'Endpoint livy does not exist', 'Failed to get state for cluster', 'Endpoint webhdfs does not exist', 'Adaptive Server is unavailable or does not exist', 'Error: Address already in use', 'Login timeout expired (0) (SQLDriverConnect)']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']], 'azdata': [['The token is expired', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Reason: Unauthorized', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Max retries exceeded with url: /api/v1/bdc/endpoints', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Look at the controller logs for more details', 'TSG027 - Observe cluster deployment', '../diagnose/tsg027-observe-bdc-create.ipynb'], ['provided port is already allocated', 'TSG062 - Get tail of all previous container logs for pods in BDC namespace', '../log-files/tsg062-tail-bdc-previous-container-logs.ipynb'], ['Create cluster failed since the existing namespace', 'SOP061 - Delete a big data cluster', '../install/sop061-delete-bdc.ipynb'], ['Failed to complete kube config setup', 'TSG067 - Failed to complete kube config setup', '../repair/tsg067-failed-to-complete-kube-config-setup.ipynb'], ['Error processing command: \"ApiError', 'TSG110 - Azdata returns ApiError', '../repair/tsg110-azdata-returns-apierror.ipynb'], ['Error processing command: \"ControllerError', 'TSG036 - Controller logs', '../log-analyzers/tsg036-get-controller-logs.ipynb'], ['ERROR: 500', 'TSG046 - Knox gateway logs', '../log-analyzers/tsg046-get-knox-logs.ipynb'], ['Data source name not found and no default driver specified', 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], [\"Can't open lib 'ODBC Driver 17 for SQL Server\", 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], ['Control plane upgrade failed. Failed to upgrade controller.', 'TSG108 - View the controller upgrade config map', '../diagnose/tsg108-controller-failed-to-upgrade.ipynb'], [\"[Errno 2] No such file or directory: '..\\\\\\\\\", 'TSG053 - ADS Provided Books must be saved before use', '../repair/tsg053-save-book-first.ipynb'], [\"NameError: name 'azdata_login_secret_name' is not defined\", 'SOP013 - Create secret for azdata login (inside cluster)', '../common/sop013-create-secret-for-azdata-login.ipynb'], ['ERROR: No credentials were supplied, or the credentials were unavailable or inaccessible.', \"TSG124 - 'No credentials were supplied' error from azdata login\", '../repair/tsg124-no-credentials-were-supplied.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb'], 'azdata': ['SOP063 - Install azdata CLI (using package manager)', '../install/sop063-packman-install-azdata.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create a temporary directory to stage files" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Create a temporary directory to hold configuration files\n", - "\n", - "import tempfile\n", - "\n", - "temp_dir = tempfile.mkdtemp()\n", - "\n", - "print(f\"Temporary directory created: {temp_dir}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Helper function to save configuration files to disk" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define helper function 'save_file' to save configuration files to the temporary directory created above\n", - "import os\n", - "import io\n", - "\n", - "def save_file(filename, contents):\n", - " with io.open(os.path.join(temp_dir, filename), \"w\", encoding='utf8', newline='\\n') as text_file:\n", - " text_file.write(contents)\n", - "\n", - " print(\"File saved: \" + os.path.join(temp_dir, filename))\n", - "\n", - "print(\"Function `save_file` defined successfully.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get endpoint hostname" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "import urllib\n", - "\n", - "endpoint_name = \"sql-server-master\" if app_name == \"master\" else app_name \n", - "\n", - "endpoint = run(f'azdata bdc endpoint list --endpoint=\"{endpoint_name}\"', return_output=True)\n", - "endpoint = json.loads(endpoint)\n", - "endpoint = endpoint['endpoint']\n", - "\n", - "print(f\"endpoint: {endpoint}\")\n", - "\n", - "hostname = urllib.parse.urlparse(endpoint).hostname\n", - "\n", - "print(f\"hostname: {hostname}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get name of the \u2018Running\u2019 `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place the name of the 'Running' controller pod in variable `controller`\n", - "\n", - "controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}} --field-selector=status.phase=Running', return_output=True)\n", - "\n", - "print(f\"Controller pod name: {controller}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the name of the pod." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Get the name of the pod\n", - "\n", - "if (app_name == \"mgmtproxy\"):\n", - " pod = run(f'kubectl get pod --selector=app=mgmtproxy -n {namespace} -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - "elif (app_name == \"gateway\"):\n", - " pod = run(f'kubectl get pod --selector=app=gateway -n {namespace} -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - "elif (app_name == \"app-proxy\"): \n", - " pod = run(f'kubectl get pod --selector=app=appproxy -n {namespace} -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - "elif (app_name == \"controller\"):\n", - " pod = run(f'kubectl get pod --selector=app=mgmtproxy -n {namespace} -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - "elif (app_name == \"master\"):\n", - " # Use explicitly provided pod name for master\n", - " # \n", - " pod = master_pod_name\n", - "else:\n", - " raise SystemExit(f'Invalid app name')\n", - "\n", - "print(f\"Pod name: {pod}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create the DNS alt\\_names for data plane in secure clusters\n", - "\n", - "Get the cluster configuration from the Big Data Cluster using\n", - "`azdata bdc config`, and pull the Active Directory DNS names out of it,\n", - "and place them into the certificate configuration file as DNS alt\\_names" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "\n", - "alt_names = \"\"\n", - "bdc_fqdn = \"\"\n", - "\n", - "hdfs_vault_svc = \"hdfsvault-svc\"\n", - "bdc_config = run(\"azdata bdc config show\", return_output=True)\n", - "bdc_config = json.loads(bdc_config)\n", - "\n", - "dns_counter = 3 # DNS.1 and DNS.2 are already in the certificate template.\n", - "\n", - "if app_name == \"gateway\" or app_name == \"master\":\n", - " alt_names += f'DNS.{str(dns_counter)} = {pod}.{common_name}\\n'\n", - " dns_counter = dns_counter + 1\n", - " alt_names += f'DNS.{str(dns_counter)} = {pod}.{common_name}.{namespace}.svc.cluster.local\\n'\n", - " dns_counter = dns_counter + 1\n", - "\n", - "if \"security\" in bdc_config[\"spec\"] and \"activeDirectory\" in bdc_config[\"spec\"][\"security\"]:\n", - " domain_dns_name = bdc_config[\"spec\"][\"security\"][\"activeDirectory\"][\"domainDnsName\"]\n", - " sub_domain_name = bdc_config[\"spec\"][\"security\"][\"activeDirectory\"][\"subdomain\"]\n", - "\n", - " alt_names += f\"DNS.{str(dns_counter)} = {common_name}.{domain_dns_name}\\n\"\n", - " dns_counter = dns_counter + 1\n", - "\n", - " if app_name == \"gateway\" or app_name == \"master\":\n", - " alt_names += f'DNS.{str(dns_counter)} = {pod}.{domain_dns_name}\\n'\n", - " dns_counter = dns_counter + 1\n", - "\n", - " if sub_domain_name:\n", - " bdc_fqdn = f\"{sub_domain_name}.{domain_dns_name}\"\n", - " else:\n", - " bdc_fqdn = domain_dns_name\n", - "\n", - "if app_name in bdc_config[\"spec\"][\"resources\"]:\n", - " app_name_endpoints = bdc_config[\"spec\"][\"resources\"][app_name][\"spec\"][\"endpoints\"]\n", - " for endpoint in app_name_endpoints:\n", - " if \"dnsName\" in endpoint:\n", - " alt_names += f'DNS.{str(dns_counter)} = {endpoint[\"dnsName\"]}\\n'\n", - " dns_counter = dns_counter + 1\n", - "\n", - "# Special case for the controller certificate\n", - "#\n", - "if app_name == \"controller\":\n", - " alt_names += f\"DNS.{str(dns_counter)} = localhost\\n\"\n", - " dns_counter = dns_counter + 1\n", - "\n", - " # Add hdfsvault-svc host for key management calls.\n", - " #\n", - " alt_names += f\"DNS.{str(dns_counter)} = {hdfs_vault_svc}\\n\"\n", - " dns_counter = dns_counter + 1\n", - "\n", - " # Add hdfsvault-svc FQDN for key management calls.\n", - " #\n", - " if bdc_fqdn:\n", - " alt_names += f\"DNS.{str(dns_counter)} = {hdfs_vault_svc}.{bdc_fqdn}\\n\"\n", - " dns_counter = dns_counter + 1\n", - "\n", - "print(\"DNS alt_names (data plane):\")\n", - "print(alt_names)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create the DNS alt\\_names for control plane in secure clusters\n", - "\n", - "Get the cluster configuration from the Big Data Cluster using\n", - "`azdata bdc endpoint list`, and pull the Active Directory DNS names out\n", - "of it for the control plane expternal endpoints (Controller and\n", - "Management Proxy), and place them into the certificate configuration\n", - "file as DNS alt\\_names" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "from urllib.parse import urlparse\n", - "\n", - "if app_name == \"controller\" or app_name == \"mgmtproxy\":\n", - " bdc_endpoint_list = run(\"azdata bdc endpoint list\", return_output=True)\n", - " bdc_endpoint_list = json.loads(bdc_endpoint_list)\n", - "\n", - " # Parse the DNS host name from:\n", - " #\n", - " # \"endpoint\": \"https://monitor.aris.local:30777\"\n", - " # \n", - " for endpoint in bdc_endpoint_list:\n", - " if endpoint[\"name\"] == app_name:\n", - " url = urlparse(endpoint[\"endpoint\"])\n", - " alt_names += f\"DNS.{str(dns_counter)} = {url.hostname}\\n\"\n", - " dns_counter = dns_counter + 1\n", - "\n", - "print(\"DNS alt_names (control plane):\")\n", - "print(alt_names)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create alt\\_names\n", - "\n", - "If the Kuberenetes service is of \u201cNodePort\u201d type, then the IP address\n", - "needed to validate the cluster certificate could be for any node in the\n", - "Kubernetes cluster, so here all node IP addresses in the Big Data\n", - "Cluster are added as alt\\_names. Otherwise (if not NodePort, and\n", - "therefore LoadBalancer), add just the hostname as returned from\n", - "`azdata bdc endpoint list` above." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "service_type = run(f\"kubectl get svc {common_name}-external -n {namespace} -o jsonpath={{.spec.type}}\", return_output=True)\n", - "\n", - "print(f\"Service type for '{common_name}-external' is: '{service_type}'\")\n", - "print(\"\")\n", - "\n", - "if service_type == \"NodePort\":\n", - " nodes_ip_address = run(\"kubectl \"\"get nodes -o jsonpath={.items[*].status.addresses[0].address}\"\"\", return_output=True)\n", - " nodes_ip_address = nodes_ip_address.split(' ')\n", - "\n", - " counter = 1\n", - " for ip in nodes_ip_address:\n", - " alt_names += f\"IP.{counter} = {ip}\\n\"\n", - " counter = counter + 1\n", - "else:\n", - " alt_names += f\"IP.1 = {hostname}\\n\"\n", - "\n", - "print(\"All (DNS and IP) alt_names:\")\n", - "print(alt_names)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Generate Certificate Configuration file\n", - "\n", - "NOTE: There is a special case for the `controller` certificate, that\n", - "needs to be generated in PKCS\\#1 format." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "certificate = f\"\"\"\n", - "[ req ]\n", - "# Options for the `req` tool (`man req`).\n", - "default_bits = 2048\n", - "default_keyfile = {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-privatekey{\".pkcs8\" if app_name == \"controller\" else \"\"}.pem\n", - "distinguished_name = req_distinguished_name\n", - "string_mask = utf8only\n", - "\n", - "# SHA-1 is deprecated, so use SHA-2 instead.\n", - "default_md = sha256\n", - "req_extensions = v3_req\n", - "\n", - "[ req_distinguished_name ]\n", - "countryName = Country Name (2 letter code)\n", - "countryName_default = {country_name}\n", - "\n", - "stateOrProvinceName = State or Province Name (full name)\n", - "stateOrProvinceName_default = {state_or_province_name}\n", - "\n", - "localityName = Locality Name (eg, city)\n", - "localityName_default = {locality_name}\n", - "\n", - "organizationName = Organization Name (eg, company)\n", - "organizationName_default = {organization_name}\n", - "\n", - "organizationalUnitName = Organizational Unit (eg, division)\n", - "organizationalUnitName_default = {organizational_unit_name}\n", - "\n", - "commonName = Common Name (e.g. server FQDN or YOUR name)\n", - "commonName_default = {common_name}\n", - "\n", - "emailAddress = Email Address\n", - "emailAddress_default = {email_address}\n", - "\n", - "[ v3_req ]\n", - "subjectAltName = @alt_names\n", - "subjectKeyIdentifier = hash\n", - "basicConstraints = CA:FALSE\n", - "keyUsage = digitalSignature, keyEncipherment\n", - "{extendedKeyUsage}\n", - "\n", - "[ alt_names ]\n", - "DNS.1 = {common_name}\n", - "DNS.2 = {common_name}.{namespace}.svc.cluster.local # Use the namespace applicable for your cluster\n", - "{alt_names}\n", - "\"\"\"\n", - "\n", - "print(certificate)\n", - "\n", - "save_file(ssl_configuration_file, certificate)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Copy certificate configuration to `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "cwd = os.getcwd()\n", - "os.chdir(temp_dir) # Use chdir to workaround kubectl bug on Windows, which incorrectly processes 'c:\\' on kubectl cp cmd line \n", - "\n", - "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"mkdir -p {test_cert_store_root}/{app_name}\"')\n", - "\n", - "run(f'kubectl cp {ssl_configuration_file} {controller}:{test_cert_store_root}/{app_name}/{ssl_configuration_file} -c controller -n {namespace}')\n", - "\n", - "os.chdir(cwd)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Generate certificate\n", - "\n", - "Use openssl req to generate a certificate in PKCS\\#10 format. See:\n", - "\n", - "- https://www.openssl.org/docs/man1.0.2/man1/req.html" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cmd = f\"openssl req -config {test_cert_store_root}/{app_name}/service.openssl.cnf -newkey rsa:2048 -sha256 -nodes -days {days} -out {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-signingrequest.csr -outform PEM -subj '/C={country_name}/ST={state_or_province_name}/L={locality_name}/O={organization_name}/OU={organizational_unit_name}/CN={common_name}'\"\n", - "\n", - "run(f'kubectl exec {controller} -n {namespace} -c controller -- bash -c \"{cmd}\"')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Clean up temporary directory for staging configuration files" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Delete the temporary directory used to hold configuration files\n", - "\n", - "import shutil\n", - "\n", - "shutil.rmtree(temp_dir)\n", - "\n", - "print(f'Temporary directory deleted: {temp_dir}')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Related\n", - "-------\n", - "\n", - "- [CER022 - Create App Proxy\n", - " certificate](../cert-management/cer022-create-app-proxy-cert.ipynb)\n", - "\n", - "- [CER031 - Sign Knox certificate with generated\n", - " CA](../cert-management/cer031-sign-knox-generated-cert.ipynb)\n", - "\n", - "- [CER041 - Install signed Knox\n", - " certificate](../cert-management/cer041-install-knox-cert.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/cert-management/cer022-create-app-proxy-cert.ipynb b/Big-Data-Clusters/CU8/Public/content/cert-management/cer022-create-app-proxy-cert.ipynb deleted file mode 100644 index cc3367d3..00000000 --- a/Big-Data-Clusters/CU8/Public/content/cert-management/cer022-create-app-proxy-cert.ipynb +++ /dev/null @@ -1,933 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "CER022 - Create App Proxy certificate\n", - "=====================================\n", - "\n", - "This notebook creates a certificate for the App Deploy Proxy endpoint.\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "import getpass\n", - "\n", - "app_name = \"app-proxy\"\n", - "scaledset_name = \"appproxy\"\n", - "container_name = \"app-service-proxy\"\n", - "prefix_keyfile_name = \"service-proxy\"\n", - "common_name = \"appproxy-svc\"\n", - "\n", - "country_name = \"US\"\n", - "state_or_province_name = \"Illinois\"\n", - "locality_name = \"Chicago\"\n", - "organization_name = \"Contoso\"\n", - "organizational_unit_name = \"Finance\"\n", - "email_address = f\"{getpass.getuser().lower()}@contoso.com\"\n", - "\n", - "ssl_configuration_file = \"service.openssl.cnf\"\n", - "\n", - "days = \"398\" # the number of days to certify the certificate for\n", - "\n", - "test_cert_store_root = \"/var/opt/secrets/test-certificates\"\n", - "\n", - "extendedKeyUsage = \"\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"cer022-create-app-proxy-cert.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond'], 'azdata': ['Endpoint sql-server-master does not exist', 'Endpoint livy does not exist', 'Failed to get state for cluster', 'Endpoint webhdfs does not exist', 'Adaptive Server is unavailable or does not exist', 'Error: Address already in use', 'Login timeout expired (0) (SQLDriverConnect)']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']], 'azdata': [['The token is expired', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Reason: Unauthorized', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Max retries exceeded with url: /api/v1/bdc/endpoints', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Look at the controller logs for more details', 'TSG027 - Observe cluster deployment', '../diagnose/tsg027-observe-bdc-create.ipynb'], ['provided port is already allocated', 'TSG062 - Get tail of all previous container logs for pods in BDC namespace', '../log-files/tsg062-tail-bdc-previous-container-logs.ipynb'], ['Create cluster failed since the existing namespace', 'SOP061 - Delete a big data cluster', '../install/sop061-delete-bdc.ipynb'], ['Failed to complete kube config setup', 'TSG067 - Failed to complete kube config setup', '../repair/tsg067-failed-to-complete-kube-config-setup.ipynb'], ['Error processing command: \"ApiError', 'TSG110 - Azdata returns ApiError', '../repair/tsg110-azdata-returns-apierror.ipynb'], ['Error processing command: \"ControllerError', 'TSG036 - Controller logs', '../log-analyzers/tsg036-get-controller-logs.ipynb'], ['ERROR: 500', 'TSG046 - Knox gateway logs', '../log-analyzers/tsg046-get-knox-logs.ipynb'], ['Data source name not found and no default driver specified', 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], [\"Can't open lib 'ODBC Driver 17 for SQL Server\", 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], ['Control plane upgrade failed. Failed to upgrade controller.', 'TSG108 - View the controller upgrade config map', '../diagnose/tsg108-controller-failed-to-upgrade.ipynb'], [\"[Errno 2] No such file or directory: '..\\\\\\\\\", 'TSG053 - ADS Provided Books must be saved before use', '../repair/tsg053-save-book-first.ipynb'], [\"NameError: name 'azdata_login_secret_name' is not defined\", 'SOP013 - Create secret for azdata login (inside cluster)', '../common/sop013-create-secret-for-azdata-login.ipynb'], ['ERROR: No credentials were supplied, or the credentials were unavailable or inaccessible.', \"TSG124 - 'No credentials were supplied' error from azdata login\", '../repair/tsg124-no-credentials-were-supplied.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb'], 'azdata': ['SOP063 - Install azdata CLI (using package manager)', '../install/sop063-packman-install-azdata.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create a temporary directory to stage files" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Create a temporary directory to hold configuration files\n", - "\n", - "import tempfile\n", - "\n", - "temp_dir = tempfile.mkdtemp()\n", - "\n", - "print(f\"Temporary directory created: {temp_dir}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Helper function to save configuration files to disk" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define helper function 'save_file' to save configuration files to the temporary directory created above\n", - "import os\n", - "import io\n", - "\n", - "def save_file(filename, contents):\n", - " with io.open(os.path.join(temp_dir, filename), \"w\", encoding='utf8', newline='\\n') as text_file:\n", - " text_file.write(contents)\n", - "\n", - " print(\"File saved: \" + os.path.join(temp_dir, filename))\n", - "\n", - "print(\"Function `save_file` defined successfully.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get endpoint hostname" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "import urllib\n", - "\n", - "endpoint_name = \"sql-server-master\" if app_name == \"master\" else app_name \n", - "\n", - "endpoint = run(f'azdata bdc endpoint list --endpoint=\"{endpoint_name}\"', return_output=True)\n", - "endpoint = json.loads(endpoint)\n", - "endpoint = endpoint['endpoint']\n", - "\n", - "print(f\"endpoint: {endpoint}\")\n", - "\n", - "hostname = urllib.parse.urlparse(endpoint).hostname\n", - "\n", - "print(f\"hostname: {hostname}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get name of the \u2018Running\u2019 `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place the name of the 'Running' controller pod in variable `controller`\n", - "\n", - "controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}} --field-selector=status.phase=Running', return_output=True)\n", - "\n", - "print(f\"Controller pod name: {controller}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the name of the pod." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Get the name of the pod\n", - "\n", - "if (app_name == \"mgmtproxy\"):\n", - " pod = run(f'kubectl get pod --selector=app=mgmtproxy -n {namespace} -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - "elif (app_name == \"gateway\"):\n", - " pod = run(f'kubectl get pod --selector=app=gateway -n {namespace} -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - "elif (app_name == \"app-proxy\"): \n", - " pod = run(f'kubectl get pod --selector=app=appproxy -n {namespace} -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - "elif (app_name == \"controller\"):\n", - " pod = run(f'kubectl get pod --selector=app=mgmtproxy -n {namespace} -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - "elif (app_name == \"master\"):\n", - " # Use explicitly provided pod name for master\n", - " # \n", - " pod = master_pod_name\n", - "else:\n", - " raise SystemExit(f'Invalid app name')\n", - "\n", - "print(f\"Pod name: {pod}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create the DNS alt\\_names for data plane in secure clusters\n", - "\n", - "Get the cluster configuration from the Big Data Cluster using\n", - "`azdata bdc config`, and pull the Active Directory DNS names out of it,\n", - "and place them into the certificate configuration file as DNS alt\\_names" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "\n", - "alt_names = \"\"\n", - "bdc_fqdn = \"\"\n", - "\n", - "hdfs_vault_svc = \"hdfsvault-svc\"\n", - "bdc_config = run(\"azdata bdc config show\", return_output=True)\n", - "bdc_config = json.loads(bdc_config)\n", - "\n", - "dns_counter = 3 # DNS.1 and DNS.2 are already in the certificate template.\n", - "\n", - "if app_name == \"gateway\" or app_name == \"master\":\n", - " alt_names += f'DNS.{str(dns_counter)} = {pod}.{common_name}\\n'\n", - " dns_counter = dns_counter + 1\n", - " alt_names += f'DNS.{str(dns_counter)} = {pod}.{common_name}.{namespace}.svc.cluster.local\\n'\n", - " dns_counter = dns_counter + 1\n", - "\n", - "if \"security\" in bdc_config[\"spec\"] and \"activeDirectory\" in bdc_config[\"spec\"][\"security\"]:\n", - " domain_dns_name = bdc_config[\"spec\"][\"security\"][\"activeDirectory\"][\"domainDnsName\"]\n", - " sub_domain_name = bdc_config[\"spec\"][\"security\"][\"activeDirectory\"][\"subdomain\"]\n", - "\n", - " alt_names += f\"DNS.{str(dns_counter)} = {common_name}.{domain_dns_name}\\n\"\n", - " dns_counter = dns_counter + 1\n", - "\n", - " if app_name == \"gateway\" or app_name == \"master\":\n", - " alt_names += f'DNS.{str(dns_counter)} = {pod}.{domain_dns_name}\\n'\n", - " dns_counter = dns_counter + 1\n", - "\n", - " if sub_domain_name:\n", - " bdc_fqdn = f\"{sub_domain_name}.{domain_dns_name}\"\n", - " else:\n", - " bdc_fqdn = domain_dns_name\n", - "\n", - "if app_name in bdc_config[\"spec\"][\"resources\"]:\n", - " app_name_endpoints = bdc_config[\"spec\"][\"resources\"][app_name][\"spec\"][\"endpoints\"]\n", - " for endpoint in app_name_endpoints:\n", - " if \"dnsName\" in endpoint:\n", - " alt_names += f'DNS.{str(dns_counter)} = {endpoint[\"dnsName\"]}\\n'\n", - " dns_counter = dns_counter + 1\n", - "\n", - "# Special case for the controller certificate\n", - "#\n", - "if app_name == \"controller\":\n", - " alt_names += f\"DNS.{str(dns_counter)} = localhost\\n\"\n", - " dns_counter = dns_counter + 1\n", - "\n", - " # Add hdfsvault-svc host for key management calls.\n", - " #\n", - " alt_names += f\"DNS.{str(dns_counter)} = {hdfs_vault_svc}\\n\"\n", - " dns_counter = dns_counter + 1\n", - "\n", - " # Add hdfsvault-svc FQDN for key management calls.\n", - " #\n", - " if bdc_fqdn:\n", - " alt_names += f\"DNS.{str(dns_counter)} = {hdfs_vault_svc}.{bdc_fqdn}\\n\"\n", - " dns_counter = dns_counter + 1\n", - "\n", - "print(\"DNS alt_names (data plane):\")\n", - "print(alt_names)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create the DNS alt\\_names for control plane in secure clusters\n", - "\n", - "Get the cluster configuration from the Big Data Cluster using\n", - "`azdata bdc endpoint list`, and pull the Active Directory DNS names out\n", - "of it for the control plane expternal endpoints (Controller and\n", - "Management Proxy), and place them into the certificate configuration\n", - "file as DNS alt\\_names" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "from urllib.parse import urlparse\n", - "\n", - "if app_name == \"controller\" or app_name == \"mgmtproxy\":\n", - " bdc_endpoint_list = run(\"azdata bdc endpoint list\", return_output=True)\n", - " bdc_endpoint_list = json.loads(bdc_endpoint_list)\n", - "\n", - " # Parse the DNS host name from:\n", - " #\n", - " # \"endpoint\": \"https://monitor.aris.local:30777\"\n", - " # \n", - " for endpoint in bdc_endpoint_list:\n", - " if endpoint[\"name\"] == app_name:\n", - " url = urlparse(endpoint[\"endpoint\"])\n", - " alt_names += f\"DNS.{str(dns_counter)} = {url.hostname}\\n\"\n", - " dns_counter = dns_counter + 1\n", - "\n", - "print(\"DNS alt_names (control plane):\")\n", - "print(alt_names)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create alt\\_names\n", - "\n", - "If the Kuberenetes service is of \u201cNodePort\u201d type, then the IP address\n", - "needed to validate the cluster certificate could be for any node in the\n", - "Kubernetes cluster, so here all node IP addresses in the Big Data\n", - "Cluster are added as alt\\_names. Otherwise (if not NodePort, and\n", - "therefore LoadBalancer), add just the hostname as returned from\n", - "`azdata bdc endpoint list` above." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "service_type = run(f\"kubectl get svc {common_name}-external -n {namespace} -o jsonpath={{.spec.type}}\", return_output=True)\n", - "\n", - "print(f\"Service type for '{common_name}-external' is: '{service_type}'\")\n", - "print(\"\")\n", - "\n", - "if service_type == \"NodePort\":\n", - " nodes_ip_address = run(\"kubectl \"\"get nodes -o jsonpath={.items[*].status.addresses[0].address}\"\"\", return_output=True)\n", - " nodes_ip_address = nodes_ip_address.split(' ')\n", - "\n", - " counter = 1\n", - " for ip in nodes_ip_address:\n", - " alt_names += f\"IP.{counter} = {ip}\\n\"\n", - " counter = counter + 1\n", - "else:\n", - " alt_names += f\"IP.1 = {hostname}\\n\"\n", - "\n", - "print(\"All (DNS and IP) alt_names:\")\n", - "print(alt_names)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Generate Certificate Configuration file\n", - "\n", - "NOTE: There is a special case for the `controller` certificate, that\n", - "needs to be generated in PKCS\\#1 format." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "certificate = f\"\"\"\n", - "[ req ]\n", - "# Options for the `req` tool (`man req`).\n", - "default_bits = 2048\n", - "default_keyfile = {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-privatekey{\".pkcs8\" if app_name == \"controller\" else \"\"}.pem\n", - "distinguished_name = req_distinguished_name\n", - "string_mask = utf8only\n", - "\n", - "# SHA-1 is deprecated, so use SHA-2 instead.\n", - "default_md = sha256\n", - "req_extensions = v3_req\n", - "\n", - "[ req_distinguished_name ]\n", - "countryName = Country Name (2 letter code)\n", - "countryName_default = {country_name}\n", - "\n", - "stateOrProvinceName = State or Province Name (full name)\n", - "stateOrProvinceName_default = {state_or_province_name}\n", - "\n", - "localityName = Locality Name (eg, city)\n", - "localityName_default = {locality_name}\n", - "\n", - "organizationName = Organization Name (eg, company)\n", - "organizationName_default = {organization_name}\n", - "\n", - "organizationalUnitName = Organizational Unit (eg, division)\n", - "organizationalUnitName_default = {organizational_unit_name}\n", - "\n", - "commonName = Common Name (e.g. server FQDN or YOUR name)\n", - "commonName_default = {common_name}\n", - "\n", - "emailAddress = Email Address\n", - "emailAddress_default = {email_address}\n", - "\n", - "[ v3_req ]\n", - "subjectAltName = @alt_names\n", - "subjectKeyIdentifier = hash\n", - "basicConstraints = CA:FALSE\n", - "keyUsage = digitalSignature, keyEncipherment\n", - "{extendedKeyUsage}\n", - "\n", - "[ alt_names ]\n", - "DNS.1 = {common_name}\n", - "DNS.2 = {common_name}.{namespace}.svc.cluster.local # Use the namespace applicable for your cluster\n", - "{alt_names}\n", - "\"\"\"\n", - "\n", - "print(certificate)\n", - "\n", - "save_file(ssl_configuration_file, certificate)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Copy certificate configuration to `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "cwd = os.getcwd()\n", - "os.chdir(temp_dir) # Use chdir to workaround kubectl bug on Windows, which incorrectly processes 'c:\\' on kubectl cp cmd line \n", - "\n", - "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"mkdir -p {test_cert_store_root}/{app_name}\"')\n", - "\n", - "run(f'kubectl cp {ssl_configuration_file} {controller}:{test_cert_store_root}/{app_name}/{ssl_configuration_file} -c controller -n {namespace}')\n", - "\n", - "os.chdir(cwd)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Generate certificate\n", - "\n", - "Use openssl req to generate a certificate in PKCS\\#10 format. See:\n", - "\n", - "- https://www.openssl.org/docs/man1.0.2/man1/req.html" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cmd = f\"openssl req -config {test_cert_store_root}/{app_name}/service.openssl.cnf -newkey rsa:2048 -sha256 -nodes -days {days} -out {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-signingrequest.csr -outform PEM -subj '/C={country_name}/ST={state_or_province_name}/L={locality_name}/O={organization_name}/OU={organizational_unit_name}/CN={common_name}'\"\n", - "\n", - "run(f'kubectl exec {controller} -n {namespace} -c controller -- bash -c \"{cmd}\"')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Clean up temporary directory for staging configuration files" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Delete the temporary directory used to hold configuration files\n", - "\n", - "import shutil\n", - "\n", - "shutil.rmtree(temp_dir)\n", - "\n", - "print(f'Temporary directory deleted: {temp_dir}')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Related\n", - "-------\n", - "\n", - "- [CER030 - Sign Management Proxy certificate with generated\n", - " CA](../cert-management/cer030-sign-service-proxy-generated-cert.ipynb)\n", - "\n", - "- [CER032 - Sign App-Proxy certificate with generated\n", - " CA](../cert-management/cer032-sign-app-proxy-generated-cert.ipynb)\n", - "\n", - "- [CER042 - Install signed App-Proxy\n", - " certificate](../cert-management/cer042-install-app-proxy-cert.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/cert-management/cer023-create-master-cert.ipynb b/Big-Data-Clusters/CU8/Public/content/cert-management/cer023-create-master-cert.ipynb deleted file mode 100644 index f541ce96..00000000 --- a/Big-Data-Clusters/CU8/Public/content/cert-management/cer023-create-master-cert.ipynb +++ /dev/null @@ -1,934 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "CER023 - Create Master certificate\n", - "==================================\n", - "\n", - "This notebook creates a certificate for the Master endpoint.\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "import getpass\n", - "\n", - "app_name = \"master\"\n", - "scaledset_name = \"master\"\n", - "container_name = \"mssql-server\"\n", - "prefix_keyfile_name = \"sql\"\n", - "common_name = \"master-svc\"\n", - "master_pod_name = \"master-0\"\n", - "\n", - "country_name = \"US\"\n", - "state_or_province_name = \"Illinois\"\n", - "locality_name = \"Chicago\"\n", - "organization_name = \"Contoso\"\n", - "organizational_unit_name = \"Finance\"\n", - "email_address = f\"{getpass.getuser().lower()}@contoso.com\"\n", - "\n", - "ssl_configuration_file = \"service.openssl.cnf\"\n", - "\n", - "days = \"398\" # the number of days to certify the certificate for\n", - "\n", - "test_cert_store_root = \"/var/opt/secrets/test-certificates\"\n", - "\n", - "extendedKeyUsage = \"\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"cer023-create-master-cert.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond'], 'azdata': ['Endpoint sql-server-master does not exist', 'Endpoint livy does not exist', 'Failed to get state for cluster', 'Endpoint webhdfs does not exist', 'Adaptive Server is unavailable or does not exist', 'Error: Address already in use', 'Login timeout expired (0) (SQLDriverConnect)']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']], 'azdata': [['The token is expired', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Reason: Unauthorized', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Max retries exceeded with url: /api/v1/bdc/endpoints', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Look at the controller logs for more details', 'TSG027 - Observe cluster deployment', '../diagnose/tsg027-observe-bdc-create.ipynb'], ['provided port is already allocated', 'TSG062 - Get tail of all previous container logs for pods in BDC namespace', '../log-files/tsg062-tail-bdc-previous-container-logs.ipynb'], ['Create cluster failed since the existing namespace', 'SOP061 - Delete a big data cluster', '../install/sop061-delete-bdc.ipynb'], ['Failed to complete kube config setup', 'TSG067 - Failed to complete kube config setup', '../repair/tsg067-failed-to-complete-kube-config-setup.ipynb'], ['Error processing command: \"ApiError', 'TSG110 - Azdata returns ApiError', '../repair/tsg110-azdata-returns-apierror.ipynb'], ['Error processing command: \"ControllerError', 'TSG036 - Controller logs', '../log-analyzers/tsg036-get-controller-logs.ipynb'], ['ERROR: 500', 'TSG046 - Knox gateway logs', '../log-analyzers/tsg046-get-knox-logs.ipynb'], ['Data source name not found and no default driver specified', 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], [\"Can't open lib 'ODBC Driver 17 for SQL Server\", 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], ['Control plane upgrade failed. Failed to upgrade controller.', 'TSG108 - View the controller upgrade config map', '../diagnose/tsg108-controller-failed-to-upgrade.ipynb'], [\"[Errno 2] No such file or directory: '..\\\\\\\\\", 'TSG053 - ADS Provided Books must be saved before use', '../repair/tsg053-save-book-first.ipynb'], [\"NameError: name 'azdata_login_secret_name' is not defined\", 'SOP013 - Create secret for azdata login (inside cluster)', '../common/sop013-create-secret-for-azdata-login.ipynb'], ['ERROR: No credentials were supplied, or the credentials were unavailable or inaccessible.', \"TSG124 - 'No credentials were supplied' error from azdata login\", '../repair/tsg124-no-credentials-were-supplied.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb'], 'azdata': ['SOP063 - Install azdata CLI (using package manager)', '../install/sop063-packman-install-azdata.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create a temporary directory to stage files" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Create a temporary directory to hold configuration files\n", - "\n", - "import tempfile\n", - "\n", - "temp_dir = tempfile.mkdtemp()\n", - "\n", - "print(f\"Temporary directory created: {temp_dir}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Helper function to save configuration files to disk" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define helper function 'save_file' to save configuration files to the temporary directory created above\n", - "import os\n", - "import io\n", - "\n", - "def save_file(filename, contents):\n", - " with io.open(os.path.join(temp_dir, filename), \"w\", encoding='utf8', newline='\\n') as text_file:\n", - " text_file.write(contents)\n", - "\n", - " print(\"File saved: \" + os.path.join(temp_dir, filename))\n", - "\n", - "print(\"Function `save_file` defined successfully.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get endpoint hostname" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "import urllib\n", - "\n", - "endpoint_name = \"sql-server-master\" if app_name == \"master\" else app_name \n", - "\n", - "endpoint = run(f'azdata bdc endpoint list --endpoint=\"{endpoint_name}\"', return_output=True)\n", - "endpoint = json.loads(endpoint)\n", - "endpoint = endpoint['endpoint']\n", - "\n", - "print(f\"endpoint: {endpoint}\")\n", - "\n", - "hostname = urllib.parse.urlparse(endpoint).hostname\n", - "\n", - "print(f\"hostname: {hostname}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get name of the \u2018Running\u2019 `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place the name of the 'Running' controller pod in variable `controller`\n", - "\n", - "controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}} --field-selector=status.phase=Running', return_output=True)\n", - "\n", - "print(f\"Controller pod name: {controller}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the name of the pod." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Get the name of the pod\n", - "\n", - "if (app_name == \"mgmtproxy\"):\n", - " pod = run(f'kubectl get pod --selector=app=mgmtproxy -n {namespace} -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - "elif (app_name == \"gateway\"):\n", - " pod = run(f'kubectl get pod --selector=app=gateway -n {namespace} -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - "elif (app_name == \"app-proxy\"): \n", - " pod = run(f'kubectl get pod --selector=app=appproxy -n {namespace} -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - "elif (app_name == \"controller\"):\n", - " pod = run(f'kubectl get pod --selector=app=mgmtproxy -n {namespace} -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - "elif (app_name == \"master\"):\n", - " # Use explicitly provided pod name for master\n", - " # \n", - " pod = master_pod_name\n", - "else:\n", - " raise SystemExit(f'Invalid app name')\n", - "\n", - "print(f\"Pod name: {pod}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create the DNS alt\\_names for data plane in secure clusters\n", - "\n", - "Get the cluster configuration from the Big Data Cluster using\n", - "`azdata bdc config`, and pull the Active Directory DNS names out of it,\n", - "and place them into the certificate configuration file as DNS alt\\_names" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "\n", - "alt_names = \"\"\n", - "bdc_fqdn = \"\"\n", - "\n", - "hdfs_vault_svc = \"hdfsvault-svc\"\n", - "bdc_config = run(\"azdata bdc config show\", return_output=True)\n", - "bdc_config = json.loads(bdc_config)\n", - "\n", - "dns_counter = 3 # DNS.1 and DNS.2 are already in the certificate template.\n", - "\n", - "if app_name == \"gateway\" or app_name == \"master\":\n", - " alt_names += f'DNS.{str(dns_counter)} = {pod}.{common_name}\\n'\n", - " dns_counter = dns_counter + 1\n", - " alt_names += f'DNS.{str(dns_counter)} = {pod}.{common_name}.{namespace}.svc.cluster.local\\n'\n", - " dns_counter = dns_counter + 1\n", - "\n", - "if \"security\" in bdc_config[\"spec\"] and \"activeDirectory\" in bdc_config[\"spec\"][\"security\"]:\n", - " domain_dns_name = bdc_config[\"spec\"][\"security\"][\"activeDirectory\"][\"domainDnsName\"]\n", - " sub_domain_name = bdc_config[\"spec\"][\"security\"][\"activeDirectory\"][\"subdomain\"]\n", - "\n", - " alt_names += f\"DNS.{str(dns_counter)} = {common_name}.{domain_dns_name}\\n\"\n", - " dns_counter = dns_counter + 1\n", - "\n", - " if app_name == \"gateway\" or app_name == \"master\":\n", - " alt_names += f'DNS.{str(dns_counter)} = {pod}.{domain_dns_name}\\n'\n", - " dns_counter = dns_counter + 1\n", - "\n", - " if sub_domain_name:\n", - " bdc_fqdn = f\"{sub_domain_name}.{domain_dns_name}\"\n", - " else:\n", - " bdc_fqdn = domain_dns_name\n", - "\n", - "if app_name in bdc_config[\"spec\"][\"resources\"]:\n", - " app_name_endpoints = bdc_config[\"spec\"][\"resources\"][app_name][\"spec\"][\"endpoints\"]\n", - " for endpoint in app_name_endpoints:\n", - " if \"dnsName\" in endpoint:\n", - " alt_names += f'DNS.{str(dns_counter)} = {endpoint[\"dnsName\"]}\\n'\n", - " dns_counter = dns_counter + 1\n", - "\n", - "# Special case for the controller certificate\n", - "#\n", - "if app_name == \"controller\":\n", - " alt_names += f\"DNS.{str(dns_counter)} = localhost\\n\"\n", - " dns_counter = dns_counter + 1\n", - "\n", - " # Add hdfsvault-svc host for key management calls.\n", - " #\n", - " alt_names += f\"DNS.{str(dns_counter)} = {hdfs_vault_svc}\\n\"\n", - " dns_counter = dns_counter + 1\n", - "\n", - " # Add hdfsvault-svc FQDN for key management calls.\n", - " #\n", - " if bdc_fqdn:\n", - " alt_names += f\"DNS.{str(dns_counter)} = {hdfs_vault_svc}.{bdc_fqdn}\\n\"\n", - " dns_counter = dns_counter + 1\n", - "\n", - "print(\"DNS alt_names (data plane):\")\n", - "print(alt_names)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create the DNS alt\\_names for control plane in secure clusters\n", - "\n", - "Get the cluster configuration from the Big Data Cluster using\n", - "`azdata bdc endpoint list`, and pull the Active Directory DNS names out\n", - "of it for the control plane expternal endpoints (Controller and\n", - "Management Proxy), and place them into the certificate configuration\n", - "file as DNS alt\\_names" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "from urllib.parse import urlparse\n", - "\n", - "if app_name == \"controller\" or app_name == \"mgmtproxy\":\n", - " bdc_endpoint_list = run(\"azdata bdc endpoint list\", return_output=True)\n", - " bdc_endpoint_list = json.loads(bdc_endpoint_list)\n", - "\n", - " # Parse the DNS host name from:\n", - " #\n", - " # \"endpoint\": \"https://monitor.aris.local:30777\"\n", - " # \n", - " for endpoint in bdc_endpoint_list:\n", - " if endpoint[\"name\"] == app_name:\n", - " url = urlparse(endpoint[\"endpoint\"])\n", - " alt_names += f\"DNS.{str(dns_counter)} = {url.hostname}\\n\"\n", - " dns_counter = dns_counter + 1\n", - "\n", - "print(\"DNS alt_names (control plane):\")\n", - "print(alt_names)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create alt\\_names\n", - "\n", - "If the Kuberenetes service is of \u201cNodePort\u201d type, then the IP address\n", - "needed to validate the cluster certificate could be for any node in the\n", - "Kubernetes cluster, so here all node IP addresses in the Big Data\n", - "Cluster are added as alt\\_names. Otherwise (if not NodePort, and\n", - "therefore LoadBalancer), add just the hostname as returned from\n", - "`azdata bdc endpoint list` above." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "service_type = run(f\"kubectl get svc {common_name}-external -n {namespace} -o jsonpath={{.spec.type}}\", return_output=True)\n", - "\n", - "print(f\"Service type for '{common_name}-external' is: '{service_type}'\")\n", - "print(\"\")\n", - "\n", - "if service_type == \"NodePort\":\n", - " nodes_ip_address = run(\"kubectl \"\"get nodes -o jsonpath={.items[*].status.addresses[0].address}\"\"\", return_output=True)\n", - " nodes_ip_address = nodes_ip_address.split(' ')\n", - "\n", - " counter = 1\n", - " for ip in nodes_ip_address:\n", - " alt_names += f\"IP.{counter} = {ip}\\n\"\n", - " counter = counter + 1\n", - "else:\n", - " alt_names += f\"IP.1 = {hostname}\\n\"\n", - "\n", - "print(\"All (DNS and IP) alt_names:\")\n", - "print(alt_names)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Generate Certificate Configuration file\n", - "\n", - "NOTE: There is a special case for the `controller` certificate, that\n", - "needs to be generated in PKCS\\#1 format." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "certificate = f\"\"\"\n", - "[ req ]\n", - "# Options for the `req` tool (`man req`).\n", - "default_bits = 2048\n", - "default_keyfile = {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-privatekey{\".pkcs8\" if app_name == \"controller\" else \"\"}.pem\n", - "distinguished_name = req_distinguished_name\n", - "string_mask = utf8only\n", - "\n", - "# SHA-1 is deprecated, so use SHA-2 instead.\n", - "default_md = sha256\n", - "req_extensions = v3_req\n", - "\n", - "[ req_distinguished_name ]\n", - "countryName = Country Name (2 letter code)\n", - "countryName_default = {country_name}\n", - "\n", - "stateOrProvinceName = State or Province Name (full name)\n", - "stateOrProvinceName_default = {state_or_province_name}\n", - "\n", - "localityName = Locality Name (eg, city)\n", - "localityName_default = {locality_name}\n", - "\n", - "organizationName = Organization Name (eg, company)\n", - "organizationName_default = {organization_name}\n", - "\n", - "organizationalUnitName = Organizational Unit (eg, division)\n", - "organizationalUnitName_default = {organizational_unit_name}\n", - "\n", - "commonName = Common Name (e.g. server FQDN or YOUR name)\n", - "commonName_default = {common_name}\n", - "\n", - "emailAddress = Email Address\n", - "emailAddress_default = {email_address}\n", - "\n", - "[ v3_req ]\n", - "subjectAltName = @alt_names\n", - "subjectKeyIdentifier = hash\n", - "basicConstraints = CA:FALSE\n", - "keyUsage = digitalSignature, keyEncipherment\n", - "{extendedKeyUsage}\n", - "\n", - "[ alt_names ]\n", - "DNS.1 = {common_name}\n", - "DNS.2 = {common_name}.{namespace}.svc.cluster.local # Use the namespace applicable for your cluster\n", - "{alt_names}\n", - "\"\"\"\n", - "\n", - "print(certificate)\n", - "\n", - "save_file(ssl_configuration_file, certificate)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Copy certificate configuration to `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "cwd = os.getcwd()\n", - "os.chdir(temp_dir) # Use chdir to workaround kubectl bug on Windows, which incorrectly processes 'c:\\' on kubectl cp cmd line \n", - "\n", - "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"mkdir -p {test_cert_store_root}/{app_name}\"')\n", - "\n", - "run(f'kubectl cp {ssl_configuration_file} {controller}:{test_cert_store_root}/{app_name}/{ssl_configuration_file} -c controller -n {namespace}')\n", - "\n", - "os.chdir(cwd)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Generate certificate\n", - "\n", - "Use openssl req to generate a certificate in PKCS\\#10 format. See:\n", - "\n", - "- https://www.openssl.org/docs/man1.0.2/man1/req.html" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cmd = f\"openssl req -config {test_cert_store_root}/{app_name}/service.openssl.cnf -newkey rsa:2048 -sha256 -nodes -days {days} -out {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-signingrequest.csr -outform PEM -subj '/C={country_name}/ST={state_or_province_name}/L={locality_name}/O={organization_name}/OU={organizational_unit_name}/CN={common_name}'\"\n", - "\n", - "run(f'kubectl exec {controller} -n {namespace} -c controller -- bash -c \"{cmd}\"')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Clean up temporary directory for staging configuration files" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Delete the temporary directory used to hold configuration files\n", - "\n", - "import shutil\n", - "\n", - "shutil.rmtree(temp_dir)\n", - "\n", - "print(f'Temporary directory deleted: {temp_dir}')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Related\n", - "-------\n", - "\n", - "- [CER024 - Create Controller\n", - " certificate](../cert-management/cer024-create-controller-cert.ipynb)\n", - "\n", - "- [CER033 - Sign Master certificate with generated\n", - " CA](../cert-management/cer033-sign-master-generated-cert.ipynb)\n", - "\n", - "- [CER043 - Install signed Master\n", - " certificate](../cert-management/cer043-install-master-cert.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/cert-management/cer024-create-controller-cert.ipynb b/Big-Data-Clusters/CU8/Public/content/cert-management/cer024-create-controller-cert.ipynb deleted file mode 100644 index 4a3bd062..00000000 --- a/Big-Data-Clusters/CU8/Public/content/cert-management/cer024-create-controller-cert.ipynb +++ /dev/null @@ -1,921 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "CER024 - Create Controller certificate\n", - "======================================\n", - "\n", - "This notebook creates a certificate for the Controller endpoint. It\n", - "creates a controller-privatekey.pem as the private key and\n", - "controller-signingrequest.csr as the signing request.\n", - "\n", - "The private key is a secret. The signing request (CSR) will be used by\n", - "the CA to generate a signed certificate for the service.\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "import getpass\n", - "\n", - "app_name = \"controller\"\n", - "scaledset_name = \"control\"\n", - "container_name = \"controller\"\n", - "prefix_keyfile_name = \"controller\"\n", - "common_name = \"controller-svc\"\n", - "\n", - "country_name = \"US\"\n", - "state_or_province_name = \"Illinois\"\n", - "locality_name = \"Chicago\"\n", - "organization_name = \"Contoso\"\n", - "organizational_unit_name = \"Finance\"\n", - "email_address = f\"{getpass.getuser().lower()}@contoso.com\"\n", - "\n", - "ssl_configuration_file = \"service.openssl.cnf\"\n", - "\n", - "days = \"398\" # the number of days to certify the certificate for\n", - "\n", - "test_cert_store_root = \"/var/opt/secrets/test-certificates\"\n", - "\n", - "extendedKeyUsage = \"extendedKeyUsage = critical, clientAuth, serverAuth\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"cer024-create-controller-cert.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond'], 'azdata': ['Endpoint sql-server-master does not exist', 'Endpoint livy does not exist', 'Failed to get state for cluster', 'Endpoint webhdfs does not exist', 'Adaptive Server is unavailable or does not exist', 'Error: Address already in use', 'Login timeout expired (0) (SQLDriverConnect)']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']], 'azdata': [['The token is expired', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Reason: Unauthorized', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Max retries exceeded with url: /api/v1/bdc/endpoints', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Look at the controller logs for more details', 'TSG027 - Observe cluster deployment', '../diagnose/tsg027-observe-bdc-create.ipynb'], ['provided port is already allocated', 'TSG062 - Get tail of all previous container logs for pods in BDC namespace', '../log-files/tsg062-tail-bdc-previous-container-logs.ipynb'], ['Create cluster failed since the existing namespace', 'SOP061 - Delete a big data cluster', '../install/sop061-delete-bdc.ipynb'], ['Failed to complete kube config setup', 'TSG067 - Failed to complete kube config setup', '../repair/tsg067-failed-to-complete-kube-config-setup.ipynb'], ['Error processing command: \"ApiError', 'TSG110 - Azdata returns ApiError', '../repair/tsg110-azdata-returns-apierror.ipynb'], ['Error processing command: \"ControllerError', 'TSG036 - Controller logs', '../log-analyzers/tsg036-get-controller-logs.ipynb'], ['ERROR: 500', 'TSG046 - Knox gateway logs', '../log-analyzers/tsg046-get-knox-logs.ipynb'], ['Data source name not found and no default driver specified', 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], [\"Can't open lib 'ODBC Driver 17 for SQL Server\", 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], ['Control plane upgrade failed. Failed to upgrade controller.', 'TSG108 - View the controller upgrade config map', '../diagnose/tsg108-controller-failed-to-upgrade.ipynb'], [\"[Errno 2] No such file or directory: '..\\\\\\\\\", 'TSG053 - ADS Provided Books must be saved before use', '../repair/tsg053-save-book-first.ipynb'], [\"NameError: name 'azdata_login_secret_name' is not defined\", 'SOP013 - Create secret for azdata login (inside cluster)', '../common/sop013-create-secret-for-azdata-login.ipynb'], ['ERROR: No credentials were supplied, or the credentials were unavailable or inaccessible.', \"TSG124 - 'No credentials were supplied' error from azdata login\", '../repair/tsg124-no-credentials-were-supplied.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb'], 'azdata': ['SOP063 - Install azdata CLI (using package manager)', '../install/sop063-packman-install-azdata.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create a temporary directory to stage files" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Create a temporary directory to hold configuration files\n", - "\n", - "import tempfile\n", - "\n", - "temp_dir = tempfile.mkdtemp()\n", - "\n", - "print(f\"Temporary directory created: {temp_dir}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Helper function to save configuration files to disk" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define helper function 'save_file' to save configuration files to the temporary directory created above\n", - "import os\n", - "import io\n", - "\n", - "def save_file(filename, contents):\n", - " with io.open(os.path.join(temp_dir, filename), \"w\", encoding='utf8', newline='\\n') as text_file:\n", - " text_file.write(contents)\n", - "\n", - " print(\"File saved: \" + os.path.join(temp_dir, filename))\n", - "\n", - "print(\"Function `save_file` defined successfully.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get endpoint hostname" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "import urllib\n", - "\n", - "endpoint_name = \"sql-server-master\" if app_name == \"master\" else app_name \n", - "\n", - "endpoint = run(f'azdata bdc endpoint list --endpoint=\"{endpoint_name}\"', return_output=True)\n", - "endpoint = json.loads(endpoint)\n", - "endpoint = endpoint['endpoint']\n", - "\n", - "print(f\"endpoint: {endpoint}\")\n", - "\n", - "hostname = urllib.parse.urlparse(endpoint).hostname\n", - "\n", - "print(f\"hostname: {hostname}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get name of the \u2018Running\u2019 `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place the name of the 'Running' controller pod in variable `controller`\n", - "\n", - "controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}} --field-selector=status.phase=Running', return_output=True)\n", - "\n", - "print(f\"Controller pod name: {controller}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create the DNS alt\\_names for data plane in secure clusters\n", - "\n", - "Get the cluster configuration from the Big Data Cluster using\n", - "`azdata bdc config`, and pull the Active Directory DNS names out of it,\n", - "and place them into the certificate configuration file as DNS alt\\_names" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "\n", - "alt_names = \"\"\n", - "bdc_fqdn = \"\"\n", - "\n", - "hdfs_vault_svc = \"hdfsvault-svc\"\n", - "bdc_config = run(\"azdata bdc config show\", return_output=True)\n", - "bdc_config = json.loads(bdc_config)\n", - "\n", - "dns_counter = 3 # DNS.1 and DNS.2 are already in the certificate template.\n", - "\n", - "if app_name == \"gateway\" or app_name == \"master\":\n", - " alt_names += f'DNS.{str(dns_counter)} = {pod}.{common_name}\\n'\n", - " dns_counter = dns_counter + 1\n", - " alt_names += f'DNS.{str(dns_counter)} = {pod}.{common_name}.{namespace}.svc.cluster.local\\n'\n", - " dns_counter = dns_counter + 1\n", - "\n", - "if \"security\" in bdc_config[\"spec\"] and \"activeDirectory\" in bdc_config[\"spec\"][\"security\"]:\n", - " domain_dns_name = bdc_config[\"spec\"][\"security\"][\"activeDirectory\"][\"domainDnsName\"]\n", - " sub_domain_name = bdc_config[\"spec\"][\"security\"][\"activeDirectory\"][\"subdomain\"]\n", - "\n", - " alt_names += f\"DNS.{str(dns_counter)} = {common_name}.{domain_dns_name}\\n\"\n", - " dns_counter = dns_counter + 1\n", - "\n", - " if app_name == \"gateway\" or app_name == \"master\":\n", - " alt_names += f'DNS.{str(dns_counter)} = {pod}.{domain_dns_name}\\n'\n", - " dns_counter = dns_counter + 1\n", - "\n", - " if sub_domain_name:\n", - " bdc_fqdn = f\"{sub_domain_name}.{domain_dns_name}\"\n", - " else:\n", - " bdc_fqdn = domain_dns_name\n", - "\n", - "if app_name in bdc_config[\"spec\"][\"resources\"]:\n", - " app_name_endpoints = bdc_config[\"spec\"][\"resources\"][app_name][\"spec\"][\"endpoints\"]\n", - " for endpoint in app_name_endpoints:\n", - " if \"dnsName\" in endpoint:\n", - " alt_names += f'DNS.{str(dns_counter)} = {endpoint[\"dnsName\"]}\\n'\n", - " dns_counter = dns_counter + 1\n", - "\n", - "# Special case for the controller certificate\n", - "#\n", - "if app_name == \"controller\":\n", - " alt_names += f\"DNS.{str(dns_counter)} = localhost\\n\"\n", - " dns_counter = dns_counter + 1\n", - "\n", - " # Add hdfsvault-svc host for key management calls.\n", - " #\n", - " alt_names += f\"DNS.{str(dns_counter)} = {hdfs_vault_svc}\\n\"\n", - " dns_counter = dns_counter + 1\n", - "\n", - " # Add hdfsvault-svc FQDN for key management calls.\n", - " #\n", - " if bdc_fqdn:\n", - " alt_names += f\"DNS.{str(dns_counter)} = {hdfs_vault_svc}.{bdc_fqdn}\\n\"\n", - " dns_counter = dns_counter + 1\n", - "\n", - "print(\"DNS alt_names (data plane):\")\n", - "print(alt_names)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create the DNS alt\\_names for control plane in secure clusters\n", - "\n", - "Get the cluster configuration from the Big Data Cluster using\n", - "`azdata bdc endpoint list`, and pull the Active Directory DNS names out\n", - "of it for the control plane expternal endpoints (Controller and\n", - "Management Proxy), and place them into the certificate configuration\n", - "file as DNS alt\\_names" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "from urllib.parse import urlparse\n", - "\n", - "if app_name == \"controller\" or app_name == \"mgmtproxy\":\n", - " bdc_endpoint_list = run(\"azdata bdc endpoint list\", return_output=True)\n", - " bdc_endpoint_list = json.loads(bdc_endpoint_list)\n", - "\n", - " # Parse the DNS host name from:\n", - " #\n", - " # \"endpoint\": \"https://monitor.aris.local:30777\"\n", - " # \n", - " for endpoint in bdc_endpoint_list:\n", - " if endpoint[\"name\"] == app_name:\n", - " url = urlparse(endpoint[\"endpoint\"])\n", - " alt_names += f\"DNS.{str(dns_counter)} = {url.hostname}\\n\"\n", - " dns_counter = dns_counter + 1\n", - "\n", - "print(\"DNS alt_names (control plane):\")\n", - "print(alt_names)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create alt\\_names\n", - "\n", - "If the Kuberenetes service is of \u201cNodePort\u201d type, then the IP address\n", - "needed to validate the cluster certificate could be for any node in the\n", - "Kubernetes cluster, so here all node IP addresses in the Big Data\n", - "Cluster are added as alt\\_names. Otherwise (if not NodePort, and\n", - "therefore LoadBalancer), add just the hostname as returned from\n", - "`azdata bdc endpoint list` above." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "service_type = run(f\"kubectl get svc {common_name}-external -n {namespace} -o jsonpath={{.spec.type}}\", return_output=True)\n", - "\n", - "print(f\"Service type for '{common_name}-external' is: '{service_type}'\")\n", - "print(\"\")\n", - "\n", - "if service_type == \"NodePort\":\n", - " nodes_ip_address = run(\"kubectl \"\"get nodes -o jsonpath={.items[*].status.addresses[0].address}\"\"\", return_output=True)\n", - " nodes_ip_address = nodes_ip_address.split(' ')\n", - "\n", - " counter = 1\n", - " for ip in nodes_ip_address:\n", - " alt_names += f\"IP.{counter} = {ip}\\n\"\n", - " counter = counter + 1\n", - "else:\n", - " alt_names += f\"IP.1 = {hostname}\\n\"\n", - "\n", - "print(\"All (DNS and IP) alt_names:\")\n", - "print(alt_names)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Generate Certificate Configuration file\n", - "\n", - "NOTE: There is a special case for the `controller` certificate, that\n", - "needs to be generated in PKCS\\#1 format." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "certificate = f\"\"\"\n", - "[ req ]\n", - "# Options for the `req` tool (`man req`).\n", - "default_bits = 2048\n", - "default_keyfile = {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-privatekey{\".pkcs8\" if app_name == \"controller\" else \"\"}.pem\n", - "distinguished_name = req_distinguished_name\n", - "string_mask = utf8only\n", - "\n", - "# SHA-1 is deprecated, so use SHA-2 instead.\n", - "default_md = sha256\n", - "req_extensions = v3_req\n", - "\n", - "[ req_distinguished_name ]\n", - "countryName = Country Name (2 letter code)\n", - "countryName_default = {country_name}\n", - "\n", - "stateOrProvinceName = State or Province Name (full name)\n", - "stateOrProvinceName_default = {state_or_province_name}\n", - "\n", - "localityName = Locality Name (eg, city)\n", - "localityName_default = {locality_name}\n", - "\n", - "organizationName = Organization Name (eg, company)\n", - "organizationName_default = {organization_name}\n", - "\n", - "organizationalUnitName = Organizational Unit (eg, division)\n", - "organizationalUnitName_default = {organizational_unit_name}\n", - "\n", - "commonName = Common Name (e.g. server FQDN or YOUR name)\n", - "commonName_default = {common_name}\n", - "\n", - "emailAddress = Email Address\n", - "emailAddress_default = {email_address}\n", - "\n", - "[ v3_req ]\n", - "subjectAltName = @alt_names\n", - "subjectKeyIdentifier = hash\n", - "basicConstraints = CA:FALSE\n", - "keyUsage = digitalSignature, keyEncipherment\n", - "{extendedKeyUsage}\n", - "\n", - "[ alt_names ]\n", - "DNS.1 = {common_name}\n", - "DNS.2 = {common_name}.{namespace}.svc.cluster.local # Use the namespace applicable for your cluster\n", - "{alt_names}\n", - "\"\"\"\n", - "\n", - "print(certificate)\n", - "\n", - "save_file(ssl_configuration_file, certificate)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Copy certificate configuration to `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "cwd = os.getcwd()\n", - "os.chdir(temp_dir) # Use chdir to workaround kubectl bug on Windows, which incorrectly processes 'c:\\' on kubectl cp cmd line \n", - "\n", - "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"mkdir -p {test_cert_store_root}/{app_name}\"')\n", - "\n", - "run(f'kubectl cp {ssl_configuration_file} {controller}:{test_cert_store_root}/{app_name}/{ssl_configuration_file} -c controller -n {namespace}')\n", - "\n", - "os.chdir(cwd)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Generate certificate\n", - "\n", - "Use openssl req to generate a certificate in PKCS\\#10 format. See:\n", - "\n", - "- https://www.openssl.org/docs/man1.0.2/man1/req.html" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cmd = f\"openssl req -config {test_cert_store_root}/{app_name}/service.openssl.cnf -newkey rsa:2048 -sha256 -nodes -days {days} -out {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-signingrequest.csr -outform PEM -subj '/C={country_name}/ST={state_or_province_name}/L={locality_name}/O={organization_name}/OU={organizational_unit_name}/CN={common_name}'\"\n", - "\n", - "run(f'kubectl exec {controller} -n {namespace} -c controller -- bash -c \"{cmd}\"')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Convert the private key to PKCS12 format\n", - "\n", - "The private key for controller needs to be converted to PKCS12 format." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cmd = f'openssl rsa -in {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-privatekey.pkcs8.pem -out {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-privatekey.pem'\n", - "\n", - "run(f'kubectl exec {controller} -n {namespace} -c controller -- bash -c \"{cmd}\"')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Clean up temporary directory for staging configuration files" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Delete the temporary directory used to hold configuration files\n", - "\n", - "import shutil\n", - "\n", - "shutil.rmtree(temp_dir)\n", - "\n", - "print(f'Temporary directory deleted: {temp_dir}')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Related\n", - "-------\n", - "\n", - "- [CER030 - Sign Management Proxy certificate with generated\n", - " CA](../cert-management/cer030-sign-service-proxy-generated-cert.ipynb)\n", - "\n", - "- [CER034 - Sign Controller certificate with cluster Root\n", - " CA](../cert-management/cer034-sign-controller-generated-cert.ipynb)\n", - "\n", - "- [CER044 - Install signed Controller\n", - " certificate](../cert-management/cer044-install-controller-cert.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/cert-management/cer025-upload-management-service-proxy-cert.ipynb b/Big-Data-Clusters/CU8/Public/content/cert-management/cer025-upload-management-service-proxy-cert.ipynb deleted file mode 100644 index c2f85c92..00000000 --- a/Big-Data-Clusters/CU8/Public/content/cert-management/cer025-upload-management-service-proxy-cert.ipynb +++ /dev/null @@ -1,556 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "CER025 - Upload existing Management Proxy certificate\n", - "=====================================================\n", - "\n", - "Use this notebook to upload an externally generated Management Proxy\n", - "certificate to a cluster.\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "local_certificate_dir = \"mssql-cluster-certificates\"\n", - "certificate_file_name = \"service-proxy-certificate.pem\"\n", - "private_key_file_name = \"service-proxy-privatekey.pem\"\n", - "\n", - "test_cert_store_root = \"/var/opt/secrets/test-certificates\"\n", - "app_name = \"mgmtproxy\"\n", - "prefix_keyfile_name = \"service-proxy\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"cer025-upload-management-service-proxy-cert.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get name of the \u2018Running\u2019 `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place the name of the 'Running' controller pod in variable `controller`\n", - "\n", - "controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}} --field-selector=status.phase=Running', return_output=True)\n", - "\n", - "print(f\"Controller pod name: {controller}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create folder on `controller` to hold the certificate" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'kubectl exec {controller} -n {namespace} -c controller -- bash -c \"mkdir -p {test_cert_store_root}/\" ')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get name of the \u2018Running\u2019 `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place the name of the 'Running' controller pod in variable `controller`\n", - "\n", - "controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}} --field-selector=status.phase=Running', return_output=True)\n", - "\n", - "print(f\"Controller pod name: {controller}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Copy certificates to `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import tempfile\n", - "\n", - "path = os.path.join(tempfile.gettempdir(), local_certificate_dir)\n", - "os.chdir(path)\n", - "\n", - "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"mkdir -p {test_cert_store_root}/{app_name}\"')\n", - "\n", - "run(f'kubectl cp {certificate_file_name} {controller}:{test_cert_store_root}/{app_name}/{prefix_keyfile_name}-certificate.pem -c controller -n {namespace}')\n", - "run(f'kubectl cp {private_key_file_name} {controller}:{test_cert_store_root}/{app_name}/{prefix_keyfile_name}-privatekey.pem -c controller -n {namespace}')\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Related\n", - "-------\n", - "\n", - "- [CER026 - Upload existing Gateway\n", - " certificate](../cert-management/cer026-upload-knox-cert.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/cert-management/cer026-upload-knox-cert.ipynb b/Big-Data-Clusters/CU8/Public/content/cert-management/cer026-upload-knox-cert.ipynb deleted file mode 100644 index cdd90d64..00000000 --- a/Big-Data-Clusters/CU8/Public/content/cert-management/cer026-upload-knox-cert.ipynb +++ /dev/null @@ -1,532 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "CER026 - Upload existing Gateway certificate\n", - "============================================\n", - "\n", - "Use this notebook to upload an externally generated Gateway certificate\n", - "to a cluster.\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "local_certificate_dir = \"mssql-cluster-certificates\"\n", - "certificate_file_name = \"knox-certificate.pem\"\n", - "private_key_file_name = \"knox-privatekey.pem\"\n", - "\n", - "test_cert_store_root = \"/var/opt/secrets/test-certificates\"\n", - "app_name = \"gateway\"\n", - "prefix_keyfile_name = \"knox\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"cer026-upload-knox-cert.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get name of the \u2018Running\u2019 `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place the name of the 'Running' controller pod in variable `controller`\n", - "\n", - "controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}} --field-selector=status.phase=Running', return_output=True)\n", - "\n", - "print(f\"Controller pod name: {controller}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create folder on `controller` to hold the certificate" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'kubectl exec {controller} -n {namespace} -c controller -- bash -c \"mkdir -p {test_cert_store_root}/\" ')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Copy certificates to `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import tempfile\n", - "\n", - "path = os.path.join(tempfile.gettempdir(), local_certificate_dir)\n", - "os.chdir(path)\n", - "\n", - "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"mkdir -p {test_cert_store_root}/{app_name}\"')\n", - "\n", - "run(f'kubectl cp {certificate_file_name} {controller}:{test_cert_store_root}/{app_name}/{prefix_keyfile_name}-certificate.pem -c controller -n {namespace}')\n", - "run(f'kubectl cp {private_key_file_name} {controller}:{test_cert_store_root}/{app_name}/{prefix_keyfile_name}-privatekey.pem -c controller -n {namespace}')\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Related\n", - "-------\n", - "\n", - "- [CER027 - Upload existing App Service Proxy\n", - " certificate](../cert-management/cer027-upload-app-proxy-cert.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/cert-management/cer027-upload-app-proxy-cert.ipynb b/Big-Data-Clusters/CU8/Public/content/cert-management/cer027-upload-app-proxy-cert.ipynb deleted file mode 100644 index feb006e5..00000000 --- a/Big-Data-Clusters/CU8/Public/content/cert-management/cer027-upload-app-proxy-cert.ipynb +++ /dev/null @@ -1,556 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "CER027 - Upload existing App Service Proxy certificate\n", - "======================================================\n", - "\n", - "Use this notebook to upload an externally generated App Service\n", - "certificate to a cluster.\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "local_certificate_dir = \"mssql-cluster-certificates\"\n", - "certificate_file_name = \"service-proxy-certificate.pem\"\n", - "private_key_file_name = \"service-proxy-privatekey.pem\"\n", - "\n", - "test_cert_store_root = \"/var/opt/secrets/test-certificates\"\n", - "app_name = \"app-proxy\"\n", - "prefix_keyfile_name = \"service-proxy\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"cer027-upload-app-proxy-cert.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get name of the \u2018Running\u2019 `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place the name of the 'Running' controller pod in variable `controller`\n", - "\n", - "controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}} --field-selector=status.phase=Running', return_output=True)\n", - "\n", - "print(f\"Controller pod name: {controller}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create folder on `controller` to hold the certificate" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'kubectl exec {controller} -n {namespace} -c controller -- bash -c \"mkdir -p {test_cert_store_root}/\" ')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get name of the \u2018Running\u2019 `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place the name of the 'Running' controller pod in variable `controller`\n", - "\n", - "controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}} --field-selector=status.phase=Running', return_output=True)\n", - "\n", - "print(f\"Controller pod name: {controller}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Copy certificates to `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import tempfile\n", - "\n", - "path = os.path.join(tempfile.gettempdir(), local_certificate_dir)\n", - "os.chdir(path)\n", - "\n", - "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"mkdir -p {test_cert_store_root}/{app_name}\"')\n", - "\n", - "run(f'kubectl cp {certificate_file_name} {controller}:{test_cert_store_root}/{app_name}/{prefix_keyfile_name}-certificate.pem -c controller -n {namespace}')\n", - "run(f'kubectl cp {private_key_file_name} {controller}:{test_cert_store_root}/{app_name}/{prefix_keyfile_name}-privatekey.pem -c controller -n {namespace}')\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Related\n", - "-------\n", - "\n", - "- [CER028 - Upload existing Master\n", - " certificate](../cert-management/cer028-upload-master-cert.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/cert-management/cer028-upload-master-cert.ipynb b/Big-Data-Clusters/CU8/Public/content/cert-management/cer028-upload-master-cert.ipynb deleted file mode 100644 index 150a6695..00000000 --- a/Big-Data-Clusters/CU8/Public/content/cert-management/cer028-upload-master-cert.ipynb +++ /dev/null @@ -1,556 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "CER028 - Upload existing Master certificate\n", - "===========================================\n", - "\n", - "Use this notebook to upload an externally generated Master certificate\n", - "to a cluster.\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "local_certificate_dir = \"mssql-cluster-certificates\"\n", - "certificate_file_name = \"sql-certificate.pem\"\n", - "private_key_file_name = \"sql-privatekey.pem\"\n", - "\n", - "test_cert_store_root = \"/var/opt/secrets/test-certificates\"\n", - "app_name = \"master\"\n", - "prefix_keyfile_name = \"sql\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"cer028-upload-master-cert.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get name of the \u2018Running\u2019 `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place the name of the 'Running' controller pod in variable `controller`\n", - "\n", - "controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}} --field-selector=status.phase=Running', return_output=True)\n", - "\n", - "print(f\"Controller pod name: {controller}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create folder on `controller` to hold the certificate" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'kubectl exec {controller} -n {namespace} -c controller -- bash -c \"mkdir -p {test_cert_store_root}/\" ')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get name of the \u2018Running\u2019 `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place the name of the 'Running' controller pod in variable `controller`\n", - "\n", - "controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}} --field-selector=status.phase=Running', return_output=True)\n", - "\n", - "print(f\"Controller pod name: {controller}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Copy certificates to `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import tempfile\n", - "\n", - "path = os.path.join(tempfile.gettempdir(), local_certificate_dir)\n", - "os.chdir(path)\n", - "\n", - "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"mkdir -p {test_cert_store_root}/{app_name}\"')\n", - "\n", - "run(f'kubectl cp {certificate_file_name} {controller}:{test_cert_store_root}/{app_name}/{prefix_keyfile_name}-certificate.pem -c controller -n {namespace}')\n", - "run(f'kubectl cp {private_key_file_name} {controller}:{test_cert_store_root}/{app_name}/{prefix_keyfile_name}-privatekey.pem -c controller -n {namespace}')\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Related\n", - "-------\n", - "\n", - "- [CER028 - Upload existing Contoller\n", - " certificate](../cert-management/cer029-upload-controller-cert.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/cert-management/cer029-upload-controller-cert.ipynb b/Big-Data-Clusters/CU8/Public/content/cert-management/cer029-upload-controller-cert.ipynb deleted file mode 100644 index 190e7e60..00000000 --- a/Big-Data-Clusters/CU8/Public/content/cert-management/cer029-upload-controller-cert.ipynb +++ /dev/null @@ -1,558 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "CER028 - Upload existing Contoller certificate\n", - "==============================================\n", - "\n", - "Use this notebook to upload an externally generated Contoller\n", - "certificate to a cluster.\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "local_certificate_dir = \"mssql-cluster-certificates\"\n", - "pfx_file_name = \"controller-certificate.p12\"\n", - "certificate_file_name = \"controller-certificate.pem\"\n", - "private_key_file_name = \"controller-privatekey.pem\"\n", - "\n", - "test_cert_store_root = \"/var/opt/secrets/test-certificates\"\n", - "app_name = \"controller\"\n", - "prefix_keyfile_name = \"controller\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"cer029-upload-controller-cert.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get name of the \u2018Running\u2019 `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place the name of the 'Running' controller pod in variable `controller`\n", - "\n", - "controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}} --field-selector=status.phase=Running', return_output=True)\n", - "\n", - "print(f\"Controller pod name: {controller}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create folder on `controller` to hold the certificate" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'kubectl exec {controller} -n {namespace} -c controller -- bash -c \"mkdir -p {test_cert_store_root}/\" ')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get name of the \u2018Running\u2019 `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place the name of the 'Running' controller pod in variable `controller`\n", - "\n", - "controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}} --field-selector=status.phase=Running', return_output=True)\n", - "\n", - "print(f\"Controller pod name: {controller}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Copy certificates to `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import tempfile\n", - "\n", - "path = os.path.join(tempfile.gettempdir(), local_certificate_dir)\n", - "os.chdir(path)\n", - "\n", - "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"mkdir -p {test_cert_store_root}/{app_name}\"')\n", - "\n", - "run(f'kubectl cp {certificate_file_name} {controller}:{test_cert_store_root}/{app_name}/{prefix_keyfile_name}-certificate.p12 -c controller -n {namespace}')\n", - "run(f'kubectl cp {certificate_file_name} {controller}:{test_cert_store_root}/{app_name}/{prefix_keyfile_name}-certificate.pem -c controller -n {namespace}')\n", - "run(f'kubectl cp {private_key_file_name} {controller}:{test_cert_store_root}/{app_name}/{prefix_keyfile_name}-privatekey.pem -c controller -n {namespace}')\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Related\n", - "-------\n", - "\n", - "- [Backup and Restore notebooks](../backup-restore/readme.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/cert-management/cer030-sign-service-proxy-generated-cert.ipynb b/Big-Data-Clusters/CU8/Public/content/cert-management/cer030-sign-service-proxy-generated-cert.ipynb deleted file mode 100644 index 841ddf52..00000000 --- a/Big-Data-Clusters/CU8/Public/content/cert-management/cer030-sign-service-proxy-generated-cert.ipynb +++ /dev/null @@ -1,788 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "CER030 - Sign Management Proxy certificate with generated CA\n", - "============================================================\n", - "\n", - "This notebook signs the certificate created using:\n", - "\n", - "- [CER020 - Create Management Proxy\n", - " certificate](../cert-management/cer020-create-management-service-proxy-cert.ipynb)\n", - "\n", - "with the generate Root CA Certificate, created using either:\n", - "\n", - "- [CER001 - Generate a Root CA\n", - " certificate](../cert-management/cer001-create-root-ca.ipynb)\n", - "- [CER003 - Upload existing Root CA\n", - " certificate](../cert-management/cer003-upload-existing-root-ca.ipynb)\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "import getpass\n", - "\n", - "app_name = \"mgmtproxy\"\n", - "scaledset_name = \"mgmtproxy\"\n", - "container_name = \"service-proxy\"\n", - "prefix_keyfile_name = \"service-proxy\"\n", - "common_name = \"mgmtproxy-svc\"\n", - "\n", - "country_name = \"US\"\n", - "state_or_province_name = \"Illinois\"\n", - "locality_name = \"Chicago\"\n", - "organization_name = \"Contoso\"\n", - "organizational_unit_name = \"Finance\"\n", - "email_address = f\"{getpass.getuser().lower()}@contoso.com\"\n", - "\n", - "ssl_configuration_file = \"ca.openssl.cnf\"\n", - "\n", - "days = \"398\" # the number of days to certify the certificate for\n", - "\n", - "certificate_filename = \"cacert.pem\"\n", - "private_key_filename = \"cakey.pem\"\n", - "\n", - "test_cert_store_root = \"/var/opt/secrets/test-certificates\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"cer030-sign-service-proxy-generated-cert.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create a temporary directory to stage files" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Create a temporary directory to hold configuration files\n", - "\n", - "import tempfile\n", - "\n", - "temp_dir = tempfile.mkdtemp()\n", - "\n", - "print(f\"Temporary directory created: {temp_dir}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Helper function to save configuration files to disk" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define helper function 'save_file' to save configuration files to the temporary directory created above\n", - "import os\n", - "import io\n", - "\n", - "def save_file(filename, contents):\n", - " with io.open(os.path.join(temp_dir, filename), \"w\", encoding='utf8', newline='\\n') as text_file:\n", - " text_file.write(contents)\n", - "\n", - " print(\"File saved: \" + os.path.join(temp_dir, filename))\n", - "\n", - "print(\"Function `save_file` defined successfully.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get name of the \u2018Running\u2019 `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place the name of the 'Running' controller pod in variable `controller`\n", - "\n", - "controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}} --field-selector=status.phase=Running', return_output=True)\n", - "\n", - "print(f\"Controller pod name: {controller}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create Signing Request configuration file" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "certificate = f\"\"\"\n", - "[ ca ]\n", - "default_ca = CA_default # The default ca section\n", - "\n", - "[ CA_default ]\n", - "default_days = 1000 # How long to certify for\n", - "default_crl_days = 30 # How long before next CRL\n", - "default_md = sha256 # Use public key default MD\n", - "preserve = no # Keep passed DN ordering\n", - "\n", - "x509_extensions = ca_extensions # The extensions to add to the cert\n", - "\n", - "email_in_dn = no # Don't concat the email in the DN\n", - "copy_extensions = copy # Required to copy SANs from CSR to cert\n", - "\n", - "base_dir = {test_cert_store_root}\n", - "certificate = $base_dir/{certificate_filename} # The CA certifcate\n", - "private_key = $base_dir/{private_key_filename} # The CA private key\n", - "new_certs_dir = $base_dir # Location for new certs after signing\n", - "database = $base_dir/index.txt # Database index file\n", - "serial = $base_dir/serial.txt # The current serial number\n", - "\n", - "unique_subject = no # Set to 'no' to allow creation of\n", - " # several certificates with same subject.\n", - "\n", - "[ req ]\n", - "default_bits = 2048\n", - "default_keyfile = {test_cert_store_root}/{private_key_filename}\n", - "distinguished_name = ca_distinguished_name\n", - "x509_extensions = ca_extensions\n", - "string_mask = utf8only\n", - "\n", - "[ ca_distinguished_name ]\n", - "countryName = Country Name (2 letter code)\n", - "countryName_default = {country_name}\n", - "\n", - "stateOrProvinceName = State or Province Name (full name)\n", - "stateOrProvinceName_default = {state_or_province_name}\n", - "\n", - "localityName = Locality Name (eg, city)\n", - "localityName_default = {locality_name}\n", - "\n", - "organizationName = Organization Name (eg, company)\n", - "organizationName_default = {organization_name}\n", - "\n", - "organizationalUnitName = Organizational Unit (eg, division)\n", - "organizationalUnitName_default = {organizational_unit_name}\n", - "\n", - "commonName = Common Name (e.g. server FQDN or YOUR name)\n", - "commonName_default = {common_name}\n", - "\n", - "emailAddress = Email Address\n", - "emailAddress_default = {email_address}\n", - "\n", - "[ ca_extensions ]\n", - "subjectKeyIdentifier = hash\n", - "authorityKeyIdentifier = keyid:always, issuer\n", - "basicConstraints = critical, CA:true\n", - "keyUsage = keyCertSign, cRLSign\n", - "\n", - "[ signing_policy ]\n", - "countryName = optional\n", - "stateOrProvinceName = optional\n", - "localityName = optional\n", - "organizationName = optional\n", - "organizationalUnitName = optional\n", - "commonName = supplied\n", - "emailAddress = optional\n", - "\n", - "[ signing_req ]\n", - "subjectKeyIdentifier = hash\n", - "authorityKeyIdentifier = keyid,issuer\n", - "basicConstraints = CA:FALSE\n", - "keyUsage = digitalSignature, keyEncipherment\n", - "\"\"\"\n", - "\n", - "save_file(ssl_configuration_file, certificate)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Copy certificate configuration to `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "cwd = os.getcwd()\n", - "os.chdir(temp_dir) # Use chdir to workaround kubectl bug on Windows, which incorrectly processes 'c:\\' on kubectl cp cmd line \n", - "\n", - "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"mkdir -p {test_cert_store_root}/{app_name}\"')\n", - "\n", - "run(f'kubectl cp {ssl_configuration_file} {controller}:{test_cert_store_root}/{app_name}/{ssl_configuration_file} -c controller -n {namespace}')\n", - "\n", - "os.chdir(cwd)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Set next serial number" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'kubectl exec {controller} -n {namespace} -c controller -- bash -c \"test -f {test_cert_store_root}/index.txt || touch {test_cert_store_root}/index.txt\"')\n", - "run(f\"\"\"kubectl exec {controller} -n {namespace} -c controller -- bash -c \"test -f {test_cert_store_root}/serial.txt || echo '00' > {test_cert_store_root}/serial.txt\" \"\"\")\n", - "\n", - "current_serial_number = run(f\"\"\"kubectl exec {controller} -n {namespace} -c controller -- bash -c \"cat {test_cert_store_root}/serial.txt\" \"\"\", return_output=True)\n", - "\n", - "# The serial number is hex\n", - "new_serial_number = int(f\"0x{current_serial_number}\", 0) + 1\n", - "\n", - "run(f\"\"\"kubectl exec {controller} -n {namespace} -c controller -- bash -c \"echo '{new_serial_number:02X}' > {test_cert_store_root}/serial.txt\" \"\"\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create private key and certificate signing request\n", - "\n", - "Use openssl ca to create a private key and signing request. See:\n", - "\n", - "- https://www.openssl.org/docs/man1.0.2/man1/ca.html" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cmd = f\"openssl ca -notext -batch -config {test_cert_store_root}/{app_name}/ca.openssl.cnf -policy signing_policy -extensions signing_req -out {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-certificate.pem -infiles {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-signingrequest.csr\"\n", - "\n", - "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"{cmd}\"')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Display certificate\n", - "\n", - "Use openssl x509 to display the certificate, so it can be visually\n", - "verified to be correct." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cmd = f\"openssl x509 -in {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-certificate.pem -text -noout\"\n", - "\n", - "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"{cmd}\"')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Clean up temporary directory for staging configuration files" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Delete the temporary directory used to hold configuration files\n", - "\n", - "import shutil\n", - "\n", - "shutil.rmtree(temp_dir)\n", - "\n", - "print(f'Temporary directory deleted: {temp_dir}')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Related\n", - "-------\n", - "\n", - "- [CER031 - Sign Knox certificate with generated\n", - " CA](../cert-management/cer031-sign-knox-generated-cert.ipynb)\n", - "\n", - "- [CER020 - Create Management Proxy\n", - " certificate](../cert-management/cer020-create-management-service-proxy-cert.ipynb)\n", - "\n", - "- [CER040 - Install signed Management Proxy\n", - " certificate](../cert-management/cer040-install-service-proxy-cert.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/cert-management/cer031-sign-knox-generated-cert.ipynb b/Big-Data-Clusters/CU8/Public/content/cert-management/cer031-sign-knox-generated-cert.ipynb deleted file mode 100644 index 094b22bf..00000000 --- a/Big-Data-Clusters/CU8/Public/content/cert-management/cer031-sign-knox-generated-cert.ipynb +++ /dev/null @@ -1,788 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "CER031 - Sign Knox certificate with generated CA\n", - "================================================\n", - "\n", - "This notebook signs the certificate created using:\n", - "\n", - "- [CER021 - Create Knox\n", - " certificate](../cert-management/cer021-create-knox-cert.ipynb)\n", - "\n", - "with the generate Root CA Certificate, created using either:\n", - "\n", - "- [CER001 - Generate a Root CA\n", - " certificate](../cert-management/cer001-create-root-ca.ipynb)\n", - "- [CER003 - Upload existing Root CA\n", - " certificate](../cert-management/cer003-upload-existing-root-ca.ipynb)\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "import getpass\n", - "\n", - "app_name = \"gateway\"\n", - "scaledset_name = \"gateway/pods/gateway-0\"\n", - "container_name = \"knox\"\n", - "prefix_keyfile_name = \"knox\"\n", - "common_name = \"gateway-svc\"\n", - "\n", - "country_name = \"US\"\n", - "state_or_province_name = \"Illinois\"\n", - "locality_name = \"Chicago\"\n", - "organization_name = \"Contoso\"\n", - "organizational_unit_name = \"Finance\"\n", - "email_address = f\"{getpass.getuser().lower()}@contoso.com\"\n", - "\n", - "ssl_configuration_file = \"ca.openssl.cnf\"\n", - "\n", - "days = \"398\" # the number of days to certify the certificate for\n", - "\n", - "certificate_filename = \"cacert.pem\"\n", - "private_key_filename = \"cakey.pem\"\n", - "\n", - "test_cert_store_root = \"/var/opt/secrets/test-certificates\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"cer031-sign-knox-generated-cert.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create a temporary directory to stage files" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Create a temporary directory to hold configuration files\n", - "\n", - "import tempfile\n", - "\n", - "temp_dir = tempfile.mkdtemp()\n", - "\n", - "print(f\"Temporary directory created: {temp_dir}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Helper function to save configuration files to disk" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define helper function 'save_file' to save configuration files to the temporary directory created above\n", - "import os\n", - "import io\n", - "\n", - "def save_file(filename, contents):\n", - " with io.open(os.path.join(temp_dir, filename), \"w\", encoding='utf8', newline='\\n') as text_file:\n", - " text_file.write(contents)\n", - "\n", - " print(\"File saved: \" + os.path.join(temp_dir, filename))\n", - "\n", - "print(\"Function `save_file` defined successfully.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get name of the \u2018Running\u2019 `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place the name of the 'Running' controller pod in variable `controller`\n", - "\n", - "controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}} --field-selector=status.phase=Running', return_output=True)\n", - "\n", - "print(f\"Controller pod name: {controller}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create Signing Request configuration file" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "certificate = f\"\"\"\n", - "[ ca ]\n", - "default_ca = CA_default # The default ca section\n", - "\n", - "[ CA_default ]\n", - "default_days = 1000 # How long to certify for\n", - "default_crl_days = 30 # How long before next CRL\n", - "default_md = sha256 # Use public key default MD\n", - "preserve = no # Keep passed DN ordering\n", - "\n", - "x509_extensions = ca_extensions # The extensions to add to the cert\n", - "\n", - "email_in_dn = no # Don't concat the email in the DN\n", - "copy_extensions = copy # Required to copy SANs from CSR to cert\n", - "\n", - "base_dir = {test_cert_store_root}\n", - "certificate = $base_dir/{certificate_filename} # The CA certifcate\n", - "private_key = $base_dir/{private_key_filename} # The CA private key\n", - "new_certs_dir = $base_dir # Location for new certs after signing\n", - "database = $base_dir/index.txt # Database index file\n", - "serial = $base_dir/serial.txt # The current serial number\n", - "\n", - "unique_subject = no # Set to 'no' to allow creation of\n", - " # several certificates with same subject.\n", - "\n", - "[ req ]\n", - "default_bits = 2048\n", - "default_keyfile = {test_cert_store_root}/{private_key_filename}\n", - "distinguished_name = ca_distinguished_name\n", - "x509_extensions = ca_extensions\n", - "string_mask = utf8only\n", - "\n", - "[ ca_distinguished_name ]\n", - "countryName = Country Name (2 letter code)\n", - "countryName_default = {country_name}\n", - "\n", - "stateOrProvinceName = State or Province Name (full name)\n", - "stateOrProvinceName_default = {state_or_province_name}\n", - "\n", - "localityName = Locality Name (eg, city)\n", - "localityName_default = {locality_name}\n", - "\n", - "organizationName = Organization Name (eg, company)\n", - "organizationName_default = {organization_name}\n", - "\n", - "organizationalUnitName = Organizational Unit (eg, division)\n", - "organizationalUnitName_default = {organizational_unit_name}\n", - "\n", - "commonName = Common Name (e.g. server FQDN or YOUR name)\n", - "commonName_default = {common_name}\n", - "\n", - "emailAddress = Email Address\n", - "emailAddress_default = {email_address}\n", - "\n", - "[ ca_extensions ]\n", - "subjectKeyIdentifier = hash\n", - "authorityKeyIdentifier = keyid:always, issuer\n", - "basicConstraints = critical, CA:true\n", - "keyUsage = keyCertSign, cRLSign\n", - "\n", - "[ signing_policy ]\n", - "countryName = optional\n", - "stateOrProvinceName = optional\n", - "localityName = optional\n", - "organizationName = optional\n", - "organizationalUnitName = optional\n", - "commonName = supplied\n", - "emailAddress = optional\n", - "\n", - "[ signing_req ]\n", - "subjectKeyIdentifier = hash\n", - "authorityKeyIdentifier = keyid,issuer\n", - "basicConstraints = CA:FALSE\n", - "keyUsage = digitalSignature, keyEncipherment\n", - "\"\"\"\n", - "\n", - "save_file(ssl_configuration_file, certificate)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Copy certificate configuration to `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "cwd = os.getcwd()\n", - "os.chdir(temp_dir) # Use chdir to workaround kubectl bug on Windows, which incorrectly processes 'c:\\' on kubectl cp cmd line \n", - "\n", - "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"mkdir -p {test_cert_store_root}/{app_name}\"')\n", - "\n", - "run(f'kubectl cp {ssl_configuration_file} {controller}:{test_cert_store_root}/{app_name}/{ssl_configuration_file} -c controller -n {namespace}')\n", - "\n", - "os.chdir(cwd)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Set next serial number" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'kubectl exec {controller} -n {namespace} -c controller -- bash -c \"test -f {test_cert_store_root}/index.txt || touch {test_cert_store_root}/index.txt\"')\n", - "run(f\"\"\"kubectl exec {controller} -n {namespace} -c controller -- bash -c \"test -f {test_cert_store_root}/serial.txt || echo '00' > {test_cert_store_root}/serial.txt\" \"\"\")\n", - "\n", - "current_serial_number = run(f\"\"\"kubectl exec {controller} -n {namespace} -c controller -- bash -c \"cat {test_cert_store_root}/serial.txt\" \"\"\", return_output=True)\n", - "\n", - "# The serial number is hex\n", - "new_serial_number = int(f\"0x{current_serial_number}\", 0) + 1\n", - "\n", - "run(f\"\"\"kubectl exec {controller} -n {namespace} -c controller -- bash -c \"echo '{new_serial_number:02X}' > {test_cert_store_root}/serial.txt\" \"\"\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create private key and certificate signing request\n", - "\n", - "Use openssl ca to create a private key and signing request. See:\n", - "\n", - "- https://www.openssl.org/docs/man1.0.2/man1/ca.html" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cmd = f\"openssl ca -notext -batch -config {test_cert_store_root}/{app_name}/ca.openssl.cnf -policy signing_policy -extensions signing_req -out {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-certificate.pem -infiles {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-signingrequest.csr\"\n", - "\n", - "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"{cmd}\"')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Display certificate\n", - "\n", - "Use openssl x509 to display the certificate, so it can be visually\n", - "verified to be correct." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cmd = f\"openssl x509 -in {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-certificate.pem -text -noout\"\n", - "\n", - "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"{cmd}\"')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Clean up temporary directory for staging configuration files" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Delete the temporary directory used to hold configuration files\n", - "\n", - "import shutil\n", - "\n", - "shutil.rmtree(temp_dir)\n", - "\n", - "print(f'Temporary directory deleted: {temp_dir}')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Related\n", - "-------\n", - "\n", - "- [CER032 - Sign App-Proxy certificate with generated\n", - " CA](../cert-management/cer032-sign-app-proxy-generated-cert.ipynb)\n", - "\n", - "- [CER021 - Create Knox\n", - " certificate](../cert-management/cer021-create-knox-cert.ipynb)\n", - "\n", - "- [CER041 - Install signed Knox\n", - " certificate](../cert-management/cer041-install-knox-cert.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/cert-management/cer032-sign-app-proxy-generated-cert.ipynb b/Big-Data-Clusters/CU8/Public/content/cert-management/cer032-sign-app-proxy-generated-cert.ipynb deleted file mode 100644 index cf87f0e0..00000000 --- a/Big-Data-Clusters/CU8/Public/content/cert-management/cer032-sign-app-proxy-generated-cert.ipynb +++ /dev/null @@ -1,788 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "CER032 - Sign App-Proxy certificate with generated CA\n", - "=====================================================\n", - "\n", - "This notebook signs the certificate created using:\n", - "\n", - "- [CER022 - Create App Proxy\n", - " certificate](../cert-management/cer022-create-app-proxy-cert.ipynb)\n", - "\n", - "with the generate Root CA Certificate, created using either:\n", - "\n", - "- [CER001 - Generate a Root CA\n", - " certificate](../cert-management/cer001-create-root-ca.ipynb)\n", - "- [CER003 - Upload existing Root CA\n", - " certificate](../cert-management/cer003-upload-existing-root-ca.ipynb)\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "import getpass\n", - "\n", - "app_name = \"app-proxy\"\n", - "scaledset_name = \"appproxy\"\n", - "container_name = \"app-service-proxy\"\n", - "prefix_keyfile_name = \"service-proxy\"\n", - "common_name = \"appproxy-svc\"\n", - "\n", - "country_name = \"US\"\n", - "state_or_province_name = \"Illinois\"\n", - "locality_name = \"Chicago\"\n", - "organization_name = \"Contoso\"\n", - "organizational_unit_name = \"Finance\"\n", - "email_address = f\"{getpass.getuser().lower()}@contoso.com\"\n", - "\n", - "ssl_configuration_file = \"ca.openssl.cnf\"\n", - "\n", - "days = \"398\" # the number of days to certify the certificate for\n", - "\n", - "certificate_filename = \"cacert.pem\"\n", - "private_key_filename = \"cakey.pem\"\n", - "\n", - "test_cert_store_root = \"/var/opt/secrets/test-certificates\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"cer032-sign-app-proxy-generated-cert.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create a temporary directory to stage files" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Create a temporary directory to hold configuration files\n", - "\n", - "import tempfile\n", - "\n", - "temp_dir = tempfile.mkdtemp()\n", - "\n", - "print(f\"Temporary directory created: {temp_dir}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Helper function to save configuration files to disk" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define helper function 'save_file' to save configuration files to the temporary directory created above\n", - "import os\n", - "import io\n", - "\n", - "def save_file(filename, contents):\n", - " with io.open(os.path.join(temp_dir, filename), \"w\", encoding='utf8', newline='\\n') as text_file:\n", - " text_file.write(contents)\n", - "\n", - " print(\"File saved: \" + os.path.join(temp_dir, filename))\n", - "\n", - "print(\"Function `save_file` defined successfully.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get name of the \u2018Running\u2019 `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place the name of the 'Running' controller pod in variable `controller`\n", - "\n", - "controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}} --field-selector=status.phase=Running', return_output=True)\n", - "\n", - "print(f\"Controller pod name: {controller}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create Signing Request configuration file" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "certificate = f\"\"\"\n", - "[ ca ]\n", - "default_ca = CA_default # The default ca section\n", - "\n", - "[ CA_default ]\n", - "default_days = 1000 # How long to certify for\n", - "default_crl_days = 30 # How long before next CRL\n", - "default_md = sha256 # Use public key default MD\n", - "preserve = no # Keep passed DN ordering\n", - "\n", - "x509_extensions = ca_extensions # The extensions to add to the cert\n", - "\n", - "email_in_dn = no # Don't concat the email in the DN\n", - "copy_extensions = copy # Required to copy SANs from CSR to cert\n", - "\n", - "base_dir = {test_cert_store_root}\n", - "certificate = $base_dir/{certificate_filename} # The CA certifcate\n", - "private_key = $base_dir/{private_key_filename} # The CA private key\n", - "new_certs_dir = $base_dir # Location for new certs after signing\n", - "database = $base_dir/index.txt # Database index file\n", - "serial = $base_dir/serial.txt # The current serial number\n", - "\n", - "unique_subject = no # Set to 'no' to allow creation of\n", - " # several certificates with same subject.\n", - "\n", - "[ req ]\n", - "default_bits = 2048\n", - "default_keyfile = {test_cert_store_root}/{private_key_filename}\n", - "distinguished_name = ca_distinguished_name\n", - "x509_extensions = ca_extensions\n", - "string_mask = utf8only\n", - "\n", - "[ ca_distinguished_name ]\n", - "countryName = Country Name (2 letter code)\n", - "countryName_default = {country_name}\n", - "\n", - "stateOrProvinceName = State or Province Name (full name)\n", - "stateOrProvinceName_default = {state_or_province_name}\n", - "\n", - "localityName = Locality Name (eg, city)\n", - "localityName_default = {locality_name}\n", - "\n", - "organizationName = Organization Name (eg, company)\n", - "organizationName_default = {organization_name}\n", - "\n", - "organizationalUnitName = Organizational Unit (eg, division)\n", - "organizationalUnitName_default = {organizational_unit_name}\n", - "\n", - "commonName = Common Name (e.g. server FQDN or YOUR name)\n", - "commonName_default = {common_name}\n", - "\n", - "emailAddress = Email Address\n", - "emailAddress_default = {email_address}\n", - "\n", - "[ ca_extensions ]\n", - "subjectKeyIdentifier = hash\n", - "authorityKeyIdentifier = keyid:always, issuer\n", - "basicConstraints = critical, CA:true\n", - "keyUsage = keyCertSign, cRLSign\n", - "\n", - "[ signing_policy ]\n", - "countryName = optional\n", - "stateOrProvinceName = optional\n", - "localityName = optional\n", - "organizationName = optional\n", - "organizationalUnitName = optional\n", - "commonName = supplied\n", - "emailAddress = optional\n", - "\n", - "[ signing_req ]\n", - "subjectKeyIdentifier = hash\n", - "authorityKeyIdentifier = keyid,issuer\n", - "basicConstraints = CA:FALSE\n", - "keyUsage = digitalSignature, keyEncipherment\n", - "\"\"\"\n", - "\n", - "save_file(ssl_configuration_file, certificate)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Copy certificate configuration to `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "cwd = os.getcwd()\n", - "os.chdir(temp_dir) # Use chdir to workaround kubectl bug on Windows, which incorrectly processes 'c:\\' on kubectl cp cmd line \n", - "\n", - "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"mkdir -p {test_cert_store_root}/{app_name}\"')\n", - "\n", - "run(f'kubectl cp {ssl_configuration_file} {controller}:{test_cert_store_root}/{app_name}/{ssl_configuration_file} -c controller -n {namespace}')\n", - "\n", - "os.chdir(cwd)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Set next serial number" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'kubectl exec {controller} -n {namespace} -c controller -- bash -c \"test -f {test_cert_store_root}/index.txt || touch {test_cert_store_root}/index.txt\"')\n", - "run(f\"\"\"kubectl exec {controller} -n {namespace} -c controller -- bash -c \"test -f {test_cert_store_root}/serial.txt || echo '00' > {test_cert_store_root}/serial.txt\" \"\"\")\n", - "\n", - "current_serial_number = run(f\"\"\"kubectl exec {controller} -n {namespace} -c controller -- bash -c \"cat {test_cert_store_root}/serial.txt\" \"\"\", return_output=True)\n", - "\n", - "# The serial number is hex\n", - "new_serial_number = int(f\"0x{current_serial_number}\", 0) + 1\n", - "\n", - "run(f\"\"\"kubectl exec {controller} -n {namespace} -c controller -- bash -c \"echo '{new_serial_number:02X}' > {test_cert_store_root}/serial.txt\" \"\"\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create private key and certificate signing request\n", - "\n", - "Use openssl ca to create a private key and signing request. See:\n", - "\n", - "- https://www.openssl.org/docs/man1.0.2/man1/ca.html" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cmd = f\"openssl ca -notext -batch -config {test_cert_store_root}/{app_name}/ca.openssl.cnf -policy signing_policy -extensions signing_req -out {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-certificate.pem -infiles {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-signingrequest.csr\"\n", - "\n", - "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"{cmd}\"')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Display certificate\n", - "\n", - "Use openssl x509 to display the certificate, so it can be visually\n", - "verified to be correct." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cmd = f\"openssl x509 -in {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-certificate.pem -text -noout\"\n", - "\n", - "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"{cmd}\"')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Clean up temporary directory for staging configuration files" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Delete the temporary directory used to hold configuration files\n", - "\n", - "import shutil\n", - "\n", - "shutil.rmtree(temp_dir)\n", - "\n", - "print(f'Temporary directory deleted: {temp_dir}')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Related\n", - "-------\n", - "\n", - "- [CER040 - Install signed Management Proxy\n", - " certificate](../cert-management/cer040-install-service-proxy-cert.ipynb)\n", - "\n", - "- [CER022 - Create App Proxy\n", - " certificate](../cert-management/cer022-create-app-proxy-cert.ipynb)\n", - "\n", - "- [CER042 - Install signed App-Proxy\n", - " certificate](../cert-management/cer042-install-app-proxy-cert.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/cert-management/cer033-sign-master-generated-cert.ipynb b/Big-Data-Clusters/CU8/Public/content/cert-management/cer033-sign-master-generated-cert.ipynb deleted file mode 100644 index a2e121c8..00000000 --- a/Big-Data-Clusters/CU8/Public/content/cert-management/cer033-sign-master-generated-cert.ipynb +++ /dev/null @@ -1,788 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "CER033 - Sign Master certificate with generated CA\n", - "==================================================\n", - "\n", - "This notebook signs the certificate created using:\n", - "\n", - "- [CER023 - Create Master\n", - " certificate](../cert-management/cer023-create-master-cert.ipynb)\n", - "\n", - "with the generate Root CA Certificate, created using either:\n", - "\n", - "- [CER001 - Generate a Root CA\n", - " certificate](../cert-management/cer001-create-root-ca.ipynb)\n", - "- [CER003 - Upload existing Root CA\n", - " certificate](../cert-management/cer003-upload-existing-root-ca.ipynb)\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "import getpass\n", - "\n", - "app_name = \"master\"\n", - "scaledset_name = \"master\"\n", - "container_name = \"mssql-server\"\n", - "prefix_keyfile_name = \"sql\"\n", - "common_name = \"master-svc\"\n", - "\n", - "country_name = \"US\"\n", - "state_or_province_name = \"Illinois\"\n", - "locality_name = \"Chicago\"\n", - "organization_name = \"Contoso\"\n", - "organizational_unit_name = \"Finance\"\n", - "email_address = f\"{getpass.getuser().lower()}@contoso.com\"\n", - "\n", - "ssl_configuration_file = \"ca.openssl.cnf\"\n", - "\n", - "days = \"398\" # the number of days to certify the certificate for\n", - "\n", - "certificate_filename = \"cacert.pem\"\n", - "private_key_filename = \"cakey.pem\"\n", - "\n", - "test_cert_store_root = \"/var/opt/secrets/test-certificates\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"cer033-sign-master-generated-cert.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create a temporary directory to stage files" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Create a temporary directory to hold configuration files\n", - "\n", - "import tempfile\n", - "\n", - "temp_dir = tempfile.mkdtemp()\n", - "\n", - "print(f\"Temporary directory created: {temp_dir}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Helper function to save configuration files to disk" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define helper function 'save_file' to save configuration files to the temporary directory created above\n", - "import os\n", - "import io\n", - "\n", - "def save_file(filename, contents):\n", - " with io.open(os.path.join(temp_dir, filename), \"w\", encoding='utf8', newline='\\n') as text_file:\n", - " text_file.write(contents)\n", - "\n", - " print(\"File saved: \" + os.path.join(temp_dir, filename))\n", - "\n", - "print(\"Function `save_file` defined successfully.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get name of the \u2018Running\u2019 `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place the name of the 'Running' controller pod in variable `controller`\n", - "\n", - "controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}} --field-selector=status.phase=Running', return_output=True)\n", - "\n", - "print(f\"Controller pod name: {controller}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create Signing Request configuration file" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "certificate = f\"\"\"\n", - "[ ca ]\n", - "default_ca = CA_default # The default ca section\n", - "\n", - "[ CA_default ]\n", - "default_days = 1000 # How long to certify for\n", - "default_crl_days = 30 # How long before next CRL\n", - "default_md = sha256 # Use public key default MD\n", - "preserve = no # Keep passed DN ordering\n", - "\n", - "x509_extensions = ca_extensions # The extensions to add to the cert\n", - "\n", - "email_in_dn = no # Don't concat the email in the DN\n", - "copy_extensions = copy # Required to copy SANs from CSR to cert\n", - "\n", - "base_dir = {test_cert_store_root}\n", - "certificate = $base_dir/{certificate_filename} # The CA certifcate\n", - "private_key = $base_dir/{private_key_filename} # The CA private key\n", - "new_certs_dir = $base_dir # Location for new certs after signing\n", - "database = $base_dir/index.txt # Database index file\n", - "serial = $base_dir/serial.txt # The current serial number\n", - "\n", - "unique_subject = no # Set to 'no' to allow creation of\n", - " # several certificates with same subject.\n", - "\n", - "[ req ]\n", - "default_bits = 2048\n", - "default_keyfile = {test_cert_store_root}/{private_key_filename}\n", - "distinguished_name = ca_distinguished_name\n", - "x509_extensions = ca_extensions\n", - "string_mask = utf8only\n", - "\n", - "[ ca_distinguished_name ]\n", - "countryName = Country Name (2 letter code)\n", - "countryName_default = {country_name}\n", - "\n", - "stateOrProvinceName = State or Province Name (full name)\n", - "stateOrProvinceName_default = {state_or_province_name}\n", - "\n", - "localityName = Locality Name (eg, city)\n", - "localityName_default = {locality_name}\n", - "\n", - "organizationName = Organization Name (eg, company)\n", - "organizationName_default = {organization_name}\n", - "\n", - "organizationalUnitName = Organizational Unit (eg, division)\n", - "organizationalUnitName_default = {organizational_unit_name}\n", - "\n", - "commonName = Common Name (e.g. server FQDN or YOUR name)\n", - "commonName_default = {common_name}\n", - "\n", - "emailAddress = Email Address\n", - "emailAddress_default = {email_address}\n", - "\n", - "[ ca_extensions ]\n", - "subjectKeyIdentifier = hash\n", - "authorityKeyIdentifier = keyid:always, issuer\n", - "basicConstraints = critical, CA:true\n", - "keyUsage = keyCertSign, cRLSign\n", - "\n", - "[ signing_policy ]\n", - "countryName = optional\n", - "stateOrProvinceName = optional\n", - "localityName = optional\n", - "organizationName = optional\n", - "organizationalUnitName = optional\n", - "commonName = supplied\n", - "emailAddress = optional\n", - "\n", - "[ signing_req ]\n", - "subjectKeyIdentifier = hash\n", - "authorityKeyIdentifier = keyid,issuer\n", - "basicConstraints = CA:FALSE\n", - "keyUsage = digitalSignature, keyEncipherment\n", - "\"\"\"\n", - "\n", - "save_file(ssl_configuration_file, certificate)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Copy certificate configuration to `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "cwd = os.getcwd()\n", - "os.chdir(temp_dir) # Use chdir to workaround kubectl bug on Windows, which incorrectly processes 'c:\\' on kubectl cp cmd line \n", - "\n", - "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"mkdir -p {test_cert_store_root}/{app_name}\"')\n", - "\n", - "run(f'kubectl cp {ssl_configuration_file} {controller}:{test_cert_store_root}/{app_name}/{ssl_configuration_file} -c controller -n {namespace}')\n", - "\n", - "os.chdir(cwd)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Set next serial number" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'kubectl exec {controller} -n {namespace} -c controller -- bash -c \"test -f {test_cert_store_root}/index.txt || touch {test_cert_store_root}/index.txt\"')\n", - "run(f\"\"\"kubectl exec {controller} -n {namespace} -c controller -- bash -c \"test -f {test_cert_store_root}/serial.txt || echo '00' > {test_cert_store_root}/serial.txt\" \"\"\")\n", - "\n", - "current_serial_number = run(f\"\"\"kubectl exec {controller} -n {namespace} -c controller -- bash -c \"cat {test_cert_store_root}/serial.txt\" \"\"\", return_output=True)\n", - "\n", - "# The serial number is hex\n", - "new_serial_number = int(f\"0x{current_serial_number}\", 0) + 1\n", - "\n", - "run(f\"\"\"kubectl exec {controller} -n {namespace} -c controller -- bash -c \"echo '{new_serial_number:02X}' > {test_cert_store_root}/serial.txt\" \"\"\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create private key and certificate signing request\n", - "\n", - "Use openssl ca to create a private key and signing request. See:\n", - "\n", - "- https://www.openssl.org/docs/man1.0.2/man1/ca.html" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cmd = f\"openssl ca -notext -batch -config {test_cert_store_root}/{app_name}/ca.openssl.cnf -policy signing_policy -extensions signing_req -out {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-certificate.pem -infiles {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-signingrequest.csr\"\n", - "\n", - "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"{cmd}\"')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Display certificate\n", - "\n", - "Use openssl x509 to display the certificate, so it can be visually\n", - "verified to be correct." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cmd = f\"openssl x509 -in {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-certificate.pem -text -noout\"\n", - "\n", - "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"{cmd}\"')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Clean up temporary directory for staging configuration files" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Delete the temporary directory used to hold configuration files\n", - "\n", - "import shutil\n", - "\n", - "shutil.rmtree(temp_dir)\n", - "\n", - "print(f'Temporary directory deleted: {temp_dir}')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Related\n", - "-------\n", - "\n", - "- [CER034 - Sign Controller certificate with cluster Root\n", - " CA](../cert-management/cer034-sign-controller-generated-cert.ipynb)\n", - "\n", - "- [CER023 - Create Master\n", - " certificate](../cert-management/cer023-create-master-cert.ipynb)\n", - "\n", - "- [CER043 - Install signed Master\n", - " certificate](../cert-management/cer043-install-master-cert.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/cert-management/cer034-sign-controller-generated-cert.ipynb b/Big-Data-Clusters/CU8/Public/content/cert-management/cer034-sign-controller-generated-cert.ipynb deleted file mode 100644 index bb08fe96..00000000 --- a/Big-Data-Clusters/CU8/Public/content/cert-management/cer034-sign-controller-generated-cert.ipynb +++ /dev/null @@ -1,865 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "CER034 - Sign Controller certificate with cluster Root CA\n", - "=========================================================\n", - "\n", - "This notebook signs the certificate created using:\n", - "\n", - "- [CER024 - Create Controller\n", - " certificate](../cert-management/cer024-create-controller-cert.ipynb)\n", - "\n", - "with the generate Root CA Certificate, created using either:\n", - "\n", - "- [CER001 - Generate a Root CA\n", - " certificate](../cert-management/cer001-create-root-ca.ipynb)\n", - "- [CER003 - Upload existing Root CA\n", - " certificate](../cert-management/cer003-upload-existing-root-ca.ipynb)\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "import getpass\n", - "\n", - "app_name = \"controller\"\n", - "scaledset_name = \"control\"\n", - "container_name = \"controller\"\n", - "prefix_keyfile_name = \"controller\"\n", - "common_name = \"controller-svc\"\n", - "\n", - "country_name = \"US\"\n", - "state_or_province_name = \"Illinois\"\n", - "locality_name = \"Chicago\"\n", - "organization_name = \"Contoso\"\n", - "organizational_unit_name = \"Finance\"\n", - "email_address = f\"{getpass.getuser().lower()}@contoso.com\"\n", - "\n", - "ssl_configuration_file = \"ca.openssl.cnf\"\n", - "\n", - "days = \"398\" # the number of days to certify the certificate for\n", - "\n", - "certificate_filename = \"cluster-ca-certificate.crt\"\n", - "private_key_filename = \"cluster-ca-privatekey.pem\"\n", - "\n", - "test_cert_store_root = \"/var/opt/secrets/test-certificates\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"cer034-sign-controller-generated-cert.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create a temporary directory to stage files" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Create a temporary directory to hold configuration files\n", - "\n", - "import tempfile\n", - "\n", - "temp_dir = tempfile.mkdtemp()\n", - "\n", - "print(f\"Temporary directory created: {temp_dir}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Helper function to save configuration files to disk" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define helper function 'save_file' to save configuration files to the temporary directory created above\n", - "import os\n", - "import io\n", - "\n", - "def save_file(filename, contents):\n", - " with io.open(os.path.join(temp_dir, filename), \"w\", encoding='utf8', newline='\\n') as text_file:\n", - " text_file.write(contents)\n", - "\n", - " print(\"File saved: \" + os.path.join(temp_dir, filename))\n", - "\n", - "print(\"Function `save_file` defined successfully.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get name of the \u2018Running\u2019 `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place the name of the 'Running' controller pod in variable `controller`\n", - "\n", - "controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}} --field-selector=status.phase=Running', return_output=True)\n", - "\n", - "print(f\"Controller pod name: {controller}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create Signing Request configuration file" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "certificate = f\"\"\"\n", - "[ ca ]\n", - "default_ca = CA_default # The default ca section\n", - "\n", - "[ CA_default ]\n", - "default_days = 1000 # How long to certify for\n", - "default_crl_days = 30 # How long before next CRL\n", - "default_md = sha256 # Use public key default MD\n", - "preserve = no # Keep passed DN ordering\n", - "\n", - "x509_extensions = ca_extensions # The extensions to add to the cert\n", - "\n", - "email_in_dn = no # Don't concat the email in the DN\n", - "copy_extensions = copy # Required to copy SANs from CSR to cert\n", - "\n", - "base_dir = {test_cert_store_root}\n", - "certificate = $base_dir/{certificate_filename} # The CA certifcate\n", - "private_key = $base_dir/{private_key_filename} # The CA private key\n", - "new_certs_dir = $base_dir # Location for new certs after signing\n", - "database = $base_dir/index.txt # Database index file\n", - "serial = $base_dir/serial.txt # The current serial number\n", - "\n", - "unique_subject = no # Set to 'no' to allow creation of\n", - " # several certificates with same subject.\n", - "\n", - "[ req ]\n", - "default_bits = 2048\n", - "default_keyfile = {test_cert_store_root}/{private_key_filename}\n", - "distinguished_name = ca_distinguished_name\n", - "x509_extensions = ca_extensions\n", - "string_mask = utf8only\n", - "\n", - "[ ca_distinguished_name ]\n", - "countryName = Country Name (2 letter code)\n", - "countryName_default = {country_name}\n", - "\n", - "stateOrProvinceName = State or Province Name (full name)\n", - "stateOrProvinceName_default = {state_or_province_name}\n", - "\n", - "localityName = Locality Name (eg, city)\n", - "localityName_default = {locality_name}\n", - "\n", - "organizationName = Organization Name (eg, company)\n", - "organizationName_default = {organization_name}\n", - "\n", - "organizationalUnitName = Organizational Unit (eg, division)\n", - "organizationalUnitName_default = {organizational_unit_name}\n", - "\n", - "commonName = Common Name (e.g. server FQDN or YOUR name)\n", - "commonName_default = {common_name}\n", - "\n", - "emailAddress = Email Address\n", - "emailAddress_default = {email_address}\n", - "\n", - "[ ca_extensions ]\n", - "subjectKeyIdentifier = hash\n", - "authorityKeyIdentifier = keyid:always, issuer\n", - "basicConstraints = critical, CA:true\n", - "keyUsage = keyCertSign, cRLSign\n", - "\n", - "[ signing_policy ]\n", - "countryName = optional\n", - "stateOrProvinceName = optional\n", - "localityName = optional\n", - "organizationName = optional\n", - "organizationalUnitName = optional\n", - "commonName = supplied\n", - "emailAddress = optional\n", - "\n", - "[ signing_req ]\n", - "subjectKeyIdentifier = hash\n", - "authorityKeyIdentifier = keyid,issuer\n", - "basicConstraints = CA:FALSE\n", - "keyUsage = digitalSignature, keyEncipherment\n", - "\"\"\"\n", - "\n", - "save_file(ssl_configuration_file, certificate)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Copy certificate configuration to `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "cwd = os.getcwd()\n", - "os.chdir(temp_dir) # Use chdir to workaround kubectl bug on Windows, which incorrectly processes 'c:\\' on kubectl cp cmd line \n", - "\n", - "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"mkdir -p {test_cert_store_root}/{app_name}\"')\n", - "\n", - "run(f'kubectl cp {ssl_configuration_file} {controller}:{test_cert_store_root}/{app_name}/{ssl_configuration_file} -c controller -n {namespace}')\n", - "\n", - "os.chdir(cwd)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Obtain CA certificate" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cmd = f\"cp /var/run/secrets/certificates/rootca/cluster-ca-certificate.crt {test_cert_store_root}/cluster-ca-certificate.crt\"\n", - "\n", - "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"{cmd}\"')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Extract the private key from pfx" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cmd = f\"NO_PASSWORD= openssl pkcs12 -in /var/run/secrets/certificates/rootca/cluster-ca-certificate.p12 -out {test_cert_store_root}/{private_key_filename} -nocerts -nodes -password env:NO_PASSWORD\"\n", - "\n", - "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"{cmd}\"')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Set next serial number" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'kubectl exec {controller} -n {namespace} -c controller -- bash -c \"test -f {test_cert_store_root}/index.txt || touch {test_cert_store_root}/index.txt\"')\n", - "run(f\"\"\"kubectl exec {controller} -n {namespace} -c controller -- bash -c \"test -f {test_cert_store_root}/serial.txt || echo '00' > {test_cert_store_root}/serial.txt\" \"\"\")\n", - "\n", - "current_serial_number = run(f\"\"\"kubectl exec {controller} -n {namespace} -c controller -- bash -c \"cat {test_cert_store_root}/serial.txt\" \"\"\", return_output=True)\n", - "\n", - "# The serial number is hex\n", - "new_serial_number = int(f\"0x{current_serial_number}\", 0) + 1\n", - "\n", - "run(f\"\"\"kubectl exec {controller} -n {namespace} -c controller -- bash -c \"echo '{new_serial_number:02X}' > {test_cert_store_root}/serial.txt\" \"\"\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create private key and certificate signing request" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cmd = f\"openssl ca -batch -notext -config {test_cert_store_root}/{app_name}/ca.openssl.cnf -policy signing_policy -extensions signing_req -out {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-certificate.pem -infiles {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-signingrequest.csr\"\n", - "\n", - "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"{cmd}\"')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Display certificate\n", - "\n", - "Use openssl x509 to display the certificate, so it can be visually\n", - "verified to be correct." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cmd = f\"openssl x509 -in {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-certificate.pem -text -noout\"\n", - "\n", - "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"{cmd}\"')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Generate the `controller` certificate in PKCS12 format\n", - "\n", - "For the controller certificate, generate the certificate in PKCS12\n", - "format.\n", - "\n", - "In order to obtain the certificate in PKCS12 format from the generated\n", - "private key and certificate file above, run the following:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cmd = f'NO_PASSWORD= openssl pkcs12 -export -out {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-certificate.p12 -inkey {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-privatekey.pem -in {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-certificate.pem -password env:NO_PASSWORD'\n", - "\n", - "run(f'kubectl exec {controller} -n {namespace} -c {app_name} -- bash -c \"{cmd}\"')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Clean up the extracted controller private key\n", - "\n", - "Remove the private key that was extract from the\n", - "cluster-ca-certificate.p12 above." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cmd = f\"rm {test_cert_store_root}/{private_key_filename}\"\n", - "\n", - "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"{cmd}\"')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Clean up temporary directory for staging configuration files" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Delete the temporary directory used to hold configuration files\n", - "\n", - "import shutil\n", - "\n", - "shutil.rmtree(temp_dir)\n", - "\n", - "print(f'Temporary directory deleted: {temp_dir}')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Related\n", - "-------\n", - "\n", - "- [CER040 - Install signed Management Proxy\n", - " certificate](../cert-management/cer040-install-service-proxy-cert.ipynb)\n", - "\n", - "- [CER024 - Create Controller\n", - " certificate](../cert-management/cer024-create-controller-cert.ipynb)\n", - "\n", - "- [CER044 - Install signed Controller\n", - " certificate](../cert-management/cer044-install-controller-cert.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/cert-management/cer035-ca-sign-controller-generated-cert.ipynb b/Big-Data-Clusters/CU8/Public/content/cert-management/cer035-ca-sign-controller-generated-cert.ipynb deleted file mode 100644 index a3366470..00000000 --- a/Big-Data-Clusters/CU8/Public/content/cert-management/cer035-ca-sign-controller-generated-cert.ipynb +++ /dev/null @@ -1,829 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "CER035 - Sign Controller certificate with external Root CA\n", - "==========================================================\n", - "\n", - "This notebook signs the certificate created using:\n", - "\n", - "- [CER024 - Create Controller\n", - " certificate](../cert-management/cer024-create-controller-cert.ipynb)\n", - "\n", - "with the generated Root CA Certificate, created using either:\n", - "\n", - "- [CER001 - Generate a Root CA\n", - " certificate](../cert-management/cer001-create-root-ca.ipynb)\n", - "- [CER003 - Upload existing Root CA\n", - " certificate](../cert-management/cer003-upload-existing-root-ca.ipynb)\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "import getpass\n", - "\n", - "app_name = \"controller\"\n", - "scaledset_name = \"control\"\n", - "container_name = \"controller\"\n", - "prefix_keyfile_name = \"controller\"\n", - "common_name = \"controller-svc\"\n", - "\n", - "country_name = \"US\"\n", - "state_or_province_name = \"Illinois\"\n", - "locality_name = \"Chicago\"\n", - "organization_name = \"Contoso\"\n", - "organizational_unit_name = \"Finance\"\n", - "email_address = f\"{getpass.getuser().lower()}@contoso.com\"\n", - "\n", - "ssl_configuration_file = \"ca.openssl.cnf\"\n", - "\n", - "days = \"398\" # the number of days to certify the certificate for\n", - "\n", - "certificate_filename = \"cacert.pem\"\n", - "private_key_filename = \"cakey.pem\"\n", - "\n", - "test_cert_store_root = \"/var/opt/secrets/test-certificates\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"cer035-ca-sign-controller-generated-cert.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create a temporary directory to stage files" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Create a temporary directory to hold configuration files\n", - "\n", - "import tempfile\n", - "\n", - "temp_dir = tempfile.mkdtemp()\n", - "\n", - "print(f\"Temporary directory created: {temp_dir}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Helper function to save configuration files to disk" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define helper function 'save_file' to save configuration files to the temporary directory created above\n", - "import os\n", - "import io\n", - "\n", - "def save_file(filename, contents):\n", - " with io.open(os.path.join(temp_dir, filename), \"w\", encoding='utf8', newline='\\n') as text_file:\n", - " text_file.write(contents)\n", - "\n", - " print(\"File saved: \" + os.path.join(temp_dir, filename))\n", - "\n", - "print(\"Function `save_file` defined successfully.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get name of the \u2018Running\u2019 `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place the name of the 'Running' controller pod in variable `controller`\n", - "\n", - "controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}} --field-selector=status.phase=Running', return_output=True)\n", - "\n", - "print(f\"Controller pod name: {controller}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create Signing Request configuration file" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "certificate = f\"\"\"\n", - "[ ca ]\n", - "default_ca = CA_default # The default ca section\n", - "\n", - "[ CA_default ]\n", - "default_days = 1000 # How long to certify for\n", - "default_crl_days = 30 # How long before next CRL\n", - "default_md = sha256 # Use public key default MD\n", - "preserve = no # Keep passed DN ordering\n", - "\n", - "x509_extensions = ca_extensions # The extensions to add to the cert\n", - "\n", - "email_in_dn = no # Don't concat the email in the DN\n", - "copy_extensions = copy # Required to copy SANs from CSR to cert\n", - "\n", - "base_dir = {test_cert_store_root}\n", - "certificate = $base_dir/{certificate_filename} # The CA certifcate\n", - "private_key = $base_dir/{private_key_filename} # The CA private key\n", - "new_certs_dir = $base_dir # Location for new certs after signing\n", - "database = $base_dir/index.txt # Database index file\n", - "serial = $base_dir/serial.txt # The current serial number\n", - "\n", - "unique_subject = no # Set to 'no' to allow creation of\n", - " # several certificates with same subject.\n", - "\n", - "[ req ]\n", - "default_bits = 2048\n", - "default_keyfile = {test_cert_store_root}/{private_key_filename}\n", - "distinguished_name = ca_distinguished_name\n", - "x509_extensions = ca_extensions\n", - "string_mask = utf8only\n", - "\n", - "[ ca_distinguished_name ]\n", - "countryName = Country Name (2 letter code)\n", - "countryName_default = {country_name}\n", - "\n", - "stateOrProvinceName = State or Province Name (full name)\n", - "stateOrProvinceName_default = {state_or_province_name}\n", - "\n", - "localityName = Locality Name (eg, city)\n", - "localityName_default = {locality_name}\n", - "\n", - "organizationName = Organization Name (eg, company)\n", - "organizationName_default = {organization_name}\n", - "\n", - "organizationalUnitName = Organizational Unit (eg, division)\n", - "organizationalUnitName_default = {organizational_unit_name}\n", - "\n", - "commonName = Common Name (e.g. server FQDN or YOUR name)\n", - "commonName_default = {common_name}\n", - "\n", - "emailAddress = Email Address\n", - "emailAddress_default = {email_address}\n", - "\n", - "[ ca_extensions ]\n", - "subjectKeyIdentifier = hash\n", - "authorityKeyIdentifier = keyid:always, issuer\n", - "basicConstraints = critical, CA:true\n", - "keyUsage = keyCertSign, cRLSign\n", - "\n", - "[ signing_policy ]\n", - "countryName = optional\n", - "stateOrProvinceName = optional\n", - "localityName = optional\n", - "organizationName = optional\n", - "organizationalUnitName = optional\n", - "commonName = supplied\n", - "emailAddress = optional\n", - "\n", - "[ signing_req ]\n", - "subjectKeyIdentifier = hash\n", - "authorityKeyIdentifier = keyid,issuer\n", - "basicConstraints = CA:FALSE\n", - "keyUsage = digitalSignature, keyEncipherment\n", - "\"\"\"\n", - "\n", - "save_file(ssl_configuration_file, certificate)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Copy certificate configuration to `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "cwd = os.getcwd()\n", - "os.chdir(temp_dir) # Use chdir to workaround kubectl bug on Windows, which incorrectly processes 'c:\\' on kubectl cp cmd line \n", - "\n", - "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"mkdir -p {test_cert_store_root}/{app_name}\"')\n", - "\n", - "run(f'kubectl cp {ssl_configuration_file} {controller}:{test_cert_store_root}/{app_name}/{ssl_configuration_file} -c controller -n {namespace}')\n", - "\n", - "os.chdir(cwd)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Set next serial number" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'kubectl exec {controller} -n {namespace} -c controller -- bash -c \"test -f {test_cert_store_root}/index.txt || touch {test_cert_store_root}/index.txt\"')\n", - "run(f\"\"\"kubectl exec {controller} -n {namespace} -c controller -- bash -c \"test -f {test_cert_store_root}/serial.txt || echo '00' > {test_cert_store_root}/serial.txt\" \"\"\")\n", - "\n", - "current_serial_number = run(f\"\"\"kubectl exec {controller} -n {namespace} -c controller -- bash -c \"cat {test_cert_store_root}/serial.txt\" \"\"\", return_output=True)\n", - "\n", - "# The serial number is hex\n", - "new_serial_number = int(f\"0x{current_serial_number}\", 0) + 1\n", - "\n", - "run(f\"\"\"kubectl exec {controller} -n {namespace} -c controller -- bash -c \"echo '{new_serial_number:02X}' > {test_cert_store_root}/serial.txt\" \"\"\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create private key and certificate signing request" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cmd = f\"openssl ca -batch -notext -config {test_cert_store_root}/{app_name}/ca.openssl.cnf -policy signing_policy -extensions signing_req -out {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-certificate.pem -infiles {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-signingrequest.csr\"\n", - "\n", - "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"{cmd}\"')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Display certificate\n", - "\n", - "Use openssl x509 to display the certificate, so it can be visually\n", - "verified to be correct." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cmd = f\"openssl x509 -in {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-certificate.pem -text -noout\"\n", - "\n", - "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"{cmd}\"')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Generate the `controller` certificate in PKCS12 format\n", - "\n", - "For the controller certificate, generate the certificate in PKCS12\n", - "format.\n", - "\n", - "In order to obtain the certificate in PKCS12 format from the generated\n", - "private key and certificate file above, run the following:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cmd = f'NO_PASSWORD= openssl pkcs12 -export -out {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-certificate.p12 -inkey {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-privatekey.pem -in {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-certificate.pem -password env:NO_PASSWORD'\n", - "\n", - "run(f'kubectl exec {controller} -n {namespace} -c {app_name} -- bash -c \"{cmd}\"')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Clean up the extracted controller private key\n", - "\n", - "Remove the private key that was extract from the\n", - "cluster-ca-certificate.p12 above." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cmd = f\"rm {test_cert_store_root}/{private_key_filename}\"\n", - "\n", - "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"{cmd}\"')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Clean up temporary directory for staging configuration files" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Delete the temporary directory used to hold configuration files\n", - "\n", - "import shutil\n", - "\n", - "shutil.rmtree(temp_dir)\n", - "\n", - "print(f'Temporary directory deleted: {temp_dir}')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Related\n", - "-------\n", - "\n", - "- [CER040 - Install signed Management Proxy\n", - " certificate](../cert-management/cer040-install-service-proxy-cert.ipynb)\n", - "\n", - "- [CER024 - Create Controller\n", - " certificate](../cert-management/cer024-create-controller-cert.ipynb)\n", - "\n", - "- [CER044 - Install signed Controller\n", - " certificate](../cert-management/cer044-install-controller-cert.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/cert-management/cer040-install-service-proxy-cert.ipynb b/Big-Data-Clusters/CU8/Public/content/cert-management/cer040-install-service-proxy-cert.ipynb deleted file mode 100644 index 3c0a0f8e..00000000 --- a/Big-Data-Clusters/CU8/Public/content/cert-management/cer040-install-service-proxy-cert.ipynb +++ /dev/null @@ -1,922 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "CER040 - Install signed Management Proxy certificate\n", - "====================================================\n", - "\n", - "This notebook installs into the Big Data Cluster the certificate signed\n", - "using:\n", - "\n", - "- [CER030 - Sign Management Proxy certificate with generated\n", - " CA](../cert-management/cer030-sign-service-proxy-generated-cert.ipynb)\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "app_name = \"mgmtproxy\"\n", - "scaledset_name = \"mgmtproxy\"\n", - "container_name = \"service-proxy\"\n", - "prefix_keyfile_name = \"service-proxy\"\n", - "common_name = \"mgmtproxy-svc\"\n", - "user = \"nginx\"\n", - "group = \"nginx\"\n", - "mode = \"550\"\n", - "\n", - "test_cert_store_root = \"/var/opt/secrets/test-certificates\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"cer040-install-service-proxy-cert.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create a temporary directory to stage files" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Create a temporary directory to hold configuration files\n", - "\n", - "import tempfile\n", - "\n", - "temp_dir = tempfile.mkdtemp()\n", - "\n", - "print(f\"Temporary directory created: {temp_dir}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Helper function to save configuration files to disk" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define helper function 'save_file' to save configuration files to the temporary directory created above\n", - "import os\n", - "import io\n", - "\n", - "def save_file(filename, contents):\n", - " with io.open(os.path.join(temp_dir, filename), \"w\", encoding='utf8', newline='\\n') as text_file:\n", - " text_file.write(contents)\n", - "\n", - " print(\"File saved: \" + os.path.join(temp_dir, filename))\n", - "\n", - "print(\"Function `save_file` defined successfully.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get name of the \u2018Running\u2019 `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place the name of the 'Running' controller pod in variable `controller`\n", - "\n", - "controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}} --field-selector=status.phase=Running', return_output=True)\n", - "\n", - "print(f\"Controller pod name: {controller}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the name of the `management proxy` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place the name of the mgmtproxy pod in variable `pod`\n", - "\n", - "pod = run(f'kubectl get pod --selector=app=mgmtproxy -n {namespace} -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - "\n", - "print(f\"Management proxy pod name: {pod}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Validate certificate common name and alt names" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "from urllib.parse import urlparse\n", - "\n", - "kubernetes_default_record_name = 'kubernetes.default'\n", - "kubernetes_default_svc_prefix = 'kubernetes.default.svc'\n", - "default_dns_suffix = 'svc.cluster.local'\n", - "dns_suffix = ''\n", - "\n", - "nslookup_output=run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"nslookup {kubernetes_default_record_name} > /tmp/nslookup.out; cat /tmp/nslookup.out; rm /tmp/nslookup.out\" ', return_output=True)\n", - "\n", - "name = re.findall('Name:\\s+(.[^,|^\\s|^\\n]+)', nslookup_output)\n", - "\n", - "if not name or kubernetes_default_svc_prefix not in name[0]:\n", - " dns_suffix = default_dns_suffix\n", - "else:\n", - " dns_suffix = 'svc' + name[0].replace(kubernetes_default_svc_prefix, '')\n", - "\n", - "alt_names = \"\"\n", - "bdc_fqdn = \"\"\n", - "\n", - "alt_names += f\"DNS.1 = {common_name}\\n\"\n", - "alt_names += f\"DNS.2 = {common_name}.{namespace}.{dns_suffix} \\n\"\n", - "\n", - "hdfs_vault_svc = \"hdfsvault-svc\"\n", - "bdc_config = run(\"azdata bdc config show\", return_output=True)\n", - "bdc_config = json.loads(bdc_config)\n", - "\n", - "dns_counter = 3 # DNS.1 and DNS.2 are already in the certificate template\n", - "\n", - "# Stateful set related DNS names\n", - "#\n", - "if app_name == \"gateway\" or app_name == \"master\":\n", - " alt_names += f'DNS.{str(dns_counter)} = {pod}.{common_name}\\n'\n", - " dns_counter = dns_counter + 1\n", - " alt_names += f'DNS.{str(dns_counter)} = {pod}.{common_name}.{namespace}.{dns_suffix}\\n'\n", - " dns_counter = dns_counter + 1\n", - "\n", - "# AD related DNS names\n", - "#\n", - "if \"security\" in bdc_config[\"spec\"] and \"activeDirectory\" in bdc_config[\"spec\"][\"security\"]:\n", - " domain_dns_name = bdc_config[\"spec\"][\"security\"][\"activeDirectory\"][\"domainDnsName\"]\n", - " subdomain_name = bdc_config[\"spec\"][\"security\"][\"activeDirectory\"][\"subdomain\"]\n", - "\n", - " alt_names += f\"DNS.{str(dns_counter)} = {common_name}.{domain_dns_name}\\n\"\n", - " dns_counter = dns_counter + 1\n", - "\n", - " if app_name == \"gateway\" or app_name == \"master\":\n", - " alt_names += f'DNS.{str(dns_counter)} = {pod}.{domain_dns_name}\\n'\n", - " dns_counter = dns_counter + 1\n", - "\n", - " if subdomain_name:\n", - " bdc_fqdn = f\"{subdomain_name}.{domain_dns_name}\"\n", - " else:\n", - " bdc_fqdn = domain_dns_name\n", - "\n", - " # Endpoint DNS names for bdc certificates\n", - " #\n", - " if app_name in bdc_config[\"spec\"][\"resources\"]:\n", - " app_name_endpoints = bdc_config[\"spec\"][\"resources\"][app_name][\"spec\"][\"endpoints\"]\n", - " for endpoint in app_name_endpoints:\n", - " if \"dnsName\" in endpoint:\n", - " alt_names += f'DNS.{str(dns_counter)} = {endpoint[\"dnsName\"]}\\n'\n", - " dns_counter = dns_counter + 1\n", - " \n", - " # Endpoint DNS names for control plane certificates\n", - " #\n", - " if app_name == \"controller\" or app_name == \"mgmtproxy\":\n", - " bdc_endpoint_list = run(\"azdata bdc endpoint list\", return_output=True)\n", - " bdc_endpoint_list = json.loads(bdc_endpoint_list)\n", - "\n", - " # Parse the DNS host name from:\n", - " #\n", - " # \"endpoint\": \"https://monitor.aris.local:30777\"\n", - " # \n", - " for endpoint in bdc_endpoint_list:\n", - " if endpoint[\"name\"] == app_name:\n", - " url = urlparse(endpoint[\"endpoint\"])\n", - " alt_names += f\"DNS.{str(dns_counter)} = {url.hostname}\\n\"\n", - " dns_counter = dns_counter + 1\n", - "\n", - "# Special case for the controller certificate\n", - "#\n", - "if app_name == \"controller\":\n", - " alt_names += f\"DNS.{str(dns_counter)} = localhost\\n\"\n", - " dns_counter = dns_counter + 1\n", - "\n", - " # Add hdfsvault-svc host for key management calls.\n", - " #\n", - " alt_names += f\"DNS.{str(dns_counter)} = {hdfs_vault_svc}\\n\"\n", - " dns_counter = dns_counter + 1\n", - "\n", - " # Add hdfsvault-svc FQDN for key management calls.\n", - " #\n", - " if bdc_fqdn:\n", - " alt_names += f\"DNS.{str(dns_counter)} = {hdfs_vault_svc}.{bdc_fqdn}\\n\"\n", - " dns_counter = dns_counter + 1\n", - "\n", - "required_dns_names = re.findall('DNS\\.[0-9] = ([^,|^\\s|^\\n]+)', alt_names)\n", - "\n", - "# Get certificate common name and DNS names\n", - "# \n", - "cert = run(f'kubectl exec {controller} -c controller -n {namespace} -- openssl x509 -in {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-certificate.pem -text -noout', return_output=True)\n", - "subject = re.findall('Subject:(.+)', cert)[0]\n", - "certficate_common_name = re.findall('CN=(.[^,|^\\s|^\\n]+)', subject)[0]\n", - "certficate_dns_names = re.findall('DNS:(.[^,|^\\s|^\\n]+)', cert)\n", - "\n", - "# Validate the common name\n", - "#\n", - "if (common_name != certficate_common_name):\n", - " run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"rm -rf {test_cert_store_root}/{app_name}\"')\n", - " raise SystemExit(f'Certficate common name does not match the expected one: {common_name}')\n", - "\n", - "# Validate the DNS names\n", - "#\n", - "if not all(dns_name in certficate_dns_names for dns_name in required_dns_names):\n", - " run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"rm -rf {test_cert_store_root}/{app_name}\"')\n", - " raise SystemExit(f'Certficate does not have all required DNS names: {required_dns_names}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Copy certifcate files from `controller` to local machine" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "cwd = os.getcwd()\n", - "os.chdir(temp_dir) # Use chdir to workaround kubectl bug on Windows, which incorrectly processes 'c:\\' on kubectl cp cmd line \n", - "\n", - "run(f'kubectl cp {controller}:{test_cert_store_root}/{app_name}/{prefix_keyfile_name}-certificate.pem {prefix_keyfile_name}-certificate.pem -c controller -n {namespace}')\n", - "run(f'kubectl cp {controller}:{test_cert_store_root}/{app_name}/{prefix_keyfile_name}-privatekey.pem {prefix_keyfile_name}-privatekey.pem -c controller -n {namespace}')\n", - "\n", - "os.chdir(cwd)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Copy certifcate files from local machine to `controldb`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "cwd = os.getcwd()\n", - "os.chdir(temp_dir) # Workaround kubectl bug on Windows, can't put c:\\ on kubectl cp cmd line \n", - "\n", - "run(f'kubectl cp {prefix_keyfile_name}-certificate.pem controldb-0:/var/opt/mssql/{prefix_keyfile_name}-certificate.pem -c mssql-server -n {namespace}')\n", - "run(f'kubectl cp {prefix_keyfile_name}-privatekey.pem controldb-0:/var/opt/mssql/{prefix_keyfile_name}-privatekey.pem -c mssql-server -n {namespace}')\n", - "\n", - "os.chdir(cwd)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the `controller-db-rw-secret` secret\n", - "\n", - "Get the controller SQL symmetric key password for decryption." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import base64\n", - "\n", - "controller_db_rw_secret = run(f'kubectl get secret/controller-db-rw-secret -n {namespace} -o jsonpath={{.data.encryptionPassword}}', return_output=True)\n", - "controller_db_rw_secret = base64.b64decode(controller_db_rw_secret).decode('utf-8')\n", - "\n", - "print(\"controller_db_rw_secret retrieved\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Update the files table with the certificates through opened SQL connection" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "sql = f\"\"\"\n", - "OPEN SYMMETRIC KEY ControllerDbSymmetricKey DECRYPTION BY PASSWORD = '{controller_db_rw_secret}'\n", - "\n", - "DECLARE @FileData VARBINARY(MAX), @Key uniqueidentifier;\n", - "SELECT @Key = KEY_GUID('ControllerDbSymmetricKey');\n", - " \n", - "SELECT TOP 1 @FileData = doc.BulkColumn FROM OPENROWSET(BULK N'/var/opt/mssql/{prefix_keyfile_name}-certificate.pem', SINGLE_BLOB) AS doc;\n", - "EXEC [dbo].[sp_set_file_data_encrypted] @FilePath = '/config/scaledsets/{scaledset_name}/containers/{container_name}/files/{prefix_keyfile_name}-certificate.pem',\n", - " @Data = @FileData,\n", - " @KeyGuid = @Key,\n", - " @Version = '0',\n", - " @User = '{user}',\n", - " @Group = '{group}',\n", - " @Mode = '{mode}';\n", - "\n", - "SELECT TOP 1 @FileData = doc.BulkColumn FROM OPENROWSET(BULK N'/var/opt/mssql/{prefix_keyfile_name}-privatekey.pem', SINGLE_BLOB) AS doc;\n", - "EXEC [dbo].[sp_set_file_data_encrypted] @FilePath = '/config/scaledsets/{scaledset_name}/containers/{container_name}/files/{prefix_keyfile_name}-privatekey.pem',\n", - " @Data = @FileData,\n", - " @KeyGuid = @Key,\n", - " @Version = '0',\n", - " @User = '{user}',\n", - " @Group = '{group}',\n", - " @Mode = '{mode}';\n", - "\"\"\"\n", - "\n", - "save_file(\"insert_certificates.sql\", sql)\n", - "\n", - "cwd = os.getcwd()\n", - "os.chdir(temp_dir) # Workaround kubectl bug on Windows, can't put c:\\ on kubectl cp cmd line \n", - "\n", - "run(f'kubectl cp insert_certificates.sql controldb-0:/var/opt/mssql/insert_certificates.sql -c mssql-server -n {namespace}')\n", - "\n", - "run(f\"\"\"kubectl exec controldb-0 -c mssql-server -n {namespace} -- bash -c \"SQLCMDPASSWORD=`cat /var/run/secrets/credentials/mssql-sa-password/password` /opt/mssql-tools/bin/sqlcmd -b -U sa -d controller -i /var/opt/mssql/insert_certificates.sql\" \"\"\")\n", - "\n", - "# Clean up\n", - "run(f\"\"\"kubectl exec controldb-0 -c mssql-server -n {namespace} -- bash -c \"rm /var/opt/mssql/insert_certificates.sql\" \"\"\")\n", - "run(f\"\"\"kubectl exec controldb-0 -c mssql-server -n {namespace} -- bash -c \"rm /var/opt/mssql/{prefix_keyfile_name}-certificate.pem\" \"\"\")\n", - "run(f\"\"\"kubectl exec controldb-0 -c mssql-server -n {namespace} -- bash -c \"rm /var/opt/mssql/{prefix_keyfile_name}-privatekey.pem\" \"\"\")\n", - "\n", - "os.chdir(cwd)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Clear out the controller\\_db\\_rw\\_secret variable" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "controller_db_rw_secret= \"\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Clean up certificate staging area\n", - "\n", - "Remove the certificate files generated on disk (they have now been\n", - "placed in the controller database)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cmd = f\"rm -r {test_cert_store_root}/{app_name}\"\n", - "\n", - "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"{cmd}\"')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Restart Pod" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'kubectl delete pod {pod} -n {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Clean up temporary directory for staging configuration files" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Delete the temporary directory used to hold configuration files\n", - "\n", - "import shutil\n", - "\n", - "shutil.rmtree(temp_dir)\n", - "\n", - "print(f'Temporary directory deleted: {temp_dir}')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Related\n", - "-------\n", - "\n", - "- [CER041 - Install signed Knox\n", - " certificate](../cert-management/cer041-install-knox-cert.ipynb)\n", - "\n", - "- [CER030 - Sign Management Proxy certificate with generated\n", - " CA](../cert-management/cer030-sign-service-proxy-generated-cert.ipynb)\n", - "\n", - "- [CER020 - Create Management Proxy\n", - " certificate](../cert-management/cer020-create-management-service-proxy-cert.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/cert-management/cer041-install-knox-cert.ipynb b/Big-Data-Clusters/CU8/Public/content/cert-management/cer041-install-knox-cert.ipynb deleted file mode 100644 index 35992682..00000000 --- a/Big-Data-Clusters/CU8/Public/content/cert-management/cer041-install-knox-cert.ipynb +++ /dev/null @@ -1,914 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "CER041 - Install signed Knox certificate\n", - "========================================\n", - "\n", - "This notebook installs into the Big Data Cluster the certificate signed\n", - "using:\n", - "\n", - "- [CER031 - Sign Knox certificate with generated\n", - " CA](../cert-management/cer031-sign-knox-generated-cert.ipynb)\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "app_name = \"gateway\"\n", - "scaledset_name = \"gateway/pods/gateway-0\"\n", - "container_name = \"knox\"\n", - "prefix_keyfile_name = \"knox\"\n", - "common_name = \"gateway-svc\"\n", - "user = \"knox\"\n", - "group = \"knox\"\n", - "mode = \"550\"\n", - "\n", - "test_cert_store_root = \"/var/opt/secrets/test-certificates\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"cer041-install-knox-cert.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create a temporary directory to stage files" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Create a temporary directory to hold configuration files\n", - "\n", - "import tempfile\n", - "\n", - "temp_dir = tempfile.mkdtemp()\n", - "\n", - "print(f\"Temporary directory created: {temp_dir}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Helper function to save configuration files to disk" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define helper function 'save_file' to save configuration files to the temporary directory created above\n", - "import os\n", - "import io\n", - "\n", - "def save_file(filename, contents):\n", - " with io.open(os.path.join(temp_dir, filename), \"w\", encoding='utf8', newline='\\n') as text_file:\n", - " text_file.write(contents)\n", - "\n", - " print(\"File saved: \" + os.path.join(temp_dir, filename))\n", - "\n", - "print(\"Function `save_file` defined successfully.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get name of the \u2018Running\u2019 `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place the name of the 'Running' controller pod in variable `controller`\n", - "\n", - "controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}} --field-selector=status.phase=Running', return_output=True)\n", - "\n", - "print(f\"Controller pod name: {controller}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Pod name for gateway" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pod = 'gateway-0'" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Validate certificate common name and alt names" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "from urllib.parse import urlparse\n", - "\n", - "kubernetes_default_record_name = 'kubernetes.default'\n", - "kubernetes_default_svc_prefix = 'kubernetes.default.svc'\n", - "default_dns_suffix = 'svc.cluster.local'\n", - "dns_suffix = ''\n", - "\n", - "nslookup_output=run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"nslookup {kubernetes_default_record_name} > /tmp/nslookup.out; cat /tmp/nslookup.out; rm /tmp/nslookup.out\" ', return_output=True)\n", - "\n", - "name = re.findall('Name:\\s+(.[^,|^\\s|^\\n]+)', nslookup_output)\n", - "\n", - "if not name or kubernetes_default_svc_prefix not in name[0]:\n", - " dns_suffix = default_dns_suffix\n", - "else:\n", - " dns_suffix = 'svc' + name[0].replace(kubernetes_default_svc_prefix, '')\n", - "\n", - "alt_names = \"\"\n", - "bdc_fqdn = \"\"\n", - "\n", - "alt_names += f\"DNS.1 = {common_name}\\n\"\n", - "alt_names += f\"DNS.2 = {common_name}.{namespace}.{dns_suffix} \\n\"\n", - "\n", - "hdfs_vault_svc = \"hdfsvault-svc\"\n", - "bdc_config = run(\"azdata bdc config show\", return_output=True)\n", - "bdc_config = json.loads(bdc_config)\n", - "\n", - "dns_counter = 3 # DNS.1 and DNS.2 are already in the certificate template\n", - "\n", - "# Stateful set related DNS names\n", - "#\n", - "if app_name == \"gateway\" or app_name == \"master\":\n", - " alt_names += f'DNS.{str(dns_counter)} = {pod}.{common_name}\\n'\n", - " dns_counter = dns_counter + 1\n", - " alt_names += f'DNS.{str(dns_counter)} = {pod}.{common_name}.{namespace}.{dns_suffix}\\n'\n", - " dns_counter = dns_counter + 1\n", - "\n", - "# AD related DNS names\n", - "#\n", - "if \"security\" in bdc_config[\"spec\"] and \"activeDirectory\" in bdc_config[\"spec\"][\"security\"]:\n", - " domain_dns_name = bdc_config[\"spec\"][\"security\"][\"activeDirectory\"][\"domainDnsName\"]\n", - " subdomain_name = bdc_config[\"spec\"][\"security\"][\"activeDirectory\"][\"subdomain\"]\n", - "\n", - " alt_names += f\"DNS.{str(dns_counter)} = {common_name}.{domain_dns_name}\\n\"\n", - " dns_counter = dns_counter + 1\n", - "\n", - " if app_name == \"gateway\" or app_name == \"master\":\n", - " alt_names += f'DNS.{str(dns_counter)} = {pod}.{domain_dns_name}\\n'\n", - " dns_counter = dns_counter + 1\n", - "\n", - " if subdomain_name:\n", - " bdc_fqdn = f\"{subdomain_name}.{domain_dns_name}\"\n", - " else:\n", - " bdc_fqdn = domain_dns_name\n", - "\n", - " # Endpoint DNS names for bdc certificates\n", - " #\n", - " if app_name in bdc_config[\"spec\"][\"resources\"]:\n", - " app_name_endpoints = bdc_config[\"spec\"][\"resources\"][app_name][\"spec\"][\"endpoints\"]\n", - " for endpoint in app_name_endpoints:\n", - " if \"dnsName\" in endpoint:\n", - " alt_names += f'DNS.{str(dns_counter)} = {endpoint[\"dnsName\"]}\\n'\n", - " dns_counter = dns_counter + 1\n", - " \n", - " # Endpoint DNS names for control plane certificates\n", - " #\n", - " if app_name == \"controller\" or app_name == \"mgmtproxy\":\n", - " bdc_endpoint_list = run(\"azdata bdc endpoint list\", return_output=True)\n", - " bdc_endpoint_list = json.loads(bdc_endpoint_list)\n", - "\n", - " # Parse the DNS host name from:\n", - " #\n", - " # \"endpoint\": \"https://monitor.aris.local:30777\"\n", - " # \n", - " for endpoint in bdc_endpoint_list:\n", - " if endpoint[\"name\"] == app_name:\n", - " url = urlparse(endpoint[\"endpoint\"])\n", - " alt_names += f\"DNS.{str(dns_counter)} = {url.hostname}\\n\"\n", - " dns_counter = dns_counter + 1\n", - "\n", - "# Special case for the controller certificate\n", - "#\n", - "if app_name == \"controller\":\n", - " alt_names += f\"DNS.{str(dns_counter)} = localhost\\n\"\n", - " dns_counter = dns_counter + 1\n", - "\n", - " # Add hdfsvault-svc host for key management calls.\n", - " #\n", - " alt_names += f\"DNS.{str(dns_counter)} = {hdfs_vault_svc}\\n\"\n", - " dns_counter = dns_counter + 1\n", - "\n", - " # Add hdfsvault-svc FQDN for key management calls.\n", - " #\n", - " if bdc_fqdn:\n", - " alt_names += f\"DNS.{str(dns_counter)} = {hdfs_vault_svc}.{bdc_fqdn}\\n\"\n", - " dns_counter = dns_counter + 1\n", - "\n", - "required_dns_names = re.findall('DNS\\.[0-9] = ([^,|^\\s|^\\n]+)', alt_names)\n", - "\n", - "# Get certificate common name and DNS names\n", - "# \n", - "cert = run(f'kubectl exec {controller} -c controller -n {namespace} -- openssl x509 -in {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-certificate.pem -text -noout', return_output=True)\n", - "subject = re.findall('Subject:(.+)', cert)[0]\n", - "certficate_common_name = re.findall('CN=(.[^,|^\\s|^\\n]+)', subject)[0]\n", - "certficate_dns_names = re.findall('DNS:(.[^,|^\\s|^\\n]+)', cert)\n", - "\n", - "# Validate the common name\n", - "#\n", - "if (common_name != certficate_common_name):\n", - " run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"rm -rf {test_cert_store_root}/{app_name}\"')\n", - " raise SystemExit(f'Certficate common name does not match the expected one: {common_name}')\n", - "\n", - "# Validate the DNS names\n", - "#\n", - "if not all(dns_name in certficate_dns_names for dns_name in required_dns_names):\n", - " run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"rm -rf {test_cert_store_root}/{app_name}\"')\n", - " raise SystemExit(f'Certficate does not have all required DNS names: {required_dns_names}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Copy certifcate files from `controller` to local machine" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "cwd = os.getcwd()\n", - "os.chdir(temp_dir) # Use chdir to workaround kubectl bug on Windows, which incorrectly processes 'c:\\' on kubectl cp cmd line \n", - "\n", - "run(f'kubectl cp {controller}:{test_cert_store_root}/{app_name}/{prefix_keyfile_name}-certificate.pem {prefix_keyfile_name}-certificate.pem -c controller -n {namespace}')\n", - "run(f'kubectl cp {controller}:{test_cert_store_root}/{app_name}/{prefix_keyfile_name}-privatekey.pem {prefix_keyfile_name}-privatekey.pem -c controller -n {namespace}')\n", - "\n", - "os.chdir(cwd)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Copy certifcate files from local machine to `controldb`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "cwd = os.getcwd()\n", - "os.chdir(temp_dir) # Workaround kubectl bug on Windows, can't put c:\\ on kubectl cp cmd line \n", - "\n", - "run(f'kubectl cp {prefix_keyfile_name}-certificate.pem controldb-0:/var/opt/mssql/{prefix_keyfile_name}-certificate.pem -c mssql-server -n {namespace}')\n", - "run(f'kubectl cp {prefix_keyfile_name}-privatekey.pem controldb-0:/var/opt/mssql/{prefix_keyfile_name}-privatekey.pem -c mssql-server -n {namespace}')\n", - "\n", - "os.chdir(cwd)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the `controller-db-rw-secret` secret\n", - "\n", - "Get the controller SQL symmetric key password for decryption." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import base64\n", - "\n", - "controller_db_rw_secret = run(f'kubectl get secret/controller-db-rw-secret -n {namespace} -o jsonpath={{.data.encryptionPassword}}', return_output=True)\n", - "controller_db_rw_secret = base64.b64decode(controller_db_rw_secret).decode('utf-8')\n", - "\n", - "print(\"controller_db_rw_secret retrieved\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Update the files table with the certificates through opened SQL connection" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "sql = f\"\"\"\n", - "OPEN SYMMETRIC KEY ControllerDbSymmetricKey DECRYPTION BY PASSWORD = '{controller_db_rw_secret}'\n", - "\n", - "DECLARE @FileData VARBINARY(MAX), @Key uniqueidentifier;\n", - "SELECT @Key = KEY_GUID('ControllerDbSymmetricKey');\n", - " \n", - "SELECT TOP 1 @FileData = doc.BulkColumn FROM OPENROWSET(BULK N'/var/opt/mssql/{prefix_keyfile_name}-certificate.pem', SINGLE_BLOB) AS doc;\n", - "EXEC [dbo].[sp_set_file_data_encrypted] @FilePath = '/config/scaledsets/{scaledset_name}/containers/{container_name}/files/{prefix_keyfile_name}-certificate.pem',\n", - " @Data = @FileData,\n", - " @KeyGuid = @Key,\n", - " @Version = '0',\n", - " @User = '{user}',\n", - " @Group = '{group}',\n", - " @Mode = '{mode}';\n", - "\n", - "SELECT TOP 1 @FileData = doc.BulkColumn FROM OPENROWSET(BULK N'/var/opt/mssql/{prefix_keyfile_name}-privatekey.pem', SINGLE_BLOB) AS doc;\n", - "EXEC [dbo].[sp_set_file_data_encrypted] @FilePath = '/config/scaledsets/{scaledset_name}/containers/{container_name}/files/{prefix_keyfile_name}-privatekey.pem',\n", - " @Data = @FileData,\n", - " @KeyGuid = @Key,\n", - " @Version = '0',\n", - " @User = '{user}',\n", - " @Group = '{group}',\n", - " @Mode = '{mode}';\n", - "\"\"\"\n", - "\n", - "save_file(\"insert_certificates.sql\", sql)\n", - "\n", - "cwd = os.getcwd()\n", - "os.chdir(temp_dir) # Workaround kubectl bug on Windows, can't put c:\\ on kubectl cp cmd line \n", - "\n", - "run(f'kubectl cp insert_certificates.sql controldb-0:/var/opt/mssql/insert_certificates.sql -c mssql-server -n {namespace}')\n", - "\n", - "run(f\"\"\"kubectl exec controldb-0 -c mssql-server -n {namespace} -- bash -c \"SQLCMDPASSWORD=`cat /var/run/secrets/credentials/mssql-sa-password/password` /opt/mssql-tools/bin/sqlcmd -b -U sa -d controller -i /var/opt/mssql/insert_certificates.sql\" \"\"\")\n", - "\n", - "# Clean up\n", - "run(f\"\"\"kubectl exec controldb-0 -c mssql-server -n {namespace} -- bash -c \"rm /var/opt/mssql/insert_certificates.sql\" \"\"\")\n", - "run(f\"\"\"kubectl exec controldb-0 -c mssql-server -n {namespace} -- bash -c \"rm /var/opt/mssql/{prefix_keyfile_name}-certificate.pem\" \"\"\")\n", - "run(f\"\"\"kubectl exec controldb-0 -c mssql-server -n {namespace} -- bash -c \"rm /var/opt/mssql/{prefix_keyfile_name}-privatekey.pem\" \"\"\")\n", - "\n", - "os.chdir(cwd)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Clear out the controller\\_db\\_rw\\_secret variable" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "controller_db_rw_secret= \"\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Clean up certificate staging area\n", - "\n", - "Remove the certificate files generated on disk (they have now been\n", - "placed in the controller database)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cmd = f\"rm -r {test_cert_store_root}/{app_name}\"\n", - "\n", - "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"{cmd}\"')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Restart knox gateway service" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'kubectl delete pod {pod} -n {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Clean up temporary directory for staging configuration files" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Delete the temporary directory used to hold configuration files\n", - "\n", - "import shutil\n", - "\n", - "shutil.rmtree(temp_dir)\n", - "\n", - "print(f'Temporary directory deleted: {temp_dir}')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Related\n", - "-------\n", - "\n", - "- [CER042 - Install signed App-Proxy\n", - " certificate](../cert-management/cer042-install-app-proxy-cert.ipynb)\n", - "\n", - "- [CER031 - Sign Knox certificate with generated\n", - " CA](../cert-management/cer031-sign-knox-generated-cert.ipynb)\n", - "\n", - "- [CER021 - Create Knox\n", - " certificate](../cert-management/cer021-create-knox-cert.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/cert-management/cer042-install-app-proxy-cert.ipynb b/Big-Data-Clusters/CU8/Public/content/cert-management/cer042-install-app-proxy-cert.ipynb deleted file mode 100644 index 3fc6c188..00000000 --- a/Big-Data-Clusters/CU8/Public/content/cert-management/cer042-install-app-proxy-cert.ipynb +++ /dev/null @@ -1,919 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "CER042 - Install signed App-Proxy certificate\n", - "=============================================\n", - "\n", - "This notebook installs into the Big Data Cluster the certificate signed\n", - "using:\n", - "\n", - "- [CER032 - Sign App-Proxy certificate with generated\n", - " CA](../cert-management/cer032-sign-app-proxy-generated-cert.ipynb)\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "app_name = \"app-proxy\"\n", - "scaledset_name = \"appproxy\"\n", - "container_name = \"app-service-proxy\"\n", - "prefix_keyfile_name = \"service-proxy\"\n", - "common_name = \"appproxy-svc\"\n", - "user = \"nginx\"\n", - "group = \"nginx\"\n", - "mode = \"550\"\n", - "\n", - "test_cert_store_root = \"/var/opt/secrets/test-certificates\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"cer042-install-app-proxy-cert.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create a temporary directory to stage files" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Create a temporary directory to hold configuration files\n", - "\n", - "import tempfile\n", - "\n", - "temp_dir = tempfile.mkdtemp()\n", - "\n", - "print(f\"Temporary directory created: {temp_dir}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Helper function to save configuration files to disk" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define helper function 'save_file' to save configuration files to the temporary directory created above\n", - "import os\n", - "import io\n", - "\n", - "def save_file(filename, contents):\n", - " with io.open(os.path.join(temp_dir, filename), \"w\", encoding='utf8', newline='\\n') as text_file:\n", - " text_file.write(contents)\n", - "\n", - " print(\"File saved: \" + os.path.join(temp_dir, filename))\n", - "\n", - "print(\"Function `save_file` defined successfully.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get name of the \u2018Running\u2019 `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place the name of the 'Running' controller pod in variable `controller`\n", - "\n", - "controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}} --field-selector=status.phase=Running', return_output=True)\n", - "\n", - "print(f\"Controller pod name: {controller}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the name of the `app proxy` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place the name of the appproxy pod in variable `pod`\n", - "\n", - "pod = run(f'kubectl get pod --selector=app=appproxy -n {namespace} -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - "\n", - "print(f\"App proxy pod name: {pod}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Validate certificate common name and alt names" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "from urllib.parse import urlparse\n", - "\n", - "kubernetes_default_record_name = 'kubernetes.default'\n", - "kubernetes_default_svc_prefix = 'kubernetes.default.svc'\n", - "default_dns_suffix = 'svc.cluster.local'\n", - "dns_suffix = ''\n", - "\n", - "nslookup_output=run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"nslookup {kubernetes_default_record_name} > /tmp/nslookup.out; cat /tmp/nslookup.out; rm /tmp/nslookup.out\" ', return_output=True)\n", - "\n", - "name = re.findall('Name:\\s+(.[^,|^\\s|^\\n]+)', nslookup_output)\n", - "\n", - "if not name or kubernetes_default_svc_prefix not in name[0]:\n", - " dns_suffix = default_dns_suffix\n", - "else:\n", - " dns_suffix = 'svc' + name[0].replace(kubernetes_default_svc_prefix, '')\n", - "\n", - "alt_names = \"\"\n", - "bdc_fqdn = \"\"\n", - "\n", - "alt_names += f\"DNS.1 = {common_name}\\n\"\n", - "alt_names += f\"DNS.2 = {common_name}.{namespace}.{dns_suffix} \\n\"\n", - "\n", - "hdfs_vault_svc = \"hdfsvault-svc\"\n", - "bdc_config = run(\"azdata bdc config show\", return_output=True)\n", - "bdc_config = json.loads(bdc_config)\n", - "\n", - "dns_counter = 3 # DNS.1 and DNS.2 are already in the certificate template\n", - "\n", - "# Stateful set related DNS names\n", - "#\n", - "if app_name == \"gateway\" or app_name == \"master\":\n", - " alt_names += f'DNS.{str(dns_counter)} = {pod}.{common_name}\\n'\n", - " dns_counter = dns_counter + 1\n", - " alt_names += f'DNS.{str(dns_counter)} = {pod}.{common_name}.{namespace}.{dns_suffix}\\n'\n", - " dns_counter = dns_counter + 1\n", - "\n", - "# AD related DNS names\n", - "#\n", - "if \"security\" in bdc_config[\"spec\"] and \"activeDirectory\" in bdc_config[\"spec\"][\"security\"]:\n", - " domain_dns_name = bdc_config[\"spec\"][\"security\"][\"activeDirectory\"][\"domainDnsName\"]\n", - " subdomain_name = bdc_config[\"spec\"][\"security\"][\"activeDirectory\"][\"subdomain\"]\n", - "\n", - " alt_names += f\"DNS.{str(dns_counter)} = {common_name}.{domain_dns_name}\\n\"\n", - " dns_counter = dns_counter + 1\n", - "\n", - " if app_name == \"gateway\" or app_name == \"master\":\n", - " alt_names += f'DNS.{str(dns_counter)} = {pod}.{domain_dns_name}\\n'\n", - " dns_counter = dns_counter + 1\n", - "\n", - " if subdomain_name:\n", - " bdc_fqdn = f\"{subdomain_name}.{domain_dns_name}\"\n", - " else:\n", - " bdc_fqdn = domain_dns_name\n", - "\n", - " # Endpoint DNS names for bdc certificates\n", - " #\n", - " if app_name in bdc_config[\"spec\"][\"resources\"]:\n", - " app_name_endpoints = bdc_config[\"spec\"][\"resources\"][app_name][\"spec\"][\"endpoints\"]\n", - " for endpoint in app_name_endpoints:\n", - " if \"dnsName\" in endpoint:\n", - " alt_names += f'DNS.{str(dns_counter)} = {endpoint[\"dnsName\"]}\\n'\n", - " dns_counter = dns_counter + 1\n", - " \n", - " # Endpoint DNS names for control plane certificates\n", - " #\n", - " if app_name == \"controller\" or app_name == \"mgmtproxy\":\n", - " bdc_endpoint_list = run(\"azdata bdc endpoint list\", return_output=True)\n", - " bdc_endpoint_list = json.loads(bdc_endpoint_list)\n", - "\n", - " # Parse the DNS host name from:\n", - " #\n", - " # \"endpoint\": \"https://monitor.aris.local:30777\"\n", - " # \n", - " for endpoint in bdc_endpoint_list:\n", - " if endpoint[\"name\"] == app_name:\n", - " url = urlparse(endpoint[\"endpoint\"])\n", - " alt_names += f\"DNS.{str(dns_counter)} = {url.hostname}\\n\"\n", - " dns_counter = dns_counter + 1\n", - "\n", - "# Special case for the controller certificate\n", - "#\n", - "if app_name == \"controller\":\n", - " alt_names += f\"DNS.{str(dns_counter)} = localhost\\n\"\n", - " dns_counter = dns_counter + 1\n", - "\n", - " # Add hdfsvault-svc host for key management calls.\n", - " #\n", - " alt_names += f\"DNS.{str(dns_counter)} = {hdfs_vault_svc}\\n\"\n", - " dns_counter = dns_counter + 1\n", - "\n", - " # Add hdfsvault-svc FQDN for key management calls.\n", - " #\n", - " if bdc_fqdn:\n", - " alt_names += f\"DNS.{str(dns_counter)} = {hdfs_vault_svc}.{bdc_fqdn}\\n\"\n", - " dns_counter = dns_counter + 1\n", - "\n", - "required_dns_names = re.findall('DNS\\.[0-9] = ([^,|^\\s|^\\n]+)', alt_names)\n", - "\n", - "# Get certificate common name and DNS names\n", - "# \n", - "cert = run(f'kubectl exec {controller} -c controller -n {namespace} -- openssl x509 -in {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-certificate.pem -text -noout', return_output=True)\n", - "subject = re.findall('Subject:(.+)', cert)[0]\n", - "certficate_common_name = re.findall('CN=(.[^,|^\\s|^\\n]+)', subject)[0]\n", - "certficate_dns_names = re.findall('DNS:(.[^,|^\\s|^\\n]+)', cert)\n", - "\n", - "# Validate the common name\n", - "#\n", - "if (common_name != certficate_common_name):\n", - " run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"rm -rf {test_cert_store_root}/{app_name}\"')\n", - " raise SystemExit(f'Certficate common name does not match the expected one: {common_name}')\n", - "\n", - "# Validate the DNS names\n", - "#\n", - "if not all(dns_name in certficate_dns_names for dns_name in required_dns_names):\n", - " run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"rm -rf {test_cert_store_root}/{app_name}\"')\n", - " raise SystemExit(f'Certficate does not have all required DNS names: {required_dns_names}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Copy certifcate files from `controller` to local machine" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "cwd = os.getcwd()\n", - "os.chdir(temp_dir) # Use chdir to workaround kubectl bug on Windows, which incorrectly processes 'c:\\' on kubectl cp cmd line \n", - "\n", - "run(f'kubectl cp {controller}:{test_cert_store_root}/{app_name}/{prefix_keyfile_name}-certificate.pem {prefix_keyfile_name}-certificate.pem -c controller -n {namespace}')\n", - "run(f'kubectl cp {controller}:{test_cert_store_root}/{app_name}/{prefix_keyfile_name}-privatekey.pem {prefix_keyfile_name}-privatekey.pem -c controller -n {namespace}')\n", - "\n", - "os.chdir(cwd)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Copy certifcate files from local machine to `controldb`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "cwd = os.getcwd()\n", - "os.chdir(temp_dir) # Workaround kubectl bug on Windows, can't put c:\\ on kubectl cp cmd line \n", - "\n", - "run(f'kubectl cp {prefix_keyfile_name}-certificate.pem controldb-0:/var/opt/mssql/{prefix_keyfile_name}-certificate.pem -c mssql-server -n {namespace}')\n", - "run(f'kubectl cp {prefix_keyfile_name}-privatekey.pem controldb-0:/var/opt/mssql/{prefix_keyfile_name}-privatekey.pem -c mssql-server -n {namespace}')\n", - "\n", - "os.chdir(cwd)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the `controller-db-rw-secret` secret\n", - "\n", - "Get the controller SQL symmetric key password for decryption." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import base64\n", - "\n", - "controller_db_rw_secret = run(f'kubectl get secret/controller-db-rw-secret -n {namespace} -o jsonpath={{.data.encryptionPassword}}', return_output=True)\n", - "controller_db_rw_secret = base64.b64decode(controller_db_rw_secret).decode('utf-8')\n", - "\n", - "print(\"controller_db_rw_secret retrieved\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Update the files table with the certificates through opened SQL connection" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "sql = f\"\"\"\n", - "OPEN SYMMETRIC KEY ControllerDbSymmetricKey DECRYPTION BY PASSWORD = '{controller_db_rw_secret}'\n", - "\n", - "DECLARE @FileData VARBINARY(MAX), @Key uniqueidentifier;\n", - "SELECT @Key = KEY_GUID('ControllerDbSymmetricKey');\n", - " \n", - "SELECT TOP 1 @FileData = doc.BulkColumn FROM OPENROWSET(BULK N'/var/opt/mssql/{prefix_keyfile_name}-certificate.pem', SINGLE_BLOB) AS doc;\n", - "EXEC [dbo].[sp_set_file_data_encrypted] @FilePath = '/config/scaledsets/{scaledset_name}/containers/{container_name}/files/{prefix_keyfile_name}-certificate.pem',\n", - " @Data = @FileData,\n", - " @KeyGuid = @Key,\n", - " @Version = '0',\n", - " @User = '{user}',\n", - " @Group = '{group}',\n", - " @Mode = '{mode}';\n", - "\n", - "SELECT TOP 1 @FileData = doc.BulkColumn FROM OPENROWSET(BULK N'/var/opt/mssql/{prefix_keyfile_name}-privatekey.pem', SINGLE_BLOB) AS doc;\n", - "EXEC [dbo].[sp_set_file_data_encrypted] @FilePath = '/config/scaledsets/{scaledset_name}/containers/{container_name}/files/{prefix_keyfile_name}-privatekey.pem',\n", - " @Data = @FileData,\n", - " @KeyGuid = @Key,\n", - " @Version = '0',\n", - " @User = '{user}',\n", - " @Group = '{group}',\n", - " @Mode = '{mode}';\n", - "\"\"\"\n", - "\n", - "save_file(\"insert_certificates.sql\", sql)\n", - "\n", - "cwd = os.getcwd()\n", - "os.chdir(temp_dir) # Workaround kubectl bug on Windows, can't put c:\\ on kubectl cp cmd line \n", - "\n", - "run(f'kubectl cp insert_certificates.sql controldb-0:/var/opt/mssql/insert_certificates.sql -c mssql-server -n {namespace}')\n", - "\n", - "run(f\"\"\"kubectl exec controldb-0 -c mssql-server -n {namespace} -- bash -c \"SQLCMDPASSWORD=`cat /var/run/secrets/credentials/mssql-sa-password/password` /opt/mssql-tools/bin/sqlcmd -b -U sa -d controller -i /var/opt/mssql/insert_certificates.sql\" \"\"\")\n", - "\n", - "# Clean up\n", - "run(f\"\"\"kubectl exec controldb-0 -c mssql-server -n {namespace} -- bash -c \"rm /var/opt/mssql/insert_certificates.sql\" \"\"\")\n", - "run(f\"\"\"kubectl exec controldb-0 -c mssql-server -n {namespace} -- bash -c \"rm /var/opt/mssql/{prefix_keyfile_name}-certificate.pem\" \"\"\")\n", - "run(f\"\"\"kubectl exec controldb-0 -c mssql-server -n {namespace} -- bash -c \"rm /var/opt/mssql/{prefix_keyfile_name}-privatekey.pem\" \"\"\")\n", - "\n", - "os.chdir(cwd)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Clear out the controller\\_db\\_rw\\_secret variable" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "controller_db_rw_secret= \"\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Clean up certificate staging area\n", - "\n", - "Remove the certificate files generated on disk (they have now been\n", - "placed in the controller database)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cmd = f\"rm -r {test_cert_store_root}/{app_name}\"\n", - "\n", - "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"{cmd}\"')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Restart Pod" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'kubectl delete pod {pod} -n {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Clean up temporary directory for staging configuration files" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Delete the temporary directory used to hold configuration files\n", - "\n", - "import shutil\n", - "\n", - "shutil.rmtree(temp_dir)\n", - "\n", - "print(f'Temporary directory deleted: {temp_dir}')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Related\n", - "-------\n", - "\n", - "- [CER022 - Create App Proxy\n", - " certificate](../cert-management/cer022-create-app-proxy-cert.ipynb)\n", - "\n", - "- [CER032 - Sign App-Proxy certificate with generated\n", - " CA](../cert-management/cer032-sign-app-proxy-generated-cert.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/cert-management/cer044-install-controller-cert.ipynb b/Big-Data-Clusters/CU8/Public/content/cert-management/cer044-install-controller-cert.ipynb deleted file mode 100644 index f1afbd7f..00000000 --- a/Big-Data-Clusters/CU8/Public/content/cert-management/cer044-install-controller-cert.ipynb +++ /dev/null @@ -1,911 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "CER044 - Install signed Controller certificate\n", - "==============================================\n", - "\n", - "This notebook installs into the Big Data Cluster the certificate signed\n", - "using:\n", - "\n", - "- [CER034 - Sign Controller certificate with cluster Root\n", - " CA](../cert-management/cer034-sign-controller-generated-cert.ipynb)\n", - "\n", - "NOTE: At the end of this notebook the Controller pod and all pods that\n", - "use PolyBase (Master Pool and Compute Pool pods) will be restarted to\n", - "load the new certificates.\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "app_name = \"controller\"\n", - "scaledset_name = \"control\"\n", - "container_name = \"controller\"\n", - "prefix_keyfile_name = \"controller\"\n", - "common_name = \"controller-svc\"\n", - "\n", - "test_cert_store_root = \"/var/opt/secrets/test-certificates\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"cer044-install-controller-cert.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create a temporary directory to stage files" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Create a temporary directory to hold configuration files\n", - "\n", - "import tempfile\n", - "\n", - "temp_dir = tempfile.mkdtemp()\n", - "\n", - "print(f\"Temporary directory created: {temp_dir}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Helper function to save configuration files to disk" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define helper function 'save_file' to save configuration files to the temporary directory created above\n", - "import os\n", - "import io\n", - "\n", - "def save_file(filename, contents):\n", - " with io.open(os.path.join(temp_dir, filename), \"w\", encoding='utf8', newline='\\n') as text_file:\n", - " text_file.write(contents)\n", - "\n", - " print(\"File saved: \" + os.path.join(temp_dir, filename))\n", - "\n", - "print(\"Function `save_file` defined successfully.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get name of the \u2018Running\u2019 `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place the name of the 'Running' controller pod in variable `controller`\n", - "\n", - "controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}} --field-selector=status.phase=Running', return_output=True)\n", - "\n", - "print(f\"Controller pod name: {controller}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Validate certificate common name and alt names" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "from urllib.parse import urlparse\n", - "\n", - "kubernetes_default_record_name = 'kubernetes.default'\n", - "kubernetes_default_svc_prefix = 'kubernetes.default.svc'\n", - "default_dns_suffix = 'svc.cluster.local'\n", - "dns_suffix = ''\n", - "\n", - "nslookup_output=run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"nslookup {kubernetes_default_record_name} > /tmp/nslookup.out; cat /tmp/nslookup.out; rm /tmp/nslookup.out\" ', return_output=True)\n", - "\n", - "name = re.findall('Name:\\s+(.[^,|^\\s|^\\n]+)', nslookup_output)\n", - "\n", - "if not name or kubernetes_default_svc_prefix not in name[0]:\n", - " dns_suffix = default_dns_suffix\n", - "else:\n", - " dns_suffix = 'svc' + name[0].replace(kubernetes_default_svc_prefix, '')\n", - "\n", - "alt_names = \"\"\n", - "bdc_fqdn = \"\"\n", - "\n", - "alt_names += f\"DNS.1 = {common_name}\\n\"\n", - "alt_names += f\"DNS.2 = {common_name}.{namespace}.{dns_suffix} \\n\"\n", - "\n", - "hdfs_vault_svc = \"hdfsvault-svc\"\n", - "bdc_config = run(\"azdata bdc config show\", return_output=True)\n", - "bdc_config = json.loads(bdc_config)\n", - "\n", - "dns_counter = 3 # DNS.1 and DNS.2 are already in the certificate template\n", - "\n", - "# Stateful set related DNS names\n", - "#\n", - "if app_name == \"gateway\" or app_name == \"master\":\n", - " alt_names += f'DNS.{str(dns_counter)} = {pod}.{common_name}\\n'\n", - " dns_counter = dns_counter + 1\n", - " alt_names += f'DNS.{str(dns_counter)} = {pod}.{common_name}.{namespace}.{dns_suffix}\\n'\n", - " dns_counter = dns_counter + 1\n", - "\n", - "# AD related DNS names\n", - "#\n", - "if \"security\" in bdc_config[\"spec\"] and \"activeDirectory\" in bdc_config[\"spec\"][\"security\"]:\n", - " domain_dns_name = bdc_config[\"spec\"][\"security\"][\"activeDirectory\"][\"domainDnsName\"]\n", - " subdomain_name = bdc_config[\"spec\"][\"security\"][\"activeDirectory\"][\"subdomain\"]\n", - "\n", - " alt_names += f\"DNS.{str(dns_counter)} = {common_name}.{domain_dns_name}\\n\"\n", - " dns_counter = dns_counter + 1\n", - "\n", - " if app_name == \"gateway\" or app_name == \"master\":\n", - " alt_names += f'DNS.{str(dns_counter)} = {pod}.{domain_dns_name}\\n'\n", - " dns_counter = dns_counter + 1\n", - "\n", - " if subdomain_name:\n", - " bdc_fqdn = f\"{subdomain_name}.{domain_dns_name}\"\n", - " else:\n", - " bdc_fqdn = domain_dns_name\n", - "\n", - " # Endpoint DNS names for bdc certificates\n", - " #\n", - " if app_name in bdc_config[\"spec\"][\"resources\"]:\n", - " app_name_endpoints = bdc_config[\"spec\"][\"resources\"][app_name][\"spec\"][\"endpoints\"]\n", - " for endpoint in app_name_endpoints:\n", - " if \"dnsName\" in endpoint:\n", - " alt_names += f'DNS.{str(dns_counter)} = {endpoint[\"dnsName\"]}\\n'\n", - " dns_counter = dns_counter + 1\n", - " \n", - " # Endpoint DNS names for control plane certificates\n", - " #\n", - " if app_name == \"controller\" or app_name == \"mgmtproxy\":\n", - " bdc_endpoint_list = run(\"azdata bdc endpoint list\", return_output=True)\n", - " bdc_endpoint_list = json.loads(bdc_endpoint_list)\n", - "\n", - " # Parse the DNS host name from:\n", - " #\n", - " # \"endpoint\": \"https://monitor.aris.local:30777\"\n", - " # \n", - " for endpoint in bdc_endpoint_list:\n", - " if endpoint[\"name\"] == app_name:\n", - " url = urlparse(endpoint[\"endpoint\"])\n", - " alt_names += f\"DNS.{str(dns_counter)} = {url.hostname}\\n\"\n", - " dns_counter = dns_counter + 1\n", - "\n", - "# Special case for the controller certificate\n", - "#\n", - "if app_name == \"controller\":\n", - " alt_names += f\"DNS.{str(dns_counter)} = localhost\\n\"\n", - " dns_counter = dns_counter + 1\n", - "\n", - " # Add hdfsvault-svc host for key management calls.\n", - " #\n", - " alt_names += f\"DNS.{str(dns_counter)} = {hdfs_vault_svc}\\n\"\n", - " dns_counter = dns_counter + 1\n", - "\n", - " # Add hdfsvault-svc FQDN for key management calls.\n", - " #\n", - " if bdc_fqdn:\n", - " alt_names += f\"DNS.{str(dns_counter)} = {hdfs_vault_svc}.{bdc_fqdn}\\n\"\n", - " dns_counter = dns_counter + 1\n", - "\n", - "required_dns_names = re.findall('DNS\\.[0-9] = ([^,|^\\s|^\\n]+)', alt_names)\n", - "\n", - "# Get certificate common name and DNS names\n", - "# \n", - "cert = run(f'kubectl exec {controller} -c controller -n {namespace} -- openssl x509 -in {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-certificate.pem -text -noout', return_output=True)\n", - "subject = re.findall('Subject:(.+)', cert)[0]\n", - "certficate_common_name = re.findall('CN=(.[^,|^\\s|^\\n]+)', subject)[0]\n", - "certficate_dns_names = re.findall('DNS:(.[^,|^\\s|^\\n]+)', cert)\n", - "\n", - "# Validate the common name\n", - "#\n", - "if (common_name != certficate_common_name):\n", - " run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"rm -rf {test_cert_store_root}/{app_name}\"')\n", - " raise SystemExit(f'Certficate common name does not match the expected one: {common_name}')\n", - "\n", - "# Validate the DNS names\n", - "#\n", - "if not all(dns_name in certficate_dns_names for dns_name in required_dns_names):\n", - " run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"rm -rf {test_cert_store_root}/{app_name}\"')\n", - " raise SystemExit(f'Certficate does not have all required DNS names: {required_dns_names}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Copy certifcate files from `controller` to local machine" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "cwd = os.getcwd()\n", - "os.chdir(temp_dir) # Workaround kubectl bug on Windows, can't put c:\\ on kubectl cp cmd line \n", - "\n", - "run(f'kubectl cp {controller}:{test_cert_store_root}/{app_name}/{prefix_keyfile_name}-certificate.p12 {prefix_keyfile_name}-certificate.p12 -c controller -n {namespace}')\n", - "run(f'kubectl cp {controller}:{test_cert_store_root}/{app_name}/{prefix_keyfile_name}-certificate.pem {prefix_keyfile_name}-certificate.pem -c controller -n {namespace}')\n", - "run(f'kubectl cp {controller}:{test_cert_store_root}/{app_name}/{prefix_keyfile_name}-privatekey.pem {prefix_keyfile_name}-privatekey.pem -c controller -n {namespace}')\n", - "\n", - "os.chdir(cwd)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Copy certifcate files from local machine to `controldb`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "cwd = os.getcwd()\n", - "os.chdir(temp_dir) # Workaround kubectl bug on Windows, can't put c:\\ on kubectl cp cmd line \n", - "\n", - "run(f'kubectl cp {prefix_keyfile_name}-certificate.p12 controldb-0:/var/opt/mssql/{prefix_keyfile_name}-certificate.p12 -c mssql-server -n {namespace}')\n", - "run(f'kubectl cp {prefix_keyfile_name}-certificate.pem controldb-0:/var/opt/mssql/{prefix_keyfile_name}-certificate.pem -c mssql-server -n {namespace}')\n", - "run(f'kubectl cp {prefix_keyfile_name}-privatekey.pem controldb-0:/var/opt/mssql/{prefix_keyfile_name}-privatekey.pem -c mssql-server -n {namespace}')\n", - "\n", - "os.chdir(cwd)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the `controller-db-rw-secret` secret\n", - "\n", - "Get the controller SQL symmetric key password for decryption." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import base64\n", - "\n", - "controller_db_rw_secret = run(f'kubectl get secret/controller-db-rw-secret -n {namespace} -o jsonpath={{.data.encryptionPassword}}', return_output=True)\n", - "controller_db_rw_secret = base64.b64decode(controller_db_rw_secret).decode('utf-8')\n", - "\n", - "print(\"controller_db_rw_secret retrieved\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Update the files table with the certificates through opened SQL connection" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "sql = f\"\"\"\n", - "OPEN SYMMETRIC KEY ControllerDbSymmetricKey DECRYPTION BY PASSWORD = '{controller_db_rw_secret}'\n", - "\n", - "DECLARE @FileData VARBINARY(MAX), @Key uniqueidentifier;\n", - "SELECT @Key = KEY_GUID('ControllerDbSymmetricKey');\n", - "\n", - "SELECT TOP 1 @FileData = doc.BulkColumn FROM OPENROWSET(BULK N'/var/opt/mssql/{prefix_keyfile_name}-certificate.p12', SINGLE_BLOB) AS doc;\n", - "EXEC [dbo].[sp_set_file_data_encrypted] @FilePath = '/config/scaledsets/control/containers/{container_name}/files/{prefix_keyfile_name}-certificate.p12',\n", - " @Data = @FileData,\n", - " @KeyGuid = @Key,\n", - " @Version = '0',\n", - " @User = '',\n", - " @Group = '',\n", - " @Mode = '';\n", - "\n", - "SELECT TOP 1 @FileData = doc.BulkColumn FROM OPENROWSET(BULK N'/var/opt/mssql/{prefix_keyfile_name}-certificate.pem', SINGLE_BLOB) AS doc;\n", - "EXEC [dbo].[sp_set_file_data_encrypted] @FilePath = '/config/scaledsets/control/containers/{container_name}/files/{prefix_keyfile_name}-certificate.pem',\n", - " @Data = @FileData,\n", - " @KeyGuid = @Key,\n", - " @Version = '0',\n", - " @User = '',\n", - " @Group = '',\n", - " @Mode = '';\n", - "\n", - "SELECT TOP 1 @FileData = doc.BulkColumn FROM OPENROWSET(BULK N'/var/opt/mssql/{prefix_keyfile_name}-privatekey.pem', SINGLE_BLOB) AS doc;\n", - "EXEC [dbo].[sp_set_file_data_encrypted] @FilePath = '/config/scaledsets/control/containers/{container_name}/files/{prefix_keyfile_name}-privatekey.pem',\n", - " @Data = @FileData,\n", - " @KeyGuid = @Key,\n", - " @Version = '0',\n", - " @User = '',\n", - " @Group = '',\n", - " @Mode = '';\n", - "\"\"\"\n", - "\n", - "save_file(\"insert_certificates.sql\", sql)\n", - "\n", - "cwd = os.getcwd()\n", - "os.chdir(temp_dir) # Workaround kubectl bug on Windows, can't put c:\\ on kubectl cp cmd line \n", - "\n", - "run(f'kubectl cp insert_certificates.sql controldb-0:/var/opt/mssql/insert_certificates.sql -c mssql-server -n {namespace}')\n", - "\n", - "run(f\"\"\"kubectl exec controldb-0 -c mssql-server -n {namespace} -- bash -c \"SQLCMDPASSWORD=`cat /var/run/secrets/credentials/mssql-sa-password/password` /opt/mssql-tools/bin/sqlcmd -b -U sa -d controller -i /var/opt/mssql/insert_certificates.sql\" \"\"\")\n", - "\n", - "# Cleanup\n", - "run(f\"\"\"kubectl exec controldb-0 -c mssql-server -n {namespace} -- bash -c \"rm /var/opt/mssql/insert_certificates.sql\" \"\"\")\n", - "run(f\"\"\"kubectl exec controldb-0 -c mssql-server -n {namespace} -- bash -c \"rm /var/opt/mssql/{prefix_keyfile_name}-certificate.p12\" \"\"\")\n", - "run(f\"\"\"kubectl exec controldb-0 -c mssql-server -n {namespace} -- bash -c \"rm /var/opt/mssql/{prefix_keyfile_name}-certificate.pem\" \"\"\")\n", - "run(f\"\"\"kubectl exec controldb-0 -c mssql-server -n {namespace} -- bash -c \"rm /var/opt/mssql/{prefix_keyfile_name}-privatekey.pem\" \"\"\")\n", - "\n", - "os.chdir(cwd)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Clean up certificate staging area\n", - "\n", - "Remove the certificate files generated on disk (they have now been\n", - "placed in the controller database)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cmd = f\"rm -r {test_cert_store_root}/{app_name}\"\n", - "\n", - "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"{cmd}\"')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Clear out the controller\\_db\\_rw\\_secret variable" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "controller_db_rw_secret= \"\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Restart `controller` to pick up new certificates.\n", - "\n", - "Delete the controller pod so that it can restart the controller and pick\n", - "up new certificates." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'kubectl delete pod {controller} -n {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Clean up temporary directory for staging configuration files" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Delete the temporary directory used to hold configuration files\n", - "\n", - "import shutil\n", - "\n", - "shutil.rmtree(temp_dir)\n", - "\n", - "print(f'Temporary directory deleted: {temp_dir}')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Related\n", - "-------\n", - "\n", - "- [CER024 - Create Controller\n", - " certificate](../cert-management/cer024-create-controller-cert.ipynb)\n", - "\n", - "- [CER034 - Sign Controller certificate with cluster Root\n", - " CA](../cert-management/cer034-sign-controller-generated-cert.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/cert-management/cer050-wait-cluster-healthly.ipynb b/Big-Data-Clusters/CU8/Public/content/cert-management/cer050-wait-cluster-healthly.ipynb deleted file mode 100644 index 44983b9d..00000000 --- a/Big-Data-Clusters/CU8/Public/content/cert-management/cer050-wait-cluster-healthly.ipynb +++ /dev/null @@ -1,264 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "CER050 - Wait for BDC to be Healthy\n", - "===================================\n", - "\n", - "This notebook will wait until the Big Data Cluster has returned to a\n", - "healthy state, after the `Controller` pod and pods that use `PolyBase`\n", - "have been restarted to load the new certificates.\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "timeout = 600 # amount of time to wait before cluster is healthy: default to 10 minutes\n", - "check_interval = 30 # amount of time between health checks - default 30 seconds\n", - "min_pod_count = 10 # minimum number of healthy pods required to assert health" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Instantiate Kubernetes client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Instantiate the Python Kubernetes client into 'api' variable\n", - "\n", - "import os\n", - "\n", - "try:\n", - " from kubernetes import client, config\n", - " from kubernetes.stream import stream\n", - "\n", - " if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n", - " config.load_incluster_config()\n", - " else:\n", - " try:\n", - " config.load_kube_config()\n", - " except:\n", - " display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n", - " raise\n", - " api = client.CoreV1Api()\n", - "\n", - " print('Kubernetes client instantiated')\n", - "except ImportError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n", - " raise" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n", - " except IndexError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print('The kubernetes namespace for your big data cluster is: ' + namespace)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Helper functions for waiting for the cluster to become healthy" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "import threading\n", - "import time\n", - "import sys\n", - "import os\n", - "from IPython.display import Markdown\n", - "\n", - "isRunning = True\n", - "\n", - "def all_containers_ready(pod):\n", - " \"\"\"helper method returns true if all the containers within the given pod are ready\n", - "\n", - " Arguments:\n", - " pod {v1Pod} -- Metadata retrieved from the api call to.\n", - " \"\"\"\n", - " \n", - " return all(map(lambda c: c.ready is True, pod.status.container_statuses))\n", - "\n", - "\n", - "def pod_is_ready(pod):\n", - " \"\"\"tests that the pod, and all containers are ready\n", - "\n", - " Arguments:\n", - " pod {v1Pod} -- Metadata retrieved from api call.\n", - " \"\"\"\n", - "\n", - " return \"job-name\" in pod.metadata.labels or (pod.status.phase == \"Running\" and all_containers_ready(pod))\n", - "\n", - "\n", - "def waitReady():\n", - " \"\"\"Waits for all pods, and containers to become ready.\n", - " \"\"\"\n", - " while isRunning:\n", - " try:\n", - " time.sleep(check_interval)\n", - " pods = get_pods()\n", - " allReady = len(pods.items) >= min_pod_count and all(map(pod_is_ready, pods.items))\n", - "\n", - " if allReady:\n", - " return True\n", - " else:\n", - " display(Markdown(get_pod_failures(pods)))\n", - " display(Markdown(f\"cluster not healthy, rechecking in {check_interval} seconds.\"))\n", - " except Exception as ex:\n", - " last_error_message = str(ex)\n", - " display(Markdown(last_error_message))\n", - " time.sleep(check_interval)\n", - "\n", - "def get_pod_failures(pods=None):\n", - " \"\"\"Returns a status message for any pods that are not ready.\n", - " \"\"\"\n", - " results = \"\"\n", - " if not pods:\n", - " pods = get_pods()\n", - "\n", - " for pod in pods.items:\n", - " if \"job-name\" not in pod.metadata.labels:\n", - " if pod.status and pod.status.container_statuses:\n", - " for container in filter(lambda c: c.ready is False, pod.status.container_statuses):\n", - " results = results + \"Container {0} in Pod {1} is not ready. Reported status: {2}
\".format(container.name, pod.metadata.name, container.state) \n", - " else:\n", - " results = results + \"Pod {0} is not ready.
\".format(pod.metadata.name)\n", - " return results\n", - "\n", - "\n", - "def get_pods():\n", - " \"\"\"Returns a list of pods by namespace, or all namespaces if no namespace is specified\n", - " \"\"\"\n", - " pods = None\n", - " if namespace is not None:\n", - " display(Markdown(f'Checking namespace {namespace}'))\n", - " pods = api.list_namespaced_pod(namespace, _request_timeout=30) \n", - " else:\n", - " display(Markdown('Checking all namespaces'))\n", - " pods = api.list_pod_for_all_namespaces(_request_timeout=30)\n", - " return pods\n", - "\n", - "def wait_for_cluster_healthy():\n", - " isRunning = True\n", - " mt = threading.Thread(target=waitReady)\n", - " mt.start()\n", - " mt.join(timeout=timeout)\n", - "\n", - " if mt.isAlive():\n", - " raise SystemExit(\"Timeout waiting for all cluster to be healthy.\")\n", - " \n", - " isRunning = False" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Wait for cluster to to get healthy" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "wait_for_cluster_healthy()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/cert-management/cer100-create-root-ca-install-certs.ipynb b/Big-Data-Clusters/CU8/Public/content/cert-management/cer100-create-root-ca-install-certs.ipynb deleted file mode 100644 index 46d32a95..00000000 --- a/Big-Data-Clusters/CU8/Public/content/cert-management/cer100-create-root-ca-install-certs.ipynb +++ /dev/null @@ -1,620 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "CER100 - Configure Cluster with Self Signed Certificates\n", - "========================================================\n", - "\n", - "This notebook will:\n", - "\n", - "1. Generate a new Root CA in the Big Data Cluster\n", - "2. Create new certificates for each endpoint (Management, Gateway,\n", - " App-Proxy and Controller)\n", - "3. Sign each new certificate with the new generated Root CA, except the\n", - " Controller cert (which is signed with the existing cluster Root CA)\n", - "4. Install each certificate into the Big Data Cluster\n", - "5. Download the new generated Root CA into this machine\u2019s Trusted Root\n", - " Cerification Authorities certificate store.\n", - "\n", - "All generated self-signed certificates will be stored in the controller\n", - "pod (at the `test_cert_store_root` location).\n", - "\n", - "**NOTE: When CER010 runs (the 3rd notebook), look for the \u2018Security\n", - "Warning\u2019 dialog to pop up, and press \u2018Yes\u2019 to accept the installation of\n", - "the new Root CA into this machine\u2019s certificate store.**\n", - "\n", - "Upon completion of this notebook, all https:// access to the Big Data\n", - "Cluster from this machine (and any machine that installs the new Root\n", - "CA) will show as being secure.\n", - "\n", - "The Notebook Runner chapter, will ensure CronJobs created (OPR003) to\n", - "run App-Deploy will install the cluster Root CA to allow securely\n", - "getting JWT tokens and the swagger.json.\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "### Parameters\n", - "\n", - "The parameters set here will override the default parameters set in each\n", - "individual notebook (`azdata notebook run` injects a `Parameters` cell\n", - "at runtime with the values passed in from the `-a` arguement)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "import getpass\n", - "\n", - "common_name = \"SQL Server Big Data Clusters Test CA\"\n", - "\n", - "country_name = \"US\"\n", - "state_or_province_name = \"Illinois\"\n", - "locality_name = \"Chicago\"\n", - "organization_name = \"Contoso\"\n", - "organizational_unit_name = \"Finance\"\n", - "email_address = f\"{getpass.getuser().lower()}@contoso.com\"\n", - "\n", - "days = \"398\" # the number of days to certify the certificates for\n", - "\n", - "test_cert_store_root = \"/var/opt/secrets/test-certificates\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Define notebooks and their arguments" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import copy\n", - "\n", - "cer00_args = { \"country_name\": country_name, \"state_or_province_name\": state_or_province_name, \"locality_name\": locality_name, \"organization_name\": organization_name, \"organizational_unit_name\": organizational_unit_name, \"common_name\": common_name, \"email_address\": email_address, \"days\": days, \"test_cert_store_root\": test_cert_store_root }\n", - "\n", - "cer02_args = copy.deepcopy(cer00_args)\n", - "cer02_args.pop(\"common_name\") # no common_name (as this is the service name set per endpoint)\n", - "\n", - "cer04_args = { \"test_cert_store_root\": test_cert_store_root }\n", - "\n", - "notebooks = [\n", - " [ os.path.join(\"..\", \"common\", \"sop028-azdata-login.ipynb\"), {} ],\n", - " [ os.path.join(\"..\", \"cert-management\", \"cer001-create-root-ca.ipynb\"), cer00_args ],\n", - " [ os.path.join(\"..\", \"cert-management\", \"cer010-install-generated-root-ca-locally.ipynb\"), cer04_args ],\n", - " [ os.path.join(\"..\", \"cert-management\", \"cer020-create-management-service-proxy-cert.ipynb\"), cer02_args ],\n", - " [ os.path.join(\"..\", \"cert-management\", \"cer021-create-knox-cert.ipynb\"), cer02_args ],\n", - " [ os.path.join(\"..\", \"cert-management\", \"cer022-create-app-proxy-cert.ipynb\"), cer02_args ],\n", - " [ os.path.join(\"..\", \"cert-management\", \"cer023-create-master-cert.ipynb\"), cer02_args ],\n", - " [ os.path.join(\"..\", \"cert-management\", \"cer024-create-controller-cert.ipynb\"), cer02_args ],\n", - " [ os.path.join(\"..\", \"cert-management\", \"cer030-sign-service-proxy-generated-cert.ipynb\"), cer02_args ],\n", - " [ os.path.join(\"..\", \"cert-management\", \"cer031-sign-knox-generated-cert.ipynb\"), cer02_args ],\n", - " [ os.path.join(\"..\", \"cert-management\", \"cer032-sign-app-proxy-generated-cert.ipynb\"), cer02_args ],\n", - " [ os.path.join(\"..\", \"cert-management\", \"cer033-sign-master-generated-cert.ipynb\"), cer02_args ],\n", - " [ os.path.join(\"..\", \"cert-management\", \"cer034-sign-controller-generated-cert.ipynb\"), cer02_args ],\n", - " [ os.path.join(\"..\", \"cert-management\", \"cer040-install-service-proxy-cert.ipynb\"), cer04_args ],\n", - " [ os.path.join(\"..\", \"cert-management\", \"cer041-install-knox-cert.ipynb\"), cer04_args ],\n", - " [ os.path.join(\"..\", \"cert-management\", \"cer042-install-app-proxy-cert.ipynb\"), cer04_args ],\n", - " [ os.path.join(\"..\", \"cert-management\", \"cer043-install-master-cert.ipynb\"), cer04_args ],\n", - " [ os.path.join(\"..\", \"cert-management\", \"cer044-install-controller-cert.ipynb\"), cer04_args ],\n", - " [ os.path.join(\"..\", \"cert-management\", \"cer050-wait-cluster-healthly.ipynb\"), {} ]\n", - "]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, hyperlinked suggestions, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"cer100-create-root-ca-install-certs.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'azdata': ['Endpoint sql-server-master does not exist', 'Endpoint livy does not exist', 'Failed to get state for cluster', 'Endpoint webhdfs does not exist', 'Adaptive Server is unavailable or does not exist', 'Error: Address already in use', 'Login timeout expired (0) (SQLDriverConnect)']}\n", - "error_hints = {'azdata': [['The token is expired', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Reason: Unauthorized', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Max retries exceeded with url: /api/v1/bdc/endpoints', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Look at the controller logs for more details', 'TSG027 - Observe cluster deployment', '../diagnose/tsg027-observe-bdc-create.ipynb'], ['provided port is already allocated', 'TSG062 - Get tail of all previous container logs for pods in BDC namespace', '../log-files/tsg062-tail-bdc-previous-container-logs.ipynb'], ['Create cluster failed since the existing namespace', 'SOP061 - Delete a big data cluster', '../install/sop061-delete-bdc.ipynb'], ['Failed to complete kube config setup', 'TSG067 - Failed to complete kube config setup', '../repair/tsg067-failed-to-complete-kube-config-setup.ipynb'], ['Error processing command: \"ApiError', 'TSG110 - Azdata returns ApiError', '../repair/tsg110-azdata-returns-apierror.ipynb'], ['Error processing command: \"ControllerError', 'TSG036 - Controller logs', '../log-analyzers/tsg036-get-controller-logs.ipynb'], ['ERROR: 500', 'TSG046 - Knox gateway logs', '../log-analyzers/tsg046-get-knox-logs.ipynb'], ['Data source name not found and no default driver specified', 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], [\"Can't open lib 'ODBC Driver 17 for SQL Server\", 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], ['Control plane upgrade failed. Failed to upgrade controller.', 'TSG108 - View the controller upgrade config map', '../diagnose/tsg108-controller-failed-to-upgrade.ipynb'], [\"[Errno 2] No such file or directory: '..\\\\\\\\\", 'TSG053 - ADS Provided Books must be saved before use', '../repair/tsg053-save-book-first.ipynb'], [\"NameError: name 'azdata_login_secret_name' is not defined\", 'SOP013 - Create secret for azdata login (inside cluster)', '../common/sop013-create-secret-for-azdata-login.ipynb'], ['ERROR: No credentials were supplied, or the credentials were unavailable or inaccessible.', \"TSG124 - 'No credentials were supplied' error from azdata login\", '../repair/tsg124-no-credentials-were-supplied.ipynb']]}\n", - "install_hint = {'azdata': ['SOP063 - Install azdata CLI (using package manager)', '../install/sop063-packman-install-azdata.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create a temporary directory to stage files" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Create a temporary directory to hold configuration files\n", - "\n", - "import tempfile\n", - "\n", - "temp_dir = tempfile.mkdtemp()\n", - "\n", - "print(f\"Temporary directory created: {temp_dir}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Helper function for running notebooks with `azdata notebook run`\n", - "\n", - "To pass \u2018list\u2019 types to `azdata notebook run --arguments`, flatten to\n", - "string" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define helper function 'run_notebook'\n", - "\n", - "def run_notebook(name, arguments):\n", - " for key, value in arguments.items():\n", - " if isinstance(value, list):\n", - " arguments[key] = str(value).replace(\"'\", \"\") # Remove the quotes, to enable passing to azdata notebook run --arguments\n", - " elif isinstance(value, bool):\n", - " arguments[key] = '\"' + str(value) + '\"' # Add quotes, to enable passing to azdata notebook run --arguments, use bool(arg) to parse in target notebooks\n", - "\n", - " # --arguments have to be passed as \\\" \\\" quoted strings on Windows cmd line\n", - " #\n", - " arguments = str(arguments).replace(\"'\", '\\\\\"') \n", - "\n", - " # `app create` and `app run` can take a long time, so pass in a 30 minute cell timeout\n", - " #\n", - " # The cwd for the azdata process about to be launched becomes the --output-path (or the autogenerated one\n", - " # if it isn't specified), but these canary notebooks go onto run the notebooks in the notebook-o16n\n", - " # directory, using a relative link, so here we set the --output-path to the cwd. This isn't great because\n", - " # then the output-* notebooks also go into this directory (which is the location of the book)\n", - " #\n", - " run(f'azdata notebook run -p \"{os.path.join(\"..\", \"notebook-o16n\", name)}\" --arguments \"{arguments}\" --output-path \"{os.getcwd()}\" --output-html --timeout 1800')\n", - "\n", - "print(\"Function 'run_notebook' defined\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Run the notebooks" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "for notebook in notebooks:\n", - " run_notebook(notebook[0], notebook[1])\n", - "\n", - "print(\"Notebooks ran successfully.\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Related\n", - "-------\n", - "\n", - "- [CER001 - Generate a Root CA\n", - " certificate](../cert-management/cer001-create-root-ca.ipynb)\n", - "\n", - "- [CER020 - Create Management Proxy\n", - " certificate](../cert-management/cer020-create-management-service-proxy-cert.ipynb)\n", - "\n", - "- [CER021 - Create Knox\n", - " certificate](../cert-management/cer021-create-knox-cert.ipynb)\n", - "\n", - "- [CER022 - Create App Proxy\n", - " certificate](../cert-management/cer022-create-app-proxy-cert.ipynb)\n", - "\n", - "- [CER030 - Sign Management Proxy certificate with generated\n", - " CA](../cert-management/cer030-sign-service-proxy-generated-cert.ipynb)\n", - "\n", - "- [CER031 - Sign Knox certificate with generated\n", - " CA](../cert-management/cer031-sign-knox-generated-cert.ipynb)\n", - "\n", - "- [CER032 - Sign App-Proxy certificate with generated\n", - " CA](../cert-management/cer032-sign-app-proxy-generated-cert.ipynb)\n", - "\n", - "- [CER040 - Install signed Management Proxy\n", - " certificate](../cert-management/cer040-install-service-proxy-cert.ipynb)\n", - "\n", - "- [CER041 - Install signed Knox\n", - " certificate](../cert-management/cer041-install-knox-cert.ipynb)\n", - "\n", - "- [CER042 - Install signed App-Proxy\n", - " certificate](../cert-management/cer042-install-app-proxy-cert.ipynb)\n", - "\n", - "- [CER010 - Install generated Root CA\n", - " locally](../cert-management/cer010-install-generated-root-ca-locally.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/cert-management/cer101-use-root-ca-install-certs.ipynb b/Big-Data-Clusters/CU8/Public/content/cert-management/cer101-use-root-ca-install-certs.ipynb deleted file mode 100644 index 91301bac..00000000 --- a/Big-Data-Clusters/CU8/Public/content/cert-management/cer101-use-root-ca-install-certs.ipynb +++ /dev/null @@ -1,609 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "CER101 - Configure Cluster with Self Signed Certificates using existing Root CA\n", - "===============================================================================\n", - "\n", - "This notebook will:\n", - "\n", - "1. Use an existing generated Root CA in the Big Data Cluster (uploaded\n", - " with CER003)\n", - "2. Create new certificates for each endpoint (Management, Gateway,\n", - " App-Proxy and Controller)\n", - "3. Sign each new certificate with the new generated Root CA, except the\n", - " Controller cert (which is signed with the existing cluster Root CA)\n", - "4. Install each certificate into the Big Data Cluster\n", - "\n", - "All generated self-signed certificates will be stored in the controller\n", - "pod (at the `test_cert_store_root` location).\n", - "\n", - "Upon completion of this notebook, all https:// access to the Big Data\n", - "Cluster from this machine (and any machine that installs the new Root\n", - "CA) will show as being secure.\n", - "\n", - "The Notebook Runner chapter, will ensure CronJobs created (OPR003) to\n", - "run App-Deploy will install the cluster Root CA to allow securely\n", - "getting JWT tokens and the swagger.json.\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "### Parameters\n", - "\n", - "The parameters set here will override the default parameters set in each\n", - "individual notebook (`azdata notebook run` injects a `Parameters` cell\n", - "at runtime with the values passed in from the `-a` arguement)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "import getpass\n", - "\n", - "common_name = \"SQL Server Big Data Clusters Test CA\"\n", - "\n", - "country_name = \"US\"\n", - "state_or_province_name = \"Illinois\"\n", - "locality_name = \"Chicago\"\n", - "organization_name = \"Contoso\"\n", - "organizational_unit_name = \"Finance\"\n", - "email_address = f\"{getpass.getuser().lower()}@contoso.com\"\n", - "\n", - "days = \"398\" # the number of days to certify the certificates for\n", - "\n", - "test_cert_store_root = \"/var/opt/secrets/test-certificates\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Define notebooks and their arguments" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "cer02_args = { \"country_name\": country_name, \"state_or_province_name\": state_or_province_name, \"locality_name\": locality_name, \"organization_name\": organization_name, \"organizational_unit_name\": organizational_unit_name, \"email_address\": email_address, \"days\": days, \"test_cert_store_root\": test_cert_store_root }\n", - "\n", - "cer04_args = { \"test_cert_store_root\": test_cert_store_root }\n", - "\n", - "notebooks = [\n", - " [ os.path.join(\"..\", \"common\", \"sop028-azdata-login.ipynb\"), {} ],\n", - " [ os.path.join(\"..\", \"cert-management\", \"cer020-create-management-service-proxy-cert.ipynb\"), cer02_args ],\n", - " [ os.path.join(\"..\", \"cert-management\", \"cer021-create-knox-cert.ipynb\"), cer02_args ],\n", - " [ os.path.join(\"..\", \"cert-management\", \"cer022-create-app-proxy-cert.ipynb\"), cer02_args ],\n", - " [ os.path.join(\"..\", \"cert-management\", \"cer023-create-master-cert.ipynb\"), cer02_args ],\n", - " [ os.path.join(\"..\", \"cert-management\", \"cer024-create-controller-cert.ipynb\"), cer02_args ],\n", - " [ os.path.join(\"..\", \"cert-management\", \"cer030-sign-service-proxy-generated-cert.ipynb\"), cer02_args ],\n", - " [ os.path.join(\"..\", \"cert-management\", \"cer031-sign-knox-generated-cert.ipynb\"), cer02_args ],\n", - " [ os.path.join(\"..\", \"cert-management\", \"cer032-sign-app-proxy-generated-cert.ipynb\"), cer02_args ],\n", - " [ os.path.join(\"..\", \"cert-management\", \"cer033-sign-master-generated-cert.ipynb\"), cer02_args ],\n", - " [ os.path.join(\"..\", \"cert-management\", \"cer034-sign-controller-generated-cert.ipynb\"), cer02_args ],\n", - " [ os.path.join(\"..\", \"cert-management\", \"cer040-install-service-proxy-cert.ipynb\"), cer04_args ],\n", - " [ os.path.join(\"..\", \"cert-management\", \"cer041-install-knox-cert.ipynb\"), cer04_args ],\n", - " [ os.path.join(\"..\", \"cert-management\", \"cer042-install-app-proxy-cert.ipynb\"), cer04_args ],\n", - " [ os.path.join(\"..\", \"cert-management\", \"cer043-install-master-cert.ipynb\"), cer04_args ],\n", - " [ os.path.join(\"..\", \"cert-management\", \"cer044-install-controller-cert.ipynb\"), cer04_args ],\n", - " [ os.path.join(\"..\", \"cert-management\", \"cer050-wait-cluster-healthly.ipynb\"), {} ]\n", - "]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, hyperlinked suggestions, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"cer101-use-root-ca-install-certs.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'azdata': ['Endpoint sql-server-master does not exist', 'Endpoint livy does not exist', 'Failed to get state for cluster', 'Endpoint webhdfs does not exist', 'Adaptive Server is unavailable or does not exist', 'Error: Address already in use', 'Login timeout expired (0) (SQLDriverConnect)']}\n", - "error_hints = {'azdata': [['The token is expired', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Reason: Unauthorized', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Max retries exceeded with url: /api/v1/bdc/endpoints', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Look at the controller logs for more details', 'TSG027 - Observe cluster deployment', '../diagnose/tsg027-observe-bdc-create.ipynb'], ['provided port is already allocated', 'TSG062 - Get tail of all previous container logs for pods in BDC namespace', '../log-files/tsg062-tail-bdc-previous-container-logs.ipynb'], ['Create cluster failed since the existing namespace', 'SOP061 - Delete a big data cluster', '../install/sop061-delete-bdc.ipynb'], ['Failed to complete kube config setup', 'TSG067 - Failed to complete kube config setup', '../repair/tsg067-failed-to-complete-kube-config-setup.ipynb'], ['Error processing command: \"ApiError', 'TSG110 - Azdata returns ApiError', '../repair/tsg110-azdata-returns-apierror.ipynb'], ['Error processing command: \"ControllerError', 'TSG036 - Controller logs', '../log-analyzers/tsg036-get-controller-logs.ipynb'], ['ERROR: 500', 'TSG046 - Knox gateway logs', '../log-analyzers/tsg046-get-knox-logs.ipynb'], ['Data source name not found and no default driver specified', 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], [\"Can't open lib 'ODBC Driver 17 for SQL Server\", 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], ['Control plane upgrade failed. Failed to upgrade controller.', 'TSG108 - View the controller upgrade config map', '../diagnose/tsg108-controller-failed-to-upgrade.ipynb'], [\"[Errno 2] No such file or directory: '..\\\\\\\\\", 'TSG053 - ADS Provided Books must be saved before use', '../repair/tsg053-save-book-first.ipynb'], [\"NameError: name 'azdata_login_secret_name' is not defined\", 'SOP013 - Create secret for azdata login (inside cluster)', '../common/sop013-create-secret-for-azdata-login.ipynb'], ['ERROR: No credentials were supplied, or the credentials were unavailable or inaccessible.', \"TSG124 - 'No credentials were supplied' error from azdata login\", '../repair/tsg124-no-credentials-were-supplied.ipynb']]}\n", - "install_hint = {'azdata': ['SOP063 - Install azdata CLI (using package manager)', '../install/sop063-packman-install-azdata.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create a temporary directory to stage files" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Create a temporary directory to hold configuration files\n", - "\n", - "import tempfile\n", - "\n", - "temp_dir = tempfile.mkdtemp()\n", - "\n", - "print(f\"Temporary directory created: {temp_dir}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Helper function for running notebooks with `azdata notebook run`\n", - "\n", - "To pass \u2018list\u2019 types to `azdata notebook run --arguments`, flatten to\n", - "string" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define helper function 'run_notebook'\n", - "\n", - "def run_notebook(name, arguments):\n", - " for key, value in arguments.items():\n", - " if isinstance(value, list):\n", - " arguments[key] = str(value).replace(\"'\", \"\") # Remove the quotes, to enable passing to azdata notebook run --arguments\n", - " elif isinstance(value, bool):\n", - " arguments[key] = '\"' + str(value) + '\"' # Add quotes, to enable passing to azdata notebook run --arguments, use bool(arg) to parse in target notebooks\n", - "\n", - " # --arguments have to be passed as \\\" \\\" quoted strings on Windows cmd line\n", - " #\n", - " arguments = str(arguments).replace(\"'\", '\\\\\"') \n", - "\n", - " # `app create` and `app run` can take a long time, so pass in a 30 minute cell timeout\n", - " #\n", - " # The cwd for the azdata process about to be launched becomes the --output-path (or the autogenerated one\n", - " # if it isn't specified), but these canary notebooks go onto run the notebooks in the notebook-o16n\n", - " # directory, using a relative link, so here we set the --output-path to the cwd. This isn't great because\n", - " # then the output-* notebooks also go into this directory (which is the location of the book)\n", - " #\n", - " run(f'azdata notebook run -p \"{os.path.join(\"..\", \"notebook-o16n\", name)}\" --arguments \"{arguments}\" --output-path \"{os.getcwd()}\" --output-html --timeout 1800')\n", - "\n", - "print(\"Function 'run_notebook' defined\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Run the notebooks" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "for notebook in notebooks:\n", - " run_notebook(notebook[0], notebook[1])\n", - "\n", - "print(\"Notebooks ran successfully.\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Related\n", - "-------\n", - "\n", - "- [CER001 - Generate a Root CA\n", - " certificate](../cert-management/cer001-create-root-ca.ipynb)\n", - "\n", - "- [CER020 - Create Management Proxy\n", - " certificate](../cert-management/cer020-create-management-service-proxy-cert.ipynb)\n", - "\n", - "- [CER021 - Create Knox\n", - " certificate](../cert-management/cer021-create-knox-cert.ipynb)\n", - "\n", - "- [CER022 - Create App Proxy\n", - " certificate](../cert-management/cer022-create-app-proxy-cert.ipynb)\n", - "\n", - "- [CER030 - Sign Management Proxy certificate with generated\n", - " CA](../cert-management/cer030-sign-service-proxy-generated-cert.ipynb)\n", - "\n", - "- [CER031 - Sign Knox certificate with generated\n", - " CA](../cert-management/cer031-sign-knox-generated-cert.ipynb)\n", - "\n", - "- [CER032 - Sign App-Proxy certificate with generated\n", - " CA](../cert-management/cer032-sign-app-proxy-generated-cert.ipynb)\n", - "\n", - "- [CER040 - Install signed Management Proxy\n", - " certificate](../cert-management/cer040-install-service-proxy-cert.ipynb)\n", - "\n", - "- [CER041 - Install signed Knox\n", - " certificate](../cert-management/cer041-install-knox-cert.ipynb)\n", - "\n", - "- [CER042 - Install signed App-Proxy\n", - " certificate](../cert-management/cer042-install-app-proxy-cert.ipynb)\n", - "\n", - "- [CER010 - Install generated Root CA\n", - " locally](../cert-management/cer010-install-generated-root-ca-locally.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/cert-management/readme.md b/Big-Data-Clusters/CU8/Public/content/cert-management/readme.md deleted file mode 100644 index 8992db28..00000000 --- a/Big-Data-Clusters/CU8/Public/content/cert-management/readme.md +++ /dev/null @@ -1,114 +0,0 @@ -# A set of notebooks used for Certificate Management - -The notebooks in this chapter can be used to create a self-signed root certificate authority (or allow for one to be uploaded), and then use that root CA to create and sign certificates for each external endpoint in a Big Data Cluster. - -After running the notebook in this chapter, and installing the Root CA certificate locally, all connections to the Big Data Cluster can be made securely (i.e. the internet browser will indicate "This Connection is Secure". The following notebook can be used to install the Root CA certificate locally on this machine. - -- CER010 - Install generated Root CA locally - -## Run the notebooks in a sequence - -These two notebooks run the required notebooks in this chapter in a sequence in a single 'run all cells' button press. - -- CER100 - Configure Cluster with Self Signed Certificates -- CER101 - Configure Cluster with Self Signed Certificates using existing Root CA - -The first notebook (CER100) will first generate a Root CA certificate. The 2nd notebook (CER101) will use an already existing Root CA downloaded and upload using: - -- CER002 - Download existing Root CA certificate -- CER003 - Upload existing Root CA certificate - -## Details - -- By default, the Big Data Cluster cluster generates its own Root CA certificate and all the certificates used inside the cluster are signed with this Root CA certificate. External clients connecting to cluster endpoints will not have this internal Root CA installed and this leads to the certificate verification related warnings on clients (internet browsers etc.) and the need to use the --insecure option with tools like CURL. - -- It is better if the certificates for the external endpoints in the Big Data Cluster can be provided and installed in the containers hosting the endpoint services, most preferably using your own trusted CA to sign these certificates and then install the CA chain inside the cluster. The notebooks in this chapter aid in this process by creating a self-signed Root CA certificate and then creating certificates for each external endpoint signed by the self-signed Root CA certificate. - -- The openssl certificate tracking database is created in the `controller` in the `/var/opt/secrets/test-certificates` folder. Here a record is maintained of each certificate that has been issued for tracking purposes. - -[Home](../readme.md) - -## Notebooks in this Chapter -- [CER001 - Generate a Root CA certificate -](cer001-create-root-ca.ipynb) - -- [CER002 - Download existing Root CA certificate -](cer002-download-existing-root-ca.ipynb) - -- [CER003 - Upload existing Root CA certificate -](cer003-upload-existing-root-ca.ipynb) - -- [CER004 - Download and Upload existing Root CA certificate -](cer004-download-upload-existing-root-ca.ipynb) - -- [CER010 - Install generated Root CA locally -](cer010-install-generated-root-ca-locally.ipynb) - -- [CER020 - Create Management Proxy certificate -](cer020-create-management-service-proxy-cert.ipynb) - -- [CER021 - Create Knox certificate -](cer021-create-knox-cert.ipynb) - -- [CER022 - Create App Proxy certificate -](cer022-create-app-proxy-cert.ipynb) - -- [CER023 - Create Master certificate -](cer023-create-master-cert.ipynb) - -- [CER024 - Create Controller certificate -](cer024-create-controller-cert.ipynb) - -- [CER025 - Upload existing Management Proxy certificate -](cer025-upload-management-service-proxy-cert.ipynb) - -- [CER026 - Upload existing Gateway certificate -](cer026-upload-knox-cert.ipynb) - -- [CER027 - Upload existing App Service Proxy certificate -](cer027-upload-app-proxy-cert.ipynb) - -- [CER028 - Upload existing Master certificate -](cer028-upload-master-cert.ipynb) - -- [CER028 - Upload existing Contoller certificate -](cer029-upload-controller-cert.ipynb) - -- [CER030 - Sign Management Proxy certificate with generated CA -](cer030-sign-service-proxy-generated-cert.ipynb) - -- [CER031 - Sign Knox certificate with generated CA -](cer031-sign-knox-generated-cert.ipynb) - -- [CER032 - Sign App-Proxy certificate with generated CA -](cer032-sign-app-proxy-generated-cert.ipynb) - -- [CER033 - Sign Master certificate with generated CA -](cer033-sign-master-generated-cert.ipynb) - -- [CER034 - Sign Controller certificate with cluster Root CA -](cer034-sign-controller-generated-cert.ipynb) - -- [CER035 - Sign Controller certificate with external Root CA -](cer035-ca-sign-controller-generated-cert.ipynb) - -- [CER040 - Install signed Management Proxy certificate -](cer040-install-service-proxy-cert.ipynb) - -- [CER041 - Install signed Knox certificate -](cer041-install-knox-cert.ipynb) - -- [CER042 - Install signed App-Proxy certificate -](cer042-install-app-proxy-cert.ipynb) - -- [CER044 - Install signed Controller certificate -](cer044-install-controller-cert.ipynb) - -- [CER050 - Wait for BDC to be Healthy -](cer050-wait-cluster-healthly.ipynb) - -- [CER100 - Configure Cluster with Self Signed Certificates -](cer100-create-root-ca-install-certs.ipynb) - -- [CER101 - Configure Cluster with Self Signed Certificates using existing Root CA -](cer101-use-root-ca-install-certs.ipynb) diff --git a/Big-Data-Clusters/CU8/Public/content/common/readme.md b/Big-Data-Clusters/CU8/Public/content/common/readme.md deleted file mode 100644 index e8335c0e..00000000 --- a/Big-Data-Clusters/CU8/Public/content/common/readme.md +++ /dev/null @@ -1,25 +0,0 @@ -# A set of notebooks used for common scenarios - -- The notebooks in this chapter are used as prerequisites for other notebooks, such as login and logout of a cluster. - -[Home](../readme.md) - -## Notebooks in this Chapter -- [SOP005 - az login ](sop005-az-login.ipynb) - -- [SOP006 - az logout ](sop006-az-logout.ipynb) - -- [SOP007 - Version information (azdata, bdc, kubernetes) ](sop007-get-key-version-information.ipynb) - -- [SOP011 - Set kubernetes configuration context ](sop011-set-kubernetes-context.ipynb) - -- [SOP013 - Create secret for azdata login (inside cluster)](sop013-create-secret-for-azdata-login.ipynb) - -- [SOP014 - Delete secret for azdata login (inside cluster)](sop014-delete-secret-for-azdata-login.ipynb) - -- [SOP028 - azdata login ](sop028-azdata-login.ipynb) - -- [SOP033 - azdata logout ](sop033-azdata-logout.ipynb) - -- [SOP034 - Wait for BDC to be Healthy ](sop034-wait-cluster-healthly.ipynb) - diff --git a/Big-Data-Clusters/CU8/Public/content/common/sop005-az-login.ipynb b/Big-Data-Clusters/CU8/Public/content/common/sop005-az-login.ipynb deleted file mode 100644 index 562a0db1..00000000 --- a/Big-Data-Clusters/CU8/Public/content/common/sop005-az-login.ipynb +++ /dev/null @@ -1,400 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "SOP005 - az login\n", - "=================\n", - "\n", - "Use the az command line interface to login to Azure.\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"sop005-az-login.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Login to azure" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(\"az login\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Related\n", - "-------\n", - "\n", - "- [SOP006 - az logout](../common/sop006-az-logout.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true, - "symlink": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/common/sop006-az-logout.ipynb b/Big-Data-Clusters/CU8/Public/content/common/sop006-az-logout.ipynb deleted file mode 100644 index f649f571..00000000 --- a/Big-Data-Clusters/CU8/Public/content/common/sop006-az-logout.ipynb +++ /dev/null @@ -1,400 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "SOP006 - az logout\n", - "==================\n", - "\n", - "Use the az command line interface to logout of Azure.\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"sop006-az-logout.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Logout of azure" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(\"az logout\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Related\n", - "-------\n", - "\n", - "- [SOP005 - az login](../common/sop005-az-login.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true, - "symlink": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/common/sop007-get-key-version-information.ipynb b/Big-Data-Clusters/CU8/Public/content/common/sop007-get-key-version-information.ipynb deleted file mode 100644 index cab0d137..00000000 --- a/Big-Data-Clusters/CU8/Public/content/common/sop007-get-key-version-information.ipynb +++ /dev/null @@ -1,516 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "SOP007 - Version information (azdata, bdc, kubernetes)\n", - "======================================================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"sop007-get-key-version-information.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond'], 'azdata': ['Endpoint sql-server-master does not exist', 'Endpoint livy does not exist', 'Failed to get state for cluster', 'Endpoint webhdfs does not exist', 'Adaptive Server is unavailable or does not exist', 'Error: Address already in use', 'Login timeout expired (0) (SQLDriverConnect)']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']], 'azdata': [['The token is expired', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Reason: Unauthorized', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Max retries exceeded with url: /api/v1/bdc/endpoints', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Look at the controller logs for more details', 'TSG027 - Observe cluster deployment', '../diagnose/tsg027-observe-bdc-create.ipynb'], ['provided port is already allocated', 'TSG062 - Get tail of all previous container logs for pods in BDC namespace', '../log-files/tsg062-tail-bdc-previous-container-logs.ipynb'], ['Create cluster failed since the existing namespace', 'SOP061 - Delete a big data cluster', '../install/sop061-delete-bdc.ipynb'], ['Failed to complete kube config setup', 'TSG067 - Failed to complete kube config setup', '../repair/tsg067-failed-to-complete-kube-config-setup.ipynb'], ['Error processing command: \"ApiError', 'TSG110 - Azdata returns ApiError', '../repair/tsg110-azdata-returns-apierror.ipynb'], ['Error processing command: \"ControllerError', 'TSG036 - Controller logs', '../log-analyzers/tsg036-get-controller-logs.ipynb'], ['ERROR: 500', 'TSG046 - Knox gateway logs', '../log-analyzers/tsg046-get-knox-logs.ipynb'], ['Data source name not found and no default driver specified', 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], [\"Can't open lib 'ODBC Driver 17 for SQL Server\", 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], ['Control plane upgrade failed. Failed to upgrade controller.', 'TSG108 - View the controller upgrade config map', '../diagnose/tsg108-controller-failed-to-upgrade.ipynb'], [\"[Errno 2] No such file or directory: '..\\\\\\\\\", 'TSG053 - ADS Provided Books must be saved before use', '../repair/tsg053-save-book-first.ipynb'], [\"NameError: name 'azdata_login_secret_name' is not defined\", 'SOP013 - Create secret for azdata login (inside cluster)', '../common/sop013-create-secret-for-azdata-login.ipynb'], ['ERROR: No credentials were supplied, or the credentials were unavailable or inaccessible.', \"TSG124 - 'No credentials were supplied' error from azdata login\", '../repair/tsg124-no-credentials-were-supplied.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb'], 'azdata': ['SOP063 - Install azdata CLI (using package manager)', '../install/sop063-packman-install-azdata.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get Kubernetes version information\n", - "\n", - "Get just the client version information first, in case the server is not\n", - "responding.\n", - "\n", - "The Kubernetes minimum version supported information can be found here:\n", - "\n", - "- https://docs.microsoft.com/en-us/sql/big-data-cluster/deploy-big-data-tools" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run('kubectl version --client -o yaml')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Then get the version information, including the server version\n", - "information." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run('kubectl version -o yaml')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get `azdata` version information" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run('azdata --version')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the image name for the `Big Data Cluster` config" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'kubectl get pod controldb-0 -n {namespace} -o jsonpath={{.spec.containers[0].image}}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the `Big Data Cluster` config" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run('azdata bdc config show')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/common/sop011-set-kubernetes-context.ipynb b/Big-Data-Clusters/CU8/Public/content/common/sop011-set-kubernetes-context.ipynb deleted file mode 100644 index 34974146..00000000 --- a/Big-Data-Clusters/CU8/Public/content/common/sop011-set-kubernetes-context.ipynb +++ /dev/null @@ -1,503 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "SOP011 - Set kubernetes configuration context\n", - "=============================================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "Set the kubernetes configuration to use.\n", - "\n", - "NOTE: To view available contexts use the following TSG:\n", - "\n", - "- [TSG010 - Get configuration\n", - " contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb)\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "context_name = None" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"sop011-set-kubernetes-context.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond'], 'azdata': ['Endpoint sql-server-master does not exist', 'Endpoint livy does not exist', 'Failed to get state for cluster', 'Endpoint webhdfs does not exist', 'Adaptive Server is unavailable or does not exist', 'Error: Address already in use', 'Login timeout expired (0) (SQLDriverConnect)']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']], 'azdata': [['The token is expired', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Reason: Unauthorized', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Max retries exceeded with url: /api/v1/bdc/endpoints', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Look at the controller logs for more details', 'TSG027 - Observe cluster deployment', '../diagnose/tsg027-observe-bdc-create.ipynb'], ['provided port is already allocated', 'TSG062 - Get tail of all previous container logs for pods in BDC namespace', '../log-files/tsg062-tail-bdc-previous-container-logs.ipynb'], ['Create cluster failed since the existing namespace', 'SOP061 - Delete a big data cluster', '../install/sop061-delete-bdc.ipynb'], ['Failed to complete kube config setup', 'TSG067 - Failed to complete kube config setup', '../repair/tsg067-failed-to-complete-kube-config-setup.ipynb'], ['Error processing command: \"ApiError', 'TSG110 - Azdata returns ApiError', '../repair/tsg110-azdata-returns-apierror.ipynb'], ['Error processing command: \"ControllerError', 'TSG036 - Controller logs', '../log-analyzers/tsg036-get-controller-logs.ipynb'], ['ERROR: 500', 'TSG046 - Knox gateway logs', '../log-analyzers/tsg046-get-knox-logs.ipynb'], ['Data source name not found and no default driver specified', 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], [\"Can't open lib 'ODBC Driver 17 for SQL Server\", 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], ['Control plane upgrade failed. Failed to upgrade controller.', 'TSG108 - View the controller upgrade config map', '../diagnose/tsg108-controller-failed-to-upgrade.ipynb'], [\"[Errno 2] No such file or directory: '..\\\\\\\\\", 'TSG053 - ADS Provided Books must be saved before use', '../repair/tsg053-save-book-first.ipynb'], [\"NameError: name 'azdata_login_secret_name' is not defined\", 'SOP013 - Create secret for azdata login (inside cluster)', '../common/sop013-create-secret-for-azdata-login.ipynb'], ['ERROR: No credentials were supplied, or the credentials were unavailable or inaccessible.', \"TSG124 - 'No credentials were supplied' error from azdata login\", '../repair/tsg124-no-credentials-were-supplied.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb'], 'azdata': ['SOP063 - Install azdata CLI (using package manager)', '../install/sop063-packman-install-azdata.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### List available contexts" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "if context_name is None:\n", - " contexts = run('kubectl config get-contexts --output name', return_output=True)\n", - " contexts =contexts.split(\"\\n\")[:-1]\n", - "\n", - " counter = 0\n", - " for context in contexts:\n", - " print(f'{counter}. {context}')\n", - " counter += 1\n", - "else:\n", - " print(f'context_name: {context_name}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Select a context (if not set as a parameter)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "if context_name is None:\n", - " context_name = contexts[5] # <-- select context here (set ordinal)\n", - "\n", - "print(f'context_name: {context_name}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Log out using azdata\n", - "\n", - "To avoid a situation where the `Kubernetes` context is for a cluster\n", - "which is not hosting the Big Data Cluster `azdata` currently logged\n", - "into." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run('azdata logout')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Set the kubernetes configuration to use" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'kubectl config use-context {context_name}')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Related\n", - "-------\n", - "\n", - "- [TSG010 - Get configuration\n", - " contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true, - "symlink": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/common/sop013-create-secret-for-azdata-login.ipynb b/Big-Data-Clusters/CU8/Public/content/common/sop013-create-secret-for-azdata-login.ipynb deleted file mode 100644 index 8302212a..00000000 --- a/Big-Data-Clusters/CU8/Public/content/common/sop013-create-secret-for-azdata-login.ipynb +++ /dev/null @@ -1,631 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "SOP013 - Create secret for azdata login (inside cluster)\n", - "========================================================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "Create a secret in the Kubernetes Secret Store, to:\n", - "\n", - "- Run app-deploys (i.e.\u00a0`azdata app run`)\n", - "- Save results in HDFS at /app-deploy\n", - "- Enable SOP028 to perform `azdata login` when run from inside the Big\n", - " Data Cluster. This is needed for example, when running notebooks in\n", - " an app-deploy (which is inside a Big Data Cluster)\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "import os\n", - "\n", - "azdata_login_username = os.environ[\"AZDATA_USERNAME\"] if \"AZDATA_USERNAME\" in os.environ else \"\" \n", - "azdata_login_password = os.environ[\"AZDATA_PASSWORD\"] if \"AZDATA_PASSWORD\" in os.environ else \"\"\n", - "\n", - "# If an Active Directory (secure) cluster, provide a domain account domain name, i.e. username@domain_name\n", - "#\n", - "azdata_login_domain_name = os.environ[\"DOMAIN_SERVICE_ACCOUNT_DOMAIN_NAME\"] if \"DOMAIN_SERVICE_ACCOUNT_DOMAIN_NAME\" in os.environ else \"\" # This should be UPPER CASE\n", - "\n", - "azdata_login_secret_name = \"azdata-login-notebook-run-secret\"\n", - "\n", - "print(\"Parameters set for user name: \"+ azdata_login_username)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"sop013-create-secret-for-azdata-login.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond'], 'azdata': ['Endpoint sql-server-master does not exist', 'Endpoint livy does not exist', 'Failed to get state for cluster', 'Endpoint webhdfs does not exist', 'Adaptive Server is unavailable or does not exist', 'Error: Address already in use', 'Login timeout expired (0) (SQLDriverConnect)']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']], 'azdata': [['The token is expired', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Reason: Unauthorized', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Max retries exceeded with url: /api/v1/bdc/endpoints', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Look at the controller logs for more details', 'TSG027 - Observe cluster deployment', '../diagnose/tsg027-observe-bdc-create.ipynb'], ['provided port is already allocated', 'TSG062 - Get tail of all previous container logs for pods in BDC namespace', '../log-files/tsg062-tail-bdc-previous-container-logs.ipynb'], ['Create cluster failed since the existing namespace', 'SOP061 - Delete a big data cluster', '../install/sop061-delete-bdc.ipynb'], ['Failed to complete kube config setup', 'TSG067 - Failed to complete kube config setup', '../repair/tsg067-failed-to-complete-kube-config-setup.ipynb'], ['Error processing command: \"ApiError', 'TSG110 - Azdata returns ApiError', '../repair/tsg110-azdata-returns-apierror.ipynb'], ['Error processing command: \"ControllerError', 'TSG036 - Controller logs', '../log-analyzers/tsg036-get-controller-logs.ipynb'], ['ERROR: 500', 'TSG046 - Knox gateway logs', '../log-analyzers/tsg046-get-knox-logs.ipynb'], ['Data source name not found and no default driver specified', 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], [\"Can't open lib 'ODBC Driver 17 for SQL Server\", 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], ['Control plane upgrade failed. Failed to upgrade controller.', 'TSG108 - View the controller upgrade config map', '../diagnose/tsg108-controller-failed-to-upgrade.ipynb'], [\"[Errno 2] No such file or directory: '..\\\\\\\\\", 'TSG053 - ADS Provided Books must be saved before use', '../repair/tsg053-save-book-first.ipynb'], [\"NameError: name 'azdata_login_secret_name' is not defined\", 'SOP013 - Create secret for azdata login (inside cluster)', '../common/sop013-create-secret-for-azdata-login.ipynb'], ['ERROR: No credentials were supplied, or the credentials were unavailable or inaccessible.', \"TSG124 - 'No credentials were supplied' error from azdata login\", '../repair/tsg124-no-credentials-were-supplied.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb'], 'azdata': ['SOP063 - Install azdata CLI (using package manager)', '../install/sop063-packman-install-azdata.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Establish if cluster is Active Directory enabled\n", - "\n", - "An Active Directory enabled cluster will have a `dns` pod. Non Active\n", - "Directory enabled clusters do not have a `dns` pod." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "dns_pod = run(f'kubectl get pods -n {namespace} -o name -l app=dns', return_output=True)\n", - "\n", - "if len(dns_pod) > 0:\n", - " is_ad_enabled_cluster = True\n", - " print(f\"Cluster {namespace} is an Active Directory enabled cluster\")\n", - "else:\n", - " is_ad_enabled_cluster = False\n", - " print(f\"Cluster {namespace} is not an Active Directory enabled cluster\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Is notebook being run inside a Kubernetes cluster\n", - "\n", - "When this is notebook is running inside a Kubernetes cluster, such as\n", - "when running inside an App-Deploy pod, there is no KUBECONFIG present,\n", - "therefore azdata login needs to use the -e (endpoint) approach to login." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n", - " inside_kubernetes_cluster = True\n", - " print(\"This notebook is running inside a Kubernetes cluster\")\n", - "else:\n", - " inside_kubernetes_cluster = False\n", - " print(\"This notebook is not running inside a Kubernetes cluster\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Verify `azdata login` does work with these credentials" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "if not inside_kubernetes_cluster and not is_ad_enabled_cluster:\n", - " os.environ[\"AZDATA_USERNAME\"] = azdata_login_username\n", - " os.environ[\"AZDATA_PASSWORD\"] = azdata_login_password\n", - "\n", - " print(f'Verifying login for user: {azdata_login_username}')\n", - " try:\n", - " run(f\"azdata login --namespace {namespace} --auth basic\")\n", - " finally:\n", - " del os.environ[\"AZDATA_USERNAME\"]\n", - " del os.environ[\"AZDATA_PASSWORD\"] \n", - "else:\n", - " print(\"SKIPPED: Can't test the credentials if running inside a Kubernetes cluster, because SOP028 will try to find the secret that hasn't been created yet, or if an AD (secure) cluster, because the client will use the current credentials, not the credentials provided above in the Parameters.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create Secret" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Delete K8s secret if previously created\n", - "# \n", - "secret = run(f\"kubectl get secrets --field-selector metadata.name={azdata_login_secret_name} -n {namespace} --no-headers -o jsonpath={{.items}}\", return_output=True)\n", - "\n", - "if secret != \"[]\":\n", - " run(f\"kubectl delete secret {azdata_login_secret_name} -n {namespace}\")\n", - "\n", - "if is_ad_enabled_cluster:\n", - " print(f\"Cluster {namespace} is an Active Directory enabled cluster, create username@domain credential\")\n", - "\n", - " if len(azdata_login_username) == 0 or \\\n", - " len(azdata_login_domain_name) == 0 or \\\n", - " len(azdata_login_password) == 0 or \\\n", - " azdata_login_username == \"\" or \\\n", - " azdata_login_domain_name == \"\" or \\\n", - " azdata_login_password == \"\":\n", - " raise SystemExit(\"This is an Active Directory (secure) cluster, please provide a domain account that has required permissions to run app-deploys and place the executed notebook files in HDFS (variables: azdata_login_username/azdata_login_domain_name/azdata_login_password)\")\n", - "\n", - " run(f\"\"\"kubectl create secret generic {azdata_login_secret_name} -n {namespace} --from-literal=azdata_login_username={azdata_login_username} --from-literal=azdata_login_domain_name={azdata_login_domain_name} --from-literal=azdata_login_password={azdata_login_password}\"\"\")\n", - "\n", - "else:\n", - " print(f\"Cluster {namespace} is not an Active Directory enabled cluster, create a username/password credential\")\n", - "\n", - " if len(azdata_login_username) == 0 or \\\n", - " len(azdata_login_password) == 0 or \\\n", - " azdata_login_username == \"\" or \\\n", - " azdata_login_domain_name == \"\":\n", - " raise SystemExit(\"Please provide a username/password account that has required permissions to run app-deploys and place the executed notebook files in HDFS (variables: azdata_login_username/azdata_login_password)\")\n", - "\n", - " run(f\"\"\"kubectl create secret generic {azdata_login_secret_name} -n {namespace} --from-literal=azdata_login_username={azdata_login_username} --from-literal=azdata_login_password={azdata_login_password}\"\"\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create role" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "role = run(f\"kubectl get role --field-selector metadata.name={azdata_login_secret_name}-reader --no-headers -o jsonpath={{.items}} --namespace {namespace}\", return_output=True)\n", - "\n", - "if role == \"[]\": # does not exist\n", - " run(f\"kubectl create role {azdata_login_secret_name}-reader --verb=get --resource=secrets --resource-name={azdata_login_secret_name} --namespace {namespace}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create role-binding" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "role_binding = run(f\"kubectl get rolebindings --field-selector metadata.name={azdata_login_secret_name}-reader-binding --no-headers -o jsonpath={{.items}} --namespace={namespace}\", return_output=True)\n", - "\n", - "if role_binding == \"[]\": # does not exist\n", - " run(f\"kubectl create rolebinding {azdata_login_secret_name}-reader-binding --role={azdata_login_secret_name}-reader --user=system:serviceaccount:test:default --namespace={namespace}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true, - "symlink": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/common/sop014-delete-secret-for-azdata-login.ipynb b/Big-Data-Clusters/CU8/Public/content/common/sop014-delete-secret-for-azdata-login.ipynb deleted file mode 100644 index d7676a1a..00000000 --- a/Big-Data-Clusters/CU8/Public/content/common/sop014-delete-secret-for-azdata-login.ipynb +++ /dev/null @@ -1,511 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "SOP014 - Delete secret for azdata login (inside cluster)\n", - "========================================================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "Delete a secret in the Kubernetes Secret Store, used to enable SOP028\n", - "perform `azdata login` when run from inside the Big Data Cluster.\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "azdata_login_secret_name = \"azdata-login-notebook-run-secret\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"sop014-delete-secret-for-azdata-login.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond'], 'azdata': ['Endpoint sql-server-master does not exist', 'Endpoint livy does not exist', 'Failed to get state for cluster', 'Endpoint webhdfs does not exist', 'Adaptive Server is unavailable or does not exist', 'Error: Address already in use', 'Login timeout expired (0) (SQLDriverConnect)']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']], 'azdata': [['The token is expired', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Reason: Unauthorized', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Max retries exceeded with url: /api/v1/bdc/endpoints', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Look at the controller logs for more details', 'TSG027 - Observe cluster deployment', '../diagnose/tsg027-observe-bdc-create.ipynb'], ['provided port is already allocated', 'TSG062 - Get tail of all previous container logs for pods in BDC namespace', '../log-files/tsg062-tail-bdc-previous-container-logs.ipynb'], ['Create cluster failed since the existing namespace', 'SOP061 - Delete a big data cluster', '../install/sop061-delete-bdc.ipynb'], ['Failed to complete kube config setup', 'TSG067 - Failed to complete kube config setup', '../repair/tsg067-failed-to-complete-kube-config-setup.ipynb'], ['Error processing command: \"ApiError', 'TSG110 - Azdata returns ApiError', '../repair/tsg110-azdata-returns-apierror.ipynb'], ['Error processing command: \"ControllerError', 'TSG036 - Controller logs', '../log-analyzers/tsg036-get-controller-logs.ipynb'], ['ERROR: 500', 'TSG046 - Knox gateway logs', '../log-analyzers/tsg046-get-knox-logs.ipynb'], ['Data source name not found and no default driver specified', 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], [\"Can't open lib 'ODBC Driver 17 for SQL Server\", 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], ['Control plane upgrade failed. Failed to upgrade controller.', 'TSG108 - View the controller upgrade config map', '../diagnose/tsg108-controller-failed-to-upgrade.ipynb'], [\"[Errno 2] No such file or directory: '..\\\\\\\\\", 'TSG053 - ADS Provided Books must be saved before use', '../repair/tsg053-save-book-first.ipynb'], [\"NameError: name 'azdata_login_secret_name' is not defined\", 'SOP013 - Create secret for azdata login (inside cluster)', '../common/sop013-create-secret-for-azdata-login.ipynb'], ['ERROR: No credentials were supplied, or the credentials were unavailable or inaccessible.', \"TSG124 - 'No credentials were supplied' error from azdata login\", '../repair/tsg124-no-credentials-were-supplied.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb'], 'azdata': ['SOP063 - Install azdata CLI (using package manager)', '../install/sop063-packman-install-azdata.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Delete role-binding" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Delete role-binding\n", - "#\n", - "role_binding = run(f\"kubectl get rolebindings --field-selector metadata.name={azdata_login_secret_name}-reader-binding --no-headers -o jsonpath={{.items}} --namespace={namespace}\", return_output=True)\n", - "\n", - "if role_binding != \"[]\": # does exist\n", - " run(f\"kubectl delete rolebinding {azdata_login_secret_name}-reader-binding --namespace={namespace}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Delete role" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "role = run(f\"kubectl get role --field-selector metadata.name={azdata_login_secret_name}-reader --no-headers -o jsonpath={{.items}} --namespace {namespace}\", return_output=True)\n", - "\n", - "if role != \"[]\": # does exist\n", - " run(f\"kubectl delete role {azdata_login_secret_name}-reader --namespace {namespace}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Delete secret" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "secret = run(f\"kubectl get secrets --field-selector metadata.name={azdata_login_secret_name} -n {namespace} --no-headers -o jsonpath={{.items}}\", return_output=True)\n", - "\n", - "if secret != \"[]\":\n", - " run(f\"kubectl delete secret {azdata_login_secret_name} -n {namespace}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true, - "symlink": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/common/sop028-azdata-login.ipynb b/Big-Data-Clusters/CU8/Public/content/common/sop028-azdata-login.ipynb deleted file mode 100644 index bf699663..00000000 --- a/Big-Data-Clusters/CU8/Public/content/common/sop028-azdata-login.ipynb +++ /dev/null @@ -1,574 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "SOP028 - azdata login\n", - "=====================\n", - "\n", - "Use the azdata command line interface to login to a Big Data Cluster.\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "azdata_login_secret_name = \"azdata-login-notebook-run-secret\" # Used to login when running inside the BDC (i.e. app-deploy containers)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"sop028-azdata-login.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond'], 'azdata': ['Endpoint sql-server-master does not exist', 'Endpoint livy does not exist', 'Failed to get state for cluster', 'Endpoint webhdfs does not exist', 'Adaptive Server is unavailable or does not exist', 'Error: Address already in use', 'Login timeout expired (0) (SQLDriverConnect)']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']], 'azdata': [['The token is expired', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Reason: Unauthorized', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Max retries exceeded with url: /api/v1/bdc/endpoints', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Look at the controller logs for more details', 'TSG027 - Observe cluster deployment', '../diagnose/tsg027-observe-bdc-create.ipynb'], ['provided port is already allocated', 'TSG062 - Get tail of all previous container logs for pods in BDC namespace', '../log-files/tsg062-tail-bdc-previous-container-logs.ipynb'], ['Create cluster failed since the existing namespace', 'SOP061 - Delete a big data cluster', '../install/sop061-delete-bdc.ipynb'], ['Failed to complete kube config setup', 'TSG067 - Failed to complete kube config setup', '../repair/tsg067-failed-to-complete-kube-config-setup.ipynb'], ['Error processing command: \"ApiError', 'TSG110 - Azdata returns ApiError', '../repair/tsg110-azdata-returns-apierror.ipynb'], ['Error processing command: \"ControllerError', 'TSG036 - Controller logs', '../log-analyzers/tsg036-get-controller-logs.ipynb'], ['ERROR: 500', 'TSG046 - Knox gateway logs', '../log-analyzers/tsg046-get-knox-logs.ipynb'], ['Data source name not found and no default driver specified', 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], [\"Can't open lib 'ODBC Driver 17 for SQL Server\", 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], ['Control plane upgrade failed. Failed to upgrade controller.', 'TSG108 - View the controller upgrade config map', '../diagnose/tsg108-controller-failed-to-upgrade.ipynb'], [\"[Errno 2] No such file or directory: '..\\\\\\\\\", 'TSG053 - ADS Provided Books must be saved before use', '../repair/tsg053-save-book-first.ipynb'], [\"NameError: name 'azdata_login_secret_name' is not defined\", 'SOP013 - Create secret for azdata login (inside cluster)', '../common/sop013-create-secret-for-azdata-login.ipynb'], ['ERROR: No credentials were supplied, or the credentials were unavailable or inaccessible.', \"TSG124 - 'No credentials were supplied' error from azdata login\", '../repair/tsg124-no-credentials-were-supplied.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb'], 'azdata': ['SOP063 - Install azdata CLI (using package manager)', '../install/sop063-packman-install-azdata.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Is notebook being run inside a Kubernetes cluster\n", - "\n", - "When this is notebook is running inside a Kubernetes cluster, such as\n", - "when running inside an App-Deploy pod, there is no KUBECONFIG present,\n", - "therefore azdata login needs to use the -e (endpoint) approach to login." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n", - " inside_kubernetes_cluster = True\n", - " print(\"This notebook is running inside a Kubernetes cluster\")\n", - "else:\n", - " inside_kubernetes_cluster = False\n", - " print(\"This notebook is not running inside a Kubernetes cluster\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Establish if cluster is Active Directory enabled\n", - "\n", - "An Active Directory enabled cluster will have a `dns` pod. Non Active\n", - "Directory enabled clusters do not have a `dns` pod." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "dns_pod = run(f'kubectl get pods -n {namespace} -o name -l app=dns', return_output=True)\n", - "\n", - "if len(dns_pod) > 0:\n", - " is_ad_enabled_cluster = True\n", - " print(f\"Cluster {namespace} is an Active Directory enabled cluster\")\n", - "else:\n", - " is_ad_enabled_cluster = False\n", - " print(f\"Cluster {namespace} is not an Active Directory enabled cluster\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Login with azdata" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "os.environ[\"ACCEPT_EULA\"] = \"yes\"\n", - "\n", - "if inside_kubernetes_cluster:\n", - " if is_ad_enabled_cluster:\n", - " try:\n", - " azdata_login_username = run(f'kubectl get secret/{azdata_login_secret_name} -n {namespace} -o jsonpath={{.data.azdata_login_username}}', return_output=True, base64_decode=True)\n", - " azdata_login_domain_name= run(f'kubectl get secret/{azdata_login_secret_name} -n {namespace} -o jsonpath={{.data.azdata_login_domain_name}}', return_output=True, base64_decode=True)\n", - " azdata_login_password= run(f'kubectl get secret/{azdata_login_secret_name} -n {namespace} -o jsonpath={{.data.azdata_login_password}}', return_output=True, base64_decode=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [SOP013 - Create secret for azdata login (inside cluster)](../common/sop013-create-secret-for-azdata-login.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - " # Use the ! 'shell command' to kinit (the run function doesn't work with the \"echo |\")\n", - " #\n", - " print(f'Performing kinit as {azdata_login_username}@{azdata_login_domain_name}')\n", - " !echo {azdata_login_password} | kinit {azdata_login_username}@{azdata_login_domain_name}\n", - "\n", - " azdata_login_password = \"\"\n", - " else:\n", - " try:\n", - " os.environ[\"AZDATA_USERNAME\"] = run(f'kubectl get secret/{azdata_login_secret_name} -n {namespace} -o jsonpath={{.data.azdata_login_username}}', return_output=True, base64_decode=True)\n", - " os.environ[\"AZDATA_PASSWORD\"] = run(f'kubectl get secret/{azdata_login_secret_name} -n {namespace} -o jsonpath={{.data.azdata_login_password}}', return_output=True, base64_decode=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [SOP013 - Create secret for azdata login (inside cluster)](../common/sop013-create-secret-for-azdata-login.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - " run(f'azdata login --endpoint {os.environ[\"CONTROLLER_ENDPOINT_URL\"]} --auth {\"ad\" if is_ad_enabled_cluster else \"basic\"}')\n", - "\n", - " if not is_ad_enabled_cluster:\n", - " del os.environ[\"AZDATA_PASSWORD\"]\n", - "\n", - "else:\n", - " if not is_ad_enabled_cluster and (\"AZDATA_USERNAME\" not in os.environ or \"AZDATA_PASSWORD\" not in os.environ):\n", - " os.environ[\"AZDATA_USERNAME\"] = run(f'kubectl get secret/controller-login-secret -n {namespace} -o jsonpath={{.data.username}}', return_output=True, base64_decode=True)\n", - " os.environ[\"AZDATA_PASSWORD\"] = run(f'kubectl get secret/controller-login-secret -n {namespace} -o jsonpath={{.data.password}}', return_output=True, base64_decode=True)\n", - "\n", - " run(f'azdata login --namespace {namespace} --auth {\"ad\" if is_ad_enabled_cluster else \"basic\"}')\n", - "\n", - " if not is_ad_enabled_cluster:\n", - " del os.environ[\"AZDATA_PASSWORD\"]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Related\n", - "-------\n", - "\n", - "- [SOP033 - azdata logout](../common/sop033-azdata-logout.ipynb)\n", - "\n", - "- [SOP011 - Set kubernetes configuration\n", - " context](../common/sop011-set-kubernetes-context.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true, - "symlink": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/common/sop033-azdata-logout.ipynb b/Big-Data-Clusters/CU8/Public/content/common/sop033-azdata-logout.ipynb deleted file mode 100644 index 0f207122..00000000 --- a/Big-Data-Clusters/CU8/Public/content/common/sop033-azdata-logout.ipynb +++ /dev/null @@ -1,409 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "SOP033 - azdata logout\n", - "======================\n", - "\n", - "Use the azdata command line interface to logout of a Big Data Cluster.\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, hyperlinked suggestions, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"sop033-azdata-logout.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'azdata': ['Endpoint sql-server-master does not exist', 'Endpoint livy does not exist', 'Failed to get state for cluster', 'Endpoint webhdfs does not exist', 'Adaptive Server is unavailable or does not exist', 'Error: Address already in use', 'Login timeout expired (0) (SQLDriverConnect)']}\n", - "error_hints = {'azdata': [['The token is expired', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Reason: Unauthorized', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Max retries exceeded with url: /api/v1/bdc/endpoints', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Look at the controller logs for more details', 'TSG027 - Observe cluster deployment', '../diagnose/tsg027-observe-bdc-create.ipynb'], ['provided port is already allocated', 'TSG062 - Get tail of all previous container logs for pods in BDC namespace', '../log-files/tsg062-tail-bdc-previous-container-logs.ipynb'], ['Create cluster failed since the existing namespace', 'SOP061 - Delete a big data cluster', '../install/sop061-delete-bdc.ipynb'], ['Failed to complete kube config setup', 'TSG067 - Failed to complete kube config setup', '../repair/tsg067-failed-to-complete-kube-config-setup.ipynb'], ['Error processing command: \"ApiError', 'TSG110 - Azdata returns ApiError', '../repair/tsg110-azdata-returns-apierror.ipynb'], ['Error processing command: \"ControllerError', 'TSG036 - Controller logs', '../log-analyzers/tsg036-get-controller-logs.ipynb'], ['ERROR: 500', 'TSG046 - Knox gateway logs', '../log-analyzers/tsg046-get-knox-logs.ipynb'], ['Data source name not found and no default driver specified', 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], [\"Can't open lib 'ODBC Driver 17 for SQL Server\", 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], ['Control plane upgrade failed. Failed to upgrade controller.', 'TSG108 - View the controller upgrade config map', '../diagnose/tsg108-controller-failed-to-upgrade.ipynb'], [\"[Errno 2] No such file or directory: '..\\\\\\\\\", 'TSG053 - ADS Provided Books must be saved before use', '../repair/tsg053-save-book-first.ipynb'], [\"NameError: name 'azdata_login_secret_name' is not defined\", 'SOP013 - Create secret for azdata login (inside cluster)', '../common/sop013-create-secret-for-azdata-login.ipynb'], ['ERROR: No credentials were supplied, or the credentials were unavailable or inaccessible.', \"TSG124 - 'No credentials were supplied' error from azdata login\", '../repair/tsg124-no-credentials-were-supplied.ipynb']]}\n", - "install_hint = {'azdata': ['SOP063 - Install azdata CLI (using package manager)', '../install/sop063-packman-install-azdata.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Use azdata to log out" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run('azdata logout')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Related\n", - "-------\n", - "\n", - "- [SOP028 - azdata login](../common/sop028-azdata-login.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true, - "symlink": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/common/sop034-wait-cluster-healthly.ipynb b/Big-Data-Clusters/CU8/Public/content/common/sop034-wait-cluster-healthly.ipynb deleted file mode 100644 index a4b21363..00000000 --- a/Big-Data-Clusters/CU8/Public/content/common/sop034-wait-cluster-healthly.ipynb +++ /dev/null @@ -1,269 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "SOP034 - Wait for BDC to be Healthy\n", - "===================================\n", - "\n", - "Blocks until the Big Data Cluster is healthy, or the specified timeout\n", - "expires.\n", - "\n", - "The min\\_pod\\_count parameter indicates that the health check will not\n", - "pass until at least this number of pods exists in the cluster. If any\n", - "existing pods beyond this limit are unhealthy, the cluster is not\n", - "healthy.\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "timeout = 600 # amount of time to wait before cluster is healthy: default to 10 minutes\n", - "check_interval = 30 # amount of time between health checks - default 30 seconds\n", - "min_pod_count = 10 # minimum number of healthy pods required to assert health" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Instantiate Kubernetes client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Instantiate the Python Kubernetes client into 'api' variable\n", - "\n", - "import os\n", - "\n", - "try:\n", - " from kubernetes import client, config\n", - " from kubernetes.stream import stream\n", - "\n", - " if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n", - " config.load_incluster_config()\n", - " else:\n", - " try:\n", - " config.load_kube_config()\n", - " except:\n", - " display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n", - " raise\n", - " api = client.CoreV1Api()\n", - "\n", - " print('Kubernetes client instantiated')\n", - "except ImportError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n", - " raise" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n", - " except IndexError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print('The kubernetes namespace for your big data cluster is: ' + namespace)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Helper functions for waiting for the cluster to become healthy" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "import threading\n", - "import time\n", - "import sys\n", - "import os\n", - "from IPython.display import Markdown\n", - "\n", - "isRunning = True\n", - "\n", - "def all_containers_ready(pod):\n", - " \"\"\"helper method returns true if all the containers within the given pod are ready\n", - "\n", - " Arguments:\n", - " pod {v1Pod} -- Metadata retrieved from the api call to.\n", - " \"\"\"\n", - " \n", - " return all(map(lambda c: c.ready is True, pod.status.container_statuses))\n", - "\n", - "\n", - "def pod_is_ready(pod):\n", - " \"\"\"tests that the pod, and all containers are ready\n", - "\n", - " Arguments:\n", - " pod {v1Pod} -- Metadata retrieved from api call.\n", - " \"\"\"\n", - "\n", - " return \"job-name\" in pod.metadata.labels or (pod.status.phase == \"Running\" and all_containers_ready(pod))\n", - "\n", - "\n", - "def waitReady():\n", - " \"\"\"Waits for all pods, and containers to become ready.\n", - " \"\"\"\n", - " while isRunning:\n", - " try:\n", - " time.sleep(check_interval)\n", - " pods = get_pods()\n", - " allReady = len(pods.items) >= min_pod_count and all(map(pod_is_ready, pods.items))\n", - "\n", - " if allReady:\n", - " return True\n", - " else:\n", - " display(Markdown(get_pod_failures(pods)))\n", - " display(Markdown(f\"cluster not healthy, rechecking in {check_interval} seconds.\"))\n", - " except Exception as ex:\n", - " last_error_message = str(ex)\n", - " display(Markdown(last_error_message))\n", - " time.sleep(check_interval)\n", - "\n", - "def get_pod_failures(pods=None):\n", - " \"\"\"Returns a status message for any pods that are not ready.\n", - " \"\"\"\n", - " results = \"\"\n", - " if not pods:\n", - " pods = get_pods()\n", - "\n", - " for pod in pods.items:\n", - " if \"job-name\" not in pod.metadata.labels:\n", - " if pod.status and pod.status.container_statuses:\n", - " for container in filter(lambda c: c.ready is False, pod.status.container_statuses):\n", - " results = results + \"Container {0} in Pod {1} is not ready. Reported status: {2}
\".format(container.name, pod.metadata.name, container.state) \n", - " else:\n", - " results = results + \"Pod {0} is not ready.
\".format(pod.metadata.name)\n", - " return results\n", - "\n", - "\n", - "def get_pods():\n", - " \"\"\"Returns a list of pods by namespace, or all namespaces if no namespace is specified\n", - " \"\"\"\n", - " pods = None\n", - " if namespace is not None:\n", - " display(Markdown(f'Checking namespace {namespace}'))\n", - " pods = api.list_namespaced_pod(namespace, _request_timeout=30) \n", - " else:\n", - " display(Markdown('Checking all namespaces'))\n", - " pods = api.list_pod_for_all_namespaces(_request_timeout=30)\n", - " return pods\n", - "\n", - "def wait_for_cluster_healthy():\n", - " isRunning = True\n", - " mt = threading.Thread(target=waitReady)\n", - " mt.start()\n", - " mt.join(timeout=timeout)\n", - "\n", - " if mt.isAlive():\n", - " raise SystemExit(\"Timeout waiting for all cluster to be healthy.\")\n", - " \n", - " isRunning = False" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Wait for cluster to to get healthy" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "wait_for_cluster_healthy()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false, - "timeout": "900" - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/diagnose/readme.md b/Big-Data-Clusters/CU8/Public/content/diagnose/readme.md deleted file mode 100644 index f1374238..00000000 --- a/Big-Data-Clusters/CU8/Public/content/diagnose/readme.md +++ /dev/null @@ -1,29 +0,0 @@ -# Diagnose notebooks - -- A collection of notebooks for diagnosing situations and states with a Big Data Cluster. - -[Home](../readme.md) - -## Notebooks in this Chapter -- [TSG027 - Observe cluster deployment ](tsg027-observe-bdc-create.ipynb) - -- [TSG078 - Is cluster healthy ](tsg078-is-cluster-healthy.ipynb) - -- [TSG029 - Find dumps in the cluster](tsg029-find-dumps-in-the-cluster.ipynb) - -- [TSG032 - CPU and Memory usage for all containers ](tsg032-get-cpu-and-memory-for-all-containers.ipynb) - -- [TSG060 - Persistent Volume disk space for all BDC PVCs ](tsg060-get-disk-space-for-all-pvcs.ipynb) - -- [TSG087 - Use hadoop fs CLI on nmnode pod ](tsg087-use-hadoop-fs.ipynb) - -- [TSG037 - Determine master pool pod hosting primary replica ](tsg037-determine-primary-master-replica.ipynb) - -- [TSG055 - Time Curl to Sparkhead ](tsg055-time-curl-to-sparkhead.ipynb) - -- [TSG079 - Generate `controller` core dump ](tsg079-generate-controller-core-dump.ipynb) - -- [TSG086 - Run `top` in all containers ](tsg086-run-top-for-all-containers.ipynb) - -- [TSG108 - View the controller upgrade config map ](tsg108-controller-failed-to-upgrade.ipynb) - diff --git a/Big-Data-Clusters/CU8/Public/content/diagnose/tsg027-observe-bdc-create.ipynb b/Big-Data-Clusters/CU8/Public/content/diagnose/tsg027-observe-bdc-create.ipynb deleted file mode 100644 index a52fe8b6..00000000 --- a/Big-Data-Clusters/CU8/Public/content/diagnose/tsg027-observe-bdc-create.ipynb +++ /dev/null @@ -1,743 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG027 - Observe cluster deployment\n", - "===================================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "To troubleshoot SQL Server big data cluster create issues the following\n", - "commands are often useful for pinpointing underlying causes.\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "tail_lines = 1000" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"tsg027-observe-bdc-create.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond'], 'azdata': ['Endpoint sql-server-master does not exist', 'Endpoint livy does not exist', 'Failed to get state for cluster', 'Endpoint webhdfs does not exist', 'Adaptive Server is unavailable or does not exist', 'Error: Address already in use', 'Login timeout expired (0) (SQLDriverConnect)']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']], 'azdata': [['The token is expired', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Reason: Unauthorized', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Max retries exceeded with url: /api/v1/bdc/endpoints', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Look at the controller logs for more details', 'TSG027 - Observe cluster deployment', '../diagnose/tsg027-observe-bdc-create.ipynb'], ['provided port is already allocated', 'TSG062 - Get tail of all previous container logs for pods in BDC namespace', '../log-files/tsg062-tail-bdc-previous-container-logs.ipynb'], ['Create cluster failed since the existing namespace', 'SOP061 - Delete a big data cluster', '../install/sop061-delete-bdc.ipynb'], ['Failed to complete kube config setup', 'TSG067 - Failed to complete kube config setup', '../repair/tsg067-failed-to-complete-kube-config-setup.ipynb'], ['Error processing command: \"ApiError', 'TSG110 - Azdata returns ApiError', '../repair/tsg110-azdata-returns-apierror.ipynb'], ['Error processing command: \"ControllerError', 'TSG036 - Controller logs', '../log-analyzers/tsg036-get-controller-logs.ipynb'], ['ERROR: 500', 'TSG046 - Knox gateway logs', '../log-analyzers/tsg046-get-knox-logs.ipynb'], ['Data source name not found and no default driver specified', 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], [\"Can't open lib 'ODBC Driver 17 for SQL Server\", 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], ['Control plane upgrade failed. Failed to upgrade controller.', 'TSG108 - View the controller upgrade config map', '../diagnose/tsg108-controller-failed-to-upgrade.ipynb'], [\"[Errno 2] No such file or directory: '..\\\\\\\\\", 'TSG053 - ADS Provided Books must be saved before use', '../repair/tsg053-save-book-first.ipynb'], [\"NameError: name 'azdata_login_secret_name' is not defined\", 'SOP013 - Create secret for azdata login (inside cluster)', '../common/sop013-create-secret-for-azdata-login.ipynb'], ['ERROR: No credentials were supplied, or the credentials were unavailable or inaccessible.', \"TSG124 - 'No credentials were supplied' error from azdata login\", '../repair/tsg124-no-credentials-were-supplied.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb'], 'azdata': ['SOP063 - Install azdata CLI (using package manager)', '../install/sop063-packman-install-azdata.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Show the Kubernetes version information" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run('kubectl version -o yaml')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Show the AZDATA version information" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run('azdata --version')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Show the Kubernetes nodes" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run('kubectl get nodes')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Show the Kubernetes namespaces\n", - "\n", - "See the namespace for the new cluster, it should be displayed in the\n", - "list of Kubernetes namespaces. Creating the namespace is one of the\n", - "first actions performed by \u2018azdata cluster create\u2019" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run('kubectl get namespace')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Show the System pods for the big data cluster\n", - "\n", - "Show the Kubernetes system pods" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run('kubectl get pods -n kube-system -o wide')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Show the Kubernetes pods for the big data cluster\n", - "\n", - "Show the SQL Server big data cluster pods" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'kubectl get pods -n {namespace} -o wide')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Show the Kubernetes services for the big data cluster\n", - "\n", - "Show the SQL Server big data cluster services" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'kubectl get service -n {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Show the Kubernetes events for the big data cluster\n", - "\n", - "Show the SQL Server big data cluster events" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'kubectl get events -n {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Describe the `controller` pod\n", - "\n", - "Describe the `controller` pod" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - "\n", - "run(f'kubectl describe pod/{controller} -n {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the `controller` container logs\n", - "\n", - "Get the `controller` container logs" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'kubectl logs pod/{controller} -c controller -n {namespace} --tail={tail_lines}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the `controller` database `mssql-server` container logs\n", - "\n", - "Get the `controller` database `mssql-server` container logs" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'kubectl logs pod/controldb-0 -c mssql-server -n {namespace} --tail={tail_lines}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the `controller` `security-support` container logs\n", - "\n", - "Get the `controller` `security-support` container logs" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'kubectl logs pod/{controller} -c security-support -n {namespace} --tail={tail_lines}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the `controller` `fluentbit` container logs\n", - "\n", - "Get the `controller` `fluentbit` container logs" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'kubectl logs pod/{controller} -c fluentbit -n {namespace} --tail={tail_lines}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Describe the `controller watchdog` pod\n", - "\n", - "Describe the `controller watchdog` pod" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "controlwd = None\n", - "\n", - "try:\n", - " controlwd = run(f'kubectl get pod --selector=app=controlwd -n {namespace} -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " run(f'kubectl describe pod/{controlwd} -n {namespace}')\n", - "except:\n", - " print(\"Skipping 'controller watchdog', it has not been created yet\" )" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the `controller watchdog` container logs\n", - "\n", - "Get the `controller watchdog` container logs" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "if controlwd is not None:\n", - " run(f'kubectl logs pod/{controlwd} -n {namespace} --tail={tail_lines}')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Related\n", - "-------\n", - "\n", - "- [TSG001 - Run azdata copy-logs](../log-files/tsg001-copy-logs.ipynb)\n", - "\n", - "- [TSG002 -\n", - " CrashLoopBackoff](../diagnose/tsg002-crash-loop-backoff.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false, - "symlink": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/diagnose/tsg029-find-dumps-in-the-cluster.ipynb b/Big-Data-Clusters/CU8/Public/content/diagnose/tsg029-find-dumps-in-the-cluster.ipynb deleted file mode 100644 index f3fd1d2f..00000000 --- a/Big-Data-Clusters/CU8/Public/content/diagnose/tsg029-find-dumps-in-the-cluster.ipynb +++ /dev/null @@ -1,180 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG029 - Find dumps in the cluster\n", - "==================================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "Look for coredumps and minidumps from processes like SQL Server or\n", - "controller in a big data cluster.\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Instantiate Kubernetes client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Instantiate the Python Kubernetes client into 'api' variable\n", - "\n", - "import os\n", - "\n", - "try:\n", - " from kubernetes import client, config\n", - " from kubernetes.stream import stream\n", - "\n", - " if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n", - " config.load_incluster_config()\n", - " else:\n", - " try:\n", - " config.load_kube_config()\n", - " except:\n", - " display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n", - " raise\n", - " api = client.CoreV1Api()\n", - "\n", - " print('Kubernetes client instantiated')\n", - "except ImportError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n", - " raise" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n", - " except IndexError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print('The kubernetes namespace for your big data cluster is: ' + namespace)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get all relevant pods" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pod_list = api.list_namespaced_pod(namespace, label_selector='app in (compute-0, data-0, storage-0, master, controller, controldb)', field_selector='status.phase==Running')\n", - "pod_names = [pod.metadata.name for pod in pod_list.items]\n", - "print('Scanning pods: ' + ', '.join(pod_names))\n", - "\n", - "command = 'find /var/opt /var/log -path /var/opt/mssql-extensibility/data -prune -o -print | grep -E \"core\\\\.sqlservr|core\\\\.controller|SQLD|\\\\.mdmp$|\\\\.dmp$|\\\\.gdmp$\"'\n", - "all_dumps = ''\n", - "\n", - "for name in pod_names:\n", - " print('Searching pod: ' + name)\n", - " container = 'mssql-server'\n", - " if 'control-' in name:\n", - " container = 'controller'\n", - "\n", - " try:\n", - " dumps=stream(api.connect_get_namespaced_pod_exec, name, namespace, command=['/bin/sh', '-c', command], container=container, stderr=True, stdout=True)\n", - " except Exception as e:\n", - " print(f'Unable to connect to pod: {name} due to {str(e.__class__)}. Skipping dump check for this pod...')\n", - " else:\n", - " if dumps:\n", - " all_dumps += '*Pod: ' + name + '*\\n'\n", - " all_dumps += dumps + '\\n'" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Validate\n", - "\n", - "Validate no dump files were found." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "if len(all_dumps) > 0:\n", - " raise SystemExit('FAIL - dump files found:\\n' + all_dumps)\n", - "\n", - "print('SUCCESS - no dump files were found.')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/diagnose/tsg032-get-cpu-and-memory-for-all-containers.ipynb b/Big-Data-Clusters/CU8/Public/content/diagnose/tsg032-get-cpu-and-memory-for-all-containers.ipynb deleted file mode 100644 index 2f0d7091..00000000 --- a/Big-Data-Clusters/CU8/Public/content/diagnose/tsg032-get-cpu-and-memory-for-all-containers.ipynb +++ /dev/null @@ -1,158 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG032 - CPU and Memory usage for all containers\n", - "================================================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Instantiate Kubernetes client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Instantiate the Python Kubernetes client into 'api' variable\n", - "\n", - "import os\n", - "\n", - "try:\n", - " from kubernetes import client, config\n", - " from kubernetes.stream import stream\n", - "\n", - " if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n", - " config.load_incluster_config()\n", - " else:\n", - " try:\n", - " config.load_kube_config()\n", - " except:\n", - " display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n", - " raise\n", - " api = client.CoreV1Api()\n", - "\n", - " print('Kubernetes client instantiated')\n", - "except ImportError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n", - " raise" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n", - " except IndexError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print('The kubernetes namespace for your big data cluster is: ' + namespace)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get per process usage stats" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cmd = \"\"\"echo \"CPU %\\t MEM %\\t MEM\\t PROCESS\" &&\n", - "ps aux |\n", - "awk '\n", - " {mem[$11] += int($6/1024)};\n", - " {cpuper[$11] += $3};\n", - " {memper[$11] += $4};\n", - "END {\n", - " for (i in mem) {\n", - " print cpuper[i] \"%\\t\", memper[i] \"%\\t\", mem[i] \"MB\\t\", i\n", - " }\n", - "}' |\n", - "sort -k3nr\n", - "\"\"\"\n", - "\n", - "pod_list = api.list_namespaced_pod(namespace)\n", - "pod_names = [pod.metadata.name for pod in pod_list.items]\n", - "\n", - "for pod in pod_list.items:\n", - " container_names = [container.name for container in pod.spec.containers]\n", - "\n", - " for container in container_names:\n", - " print (f\"CONTAINER: {container} / POD: {pod.metadata.name}\")\n", - " try:\n", - " print(stream(api.connect_get_namespaced_pod_exec, pod.metadata.name, namespace, command=['/bin/sh', '-c', cmd], container=container, stderr=True, stdout=True))\n", - " except Exception:\n", - " print (f\"Failed to get CPU/Memory for container: {container} in POD: {pod.metadata.name}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/diagnose/tsg037-determine-primary-master-replica.ipynb b/Big-Data-Clusters/CU8/Public/content/diagnose/tsg037-determine-primary-master-replica.ipynb deleted file mode 100644 index b5a3bda2..00000000 --- a/Big-Data-Clusters/CU8/Public/content/diagnose/tsg037-determine-primary-master-replica.ipynb +++ /dev/null @@ -1,546 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG037 - Determine master pool pod hosting primary replica\n", - "==========================================================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "Determine the pod that hosts the primary replica for the Big Data\n", - "Cluster when master pool high availability is enabled.\n", - "\n", - "For BDC deployed with High availability, the master pool has at least\n", - "three master PODs (availablity group replicas), SQL server deployed in\n", - "master pool is based on Contained Availability Group where the\n", - "availability group has its own logical master database, there will be 2\n", - "connection contexts that could be made to SQL server deployed in master\n", - "pool\n", - "\n", - "1. Connection to availability Group using sql-server-master endpoint,\n", - " using this connection you will be connected to the availability\n", - " group master database.\n", - "2. Connection to SQL instance, using this\n", - " [connection](https://docs.microsoft.com/en-us/sql/big-data-cluster/deployment-high-availability?view=sqlallproducts-allversions#instance-connect)\n", - " you will be connected to the instance master database, you may need\n", - " this type of connection for certain operations like setting server\n", - " level configurations or manually adding a database to the\n", - " availability group (in case database was created with a restore\n", - " workflow).\n", - "\n", - "`Note`: for bdc deployed without high availability, this notebook will\n", - "always return master-0 since we have only one master POD in the mater\n", - "pool.\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"tsg037-determine-primary-master-replica.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond'], 'azdata': ['Endpoint sql-server-master does not exist', 'Endpoint livy does not exist', 'Failed to get state for cluster', 'Endpoint webhdfs does not exist', 'Adaptive Server is unavailable or does not exist', 'Error: Address already in use', 'Login timeout expired (0) (SQLDriverConnect)']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']], 'azdata': [['The token is expired', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Reason: Unauthorized', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Max retries exceeded with url: /api/v1/bdc/endpoints', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Look at the controller logs for more details', 'TSG027 - Observe cluster deployment', '../diagnose/tsg027-observe-bdc-create.ipynb'], ['provided port is already allocated', 'TSG062 - Get tail of all previous container logs for pods in BDC namespace', '../log-files/tsg062-tail-bdc-previous-container-logs.ipynb'], ['Create cluster failed since the existing namespace', 'SOP061 - Delete a big data cluster', '../install/sop061-delete-bdc.ipynb'], ['Failed to complete kube config setup', 'TSG067 - Failed to complete kube config setup', '../repair/tsg067-failed-to-complete-kube-config-setup.ipynb'], ['Error processing command: \"ApiError', 'TSG110 - Azdata returns ApiError', '../repair/tsg110-azdata-returns-apierror.ipynb'], ['Error processing command: \"ControllerError', 'TSG036 - Controller logs', '../log-analyzers/tsg036-get-controller-logs.ipynb'], ['ERROR: 500', 'TSG046 - Knox gateway logs', '../log-analyzers/tsg046-get-knox-logs.ipynb'], ['Data source name not found and no default driver specified', 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], [\"Can't open lib 'ODBC Driver 17 for SQL Server\", 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], ['Control plane upgrade failed. Failed to upgrade controller.', 'TSG108 - View the controller upgrade config map', '../diagnose/tsg108-controller-failed-to-upgrade.ipynb'], [\"[Errno 2] No such file or directory: '..\\\\\\\\\", 'TSG053 - ADS Provided Books must be saved before use', '../repair/tsg053-save-book-first.ipynb'], [\"NameError: name 'azdata_login_secret_name' is not defined\", 'SOP013 - Create secret for azdata login (inside cluster)', '../common/sop013-create-secret-for-azdata-login.ipynb'], ['ERROR: No credentials were supplied, or the credentials were unavailable or inaccessible.', \"TSG124 - 'No credentials were supplied' error from azdata login\", '../repair/tsg124-no-credentials-were-supplied.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb'], 'azdata': ['SOP063 - Install azdata CLI (using package manager)', '../install/sop063-packman-install-azdata.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Instantiate Kubernetes client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Instantiate the Python Kubernetes client into 'api' variable\n", - "\n", - "import os\n", - "\n", - "try:\n", - " from kubernetes import client, config\n", - " from kubernetes.stream import stream\n", - "\n", - " if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n", - " config.load_incluster_config()\n", - " else:\n", - " try:\n", - " config.load_kube_config()\n", - " except:\n", - " display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n", - " raise\n", - " api = client.CoreV1Api()\n", - "\n", - " print('Kubernetes client instantiated')\n", - "except ImportError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n", - " raise" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n", - " except IndexError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print('The kubernetes namespace for your big data cluster is: ' + namespace)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the controller username and password\n", - "\n", - "Get the controller username and password from the Kubernetes Secret\n", - "Store and place in the required AZDATA\\_USERNAME and AZDATA\\_PASSWORD\n", - "environment variables." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place controller secret in AZDATA_USERNAME/AZDATA_PASSWORD environment variables\n", - "\n", - "import os, base64\n", - "\n", - "os.environ[\"AZDATA_USERNAME\"] = run(f'kubectl get secret/controller-login-secret -n {namespace} -o jsonpath={{.data.username}}', return_output=True, base64_decode=True)\n", - "os.environ[\"AZDATA_PASSWORD\"] = run(f'kubectl get secret/controller-login-secret -n {namespace} -o jsonpath={{.data.password}}', return_output=True, base64_decode=True)\n", - "\n", - "print(f\"Controller username '{os.environ['AZDATA_USERNAME']}' and password stored in environment variables\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Find Pod hosting the primary replica\n", - "\n", - "Run the T-SQL command using `azdata sql query`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'azdata sql query -q \"SELECT @@SERVERNAME [pod hosting primary replica]\" -o table')\n", - "del os.environ[\"AZDATA_PASSWORD\"]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/diagnose/tsg055-time-curl-to-sparkhead.ipynb b/Big-Data-Clusters/CU8/Public/content/diagnose/tsg055-time-curl-to-sparkhead.ipynb deleted file mode 100644 index 89905a57..00000000 --- a/Big-Data-Clusters/CU8/Public/content/diagnose/tsg055-time-curl-to-sparkhead.ipynb +++ /dev/null @@ -1,544 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG055 - Time Curl to Sparkhead\n", - "===============================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "If `azdata bdc status show` fails with:\n", - "\n", - "> StatefulSet sparkhead is not healthy: {{Pod sparkhead-0 is not\n", - "> healthy: {Container hadoop-livy-sparkhistory is unhealthy: {Found\n", - "> error properties: {Property: sparkhistory.readiness, Details: \u2018Timed\n", - "> out getting health status after 5000 milliseconds.\u2019}}}}}: unhealthy\n", - "> Pod sparkhead-0 is not healthy: {Container hadoop-livy-sparkhistory is\n", - "> unhealthy: {Found error properties: {Property: sparkhistory.readiness,\n", - "> Details: \u2018Timed out getting health status after 5000\n", - "> milliseconds.\u2019}}}: unhealthy spark: unhealthy\" StatefulSet sparkhead\n", - "> is not healthy: {{Pod sparkhead-0 is not healthy: {Container\n", - "> hadoop-livy-sparkhistory is unhealthy: {Found error properties:\n", - "> {Property: sparkhistory.readiness, Details: \u2018Timed out getting health\n", - "> status after 5000 milliseconds.\u2019}}}}}: unhealthy Pod sparkhead-0 is\n", - "> not healthy: {Container hadoop-livy-sparkhistory is unhealthy: {Found\n", - "> error properties: {Property: sparkhistory.readiness, Details: \u2018Timed\n", - "> out getting health status after 5000 milliseconds.\u2019}}}: unhealthy\n", - "\n", - "It can be a useful diagnosis step to understand what the Curl response\n", - "time is from the `controller` pod to the `sparkhead` pod.\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"tsg055-time-curl-to-sparkhead.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Instantiate Kubernetes client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Instantiate the Python Kubernetes client into 'api' variable\n", - "\n", - "import os\n", - "\n", - "try:\n", - " from kubernetes import client, config\n", - " from kubernetes.stream import stream\n", - "\n", - " if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n", - " config.load_incluster_config()\n", - " else:\n", - " try:\n", - " config.load_kube_config()\n", - " except:\n", - " display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n", - " raise\n", - " api = client.CoreV1Api()\n", - "\n", - " print('Kubernetes client instantiated')\n", - "except ImportError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n", - " raise" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n", - " except IndexError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print('The kubernetes namespace for your big data cluster is: ' + namespace)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get name of the \u2018Running\u2019 `controller` `pod`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place the name of the 'Running' controller pod in variable `controller`\n", - "\n", - "controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}} --field-selector=status.phase=Running', return_output=True)\n", - "\n", - "print(f\"Controller pod name: {controller}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Time `curl` in `controller` `pod` to `sparkhead`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'kubectl exec {controller} -n {namespace} -c controller -- bash -c \"time curl --cacert /run/secrets/certificates/rootca/cluster-ca-certificate.crt https://sparkhead-svc:18480\"')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false, - "expert": { - "rules": [ - [ - "TSG078", - "code", - "stream", - "name", - "stdout", - "text", - ".*StatefulSet sparkhead is not healthy.*Timed out getting health status" - ] - ] - } - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/diagnose/tsg060-get-disk-space-for-all-pvcs.ipynb b/Big-Data-Clusters/CU8/Public/content/diagnose/tsg060-get-disk-space-for-all-pvcs.ipynb deleted file mode 100644 index 5b1e11dc..00000000 --- a/Big-Data-Clusters/CU8/Public/content/diagnose/tsg060-get-disk-space-for-all-pvcs.ipynb +++ /dev/null @@ -1,558 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG060 - Persistent Volume disk space for all BDC PVCs\n", - "======================================================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "Connect to each container and get the disk space used/available for each\n", - "Persisted Volume (PV) mapped to each Persisted Volume Claim (PVC) of a\n", - "Big Data Cluster (BDC)\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters\n", - "\n", - "Set the space used percentage, if disk space used crosses this\n", - "threshold, this notebook will raise an exception." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "SPACED_USED_PERCENT_THRESHOLD = 80" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"tsg060-get-disk-space-for-all-pvcs.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Instantiate Kubernetes client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Instantiate the Python Kubernetes client into 'api' variable\n", - "\n", - "import os\n", - "\n", - "try:\n", - " from kubernetes import client, config\n", - " from kubernetes.stream import stream\n", - "\n", - " if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n", - " config.load_incluster_config()\n", - " else:\n", - " try:\n", - " config.load_kube_config()\n", - " except:\n", - " display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n", - " raise\n", - " api = client.CoreV1Api()\n", - "\n", - " print('Kubernetes client instantiated')\n", - "except ImportError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n", - " raise" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n", - " except IndexError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print('The kubernetes namespace for your big data cluster is: ' + namespace)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Connect to each container that mounts a PVC and run the `df` linux command line tool\n", - "\n", - "For each pod:\n", - "\n", - "1. Get the claim\\_names from the volumes which have a PVC\n", - "2. Join that to the containers who volume\\_mount that claim\\_name\n", - "3. Get the \u2018mount\\_path\u2019 from the \u2018volume\\_mount\u2019\n", - "4. Exec into the container and run the \u2018df\u2019 tool.\n", - "\n", - "This technique seems to work across kubeadm and AKS, but does require\n", - "\u2018kubectl exec\u2019 into each container (which requires permission and some\n", - "time)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pods = api.list_namespaced_pod(namespace)\n", - "\n", - "low_diskspace = False\n", - "\n", - "for pod in pods.items:\n", - " for volume in pod.spec.volumes:\n", - " if volume.persistent_volume_claim is not None:\n", - " for container in pod.spec.containers:\n", - " for volume_mount in container.volume_mounts:\n", - " if volume_mount.name == volume.name:\n", - " pvc = api.read_namespaced_persistent_volume_claim(name=volume.persistent_volume_claim.claim_name, namespace=namespace)\n", - " print (f\"Disk Space for {pod.metadata.name}/{container.name} PVC: {volume.persistent_volume_claim.claim_name} bound to PV: {pvc.spec.volume_name} ({pvc.status.capacity}) Storage Class: {pvc.spec.storage_class_name}\")\n", - " try:\n", - " output=stream(api.connect_get_namespaced_pod_exec, pod.metadata.name, namespace, container=container.name, command=['/bin/sh', '-c', f'df {volume_mount.mount_path} -h'], stderr=True, stdout=True)\n", - " except Exception as err:\n", - " print(err)\n", - " else:\n", - " print(output)\n", - "\n", - " # Get the same output as a CSV, so we can check the space used\n", - " output=stream(api.connect_get_namespaced_pod_exec, pod.metadata.name, namespace, container=container.name, command=['/bin/sh', '-c', f\"\"\"df {volume_mount.mount_path} -h -P | awk '{{print $1\",\"$2\",\"$3\",\"$4\",\"$5\",\"$6\" \"$7}}'\"\"\"], stderr=True, stdout=True)\n", - " \n", - " s = output.split(\",\")\n", - " space_used = int(s[9][:-1])\n", - "\n", - " if space_used > SPACED_USED_PERCENT_THRESHOLD:\n", - " low_diskspace = True\n", - "\n", - " # NOTE: This string is used to match an `expert rule` (SOP013)\n", - " #\n", - " print(f\"WARNING: LOW DISK SPACE! ({pod.metadata.name}/{container.name})\")\n", - " print(\"^^^^^^^^^^^^^^^^^^^^^^^^^\")\n", - "\n", - "if low_diskspace:\n", - " raise SystemExit(f\"Disk space on one or more Persisted Volumes is greater than {SPACED_USED_PERCENT_THRESHOLD}%\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/diagnose/tsg078-is-cluster-healthy.ipynb b/Big-Data-Clusters/CU8/Public/content/diagnose/tsg078-is-cluster-healthy.ipynb deleted file mode 100644 index 291edd20..00000000 --- a/Big-Data-Clusters/CU8/Public/content/diagnose/tsg078-is-cluster-healthy.ipynb +++ /dev/null @@ -1,555 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG078 - Is cluster healthy\n", - "===========================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"tsg078-is-cluster-healthy.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond'], 'azdata': ['Endpoint sql-server-master does not exist', 'Endpoint livy does not exist', 'Failed to get state for cluster', 'Endpoint webhdfs does not exist', 'Adaptive Server is unavailable or does not exist', 'Error: Address already in use', 'Login timeout expired (0) (SQLDriverConnect)']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']], 'azdata': [['The token is expired', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Reason: Unauthorized', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Max retries exceeded with url: /api/v1/bdc/endpoints', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Look at the controller logs for more details', 'TSG027 - Observe cluster deployment', '../diagnose/tsg027-observe-bdc-create.ipynb'], ['provided port is already allocated', 'TSG062 - Get tail of all previous container logs for pods in BDC namespace', '../log-files/tsg062-tail-bdc-previous-container-logs.ipynb'], ['Create cluster failed since the existing namespace', 'SOP061 - Delete a big data cluster', '../install/sop061-delete-bdc.ipynb'], ['Failed to complete kube config setup', 'TSG067 - Failed to complete kube config setup', '../repair/tsg067-failed-to-complete-kube-config-setup.ipynb'], ['Error processing command: \"ApiError', 'TSG110 - Azdata returns ApiError', '../repair/tsg110-azdata-returns-apierror.ipynb'], ['Error processing command: \"ControllerError', 'TSG036 - Controller logs', '../log-analyzers/tsg036-get-controller-logs.ipynb'], ['ERROR: 500', 'TSG046 - Knox gateway logs', '../log-analyzers/tsg046-get-knox-logs.ipynb'], ['Data source name not found and no default driver specified', 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], [\"Can't open lib 'ODBC Driver 17 for SQL Server\", 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], ['Control plane upgrade failed. Failed to upgrade controller.', 'TSG108 - View the controller upgrade config map', '../diagnose/tsg108-controller-failed-to-upgrade.ipynb'], [\"[Errno 2] No such file or directory: '..\\\\\\\\\", 'TSG053 - ADS Provided Books must be saved before use', '../repair/tsg053-save-book-first.ipynb'], [\"NameError: name 'azdata_login_secret_name' is not defined\", 'SOP013 - Create secret for azdata login (inside cluster)', '../common/sop013-create-secret-for-azdata-login.ipynb'], ['ERROR: No credentials were supplied, or the credentials were unavailable or inaccessible.', \"TSG124 - 'No credentials were supplied' error from azdata login\", '../repair/tsg124-no-credentials-were-supplied.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb'], 'azdata': ['SOP063 - Install azdata CLI (using package manager)', '../install/sop063-packman-install-azdata.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the controller username and password\n", - "\n", - "Get the controller username and password from the Kubernetes Secret\n", - "Store and place in the required AZDATA\\_USERNAME and AZDATA\\_PASSWORD\n", - "environment variables." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place controller secret in AZDATA_USERNAME/AZDATA_PASSWORD environment variables\n", - "\n", - "import os, base64\n", - "\n", - "os.environ[\"AZDATA_USERNAME\"] = run(f'kubectl get secret/controller-login-secret -n {namespace} -o jsonpath={{.data.username}}', return_output=True, base64_decode=True)\n", - "os.environ[\"AZDATA_PASSWORD\"] = run(f'kubectl get secret/controller-login-secret -n {namespace} -o jsonpath={{.data.password}}', return_output=True, base64_decode=True)\n", - "\n", - "print(f\"Controller username '{os.environ['AZDATA_USERNAME']}' and password stored in environment variables\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the cluster health" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "import os\n", - "import json\n", - "\n", - "try:\n", - " health = run(f\"azdata bdc status show --all -o json\", return_output=True)\n", - "except: # WORKAROUND: ReadTimeoutError due to --all on large clusters, therefore remove. Waiting for azdata TIMEOUT override.\n", - " health = run(f\"azdata bdc status show -o json\", return_output=True)\n", - "\n", - "print(health)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Analyze the cluster health" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "health_json = json.loads(health)\n", - "\n", - "cluster_all_healthy = True\n", - "\n", - "for service in health_json[\"result\"][\"services\"]:\n", - " if service[\"healthStatus\"] != \"healthy\":\n", - " cluster_all_healthy = False\n", - " print (f'{service[\"serviceName\"]}: {service[\"healthStatus\"]}')\n", - " for resource in service[\"resources\"]:\n", - " if resource[\"healthStatus\"] != \"healthy\":\n", - " cluster_all_healthy = False\n", - " print(f'{str(resource[\"details\"])}: {resource[\"healthStatus\"]}')\n", - " if resource[\"instances\"] is not None:\n", - " for instance in resource[\"instances\"]:\n", - " if instance[\"healthStatus\"] != \"healthy\":\n", - " cluster_all_healthy = False\n", - " print(f'{str(instance[\"details\"])}: {instance[\"healthStatus\"]}')\n", - "\n", - "if not cluster_all_healthy:\n", - " raise SystemExit('The cluster is not 100% healthy')\n", - "else:\n", - " print(\"Cluster is healthy!\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Related\n", - "-------\n", - "\n", - "- [TSG061 - Get tail of all container logs for pods in BDC\n", - " namespace](../log-files/tsg061-tail-bdc-container-logs.ipynb)\n", - "\n", - "- [TSG062 - Get tail of all previous container logs for pods in BDC\n", - " namespace](../log-files/tsg062-tail-bdc-previous-container-logs.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false, - "expert": { - "expanded_rules": [ - [ - 5, - "../diagnose/tsg055-time-curl-to-sparkhead.ipynb", - "code", - "stream", - "name", - "stdout", - "text", - ".*StatefulSet sparkhead is not healthy.*Timed out getting health status", - 0 - ] - ] - } - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/diagnose/tsg079-generate-controller-core-dump.ipynb b/Big-Data-Clusters/CU8/Public/content/diagnose/tsg079-generate-controller-core-dump.ipynb deleted file mode 100644 index 5dbab70b..00000000 --- a/Big-Data-Clusters/CU8/Public/content/diagnose/tsg079-generate-controller-core-dump.ipynb +++ /dev/null @@ -1,500 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG079 - Generate `controller` core dump\n", - "========================================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"tsg079-generate-controller-core-dump.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond'], 'azdata': ['Endpoint sql-server-master does not exist', 'Endpoint livy does not exist', 'Failed to get state for cluster', 'Endpoint webhdfs does not exist', 'Adaptive Server is unavailable or does not exist', 'Error: Address already in use', 'Login timeout expired (0) (SQLDriverConnect)']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']], 'azdata': [['The token is expired', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Reason: Unauthorized', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Max retries exceeded with url: /api/v1/bdc/endpoints', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Look at the controller logs for more details', 'TSG027 - Observe cluster deployment', '../diagnose/tsg027-observe-bdc-create.ipynb'], ['provided port is already allocated', 'TSG062 - Get tail of all previous container logs for pods in BDC namespace', '../log-files/tsg062-tail-bdc-previous-container-logs.ipynb'], ['Create cluster failed since the existing namespace', 'SOP061 - Delete a big data cluster', '../install/sop061-delete-bdc.ipynb'], ['Failed to complete kube config setup', 'TSG067 - Failed to complete kube config setup', '../repair/tsg067-failed-to-complete-kube-config-setup.ipynb'], ['Error processing command: \"ApiError', 'TSG110 - Azdata returns ApiError', '../repair/tsg110-azdata-returns-apierror.ipynb'], ['Error processing command: \"ControllerError', 'TSG036 - Controller logs', '../log-analyzers/tsg036-get-controller-logs.ipynb'], ['ERROR: 500', 'TSG046 - Knox gateway logs', '../log-analyzers/tsg046-get-knox-logs.ipynb'], ['Data source name not found and no default driver specified', 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], [\"Can't open lib 'ODBC Driver 17 for SQL Server\", 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], ['Control plane upgrade failed. Failed to upgrade controller.', 'TSG108 - View the controller upgrade config map', '../diagnose/tsg108-controller-failed-to-upgrade.ipynb'], [\"[Errno 2] No such file or directory: '..\\\\\\\\\", 'TSG053 - ADS Provided Books must be saved before use', '../repair/tsg053-save-book-first.ipynb'], [\"NameError: name 'azdata_login_secret_name' is not defined\", 'SOP013 - Create secret for azdata login (inside cluster)', '../common/sop013-create-secret-for-azdata-login.ipynb'], ['ERROR: No credentials were supplied, or the credentials were unavailable or inaccessible.', \"TSG124 - 'No credentials were supplied' error from azdata login\", '../repair/tsg124-no-credentials-were-supplied.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb'], 'azdata': ['SOP063 - Install azdata CLI (using package manager)', '../install/sop063-packman-install-azdata.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the controller username and password\n", - "\n", - "Get the controller username and password from the Kubernetes Secret\n", - "Store and place in the required AZDATA\\_USERNAME and AZDATA\\_PASSWORD\n", - "environment variables." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place controller secret in AZDATA_USERNAME/AZDATA_PASSWORD environment variables\n", - "\n", - "import os, base64\n", - "\n", - "os.environ[\"AZDATA_USERNAME\"] = run(f'kubectl get secret/controller-login-secret -n {namespace} -o jsonpath={{.data.username}}', return_output=True, base64_decode=True)\n", - "os.environ[\"AZDATA_PASSWORD\"] = run(f'kubectl get secret/controller-login-secret -n {namespace} -o jsonpath={{.data.password}}', return_output=True, base64_decode=True)\n", - "\n", - "print(f\"Controller username '{os.environ['AZDATA_USERNAME']}' and password stored in environment variables\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Set current directory to temporary directory" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import tempfile\n", - "\n", - "path = tempfile.gettempdir()\n", - "\n", - "os.chdir(path)\n", - "\n", - "print(f\"Current directory set to: {path}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Generate core dump" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'azdata bdc debug dump -n {namespace} -c controller')\n", - "\n", - "print(f'The dump file is in: {os.path.join(path, \"output\", \"dump\")}')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/diagnose/tsg086-run-top-for-all-containers.ipynb b/Big-Data-Clusters/CU8/Public/content/diagnose/tsg086-run-top-for-all-containers.ipynb deleted file mode 100644 index 6d4a337f..00000000 --- a/Big-Data-Clusters/CU8/Public/content/diagnose/tsg086-run-top-for-all-containers.ipynb +++ /dev/null @@ -1,146 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG086 - Run `top` in all containers\n", - "====================================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Instantiate Kubernetes client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Instantiate the Python Kubernetes client into 'api' variable\n", - "\n", - "import os\n", - "\n", - "try:\n", - " from kubernetes import client, config\n", - " from kubernetes.stream import stream\n", - "\n", - " if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n", - " config.load_incluster_config()\n", - " else:\n", - " try:\n", - " config.load_kube_config()\n", - " except:\n", - " display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n", - " raise\n", - " api = client.CoreV1Api()\n", - "\n", - " print('Kubernetes client instantiated')\n", - "except ImportError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n", - " raise" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n", - " except IndexError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print('The kubernetes namespace for your big data cluster is: ' + namespace)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Run top in each container" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cmd = \"top -b -n 1\"\n", - "\n", - "pod_list = api.list_namespaced_pod(namespace)\n", - "pod_names = [pod.metadata.name for pod in pod_list.items]\n", - "\n", - "for pod in pod_list.items:\n", - " container_names = [container.name for container in pod.spec.containers]\n", - "\n", - " for container in container_names:\n", - " print (f\"CONTAINER: {container} / POD: {pod.metadata.name}\")\n", - " try:\n", - " print(stream(api.connect_get_namespaced_pod_exec, pod.metadata.name, namespace, command=['/bin/sh', '-c', cmd], container=container, stderr=True, stdout=True))\n", - " except Exception as err:\n", - " print (f\"Failed to get run 'top' for container: {container} in pod: {pod.metadata.name}. Error: {err}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/diagnose/tsg087-use-hadoop-fs.ipynb b/Big-Data-Clusters/CU8/Public/content/diagnose/tsg087-use-hadoop-fs.ipynb deleted file mode 100644 index 94b86a7d..00000000 --- a/Big-Data-Clusters/CU8/Public/content/diagnose/tsg087-use-hadoop-fs.ipynb +++ /dev/null @@ -1,489 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG087 - Use hadoop fs CLI on nmnode pod\n", - "========================================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "Connect directly to the namenode and use the comprehensive `hadoop fs`\n", - "CLI\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"tsg087-use-hadoop-fs.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Instantiate Kubernetes client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Instantiate the Python Kubernetes client into 'api' variable\n", - "\n", - "import os\n", - "\n", - "try:\n", - " from kubernetes import client, config\n", - " from kubernetes.stream import stream\n", - "\n", - " if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n", - " config.load_incluster_config()\n", - " else:\n", - " try:\n", - " config.load_kube_config()\n", - " except:\n", - " display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n", - " raise\n", - " api = client.CoreV1Api()\n", - "\n", - " print('Kubernetes client instantiated')\n", - "except ImportError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n", - " raise" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n", - " except IndexError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print('The kubernetes namespace for your big data cluster is: ' + namespace)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Connect to the namenode pod and run hadoop fs CLI" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'kubectl exec nmnode-0-0 -n {namespace} -c hadoop -- hadoop fs -ls /')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/diagnose/tsg108-controller-failed-to-upgrade.ipynb b/Big-Data-Clusters/CU8/Public/content/diagnose/tsg108-controller-failed-to-upgrade.ipynb deleted file mode 100644 index e5d1f849..00000000 --- a/Big-Data-Clusters/CU8/Public/content/diagnose/tsg108-controller-failed-to-upgrade.ipynb +++ /dev/null @@ -1,490 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG108 - View the controller upgrade config map\n", - "===============================================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "When running a Big Data Cluster upgrade using `azdata bdc upgrade`:\n", - "\n", - "`azdata bdc upgrade --name --tag `\n", - "\n", - "It may fail with:\n", - "\n", - "> Upgrading cluster to version 15.0.4003.10029\\_2\n", - ">\n", - "> NOTE: Cluster upgrade can take a significant amount of time depending\n", - "> on configuration, network speed, and the number of nodes in the\n", - "> cluster.\n", - ">\n", - "> Upgrading Control Plane. Control plane upgrade failed. Failed to\n", - "> upgrade controller.\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "Use these steps to troubelshoot the problem.\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"tsg108-controller-failed-to-upgrade.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### View the upgrade configmap" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'kubectl get configmap -n {namespace} controller-upgrade-configmap -o yaml')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Related\n", - "-------\n", - "\n", - "- [TSG109 - Set upgrade\n", - " timeouts](../repair/tsg109-upgrade-stalled.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false, - "symlink": true, - "expert": { - "expanded_rules": [ - [ - 5, - "../repair/tsg109-upgrade-stalled.ipynb", - "code", - "stream", - "name", - "stdout", - "text", - ".\\*upgrade has timed out", - 0 - ] - ] - } - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/install/readme.md b/Big-Data-Clusters/CU8/Public/content/install/readme.md deleted file mode 100644 index 5072e50b..00000000 --- a/Big-Data-Clusters/CU8/Public/content/install/readme.md +++ /dev/null @@ -1,37 +0,0 @@ -# Installation notebooks - -- A set of notebooks used for installing and uninstalling command line tools and packages needed to manage SQL Server Big Data Clusters. - -[Home](../readme.md) - -## Notebooks in this Chapter -- [SOP036 - Install kubectl command line interface ](sop036-install-kubectl.ipynb) - -- [SOP037 - Uninstall kubectl command line interface ](sop037-uninstall-kubectl.ipynb) - -- [SOP059 - Install Kubernetes Python module ](sop059-install-kubernetes-module.ipynb) - -- [SOP060 - Uninstall kubernetes module ](sop060-uninstall-kubernetes-module.ipynb) - -- [SOP062 - Install ipython-sql and pyodbc modules ](sop062-install-ipython-sql-module.ipynb) - -- [SOP063 - Install azdata CLI (using package manager) ](sop063-packman-install-azdata.ipynb) - -- [SOP064 - Uninstall azdata CLI (using package manager) ](sop064-packman-uninstall-azdata.ipynb) - -- [SOP054 - Install azdata CLI (using pip) ](sop054-install-azdata.ipynb) - -- [SOP055 - Uninstall azdata CLI (using pip) ](sop055-uninstall-azdata.ipynb) - -- [SOP038 - Install azure command line interface ](sop038-install-az.ipynb) - -- [SOP039 - Uninstall azure command line interface ](sop039-uninstall-az.ipynb) - -- [SOP040 - Upgrade pip in ADS Python sandbox ](sop040-upgrade-pip.ipynb) - -- [SOP069 - Install ODBC for SQL Server ](sop069-install-odbc-driver-for-sql-server.ipynb) - -- [SOP012 - Install unixodbc for Mac ](sop012-brew-install-odbc-for-sql-server.ipynb) - -- [SOP010 - Upgrade a big data cluster ](sop010-upgrade-bdc.ipynb) - diff --git a/Big-Data-Clusters/CU8/Public/content/install/sop010-upgrade-bdc.ipynb b/Big-Data-Clusters/CU8/Public/content/install/sop010-upgrade-bdc.ipynb deleted file mode 100644 index 5ed231ed..00000000 --- a/Big-Data-Clusters/CU8/Public/content/install/sop010-upgrade-bdc.ipynb +++ /dev/null @@ -1,491 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "SOP010 - Upgrade a big data cluster\n", - "===================================\n", - "\n", - "Upgrade a Big Data Cluster using `azdata`.\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "docker_image_tag = f\"\" # i.e. 15.0.4003.10029_2\n", - "\n", - "docker_repository = None\n", - "docker_username = None\n", - "docker_password = None\n", - "\n", - "print('PARAMETERS:')\n", - "print('')\n", - "print(f'docker_image_tag = {docker_image_tag}')\n", - "print('')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"sop010-upgrade-bdc.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond'], 'azdata': ['Endpoint sql-server-master does not exist', 'Endpoint livy does not exist', 'Failed to get state for cluster', 'Endpoint webhdfs does not exist', 'Adaptive Server is unavailable or does not exist', 'Error: Address already in use', 'Login timeout expired (0) (SQLDriverConnect)']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']], 'azdata': [['The token is expired', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Reason: Unauthorized', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Max retries exceeded with url: /api/v1/bdc/endpoints', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Look at the controller logs for more details', 'TSG027 - Observe cluster deployment', '../diagnose/tsg027-observe-bdc-create.ipynb'], ['provided port is already allocated', 'TSG062 - Get tail of all previous container logs for pods in BDC namespace', '../log-files/tsg062-tail-bdc-previous-container-logs.ipynb'], ['Create cluster failed since the existing namespace', 'SOP061 - Delete a big data cluster', '../install/sop061-delete-bdc.ipynb'], ['Failed to complete kube config setup', 'TSG067 - Failed to complete kube config setup', '../repair/tsg067-failed-to-complete-kube-config-setup.ipynb'], ['Error processing command: \"ApiError', 'TSG110 - Azdata returns ApiError', '../repair/tsg110-azdata-returns-apierror.ipynb'], ['Error processing command: \"ControllerError', 'TSG036 - Controller logs', '../log-analyzers/tsg036-get-controller-logs.ipynb'], ['ERROR: 500', 'TSG046 - Knox gateway logs', '../log-analyzers/tsg046-get-knox-logs.ipynb'], ['Data source name not found and no default driver specified', 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], [\"Can't open lib 'ODBC Driver 17 for SQL Server\", 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], ['Control plane upgrade failed. Failed to upgrade controller.', 'TSG108 - View the controller upgrade config map', '../diagnose/tsg108-controller-failed-to-upgrade.ipynb'], [\"[Errno 2] No such file or directory: '..\\\\\\\\\", 'TSG053 - ADS Provided Books must be saved before use', '../repair/tsg053-save-book-first.ipynb'], [\"NameError: name 'azdata_login_secret_name' is not defined\", 'SOP013 - Create secret for azdata login (inside cluster)', '../common/sop013-create-secret-for-azdata-login.ipynb'], ['ERROR: No credentials were supplied, or the credentials were unavailable or inaccessible.', \"TSG124 - 'No credentials were supplied' error from azdata login\", '../repair/tsg124-no-credentials-were-supplied.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb'], 'azdata': ['SOP063 - Install azdata CLI (using package manager)', '../install/sop063-packman-install-azdata.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Upgrade the cluster" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "if docker_username is not None:\n", - " os.environ[\"DOCKER_USERNAME\"] = docker_username\n", - " os.environ[\"DOCKER_PASSWORD\"] = docker_password\n", - "\n", - "run(f'azdata bdc upgrade --name {namespace} --tag {docker_image_tag} {\"\" if docker_repository is None else f\"--repository {docker_repository}\"}')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Related\n", - "-------\n", - "\n", - "- [TSG108 - View the controller upgrade config\n", - " map](../diagnose/tsg108-controller-failed-to-upgrade.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/install/sop012-brew-install-odbc-for-sql-server.ipynb b/Big-Data-Clusters/CU8/Public/content/install/sop012-brew-install-odbc-for-sql-server.ipynb deleted file mode 100644 index b1936026..00000000 --- a/Big-Data-Clusters/CU8/Public/content/install/sop012-brew-install-odbc-for-sql-server.ipynb +++ /dev/null @@ -1,397 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "SOP012 - Install unixodbc for Mac\n", - "=================================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "`azdata` may fail to install on Mac with the following error.\n", - "\n", - "> ERROR:\n", - "> dlopen(/Users/user/.local/lib/python3.6/site-packages/pyodbc.cpython-36m-darwin.so,\n", - "> 2): Library not loaded: /usr/local/opt/unixodbc/lib/libodbc.2.dylib\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"sop012-brew-install-odbc-for-sql-server.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Install `unixodbc`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run('brew install unixodbc')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false, - "symlink": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/install/sop036-install-kubectl.ipynb b/Big-Data-Clusters/CU8/Public/content/install/sop036-install-kubectl.ipynb deleted file mode 100644 index d0e10983..00000000 --- a/Big-Data-Clusters/CU8/Public/content/install/sop036-install-kubectl.ipynb +++ /dev/null @@ -1,416 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "SOP036 - Install kubectl command line interface\n", - "===============================================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"sop036-install-kubectl.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Install Kubernetes CLI\n", - "\n", - "To get the latest version number for `kubectl` for Windows, open this\n", - "file:\n", - "\n", - "- https://storage.googleapis.com/kubernetes-release/release/stable.txt\n", - "\n", - "NOTE: For Windows, `kubectl.exe` is installed in the folder containing\n", - "the `python.exe` (`sys.executable`), which will be in the path for\n", - "notebooks run in ADS." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import sys\n", - "import platform\n", - "\n", - "from pathlib import Path\n", - "\n", - "if platform.system() == \"Darwin\":\n", - " run('brew update')\n", - " run('brew install kubernetes-cli')\n", - "elif platform.system() == \"Windows\":\n", - " path = Path(sys.executable)\n", - " cwd = os.getcwd()\n", - " os.chdir(path.parent)\n", - " run('curl -L https://storage.googleapis.com/kubernetes-release/release/v1.17.0/bin/windows/amd64/kubectl.exe -o kubectl.exe')\n", - " os.chdir(cwd)\n", - "elif platform.system() == \"Linux\":\n", - " run('sudo apt-get update')\n", - " run('sudo apt-get install -y kubectl')\n", - "else:\n", - " raise SystemExit(f\"Platform '{platform.system()}' is not recognized, must be 'Darwin', 'Windows' or 'Linux'\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true, - "symlink": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/install/sop037-uninstall-kubectl.ipynb b/Big-Data-Clusters/CU8/Public/content/install/sop037-uninstall-kubectl.ipynb deleted file mode 100644 index 1450d4ee..00000000 --- a/Big-Data-Clusters/CU8/Public/content/install/sop037-uninstall-kubectl.ipynb +++ /dev/null @@ -1,409 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "SOP037 - Uninstall kubectl command line interface\n", - "=================================================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"sop037-uninstall-kubectl.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Uninstall Kubernetes CLI\n", - "\n", - "NOTE: For Windows, `kubectl.exe` was installed in the folder containing\n", - "the `python.exe` (`sys.executable`), it will be removed from this\n", - "folder." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import sys\n", - "import platform\n", - "\n", - "from pathlib import Path\n", - "\n", - "if platform.system() == \"Darwin\":\n", - " run('brew uninstall kubernetes-cli')\n", - "elif platform.system() == \"Windows\":\n", - " path = Path(sys.executable)\n", - " cwd = os.getcwd()\n", - " os.chdir(path.parent)\n", - " run('cmd /k del kubectl.exe')\n", - " os.chdir(cwd)\n", - "elif platform.system() == \"Linux\":\n", - " run('sudo apt-get uninstall -y kubectl')\n", - "else:\n", - " raise SystemExit(f\"Platform '{platform.system()}' is not recognized, must be 'Darwin', 'Windows' or 'Linux'\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true, - "symlink": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/install/sop038-install-az.ipynb b/Big-Data-Clusters/CU8/Public/content/install/sop038-install-az.ipynb deleted file mode 100644 index 22d231eb..00000000 --- a/Big-Data-Clusters/CU8/Public/content/install/sop038-install-az.ipynb +++ /dev/null @@ -1,406 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "SOP038 - Install azure command line interface\n", - "=============================================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, hyperlinked suggestions, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"sop038-install-az.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'python': []}\n", - "error_hints = {'python': [['Library not loaded: /usr/local/opt/unixodbc', 'SOP012 - Install unixodbc for Mac', '../install/sop012-brew-install-odbc-for-sql-server.ipynb'], ['WARNING: You are using pip version', 'SOP040 - Upgrade pip in ADS Python sandbox', '../install/sop040-upgrade-pip.ipynb']]}\n", - "install_hint = {'python': []}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Install az CLI" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(\"python --version\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run('python -m pip install -m pip install azure-cli')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true, - "symlink": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/install/sop039-uninstall-az.ipynb b/Big-Data-Clusters/CU8/Public/content/install/sop039-uninstall-az.ipynb deleted file mode 100644 index b2c26da2..00000000 --- a/Big-Data-Clusters/CU8/Public/content/install/sop039-uninstall-az.ipynb +++ /dev/null @@ -1,397 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "SOP039 - Uninstall azure command line interface\n", - "===============================================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, hyperlinked suggestions, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"sop039-uninstall-az.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'python': []}\n", - "error_hints = {'python': [['Library not loaded: /usr/local/opt/unixodbc', 'SOP012 - Install unixodbc for Mac', '../install/sop012-brew-install-odbc-for-sql-server.ipynb'], ['WARNING: You are using pip version', 'SOP040 - Upgrade pip in ADS Python sandbox', '../install/sop040-upgrade-pip.ipynb']]}\n", - "install_hint = {'python': []}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Uninstall az CLI" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run('python -m pip uninstall azure-cli --yes')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true, - "symlink": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/install/sop040-upgrade-pip.ipynb b/Big-Data-Clusters/CU8/Public/content/install/sop040-upgrade-pip.ipynb deleted file mode 100644 index 6fb55376..00000000 --- a/Big-Data-Clusters/CU8/Public/content/install/sop040-upgrade-pip.ipynb +++ /dev/null @@ -1,399 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "SOP040 - Upgrade pip in ADS Python sandbox\n", - "==========================================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, hyperlinked suggestions, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"sop040-upgrade-pip.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'python': []}\n", - "error_hints = {'python': [['Library not loaded: /usr/local/opt/unixodbc', 'SOP012 - Install unixodbc for Mac', '../install/sop012-brew-install-odbc-for-sql-server.ipynb'], ['WARNING: You are using pip version', 'SOP040 - Upgrade pip in ADS Python sandbox', '../install/sop040-upgrade-pip.ipynb']]}\n", - "install_hint = {'python': []}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Upgrade pip" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import sys\n", - "\n", - "run(f'python -m pip install --upgrade pip')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true, - "symlink": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/install/sop054-install-azdata.ipynb b/Big-Data-Clusters/CU8/Public/content/install/sop054-install-azdata.ipynb deleted file mode 100644 index dc15b52f..00000000 --- a/Big-Data-Clusters/CU8/Public/content/install/sop054-install-azdata.ipynb +++ /dev/null @@ -1,430 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "SOP054 - Install azdata CLI (using pip)\n", - "=======================================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, hyperlinked suggestions, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"sop054-install-azdata.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'python': [], 'azdata': ['Endpoint sql-server-master does not exist', 'Endpoint livy does not exist', 'Failed to get state for cluster', 'Endpoint webhdfs does not exist', 'Adaptive Server is unavailable or does not exist', 'Error: Address already in use', 'Login timeout expired (0) (SQLDriverConnect)']}\n", - "error_hints = {'python': [['Library not loaded: /usr/local/opt/unixodbc', 'SOP012 - Install unixodbc for Mac', '../install/sop012-brew-install-odbc-for-sql-server.ipynb'], ['WARNING: You are using pip version', 'SOP040 - Upgrade pip in ADS Python sandbox', '../install/sop040-upgrade-pip.ipynb']], 'azdata': [['The token is expired', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Reason: Unauthorized', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Max retries exceeded with url: /api/v1/bdc/endpoints', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Look at the controller logs for more details', 'TSG027 - Observe cluster deployment', '../diagnose/tsg027-observe-bdc-create.ipynb'], ['provided port is already allocated', 'TSG062 - Get tail of all previous container logs for pods in BDC namespace', '../log-files/tsg062-tail-bdc-previous-container-logs.ipynb'], ['Create cluster failed since the existing namespace', 'SOP061 - Delete a big data cluster', '../install/sop061-delete-bdc.ipynb'], ['Failed to complete kube config setup', 'TSG067 - Failed to complete kube config setup', '../repair/tsg067-failed-to-complete-kube-config-setup.ipynb'], ['Error processing command: \"ApiError', 'TSG110 - Azdata returns ApiError', '../repair/tsg110-azdata-returns-apierror.ipynb'], ['Error processing command: \"ControllerError', 'TSG036 - Controller logs', '../log-analyzers/tsg036-get-controller-logs.ipynb'], ['ERROR: 500', 'TSG046 - Knox gateway logs', '../log-analyzers/tsg046-get-knox-logs.ipynb'], ['Data source name not found and no default driver specified', 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], [\"Can't open lib 'ODBC Driver 17 for SQL Server\", 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], ['Control plane upgrade failed. Failed to upgrade controller.', 'TSG108 - View the controller upgrade config map', '../diagnose/tsg108-controller-failed-to-upgrade.ipynb'], [\"[Errno 2] No such file or directory: '..\\\\\\\\\", 'TSG053 - ADS Provided Books must be saved before use', '../repair/tsg053-save-book-first.ipynb'], [\"NameError: name 'azdata_login_secret_name' is not defined\", 'SOP013 - Create secret for azdata login (inside cluster)', '../common/sop013-create-secret-for-azdata-login.ipynb'], ['ERROR: No credentials were supplied, or the credentials were unavailable or inaccessible.', \"TSG124 - 'No credentials were supplied' error from azdata login\", '../repair/tsg124-no-credentials-were-supplied.ipynb']]}\n", - "install_hint = {'python': [], 'azdata': ['SOP063 - Install azdata CLI (using package manager)', '../install/sop063-packman-install-azdata.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Install azdata CLI" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'python --version')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'python -m pip install -r https://aka.ms/azdata')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Display azdata version" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(\"azdata --version\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Related (SOP063, SOP054)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true, - "symlink": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/install/sop055-uninstall-azdata.ipynb b/Big-Data-Clusters/CU8/Public/content/install/sop055-uninstall-azdata.ipynb deleted file mode 100644 index 823be48e..00000000 --- a/Big-Data-Clusters/CU8/Public/content/install/sop055-uninstall-azdata.ipynb +++ /dev/null @@ -1,424 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "SOP055 - Uninstall azdata CLI (using pip)\n", - "=========================================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, hyperlinked suggestions, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"sop055-uninstall-azdata.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'python': []}\n", - "error_hints = {'python': [['Library not loaded: /usr/local/opt/unixodbc', 'SOP012 - Install unixodbc for Mac', '../install/sop012-brew-install-odbc-for-sql-server.ipynb'], ['WARNING: You are using pip version', 'SOP040 - Upgrade pip in ADS Python sandbox', '../install/sop040-upgrade-pip.ipynb']]}\n", - "install_hint = {'python': []}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Uninstall azdata CLI" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import sys\n", - "\n", - "run(f'python -m pip uninstall -r https://aka.ms/azdata -y')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Pip list\n", - "\n", - "Verify there are no azdata modules in the list" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'python -m pip list')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Related (SOP055, SOP064)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true, - "symlink": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/install/sop059-install-kubernetes-module.ipynb b/Big-Data-Clusters/CU8/Public/content/install/sop059-install-kubernetes-module.ipynb deleted file mode 100644 index 67955b4b..00000000 --- a/Big-Data-Clusters/CU8/Public/content/install/sop059-install-kubernetes-module.ipynb +++ /dev/null @@ -1,415 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "SOP059 - Install Kubernetes Python module\n", - "=========================================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, hyperlinked suggestions, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"sop059-install-kubernetes-module.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'python': []}\n", - "error_hints = {'python': [['Library not loaded: /usr/local/opt/unixodbc', 'SOP012 - Install unixodbc for Mac', '../install/sop012-brew-install-odbc-for-sql-server.ipynb'], ['WARNING: You are using pip version', 'SOP040 - Upgrade pip in ADS Python sandbox', '../install/sop040-upgrade-pip.ipynb']]}\n", - "install_hint = {'python': []}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Pip install the kubernetes module" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import sys\n", - "\n", - "run(f'python -m pip install kubernetes>=10.0.0')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Pip list installed modules" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'python -m pip list')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true, - "symlink": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/install/sop060-uninstall-kubernetes-module.ipynb b/Big-Data-Clusters/CU8/Public/content/install/sop060-uninstall-kubernetes-module.ipynb deleted file mode 100644 index 2afefcf4..00000000 --- a/Big-Data-Clusters/CU8/Public/content/install/sop060-uninstall-kubernetes-module.ipynb +++ /dev/null @@ -1,415 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "SOP060 - Uninstall kubernetes module\n", - "====================================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, hyperlinked suggestions, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"sop060-uninstall-kubernetes-module.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'python': []}\n", - "error_hints = {'python': [['Library not loaded: /usr/local/opt/unixodbc', 'SOP012 - Install unixodbc for Mac', '../install/sop012-brew-install-odbc-for-sql-server.ipynb'], ['WARNING: You are using pip version', 'SOP040 - Upgrade pip in ADS Python sandbox', '../install/sop040-upgrade-pip.ipynb']]}\n", - "install_hint = {'python': []}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Pip uninstall the kubernetes module" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import sys\n", - "\n", - "run(f'python -m pip uninstall kubernetes -y')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Pip list installed modules" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'python -m pip list')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true, - "symlink": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/install/sop062-install-ipython-sql-module.ipynb b/Big-Data-Clusters/CU8/Public/content/install/sop062-install-ipython-sql-module.ipynb deleted file mode 100644 index 425ad484..00000000 --- a/Big-Data-Clusters/CU8/Public/content/install/sop062-install-ipython-sql-module.ipynb +++ /dev/null @@ -1,429 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "SOP062 - Install ipython-sql and pyodbc modules\n", - "===============================================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, hyperlinked suggestions, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"sop062-install-ipython-sql-module.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'python': []}\n", - "error_hints = {'python': [['Library not loaded: /usr/local/opt/unixodbc', 'SOP012 - Install unixodbc for Mac', '../install/sop012-brew-install-odbc-for-sql-server.ipynb'], ['WARNING: You are using pip version', 'SOP040 - Upgrade pip in ADS Python sandbox', '../install/sop040-upgrade-pip.ipynb']]}\n", - "install_hint = {'python': []}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Pip install the ipython-sql module" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'python -m pip install ipython-sql')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Pip install the pyodbc module" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'python -m pip install pyodbc')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Pip list installed modules" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'python -m pip list')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true, - "symlink": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/install/sop063-packman-install-azdata.ipynb b/Big-Data-Clusters/CU8/Public/content/install/sop063-packman-install-azdata.ipynb deleted file mode 100644 index a07d3578..00000000 --- a/Big-Data-Clusters/CU8/Public/content/install/sop063-packman-install-azdata.ipynb +++ /dev/null @@ -1,457 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "SOP063 - Install azdata CLI (using package manager)\n", - "===================================================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, hyperlinked suggestions, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"sop063-packman-install-azdata.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'python': [], 'azdata': ['Endpoint sql-server-master does not exist', 'Endpoint livy does not exist', 'Failed to get state for cluster', 'Endpoint webhdfs does not exist', 'Adaptive Server is unavailable or does not exist', 'Error: Address already in use', 'Login timeout expired (0) (SQLDriverConnect)']}\n", - "error_hints = {'python': [['Library not loaded: /usr/local/opt/unixodbc', 'SOP012 - Install unixodbc for Mac', '../install/sop012-brew-install-odbc-for-sql-server.ipynb'], ['WARNING: You are using pip version', 'SOP040 - Upgrade pip in ADS Python sandbox', '../install/sop040-upgrade-pip.ipynb']], 'azdata': [['The token is expired', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Reason: Unauthorized', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Max retries exceeded with url: /api/v1/bdc/endpoints', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Look at the controller logs for more details', 'TSG027 - Observe cluster deployment', '../diagnose/tsg027-observe-bdc-create.ipynb'], ['provided port is already allocated', 'TSG062 - Get tail of all previous container logs for pods in BDC namespace', '../log-files/tsg062-tail-bdc-previous-container-logs.ipynb'], ['Create cluster failed since the existing namespace', 'SOP061 - Delete a big data cluster', '../install/sop061-delete-bdc.ipynb'], ['Failed to complete kube config setup', 'TSG067 - Failed to complete kube config setup', '../repair/tsg067-failed-to-complete-kube-config-setup.ipynb'], ['Error processing command: \"ApiError', 'TSG110 - Azdata returns ApiError', '../repair/tsg110-azdata-returns-apierror.ipynb'], ['Error processing command: \"ControllerError', 'TSG036 - Controller logs', '../log-analyzers/tsg036-get-controller-logs.ipynb'], ['ERROR: 500', 'TSG046 - Knox gateway logs', '../log-analyzers/tsg046-get-knox-logs.ipynb'], ['Data source name not found and no default driver specified', 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], [\"Can't open lib 'ODBC Driver 17 for SQL Server\", 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], ['Control plane upgrade failed. Failed to upgrade controller.', 'TSG108 - View the controller upgrade config map', '../diagnose/tsg108-controller-failed-to-upgrade.ipynb'], [\"[Errno 2] No such file or directory: '..\\\\\\\\\", 'TSG053 - ADS Provided Books must be saved before use', '../repair/tsg053-save-book-first.ipynb'], [\"NameError: name 'azdata_login_secret_name' is not defined\", 'SOP013 - Create secret for azdata login (inside cluster)', '../common/sop013-create-secret-for-azdata-login.ipynb'], ['ERROR: No credentials were supplied, or the credentials were unavailable or inaccessible.', \"TSG124 - 'No credentials were supplied' error from azdata login\", '../repair/tsg124-no-credentials-were-supplied.ipynb']]}\n", - "install_hint = {'python': [], 'azdata': ['SOP063 - Install azdata CLI (using package manager)', '../install/sop063-packman-install-azdata.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Install azdata CLI using OS specific package manager" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import sys\n", - "import platform\n", - "\n", - "from pathlib import Path\n", - "\n", - "if platform.system() == \"Darwin\":\n", - " run('brew tap microsoft/azdata-cli-release')\n", - " run('brew update')\n", - " run('brew install azdata-cli')\n", - "elif platform.system() == \"Windows\":\n", - " # Download the .msi to %TEMP%\n", - " #\n", - " run(f'curl -L https://aka.ms/azdata-msi -o {os.path.join(os.environ[\"TEMP\"], \"azdata-cli.msi\")}')\n", - "\n", - " # Install using the .msi.\n", - " #\n", - " # NOTES:\n", - " # 1. This will pop up the User Access Control dialog, press 'Yes'\n", - " # 2. The installer dialog will appear (it may start as a background window)\n", - " #\n", - " run(f'cmd /c {os.path.join(os.environ[\"TEMP\"], \"azdata-cli.msi\")} /passive')\n", - "\n", - " # Clean up\n", - " #\n", - " run(f'cmd /c del {os.path.join(os.environ[\"TEMP\"], \"azdata-cli.msi\")}')\n", - "elif platform.system() == \"Linux\":\n", - " webbrowser.open('https://docs.microsoft.com/en-us/sql/big-data-cluster/deploy-install-azdata-linux-package')\n", - "else:\n", - " raise SystemExit(f\"Platform '{platform.system()}' is not recognized, must be 'Darwin', 'Windows' or 'Linux'\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Display azdata version" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(\"azdata --version\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### NOTE: Restart existing Azure Data Studio instances\n", - "\n", - "Other tabs in this instance of Azure Data Studio, and other instances of\n", - "Azure Data Studio will not have their `PATH` updated until they\u2019re\n", - "restarted. **Azure Data Studio must be restarted for it to get the\n", - "updated `PATH` settings.**\n", - "\n", - "### Related (SOP064, SOP055)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true, - "symlink": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/install/sop064-packman-uninstall-azdata.ipynb b/Big-Data-Clusters/CU8/Public/content/install/sop064-packman-uninstall-azdata.ipynb deleted file mode 100644 index ebda8c63..00000000 --- a/Big-Data-Clusters/CU8/Public/content/install/sop064-packman-uninstall-azdata.ipynb +++ /dev/null @@ -1,431 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "SOP064 - Uninstall azdata CLI (using package manager)\n", - "=====================================================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, hyperlinked suggestions, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"sop064-packman-uninstall-azdata.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'azdata': ['Endpoint sql-server-master does not exist', 'Endpoint livy does not exist', 'Failed to get state for cluster', 'Endpoint webhdfs does not exist', 'Adaptive Server is unavailable or does not exist', 'Error: Address already in use', 'Login timeout expired (0) (SQLDriverConnect)']}\n", - "error_hints = {'azdata': [['The token is expired', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Reason: Unauthorized', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Max retries exceeded with url: /api/v1/bdc/endpoints', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Look at the controller logs for more details', 'TSG027 - Observe cluster deployment', '../diagnose/tsg027-observe-bdc-create.ipynb'], ['provided port is already allocated', 'TSG062 - Get tail of all previous container logs for pods in BDC namespace', '../log-files/tsg062-tail-bdc-previous-container-logs.ipynb'], ['Create cluster failed since the existing namespace', 'SOP061 - Delete a big data cluster', '../install/sop061-delete-bdc.ipynb'], ['Failed to complete kube config setup', 'TSG067 - Failed to complete kube config setup', '../repair/tsg067-failed-to-complete-kube-config-setup.ipynb'], ['Error processing command: \"ApiError', 'TSG110 - Azdata returns ApiError', '../repair/tsg110-azdata-returns-apierror.ipynb'], ['Error processing command: \"ControllerError', 'TSG036 - Controller logs', '../log-analyzers/tsg036-get-controller-logs.ipynb'], ['ERROR: 500', 'TSG046 - Knox gateway logs', '../log-analyzers/tsg046-get-knox-logs.ipynb'], ['Data source name not found and no default driver specified', 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], [\"Can't open lib 'ODBC Driver 17 for SQL Server\", 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], ['Control plane upgrade failed. Failed to upgrade controller.', 'TSG108 - View the controller upgrade config map', '../diagnose/tsg108-controller-failed-to-upgrade.ipynb'], [\"[Errno 2] No such file or directory: '..\\\\\\\\\", 'TSG053 - ADS Provided Books must be saved before use', '../repair/tsg053-save-book-first.ipynb'], [\"NameError: name 'azdata_login_secret_name' is not defined\", 'SOP013 - Create secret for azdata login (inside cluster)', '../common/sop013-create-secret-for-azdata-login.ipynb'], ['ERROR: No credentials were supplied, or the credentials were unavailable or inaccessible.', \"TSG124 - 'No credentials were supplied' error from azdata login\", '../repair/tsg124-no-credentials-were-supplied.ipynb']]}\n", - "install_hint = {'azdata': ['SOP063 - Install azdata CLI (using package manager)', '../install/sop063-packman-install-azdata.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Uninstall azdata CLI using OS specific package manager" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import sys\n", - "import platform\n", - "\n", - "from pathlib import Path\n", - "\n", - "if platform.system() == \"Darwin\":\n", - " run('brew uninstall azdata-cli')\n", - "elif platform.system() == \"Windows\":\n", - "\n", - " # Get the product guid to be able to do the .msi uninstall (this can take 2 or 3 minutes)\n", - " #\n", - " product_guid = run(\"\"\"powershell -Command \"$product = get-wmiobject Win32_Product | Where {$_.Name -match 'Azure Data CLI'}; $product.IdentifyingNumber\" \"\"\", return_output=True)\n", - "\n", - " print (f\"The product guid is: {product_guid}\")\n", - "\n", - " # Uninstall using the product guid\n", - " #\n", - " # NOTES:\n", - " # 1. This will pop up the User Access Control dialog, press 'Yes'\n", - " # 2. The installer dialog will appear (it may start as a background window)\n", - " #\n", - " run(f'msiexec /uninstall {product_guid} /passive')\n", - "\n", - "elif platform.system() == \"Linux\":\n", - " webbrowser.open('https://docs.microsoft.com/en-us/sql/big-data-cluster/deploy-install-azdata-linux-package')\n", - "else:\n", - " raise SystemExit(f\"Platform '{platform.system()}' is not recognized, must be 'Darwin', 'Windows' or 'Linux'\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Related (SOP063, SOP054)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true, - "symlink": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/install/sop069-install-odbc-driver-for-sql-server.ipynb b/Big-Data-Clusters/CU8/Public/content/install/sop069-install-odbc-driver-for-sql-server.ipynb deleted file mode 100644 index 55299bb1..00000000 --- a/Big-Data-Clusters/CU8/Public/content/install/sop069-install-odbc-driver-for-sql-server.ipynb +++ /dev/null @@ -1,63 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "SOP069 - Install ODBC for SQL Server\n", - "====================================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "Some subcommands in `azdata` require the SQL Server ODBC driver. If the\n", - "driver is not installed, the following error is given:\n", - "\n", - "> ERROR: Error processing command: \u201cInterfaceError\u201d (\u2018IM002\u2019, \u2018\\[IM002\\]\n", - "> \\[Microsoft\\]\\[ODBC Driver Manager\\] Data source name not found and no\n", - "> default driver specified (0) (SQLDriverConnect)\u2019)\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Install ODBC Driver 17 for SQL Server" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import platform\n", - "import webbrowser\n", - "\n", - "if platform.system() == \"Windows\":\n", - " webbrowser.open('https://www.microsoft.com/en-us/download/details.aspx?id=56567')\n", - "else:\n", - " webbrowser.open('https://docs.microsoft.com/en-us/sql/connect/odbc/linux-mac/installing-the-microsoft-odbc-driver-for-sql-server')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false, - "symlink": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/log-analyzers/readme.md b/Big-Data-Clusters/CU8/Public/content/log-analyzers/readme.md deleted file mode 100644 index 742fdeba..00000000 --- a/Big-Data-Clusters/CU8/Public/content/log-analyzers/readme.md +++ /dev/null @@ -1,49 +0,0 @@ -# Logs notebooks - -- A set of notebooks to gather and analyze logs from a SQL Server Big Data Cluster. The analysis process will SUGGEST follow on TSGs to run for known issue found in the logs - -[Home](../readme.md) - -## Notebooks in this Chapter -- [TSG046 - Knox gateway logs ](tsg046-get-knox-logs.ipynb) - -- [TSG036 - Controller logs ](tsg036-get-controller-logs.ipynb) - -- [TSG034 - Livy logs ](tsg034-get-livy-logs.ipynb) - -- [TSG035 - Spark History logs ](tsg035-get-sparkhistory-logs.ipynb) - -- [TSG030 - SQL Server errorlog files ](tsg030-get-errorlog-from-all-pods.ipynb) - -- [TSG031 - SQL Server PolyBase logs ](tsg031-get-polybase-logs-for-all-pods.ipynb) - -- [TSG095 - Hadoop namenode logs ](tsg095-get-namenode-logs.ipynb) - -- [TSG090 - Yarn nodemanager logs ](tsg090-get-nodemanager-logs.ipynb) - -- [TSG088 - Hadoop datanode logs ](tsg088-get-datanode-logs.ipynb) - -- [TSG096 - Zookeeper logs ](tsg096-get-zookeeper-logs.ipynb) - -- [TSG073 - InfluxDB logs ](tsg073-get-influxdb-logs.ipynb) - -- [TSG076 - Elastic Search logs ](tsg076-get-elastic-search-logs.ipynb) - -- [TSG077 - Kibana logs ](tsg077-get-kibana-logs.ipynb) - -- [TSG092 - Supervisord log tail for all containers in BDC ](tsg092-get-all-supervisord-log-tails.ipynb) - -- [TSG093 - Agent log tail for all containers in BDC ](tsg093-get-all-agent-log-tails.ipynb) - -- [TSG094 - Grafana logs ](tsg094-get-grafana-logs.ipynb) - -- [TSG117 - App-Deploy Proxy Nginx Logs ](tsg117-get-approxy-nginx-logs.ipynb) - -- [TSG120 - Provisioner log tail for all containers in BDC ](tsg120-get-all-provisioner-log-tails.ipynb) - -- [TSG121 - Supervisor mssql-server logs ](tsg121-get-all-supervisor-mssql-logs.ipynb) - -- [TSG122 - Hive Metastore logs ](tsg122-get-hive-metastore-logs.ipynb) - -- [TSG123 - Hive logs ](tsg123-get-hive-logs.ipynb) - diff --git a/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg030-get-errorlog-from-all-pods.ipynb b/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg030-get-errorlog-from-all-pods.ipynb deleted file mode 100644 index e4e6e942..00000000 --- a/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg030-get-errorlog-from-all-pods.ipynb +++ /dev/null @@ -1,289 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG030 - SQL Server errorlog files\n", - "==================================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "import re\n", - "\n", - "tail_lines = 500\n", - "\n", - "pod = None # All\n", - "container = \"mssql-server\"\n", - "log_files = [ \"/var/opt/mssql/log/errorlog\" ]\n", - "\n", - "expressions_to_analyze = [\n", - " re.compile(\".{35}Error:\"),\n", - " re.compile(\".{35}Login failed for user '##\"),\n", - " re.compile(\".{35}SqlDumpExceptionHandler\")\n", - "]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Instantiate Kubernetes client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Instantiate the Python Kubernetes client into 'api' variable\n", - "\n", - "import os\n", - "\n", - "try:\n", - " from kubernetes import client, config\n", - " from kubernetes.stream import stream\n", - "\n", - " if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n", - " config.load_incluster_config()\n", - " else:\n", - " try:\n", - " config.load_kube_config()\n", - " except:\n", - " display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n", - " raise\n", - " api = client.CoreV1Api()\n", - "\n", - " print('Kubernetes client instantiated')\n", - "except ImportError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n", - " raise" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n", - " except IndexError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print('The kubernetes namespace for your big data cluster is: ' + namespace)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get tail for log" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Display the last 'tail_lines' of files in 'log_files' list\n", - "\n", - "pods = api.list_namespaced_pod(namespace)\n", - "\n", - "entries_for_analysis = []\n", - "\n", - "for p in pods.items:\n", - " if pod is None or p.metadata.name == pod:\n", - " for c in p.spec.containers:\n", - " if container is None or c.name == container:\n", - " for log_file in log_files:\n", - " print (f\"- LOGS: '{log_file}' for CONTAINER: '{c.name}' in POD: '{p.metadata.name}'\")\n", - " try:\n", - " output = stream(api.connect_get_namespaced_pod_exec, p.metadata.name, namespace, command=['/bin/sh', '-c', f'tail -n {tail_lines} {log_file}'], container=c.name, stderr=True, stdout=True)\n", - " except Exception:\n", - " print (f\"FAILED to get LOGS for CONTAINER: {c.name} in POD: {p.metadata.name}\")\n", - " else:\n", - " for line in output.split('\\n'):\n", - " for expression in expressions_to_analyze:\n", - " if expression.match(line):\n", - " entries_for_analysis.append(line)\n", - " print(line)\n", - "print(\"\")\n", - "print(f\"{len(entries_for_analysis)} log entries found for further analysis.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Analyze log entries and suggest relevant Troubleshooting Guides" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Analyze log entries and suggest further relevant troubleshooting guides\n", - "from IPython.display import Markdown\n", - "import os\n", - "import json\n", - "import requests\n", - "import ipykernel\n", - "import datetime\n", - "\n", - "from urllib.parse import urljoin\n", - "from notebook import notebookapp\n", - "\n", - "def get_notebook_name():\n", - " \"\"\"Return the full path of the jupyter notebook. Some runtimes (e.g. ADS) \n", - " have the kernel_id in the filename of the connection file. If so, the \n", - " notebook name at runtime can be determined using `list_running_servers`.\n", - " Other runtimes (e.g. azdata) do not have the kernel_id in the filename of\n", - " the connection file, therefore we are unable to establish the filename\n", - " \"\"\"\n", - " connection_file = os.path.basename(ipykernel.get_connection_file())\n", - " \n", - " # If the runtime has the kernel_id in the connection filename, use it to\n", - " # get the real notebook name at runtime, otherwise, use the notebook \n", - " # filename from build time.\n", - " try: \n", - " kernel_id = connection_file.split('-', 1)[1].split('.')[0]\n", - " except:\n", - " pass\n", - " else:\n", - " for servers in list(notebookapp.list_running_servers()):\n", - " try:\n", - " response = requests.get(urljoin(servers['url'], 'api/sessions'), params={'token': servers.get('token', '')}, timeout=.01)\n", - " except:\n", - " pass\n", - " else:\n", - " for nn in json.loads(response.text):\n", - " if nn['kernel']['id'] == kernel_id:\n", - " return nn['path']\n", - "\n", - "def load_json(filename):\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def get_notebook_rules():\n", - " \"\"\"Load the notebook rules from the metadata of this notebook (in the .ipynb file)\"\"\"\n", - " file_name = get_notebook_name()\n", - "\n", - " if file_name == None:\n", - " return None\n", - " else:\n", - " j = load_json(file_name)\n", - "\n", - " if \"azdata\" not in j[\"metadata\"] or \\\n", - " \"expert\" not in j[\"metadata\"][\"azdata\"] or \\\n", - " \"log_analyzer_rules\" not in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - " return []\n", - " else:\n", - " return j[\"metadata\"][\"azdata\"][\"expert\"][\"log_analyzer_rules\"]\n", - "\n", - "rules = get_notebook_rules()\n", - "\n", - "if rules == None:\n", - " print(\"\")\n", - " print(f\"Log Analysis only available when run in Azure Data Studio. Not available when run in azdata.\")\n", - "else:\n", - " print(f\"Applying the following {len(rules)} rules to {len(entries_for_analysis)} log entries for analysis, looking for HINTs to further troubleshooting.\")\n", - " print(rules)\n", - " hints = 0\n", - " if len(rules) > 0:\n", - " for entry in entries_for_analysis:\n", - " for rule in rules:\n", - " if entry.find(rule[0]) != -1:\n", - " print (entry)\n", - "\n", - " display(Markdown(f'HINT: Use [{rule[2]}]({rule[3]}) to resolve this issue.'))\n", - " hints = hints + 1\n", - "\n", - " print(\"\")\n", - " print(f\"{len(entries_for_analysis)} log entries analyzed (using {len(rules)} rules). {hints} further troubleshooting hints made inline.\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg031-get-polybase-logs-for-all-pods.ipynb b/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg031-get-polybase-logs-for-all-pods.ipynb deleted file mode 100644 index d1c5d218..00000000 --- a/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg031-get-polybase-logs-for-all-pods.ipynb +++ /dev/null @@ -1,287 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG031 - SQL Server PolyBase logs\n", - "=================================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "import re\n", - "\n", - "tail_lines = 500\n", - "\n", - "pod = None # All\n", - "container = \"mssql-server\"\n", - "log_files = [ \"/var/opt/mssql/log/polybase/MSSQLSERVER_*_errors.log\" ]\n", - "\n", - "expressions_to_analyze = [\n", - " re.compile(\"(.*)MppSqlException\")\n", - "]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Instantiate Kubernetes client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Instantiate the Python Kubernetes client into 'api' variable\n", - "\n", - "import os\n", - "\n", - "try:\n", - " from kubernetes import client, config\n", - " from kubernetes.stream import stream\n", - "\n", - " if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n", - " config.load_incluster_config()\n", - " else:\n", - " try:\n", - " config.load_kube_config()\n", - " except:\n", - " display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n", - " raise\n", - " api = client.CoreV1Api()\n", - "\n", - " print('Kubernetes client instantiated')\n", - "except ImportError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n", - " raise" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n", - " except IndexError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print('The kubernetes namespace for your big data cluster is: ' + namespace)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get tail for log" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Display the last 'tail_lines' of files in 'log_files' list\n", - "\n", - "pods = api.list_namespaced_pod(namespace)\n", - "\n", - "entries_for_analysis = []\n", - "\n", - "for p in pods.items:\n", - " if pod is None or p.metadata.name == pod:\n", - " for c in p.spec.containers:\n", - " if container is None or c.name == container:\n", - " for log_file in log_files:\n", - " print (f\"- LOGS: '{log_file}' for CONTAINER: '{c.name}' in POD: '{p.metadata.name}'\")\n", - " try:\n", - " output = stream(api.connect_get_namespaced_pod_exec, p.metadata.name, namespace, command=['/bin/sh', '-c', f'tail -n {tail_lines} {log_file}'], container=c.name, stderr=True, stdout=True)\n", - " except Exception:\n", - " print (f\"FAILED to get LOGS for CONTAINER: {c.name} in POD: {p.metadata.name}\")\n", - " else:\n", - " for line in output.split('\\n'):\n", - " for expression in expressions_to_analyze:\n", - " if expression.match(line):\n", - " entries_for_analysis.append(line)\n", - " print(line)\n", - "print(\"\")\n", - "print(f\"{len(entries_for_analysis)} log entries found for further analysis.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Analyze log entries and suggest relevant Troubleshooting Guides" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Analyze log entries and suggest further relevant troubleshooting guides\n", - "from IPython.display import Markdown\n", - "import os\n", - "import json\n", - "import requests\n", - "import ipykernel\n", - "import datetime\n", - "\n", - "from urllib.parse import urljoin\n", - "from notebook import notebookapp\n", - "\n", - "def get_notebook_name():\n", - " \"\"\"Return the full path of the jupyter notebook. Some runtimes (e.g. ADS) \n", - " have the kernel_id in the filename of the connection file. If so, the \n", - " notebook name at runtime can be determined using `list_running_servers`.\n", - " Other runtimes (e.g. azdata) do not have the kernel_id in the filename of\n", - " the connection file, therefore we are unable to establish the filename\n", - " \"\"\"\n", - " connection_file = os.path.basename(ipykernel.get_connection_file())\n", - " \n", - " # If the runtime has the kernel_id in the connection filename, use it to\n", - " # get the real notebook name at runtime, otherwise, use the notebook \n", - " # filename from build time.\n", - " try: \n", - " kernel_id = connection_file.split('-', 1)[1].split('.')[0]\n", - " except:\n", - " pass\n", - " else:\n", - " for servers in list(notebookapp.list_running_servers()):\n", - " try:\n", - " response = requests.get(urljoin(servers['url'], 'api/sessions'), params={'token': servers.get('token', '')}, timeout=.01)\n", - " except:\n", - " pass\n", - " else:\n", - " for nn in json.loads(response.text):\n", - " if nn['kernel']['id'] == kernel_id:\n", - " return nn['path']\n", - "\n", - "def load_json(filename):\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def get_notebook_rules():\n", - " \"\"\"Load the notebook rules from the metadata of this notebook (in the .ipynb file)\"\"\"\n", - " file_name = get_notebook_name()\n", - "\n", - " if file_name == None:\n", - " return None\n", - " else:\n", - " j = load_json(file_name)\n", - "\n", - " if \"azdata\" not in j[\"metadata\"] or \\\n", - " \"expert\" not in j[\"metadata\"][\"azdata\"] or \\\n", - " \"log_analyzer_rules\" not in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - " return []\n", - " else:\n", - " return j[\"metadata\"][\"azdata\"][\"expert\"][\"log_analyzer_rules\"]\n", - "\n", - "rules = get_notebook_rules()\n", - "\n", - "if rules == None:\n", - " print(\"\")\n", - " print(f\"Log Analysis only available when run in Azure Data Studio. Not available when run in azdata.\")\n", - "else:\n", - " print(f\"Applying the following {len(rules)} rules to {len(entries_for_analysis)} log entries for analysis, looking for HINTs to further troubleshooting.\")\n", - " print(rules)\n", - " hints = 0\n", - " if len(rules) > 0:\n", - " for entry in entries_for_analysis:\n", - " for rule in rules:\n", - " if entry.find(rule[0]) != -1:\n", - " print (entry)\n", - "\n", - " display(Markdown(f'HINT: Use [{rule[2]}]({rule[3]}) to resolve this issue.'))\n", - " hints = hints + 1\n", - "\n", - " print(\"\")\n", - " print(f\"{len(entries_for_analysis)} log entries analyzed (using {len(rules)} rules). {hints} further troubleshooting hints made inline.\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg034-get-livy-logs.ipynb b/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg034-get-livy-logs.ipynb deleted file mode 100644 index e9fd6151..00000000 --- a/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg034-get-livy-logs.ipynb +++ /dev/null @@ -1,291 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG034 - Livy logs\n", - "==================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "import re\n", - "\n", - "tail_lines = 500\n", - "\n", - "pod = None # All\n", - "container = 'hadoop-livy-sparkhistory'\n", - "log_files = [ '/var/log/supervisor/log/livy*' ]\n", - "\n", - "expressions_to_analyze = [\n", - " re.compile(\".{17} WARN \"),\n", - " re.compile(\".{17} ERROR \")\n", - "]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Instantiate Kubernetes client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Instantiate the Python Kubernetes client into 'api' variable\n", - "\n", - "import os\n", - "\n", - "try:\n", - " from kubernetes import client, config\n", - " from kubernetes.stream import stream\n", - "\n", - " if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n", - " config.load_incluster_config()\n", - " else:\n", - " try:\n", - " config.load_kube_config()\n", - " except:\n", - " display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n", - " raise\n", - " api = client.CoreV1Api()\n", - "\n", - " print('Kubernetes client instantiated')\n", - "except ImportError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n", - " raise" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n", - " except IndexError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print('The kubernetes namespace for your big data cluster is: ' + namespace)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get tail for log" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Display the last 'tail_lines' of files in 'log_files' list\n", - "\n", - "pods = api.list_namespaced_pod(namespace)\n", - "\n", - "entries_for_analysis = []\n", - "\n", - "for p in pods.items:\n", - " if pod is None or p.metadata.name == pod:\n", - " for c in p.spec.containers:\n", - " if container is None or c.name == container:\n", - " for log_file in log_files:\n", - " print (f\"- LOGS: '{log_file}' for CONTAINER: '{c.name}' in POD: '{p.metadata.name}'\")\n", - " try:\n", - " output = stream(api.connect_get_namespaced_pod_exec, p.metadata.name, namespace, command=['/bin/sh', '-c', f'tail -n {tail_lines} {log_file}'], container=c.name, stderr=True, stdout=True)\n", - " except Exception:\n", - " print (f\"FAILED to get LOGS for CONTAINER: {c.name} in POD: {p.metadata.name}\")\n", - " else:\n", - " for line in output.split('\\n'):\n", - " for expression in expressions_to_analyze:\n", - " if expression.match(line):\n", - " entries_for_analysis.append(line)\n", - " print(line)\n", - "print(\"\")\n", - "print(f\"{len(entries_for_analysis)} log entries found for further analysis.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Analyze log entries and suggest relevant Troubleshooting Guides" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Analyze log entries and suggest further relevant troubleshooting guides\n", - "from IPython.display import Markdown\n", - "import os\n", - "import json\n", - "import requests\n", - "import ipykernel\n", - "import datetime\n", - "\n", - "from urllib.parse import urljoin\n", - "from notebook import notebookapp\n", - "\n", - "def get_notebook_name():\n", - " \"\"\"Return the full path of the jupyter notebook. Some runtimes (e.g. ADS) \n", - " have the kernel_id in the filename of the connection file. If so, the \n", - " notebook name at runtime can be determined using `list_running_servers`.\n", - " Other runtimes (e.g. azdata) do not have the kernel_id in the filename of\n", - " the connection file, therefore we are unable to establish the filename\n", - " \"\"\"\n", - " connection_file = os.path.basename(ipykernel.get_connection_file())\n", - " \n", - " # If the runtime has the kernel_id in the connection filename, use it to\n", - " # get the real notebook name at runtime, otherwise, use the notebook \n", - " # filename from build time.\n", - " try: \n", - " kernel_id = connection_file.split('-', 1)[1].split('.')[0]\n", - " except:\n", - " pass\n", - " else:\n", - " for servers in list(notebookapp.list_running_servers()):\n", - " try:\n", - " response = requests.get(urljoin(servers['url'], 'api/sessions'), params={'token': servers.get('token', '')}, timeout=.01)\n", - " except:\n", - " pass\n", - " else:\n", - " for nn in json.loads(response.text):\n", - " if nn['kernel']['id'] == kernel_id:\n", - " return nn['path']\n", - "\n", - "def load_json(filename):\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def get_notebook_rules():\n", - " \"\"\"Load the notebook rules from the metadata of this notebook (in the .ipynb file)\"\"\"\n", - " file_name = get_notebook_name()\n", - "\n", - " if file_name == None:\n", - " return None\n", - " else:\n", - " j = load_json(file_name)\n", - "\n", - " if \"azdata\" not in j[\"metadata\"] or \\\n", - " \"expert\" not in j[\"metadata\"][\"azdata\"] or \\\n", - " \"log_analyzer_rules\" not in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - " return []\n", - " else:\n", - " return j[\"metadata\"][\"azdata\"][\"expert\"][\"log_analyzer_rules\"]\n", - "\n", - "rules = get_notebook_rules()\n", - "\n", - "if rules == None:\n", - " print(\"\")\n", - " print(f\"Log Analysis only available when run in Azure Data Studio. Not available when run in azdata.\")\n", - "else:\n", - " print(f\"Applying the following {len(rules)} rules to {len(entries_for_analysis)} log entries for analysis, looking for HINTs to further troubleshooting.\")\n", - " print(rules)\n", - " hints = 0\n", - " if len(rules) > 0:\n", - " for entry in entries_for_analysis:\n", - " for rule in rules:\n", - " if entry.find(rule[0]) != -1:\n", - " print (entry)\n", - "\n", - " display(Markdown(f'HINT: Use [{rule[2]}]({rule[3]}) to resolve this issue.'))\n", - " hints = hints + 1\n", - "\n", - " print(\"\")\n", - " print(f\"{len(entries_for_analysis)} log entries analyzed (using {len(rules)} rules). {hints} further troubleshooting hints made inline.\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg035-get-sparkhistory-logs.ipynb b/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg035-get-sparkhistory-logs.ipynb deleted file mode 100644 index 8d39f58b..00000000 --- a/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg035-get-sparkhistory-logs.ipynb +++ /dev/null @@ -1,291 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG035 - Spark History logs\n", - "===========================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "import re\n", - "\n", - "tail_lines = 500\n", - "\n", - "pod = None # All\n", - "container='hadoop-livy-sparkhistory'\n", - "log_files = [ \"/var/log/supervisor/log/sparkhistory*\" ]\n", - "\n", - "expressions_to_analyze = [\n", - " re.compile(\".{23} WARN \"),\n", - " re.compile(\".{23} ERROR \")\n", - "]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Instantiate Kubernetes client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Instantiate the Python Kubernetes client into 'api' variable\n", - "\n", - "import os\n", - "\n", - "try:\n", - " from kubernetes import client, config\n", - " from kubernetes.stream import stream\n", - "\n", - " if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n", - " config.load_incluster_config()\n", - " else:\n", - " try:\n", - " config.load_kube_config()\n", - " except:\n", - " display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n", - " raise\n", - " api = client.CoreV1Api()\n", - "\n", - " print('Kubernetes client instantiated')\n", - "except ImportError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n", - " raise" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n", - " except IndexError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print('The kubernetes namespace for your big data cluster is: ' + namespace)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get tail for log" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Display the last 'tail_lines' of files in 'log_files' list\n", - "\n", - "pods = api.list_namespaced_pod(namespace)\n", - "\n", - "entries_for_analysis = []\n", - "\n", - "for p in pods.items:\n", - " if pod is None or p.metadata.name == pod:\n", - " for c in p.spec.containers:\n", - " if container is None or c.name == container:\n", - " for log_file in log_files:\n", - " print (f\"- LOGS: '{log_file}' for CONTAINER: '{c.name}' in POD: '{p.metadata.name}'\")\n", - " try:\n", - " output = stream(api.connect_get_namespaced_pod_exec, p.metadata.name, namespace, command=['/bin/sh', '-c', f'tail -n {tail_lines} {log_file}'], container=c.name, stderr=True, stdout=True)\n", - " except Exception:\n", - " print (f\"FAILED to get LOGS for CONTAINER: {c.name} in POD: {p.metadata.name}\")\n", - " else:\n", - " for line in output.split('\\n'):\n", - " for expression in expressions_to_analyze:\n", - " if expression.match(line):\n", - " entries_for_analysis.append(line)\n", - " print(line)\n", - "print(\"\")\n", - "print(f\"{len(entries_for_analysis)} log entries found for further analysis.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Analyze log entries and suggest relevant Troubleshooting Guides" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Analyze log entries and suggest further relevant troubleshooting guides\n", - "from IPython.display import Markdown\n", - "import os\n", - "import json\n", - "import requests\n", - "import ipykernel\n", - "import datetime\n", - "\n", - "from urllib.parse import urljoin\n", - "from notebook import notebookapp\n", - "\n", - "def get_notebook_name():\n", - " \"\"\"Return the full path of the jupyter notebook. Some runtimes (e.g. ADS) \n", - " have the kernel_id in the filename of the connection file. If so, the \n", - " notebook name at runtime can be determined using `list_running_servers`.\n", - " Other runtimes (e.g. azdata) do not have the kernel_id in the filename of\n", - " the connection file, therefore we are unable to establish the filename\n", - " \"\"\"\n", - " connection_file = os.path.basename(ipykernel.get_connection_file())\n", - " \n", - " # If the runtime has the kernel_id in the connection filename, use it to\n", - " # get the real notebook name at runtime, otherwise, use the notebook \n", - " # filename from build time.\n", - " try: \n", - " kernel_id = connection_file.split('-', 1)[1].split('.')[0]\n", - " except:\n", - " pass\n", - " else:\n", - " for servers in list(notebookapp.list_running_servers()):\n", - " try:\n", - " response = requests.get(urljoin(servers['url'], 'api/sessions'), params={'token': servers.get('token', '')}, timeout=.01)\n", - " except:\n", - " pass\n", - " else:\n", - " for nn in json.loads(response.text):\n", - " if nn['kernel']['id'] == kernel_id:\n", - " return nn['path']\n", - "\n", - "def load_json(filename):\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def get_notebook_rules():\n", - " \"\"\"Load the notebook rules from the metadata of this notebook (in the .ipynb file)\"\"\"\n", - " file_name = get_notebook_name()\n", - "\n", - " if file_name == None:\n", - " return None\n", - " else:\n", - " j = load_json(file_name)\n", - "\n", - " if \"azdata\" not in j[\"metadata\"] or \\\n", - " \"expert\" not in j[\"metadata\"][\"azdata\"] or \\\n", - " \"log_analyzer_rules\" not in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - " return []\n", - " else:\n", - " return j[\"metadata\"][\"azdata\"][\"expert\"][\"log_analyzer_rules\"]\n", - "\n", - "rules = get_notebook_rules()\n", - "\n", - "if rules == None:\n", - " print(\"\")\n", - " print(f\"Log Analysis only available when run in Azure Data Studio. Not available when run in azdata.\")\n", - "else:\n", - " print(f\"Applying the following {len(rules)} rules to {len(entries_for_analysis)} log entries for analysis, looking for HINTs to further troubleshooting.\")\n", - " print(rules)\n", - " hints = 0\n", - " if len(rules) > 0:\n", - " for entry in entries_for_analysis:\n", - " for rule in rules:\n", - " if entry.find(rule[0]) != -1:\n", - " print (entry)\n", - "\n", - " display(Markdown(f'HINT: Use [{rule[2]}]({rule[3]}) to resolve this issue.'))\n", - " hints = hints + 1\n", - "\n", - " print(\"\")\n", - " print(f\"{len(entries_for_analysis)} log entries analyzed (using {len(rules)} rules). {hints} further troubleshooting hints made inline.\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg036-get-controller-logs.ipynb b/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg036-get-controller-logs.ipynb deleted file mode 100644 index 81b38383..00000000 --- a/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg036-get-controller-logs.ipynb +++ /dev/null @@ -1,327 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG036 - Controller logs\n", - "========================\n", - "\n", - "Get the last \u2018n\u2019 hours of controller logs.\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "import re\n", - "from datetime import datetime\n", - "\n", - "tail_lines = 500\n", - "\n", - "# The controller log files are kept in a yyyy-mm-dd folder structure\n", - "#\n", - "d = datetime.utcnow()\n", - "date = \"{0}-{1:02d}-{2:02d}\".format(d.year, d.month, d.day)\n", - "folder = f\"/var/log/controller/{date}\"\n", - "\n", - "pod = None # All\n", - "container = 'controller'\n", - "log_files = [ f'{folder}/controller.log', f'{folder}/kube.log', f'{folder}/controller.out', f'{folder}/access.log' ]\n", - "\n", - "expressions_to_analyze = [\n", - " re.compile(\".{26} WARN \"),\n", - " re.compile(\".{26} ERROR \")\n", - "]\n", - "\n", - "print(\"Log files to get:\")\n", - "print(log_files)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Instantiate Kubernetes client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Instantiate the Python Kubernetes client into 'api' variable\n", - "\n", - "import os\n", - "\n", - "try:\n", - " from kubernetes import client, config\n", - " from kubernetes.stream import stream\n", - "\n", - " if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n", - " config.load_incluster_config()\n", - " else:\n", - " try:\n", - " config.load_kube_config()\n", - " except:\n", - " display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n", - " raise\n", - " api = client.CoreV1Api()\n", - "\n", - " print('Kubernetes client instantiated')\n", - "except ImportError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n", - " raise" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n", - " except IndexError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print('The kubernetes namespace for your big data cluster is: ' + namespace)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get tail for log" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Display the last 'tail_lines' of files in 'log_files' list\n", - "\n", - "pods = api.list_namespaced_pod(namespace)\n", - "\n", - "entries_for_analysis = []\n", - "\n", - "for p in pods.items:\n", - " if pod is None or p.metadata.name == pod:\n", - " for c in p.spec.containers:\n", - " if container is None or c.name == container:\n", - " for log_file in log_files:\n", - " print (f\"- LOGS: '{log_file}' for CONTAINER: '{c.name}' in POD: '{p.metadata.name}'\")\n", - " try:\n", - " output = stream(api.connect_get_namespaced_pod_exec, p.metadata.name, namespace, command=['/bin/sh', '-c', f'tail -n {tail_lines} {log_file}'], container=c.name, stderr=True, stdout=True)\n", - " except Exception:\n", - " print (f\"FAILED to get LOGS for CONTAINER: {c.name} in POD: {p.metadata.name}\")\n", - " else:\n", - " for line in output.split('\\n'):\n", - " for expression in expressions_to_analyze:\n", - " if expression.match(line):\n", - " entries_for_analysis.append(line)\n", - " print(line)\n", - "print(\"\")\n", - "print(f\"{len(entries_for_analysis)} log entries found for further analysis.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Analyze log entries and suggest relevant Troubleshooting Guides" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Analyze log entries and suggest further relevant troubleshooting guides\n", - "from IPython.display import Markdown\n", - "import os\n", - "import json\n", - "import requests\n", - "import ipykernel\n", - "import datetime\n", - "\n", - "from urllib.parse import urljoin\n", - "from notebook import notebookapp\n", - "\n", - "def get_notebook_name():\n", - " \"\"\"Return the full path of the jupyter notebook. Some runtimes (e.g. ADS) \n", - " have the kernel_id in the filename of the connection file. If so, the \n", - " notebook name at runtime can be determined using `list_running_servers`.\n", - " Other runtimes (e.g. azdata) do not have the kernel_id in the filename of\n", - " the connection file, therefore we are unable to establish the filename\n", - " \"\"\"\n", - " connection_file = os.path.basename(ipykernel.get_connection_file())\n", - " \n", - " # If the runtime has the kernel_id in the connection filename, use it to\n", - " # get the real notebook name at runtime, otherwise, use the notebook \n", - " # filename from build time.\n", - " try: \n", - " kernel_id = connection_file.split('-', 1)[1].split('.')[0]\n", - " except:\n", - " pass\n", - " else:\n", - " for servers in list(notebookapp.list_running_servers()):\n", - " try:\n", - " response = requests.get(urljoin(servers['url'], 'api/sessions'), params={'token': servers.get('token', '')}, timeout=.01)\n", - " except:\n", - " pass\n", - " else:\n", - " for nn in json.loads(response.text):\n", - " if nn['kernel']['id'] == kernel_id:\n", - " return nn['path']\n", - "\n", - "def load_json(filename):\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def get_notebook_rules():\n", - " \"\"\"Load the notebook rules from the metadata of this notebook (in the .ipynb file)\"\"\"\n", - " file_name = get_notebook_name()\n", - "\n", - " if file_name == None:\n", - " return None\n", - " else:\n", - " j = load_json(file_name)\n", - "\n", - " if \"azdata\" not in j[\"metadata\"] or \\\n", - " \"expert\" not in j[\"metadata\"][\"azdata\"] or \\\n", - " \"log_analyzer_rules\" not in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - " return []\n", - " else:\n", - " return j[\"metadata\"][\"azdata\"][\"expert\"][\"log_analyzer_rules\"]\n", - "\n", - "rules = get_notebook_rules()\n", - "\n", - "if rules == None:\n", - " print(\"\")\n", - " print(f\"Log Analysis only available when run in Azure Data Studio. Not available when run in azdata.\")\n", - "else:\n", - " print(f\"Applying the following {len(rules)} rules to {len(entries_for_analysis)} log entries for analysis, looking for HINTs to further troubleshooting.\")\n", - " print(rules)\n", - " hints = 0\n", - " if len(rules) > 0:\n", - " for entry in entries_for_analysis:\n", - " for rule in rules:\n", - " if entry.find(rule[0]) != -1:\n", - " print (entry)\n", - "\n", - " display(Markdown(f'HINT: Use [{rule[2]}]({rule[3]}) to resolve this issue.'))\n", - " hints = hints + 1\n", - "\n", - " print(\"\")\n", - " print(f\"{len(entries_for_analysis)} log entries analyzed (using {len(rules)} rules). {hints} further troubleshooting hints made inline.\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Related\n", - "-------\n", - "\n", - "- [TSG027 - Observe cluster\n", - " deployment](../diagnose/tsg027-observe-bdc-create.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false, - "expert": { - "log_analyzer_rules": [ - [ - "doc is missing key: /data", - "TSG038", - "TSG038 - BDC create failures due to - doc is missing key", - "../repair/tsg038-doc-is-missing-key-error.ipynb" - ], - [ - "Failed when starting controller service. System.TimeoutException:\nOperation timed out after 10 minutes", - "TSG057", - "TSG057 - Failed when starting controller service. System.TimeoutException", - "../repair/tsg057-failed-when-starting-controller.ipynb" - ] - ] - } - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg046-get-knox-logs.ipynb b/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg046-get-knox-logs.ipynb deleted file mode 100644 index 61e7c0e1..00000000 --- a/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg046-get-knox-logs.ipynb +++ /dev/null @@ -1,328 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG046 - Knox gateway logs\n", - "==========================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "Knox gives a 500 error to the client, and removes details (the stack)\n", - "pointing to the cause of the underlying issue. Therefore use this TSG to\n", - "get the Knox logs from the cluster.\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "import re\n", - "\n", - "tail_lines = 500\n", - "\n", - "pod = None # All\n", - "container='knox'\n", - "log_files = [ \"/var/log/knox/gateway.log\" ]\n", - "\n", - "expressions_to_analyze = [\n", - " re.compile(\".{23} WARN \"),\n", - " re.compile(\".{23} ERROR \")\n", - "]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Instantiate Kubernetes client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Instantiate the Python Kubernetes client into 'api' variable\n", - "\n", - "import os\n", - "\n", - "try:\n", - " from kubernetes import client, config\n", - " from kubernetes.stream import stream\n", - "\n", - " if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n", - " config.load_incluster_config()\n", - " else:\n", - " try:\n", - " config.load_kube_config()\n", - " except:\n", - " display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n", - " raise\n", - " api = client.CoreV1Api()\n", - "\n", - " print('Kubernetes client instantiated')\n", - "except ImportError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n", - " raise" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n", - " except IndexError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print('The kubernetes namespace for your big data cluster is: ' + namespace)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get tail for log" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Display the last 'tail_lines' of files in 'log_files' list\n", - "\n", - "pods = api.list_namespaced_pod(namespace)\n", - "\n", - "entries_for_analysis = []\n", - "\n", - "for p in pods.items:\n", - " if pod is None or p.metadata.name == pod:\n", - " for c in p.spec.containers:\n", - " if container is None or c.name == container:\n", - " for log_file in log_files:\n", - " print (f\"- LOGS: '{log_file}' for CONTAINER: '{c.name}' in POD: '{p.metadata.name}'\")\n", - " try:\n", - " output = stream(api.connect_get_namespaced_pod_exec, p.metadata.name, namespace, command=['/bin/sh', '-c', f'tail -n {tail_lines} {log_file}'], container=c.name, stderr=True, stdout=True)\n", - " except Exception:\n", - " print (f\"FAILED to get LOGS for CONTAINER: {c.name} in POD: {p.metadata.name}\")\n", - " else:\n", - " for line in output.split('\\n'):\n", - " for expression in expressions_to_analyze:\n", - " if expression.match(line):\n", - " entries_for_analysis.append(line)\n", - " print(line)\n", - "print(\"\")\n", - "print(f\"{len(entries_for_analysis)} log entries found for further analysis.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Analyze log entries and suggest relevant Troubleshooting Guides" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Analyze log entries and suggest further relevant troubleshooting guides\n", - "from IPython.display import Markdown\n", - "import os\n", - "import json\n", - "import requests\n", - "import ipykernel\n", - "import datetime\n", - "\n", - "from urllib.parse import urljoin\n", - "from notebook import notebookapp\n", - "\n", - "def get_notebook_name():\n", - " \"\"\"Return the full path of the jupyter notebook. Some runtimes (e.g. ADS) \n", - " have the kernel_id in the filename of the connection file. If so, the \n", - " notebook name at runtime can be determined using `list_running_servers`.\n", - " Other runtimes (e.g. azdata) do not have the kernel_id in the filename of\n", - " the connection file, therefore we are unable to establish the filename\n", - " \"\"\"\n", - " connection_file = os.path.basename(ipykernel.get_connection_file())\n", - " \n", - " # If the runtime has the kernel_id in the connection filename, use it to\n", - " # get the real notebook name at runtime, otherwise, use the notebook \n", - " # filename from build time.\n", - " try: \n", - " kernel_id = connection_file.split('-', 1)[1].split('.')[0]\n", - " except:\n", - " pass\n", - " else:\n", - " for servers in list(notebookapp.list_running_servers()):\n", - " try:\n", - " response = requests.get(urljoin(servers['url'], 'api/sessions'), params={'token': servers.get('token', '')}, timeout=.01)\n", - " except:\n", - " pass\n", - " else:\n", - " for nn in json.loads(response.text):\n", - " if nn['kernel']['id'] == kernel_id:\n", - " return nn['path']\n", - "\n", - "def load_json(filename):\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def get_notebook_rules():\n", - " \"\"\"Load the notebook rules from the metadata of this notebook (in the .ipynb file)\"\"\"\n", - " file_name = get_notebook_name()\n", - "\n", - " if file_name == None:\n", - " return None\n", - " else:\n", - " j = load_json(file_name)\n", - "\n", - " if \"azdata\" not in j[\"metadata\"] or \\\n", - " \"expert\" not in j[\"metadata\"][\"azdata\"] or \\\n", - " \"log_analyzer_rules\" not in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - " return []\n", - " else:\n", - " return j[\"metadata\"][\"azdata\"][\"expert\"][\"log_analyzer_rules\"]\n", - "\n", - "rules = get_notebook_rules()\n", - "\n", - "if rules == None:\n", - " print(\"\")\n", - " print(f\"Log Analysis only available when run in Azure Data Studio. Not available when run in azdata.\")\n", - "else:\n", - " print(f\"Applying the following {len(rules)} rules to {len(entries_for_analysis)} log entries for analysis, looking for HINTs to further troubleshooting.\")\n", - " print(rules)\n", - " hints = 0\n", - " if len(rules) > 0:\n", - " for entry in entries_for_analysis:\n", - " for rule in rules:\n", - " if entry.find(rule[0]) != -1:\n", - " print (entry)\n", - "\n", - " display(Markdown(f'HINT: Use [{rule[2]}]({rule[3]}) to resolve this issue.'))\n", - " hints = hints + 1\n", - "\n", - " print(\"\")\n", - " print(f\"{len(entries_for_analysis)} log entries analyzed (using {len(rules)} rules). {hints} further troubleshooting hints made inline.\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false, - "expert": { - "rules": [ - [ - "SAM008", - "code", - "stream", - "name", - "stdout", - "text", - ".\\*ERROR: 500" - ] - ], - "log_analyzer_rules": [ - [ - "Invalid object name \u2018roles\\_permissions\u2019", - "TSG039", - "TSG039 - Invalid object name 'role_permissions'", - "../repair/tsg039-invalid-object-name-role-permissions.ipynb" - ], - [ - "Name node is in safe mode", - "TSG024", - "TSG024 - Namenode is in safe mode", - "../repair/tsg024-name-node-is-in-safe-mode.ipynb" - ], - [ - "Connection exception dispatching request", - "TSG034", - "TSG034 - Livy logs", - "../log-analyzers/tsg034-get-livy-logs.ipynb" - ] - ] - } - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg073-get-influxdb-logs.ipynb b/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg073-get-influxdb-logs.ipynb deleted file mode 100644 index 1a7c7f74..00000000 --- a/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg073-get-influxdb-logs.ipynb +++ /dev/null @@ -1,285 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG073 - InfluxDB logs\n", - "======================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "import re\n", - "\n", - "tail_lines = 500\n", - "\n", - "pod = None # All\n", - "container = \"influxdb\"\n", - "log_files = [ \"/var/log/supervisor/log/influxdb*.log\" ]\n", - "\n", - "expressions_to_analyze = []" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Instantiate Kubernetes client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Instantiate the Python Kubernetes client into 'api' variable\n", - "\n", - "import os\n", - "\n", - "try:\n", - " from kubernetes import client, config\n", - " from kubernetes.stream import stream\n", - "\n", - " if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n", - " config.load_incluster_config()\n", - " else:\n", - " try:\n", - " config.load_kube_config()\n", - " except:\n", - " display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n", - " raise\n", - " api = client.CoreV1Api()\n", - "\n", - " print('Kubernetes client instantiated')\n", - "except ImportError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n", - " raise" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n", - " except IndexError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print('The kubernetes namespace for your big data cluster is: ' + namespace)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get tail for log" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Display the last 'tail_lines' of files in 'log_files' list\n", - "\n", - "pods = api.list_namespaced_pod(namespace)\n", - "\n", - "entries_for_analysis = []\n", - "\n", - "for p in pods.items:\n", - " if pod is None or p.metadata.name == pod:\n", - " for c in p.spec.containers:\n", - " if container is None or c.name == container:\n", - " for log_file in log_files:\n", - " print (f\"- LOGS: '{log_file}' for CONTAINER: '{c.name}' in POD: '{p.metadata.name}'\")\n", - " try:\n", - " output = stream(api.connect_get_namespaced_pod_exec, p.metadata.name, namespace, command=['/bin/sh', '-c', f'tail -n {tail_lines} {log_file}'], container=c.name, stderr=True, stdout=True)\n", - " except Exception:\n", - " print (f\"FAILED to get LOGS for CONTAINER: {c.name} in POD: {p.metadata.name}\")\n", - " else:\n", - " for line in output.split('\\n'):\n", - " for expression in expressions_to_analyze:\n", - " if expression.match(line):\n", - " entries_for_analysis.append(line)\n", - " print(line)\n", - "print(\"\")\n", - "print(f\"{len(entries_for_analysis)} log entries found for further analysis.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Analyze log entries and suggest relevant Troubleshooting Guides" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Analyze log entries and suggest further relevant troubleshooting guides\n", - "from IPython.display import Markdown\n", - "import os\n", - "import json\n", - "import requests\n", - "import ipykernel\n", - "import datetime\n", - "\n", - "from urllib.parse import urljoin\n", - "from notebook import notebookapp\n", - "\n", - "def get_notebook_name():\n", - " \"\"\"Return the full path of the jupyter notebook. Some runtimes (e.g. ADS) \n", - " have the kernel_id in the filename of the connection file. If so, the \n", - " notebook name at runtime can be determined using `list_running_servers`.\n", - " Other runtimes (e.g. azdata) do not have the kernel_id in the filename of\n", - " the connection file, therefore we are unable to establish the filename\n", - " \"\"\"\n", - " connection_file = os.path.basename(ipykernel.get_connection_file())\n", - " \n", - " # If the runtime has the kernel_id in the connection filename, use it to\n", - " # get the real notebook name at runtime, otherwise, use the notebook \n", - " # filename from build time.\n", - " try: \n", - " kernel_id = connection_file.split('-', 1)[1].split('.')[0]\n", - " except:\n", - " pass\n", - " else:\n", - " for servers in list(notebookapp.list_running_servers()):\n", - " try:\n", - " response = requests.get(urljoin(servers['url'], 'api/sessions'), params={'token': servers.get('token', '')}, timeout=.01)\n", - " except:\n", - " pass\n", - " else:\n", - " for nn in json.loads(response.text):\n", - " if nn['kernel']['id'] == kernel_id:\n", - " return nn['path']\n", - "\n", - "def load_json(filename):\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def get_notebook_rules():\n", - " \"\"\"Load the notebook rules from the metadata of this notebook (in the .ipynb file)\"\"\"\n", - " file_name = get_notebook_name()\n", - "\n", - " if file_name == None:\n", - " return None\n", - " else:\n", - " j = load_json(file_name)\n", - "\n", - " if \"azdata\" not in j[\"metadata\"] or \\\n", - " \"expert\" not in j[\"metadata\"][\"azdata\"] or \\\n", - " \"log_analyzer_rules\" not in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - " return []\n", - " else:\n", - " return j[\"metadata\"][\"azdata\"][\"expert\"][\"log_analyzer_rules\"]\n", - "\n", - "rules = get_notebook_rules()\n", - "\n", - "if rules == None:\n", - " print(\"\")\n", - " print(f\"Log Analysis only available when run in Azure Data Studio. Not available when run in azdata.\")\n", - "else:\n", - " print(f\"Applying the following {len(rules)} rules to {len(entries_for_analysis)} log entries for analysis, looking for HINTs to further troubleshooting.\")\n", - " print(rules)\n", - " hints = 0\n", - " if len(rules) > 0:\n", - " for entry in entries_for_analysis:\n", - " for rule in rules:\n", - " if entry.find(rule[0]) != -1:\n", - " print (entry)\n", - "\n", - " display(Markdown(f'HINT: Use [{rule[2]}]({rule[3]}) to resolve this issue.'))\n", - " hints = hints + 1\n", - "\n", - " print(\"\")\n", - " print(f\"{len(entries_for_analysis)} log entries analyzed (using {len(rules)} rules). {hints} further troubleshooting hints made inline.\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg076-get-elastic-search-logs.ipynb b/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg076-get-elastic-search-logs.ipynb deleted file mode 100644 index 4c995d01..00000000 --- a/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg076-get-elastic-search-logs.ipynb +++ /dev/null @@ -1,288 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG076 - Elastic Search logs\n", - "============================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "import re\n", - "\n", - "tail_lines = 500\n", - "\n", - "pod = None # All\n", - "container = \"elasticsearch\"\n", - "log_files = [ \"/var/log/supervisor/log/elasticsearch*.log\" ]\n", - "\n", - "expressions_to_analyze = [\n", - " re.compile(\".{26}[WARN ]\"),\n", - " re.compile(\".{26}[ERROR]\")\n", - "]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Instantiate Kubernetes client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Instantiate the Python Kubernetes client into 'api' variable\n", - "\n", - "import os\n", - "\n", - "try:\n", - " from kubernetes import client, config\n", - " from kubernetes.stream import stream\n", - "\n", - " if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n", - " config.load_incluster_config()\n", - " else:\n", - " try:\n", - " config.load_kube_config()\n", - " except:\n", - " display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n", - " raise\n", - " api = client.CoreV1Api()\n", - "\n", - " print('Kubernetes client instantiated')\n", - "except ImportError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n", - " raise" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n", - " except IndexError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print('The kubernetes namespace for your big data cluster is: ' + namespace)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get tail for log" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Display the last 'tail_lines' of files in 'log_files' list\n", - "\n", - "pods = api.list_namespaced_pod(namespace)\n", - "\n", - "entries_for_analysis = []\n", - "\n", - "for p in pods.items:\n", - " if pod is None or p.metadata.name == pod:\n", - " for c in p.spec.containers:\n", - " if container is None or c.name == container:\n", - " for log_file in log_files:\n", - " print (f\"- LOGS: '{log_file}' for CONTAINER: '{c.name}' in POD: '{p.metadata.name}'\")\n", - " try:\n", - " output = stream(api.connect_get_namespaced_pod_exec, p.metadata.name, namespace, command=['/bin/sh', '-c', f'tail -n {tail_lines} {log_file}'], container=c.name, stderr=True, stdout=True)\n", - " except Exception:\n", - " print (f\"FAILED to get LOGS for CONTAINER: {c.name} in POD: {p.metadata.name}\")\n", - " else:\n", - " for line in output.split('\\n'):\n", - " for expression in expressions_to_analyze:\n", - " if expression.match(line):\n", - " entries_for_analysis.append(line)\n", - " print(line)\n", - "print(\"\")\n", - "print(f\"{len(entries_for_analysis)} log entries found for further analysis.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Analyze log entries and suggest relevant Troubleshooting Guides" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Analyze log entries and suggest further relevant troubleshooting guides\n", - "from IPython.display import Markdown\n", - "import os\n", - "import json\n", - "import requests\n", - "import ipykernel\n", - "import datetime\n", - "\n", - "from urllib.parse import urljoin\n", - "from notebook import notebookapp\n", - "\n", - "def get_notebook_name():\n", - " \"\"\"Return the full path of the jupyter notebook. Some runtimes (e.g. ADS) \n", - " have the kernel_id in the filename of the connection file. If so, the \n", - " notebook name at runtime can be determined using `list_running_servers`.\n", - " Other runtimes (e.g. azdata) do not have the kernel_id in the filename of\n", - " the connection file, therefore we are unable to establish the filename\n", - " \"\"\"\n", - " connection_file = os.path.basename(ipykernel.get_connection_file())\n", - " \n", - " # If the runtime has the kernel_id in the connection filename, use it to\n", - " # get the real notebook name at runtime, otherwise, use the notebook \n", - " # filename from build time.\n", - " try: \n", - " kernel_id = connection_file.split('-', 1)[1].split('.')[0]\n", - " except:\n", - " pass\n", - " else:\n", - " for servers in list(notebookapp.list_running_servers()):\n", - " try:\n", - " response = requests.get(urljoin(servers['url'], 'api/sessions'), params={'token': servers.get('token', '')}, timeout=.01)\n", - " except:\n", - " pass\n", - " else:\n", - " for nn in json.loads(response.text):\n", - " if nn['kernel']['id'] == kernel_id:\n", - " return nn['path']\n", - "\n", - "def load_json(filename):\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def get_notebook_rules():\n", - " \"\"\"Load the notebook rules from the metadata of this notebook (in the .ipynb file)\"\"\"\n", - " file_name = get_notebook_name()\n", - "\n", - " if file_name == None:\n", - " return None\n", - " else:\n", - " j = load_json(file_name)\n", - "\n", - " if \"azdata\" not in j[\"metadata\"] or \\\n", - " \"expert\" not in j[\"metadata\"][\"azdata\"] or \\\n", - " \"log_analyzer_rules\" not in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - " return []\n", - " else:\n", - " return j[\"metadata\"][\"azdata\"][\"expert\"][\"log_analyzer_rules\"]\n", - "\n", - "rules = get_notebook_rules()\n", - "\n", - "if rules == None:\n", - " print(\"\")\n", - " print(f\"Log Analysis only available when run in Azure Data Studio. Not available when run in azdata.\")\n", - "else:\n", - " print(f\"Applying the following {len(rules)} rules to {len(entries_for_analysis)} log entries for analysis, looking for HINTs to further troubleshooting.\")\n", - " print(rules)\n", - " hints = 0\n", - " if len(rules) > 0:\n", - " for entry in entries_for_analysis:\n", - " for rule in rules:\n", - " if entry.find(rule[0]) != -1:\n", - " print (entry)\n", - "\n", - " display(Markdown(f'HINT: Use [{rule[2]}]({rule[3]}) to resolve this issue.'))\n", - " hints = hints + 1\n", - "\n", - " print(\"\")\n", - " print(f\"{len(entries_for_analysis)} log entries analyzed (using {len(rules)} rules). {hints} further troubleshooting hints made inline.\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg077-get-kibana-logs.ipynb b/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg077-get-kibana-logs.ipynb deleted file mode 100644 index 0ea56bf0..00000000 --- a/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg077-get-kibana-logs.ipynb +++ /dev/null @@ -1,285 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG077 - Kibana logs\n", - "====================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "import re\n", - "\n", - "tail_lines = 500\n", - "\n", - "pod = None # All\n", - "container = \"kibana\"\n", - "log_files = [ \"/var/log/supervisor/log/kibana*.log\" ]\n", - "\n", - "expressions_to_analyze = [ ]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Instantiate Kubernetes client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Instantiate the Python Kubernetes client into 'api' variable\n", - "\n", - "import os\n", - "\n", - "try:\n", - " from kubernetes import client, config\n", - " from kubernetes.stream import stream\n", - "\n", - " if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n", - " config.load_incluster_config()\n", - " else:\n", - " try:\n", - " config.load_kube_config()\n", - " except:\n", - " display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n", - " raise\n", - " api = client.CoreV1Api()\n", - "\n", - " print('Kubernetes client instantiated')\n", - "except ImportError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n", - " raise" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n", - " except IndexError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print('The kubernetes namespace for your big data cluster is: ' + namespace)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get tail for log" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Display the last 'tail_lines' of files in 'log_files' list\n", - "\n", - "pods = api.list_namespaced_pod(namespace)\n", - "\n", - "entries_for_analysis = []\n", - "\n", - "for p in pods.items:\n", - " if pod is None or p.metadata.name == pod:\n", - " for c in p.spec.containers:\n", - " if container is None or c.name == container:\n", - " for log_file in log_files:\n", - " print (f\"- LOGS: '{log_file}' for CONTAINER: '{c.name}' in POD: '{p.metadata.name}'\")\n", - " try:\n", - " output = stream(api.connect_get_namespaced_pod_exec, p.metadata.name, namespace, command=['/bin/sh', '-c', f'tail -n {tail_lines} {log_file}'], container=c.name, stderr=True, stdout=True)\n", - " except Exception:\n", - " print (f\"FAILED to get LOGS for CONTAINER: {c.name} in POD: {p.metadata.name}\")\n", - " else:\n", - " for line in output.split('\\n'):\n", - " for expression in expressions_to_analyze:\n", - " if expression.match(line):\n", - " entries_for_analysis.append(line)\n", - " print(line)\n", - "print(\"\")\n", - "print(f\"{len(entries_for_analysis)} log entries found for further analysis.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Analyze log entries and suggest relevant Troubleshooting Guides" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Analyze log entries and suggest further relevant troubleshooting guides\n", - "from IPython.display import Markdown\n", - "import os\n", - "import json\n", - "import requests\n", - "import ipykernel\n", - "import datetime\n", - "\n", - "from urllib.parse import urljoin\n", - "from notebook import notebookapp\n", - "\n", - "def get_notebook_name():\n", - " \"\"\"Return the full path of the jupyter notebook. Some runtimes (e.g. ADS) \n", - " have the kernel_id in the filename of the connection file. If so, the \n", - " notebook name at runtime can be determined using `list_running_servers`.\n", - " Other runtimes (e.g. azdata) do not have the kernel_id in the filename of\n", - " the connection file, therefore we are unable to establish the filename\n", - " \"\"\"\n", - " connection_file = os.path.basename(ipykernel.get_connection_file())\n", - " \n", - " # If the runtime has the kernel_id in the connection filename, use it to\n", - " # get the real notebook name at runtime, otherwise, use the notebook \n", - " # filename from build time.\n", - " try: \n", - " kernel_id = connection_file.split('-', 1)[1].split('.')[0]\n", - " except:\n", - " pass\n", - " else:\n", - " for servers in list(notebookapp.list_running_servers()):\n", - " try:\n", - " response = requests.get(urljoin(servers['url'], 'api/sessions'), params={'token': servers.get('token', '')}, timeout=.01)\n", - " except:\n", - " pass\n", - " else:\n", - " for nn in json.loads(response.text):\n", - " if nn['kernel']['id'] == kernel_id:\n", - " return nn['path']\n", - "\n", - "def load_json(filename):\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def get_notebook_rules():\n", - " \"\"\"Load the notebook rules from the metadata of this notebook (in the .ipynb file)\"\"\"\n", - " file_name = get_notebook_name()\n", - "\n", - " if file_name == None:\n", - " return None\n", - " else:\n", - " j = load_json(file_name)\n", - "\n", - " if \"azdata\" not in j[\"metadata\"] or \\\n", - " \"expert\" not in j[\"metadata\"][\"azdata\"] or \\\n", - " \"log_analyzer_rules\" not in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - " return []\n", - " else:\n", - " return j[\"metadata\"][\"azdata\"][\"expert\"][\"log_analyzer_rules\"]\n", - "\n", - "rules = get_notebook_rules()\n", - "\n", - "if rules == None:\n", - " print(\"\")\n", - " print(f\"Log Analysis only available when run in Azure Data Studio. Not available when run in azdata.\")\n", - "else:\n", - " print(f\"Applying the following {len(rules)} rules to {len(entries_for_analysis)} log entries for analysis, looking for HINTs to further troubleshooting.\")\n", - " print(rules)\n", - " hints = 0\n", - " if len(rules) > 0:\n", - " for entry in entries_for_analysis:\n", - " for rule in rules:\n", - " if entry.find(rule[0]) != -1:\n", - " print (entry)\n", - "\n", - " display(Markdown(f'HINT: Use [{rule[2]}]({rule[3]}) to resolve this issue.'))\n", - " hints = hints + 1\n", - "\n", - " print(\"\")\n", - " print(f\"{len(entries_for_analysis)} log entries analyzed (using {len(rules)} rules). {hints} further troubleshooting hints made inline.\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg088-get-datanode-logs.ipynb b/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg088-get-datanode-logs.ipynb deleted file mode 100644 index b2b67ecf..00000000 --- a/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg088-get-datanode-logs.ipynb +++ /dev/null @@ -1,290 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG088 - Hadoop datanode logs\n", - "=============================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "import re\n", - "\n", - "tail_lines = 500\n", - "\n", - "pod = None # All\n", - "container = \"hadoop\"\n", - "log_files = [ \"/var/log/supervisor/log/datanode*.log\" ]\n", - "\n", - "expressions_to_analyze = [\n", - " re.compile(\".{23} WARN \"),\n", - " re.compile(\".{23} ERROR \")\n", - "]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Instantiate Kubernetes client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Instantiate the Python Kubernetes client into 'api' variable\n", - "\n", - "import os\n", - "\n", - "try:\n", - " from kubernetes import client, config\n", - " from kubernetes.stream import stream\n", - "\n", - " if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n", - " config.load_incluster_config()\n", - " else:\n", - " try:\n", - " config.load_kube_config()\n", - " except:\n", - " display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n", - " raise\n", - " api = client.CoreV1Api()\n", - "\n", - " print('Kubernetes client instantiated')\n", - "except ImportError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n", - " raise" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n", - " except IndexError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print('The kubernetes namespace for your big data cluster is: ' + namespace)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Hadoop datanode logs from the hadoop container\n", - "\n", - "### Get tail for log" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Display the last 'tail_lines' of files in 'log_files' list\n", - "\n", - "pods = api.list_namespaced_pod(namespace)\n", - "\n", - "entries_for_analysis = []\n", - "\n", - "for p in pods.items:\n", - " if pod is None or p.metadata.name == pod:\n", - " for c in p.spec.containers:\n", - " if container is None or c.name == container:\n", - " for log_file in log_files:\n", - " print (f\"- LOGS: '{log_file}' for CONTAINER: '{c.name}' in POD: '{p.metadata.name}'\")\n", - " try:\n", - " output = stream(api.connect_get_namespaced_pod_exec, p.metadata.name, namespace, command=['/bin/sh', '-c', f'tail -n {tail_lines} {log_file}'], container=c.name, stderr=True, stdout=True)\n", - " except Exception:\n", - " print (f\"FAILED to get LOGS for CONTAINER: {c.name} in POD: {p.metadata.name}\")\n", - " else:\n", - " for line in output.split('\\n'):\n", - " for expression in expressions_to_analyze:\n", - " if expression.match(line):\n", - " entries_for_analysis.append(line)\n", - " print(line)\n", - "print(\"\")\n", - "print(f\"{len(entries_for_analysis)} log entries found for further analysis.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Analyze log entries and suggest relevant Troubleshooting Guides" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Analyze log entries and suggest further relevant troubleshooting guides\n", - "from IPython.display import Markdown\n", - "import os\n", - "import json\n", - "import requests\n", - "import ipykernel\n", - "import datetime\n", - "\n", - "from urllib.parse import urljoin\n", - "from notebook import notebookapp\n", - "\n", - "def get_notebook_name():\n", - " \"\"\"Return the full path of the jupyter notebook. Some runtimes (e.g. ADS) \n", - " have the kernel_id in the filename of the connection file. If so, the \n", - " notebook name at runtime can be determined using `list_running_servers`.\n", - " Other runtimes (e.g. azdata) do not have the kernel_id in the filename of\n", - " the connection file, therefore we are unable to establish the filename\n", - " \"\"\"\n", - " connection_file = os.path.basename(ipykernel.get_connection_file())\n", - " \n", - " # If the runtime has the kernel_id in the connection filename, use it to\n", - " # get the real notebook name at runtime, otherwise, use the notebook \n", - " # filename from build time.\n", - " try: \n", - " kernel_id = connection_file.split('-', 1)[1].split('.')[0]\n", - " except:\n", - " pass\n", - " else:\n", - " for servers in list(notebookapp.list_running_servers()):\n", - " try:\n", - " response = requests.get(urljoin(servers['url'], 'api/sessions'), params={'token': servers.get('token', '')}, timeout=.01)\n", - " except:\n", - " pass\n", - " else:\n", - " for nn in json.loads(response.text):\n", - " if nn['kernel']['id'] == kernel_id:\n", - " return nn['path']\n", - "\n", - "def load_json(filename):\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def get_notebook_rules():\n", - " \"\"\"Load the notebook rules from the metadata of this notebook (in the .ipynb file)\"\"\"\n", - " file_name = get_notebook_name()\n", - "\n", - " if file_name == None:\n", - " return None\n", - " else:\n", - " j = load_json(file_name)\n", - "\n", - " if \"azdata\" not in j[\"metadata\"] or \\\n", - " \"expert\" not in j[\"metadata\"][\"azdata\"] or \\\n", - " \"log_analyzer_rules\" not in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - " return []\n", - " else:\n", - " return j[\"metadata\"][\"azdata\"][\"expert\"][\"log_analyzer_rules\"]\n", - "\n", - "rules = get_notebook_rules()\n", - "\n", - "if rules == None:\n", - " print(\"\")\n", - " print(f\"Log Analysis only available when run in Azure Data Studio. Not available when run in azdata.\")\n", - "else:\n", - " print(f\"Applying the following {len(rules)} rules to {len(entries_for_analysis)} log entries for analysis, looking for HINTs to further troubleshooting.\")\n", - " print(rules)\n", - " hints = 0\n", - " if len(rules) > 0:\n", - " for entry in entries_for_analysis:\n", - " for rule in rules:\n", - " if entry.find(rule[0]) != -1:\n", - " print (entry)\n", - "\n", - " display(Markdown(f'HINT: Use [{rule[2]}]({rule[3]}) to resolve this issue.'))\n", - " hints = hints + 1\n", - "\n", - " print(\"\")\n", - " print(f\"{len(entries_for_analysis)} log entries analyzed (using {len(rules)} rules). {hints} further troubleshooting hints made inline.\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg090-get-nodemanager-logs.ipynb b/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg090-get-nodemanager-logs.ipynb deleted file mode 100644 index ebeda02f..00000000 --- a/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg090-get-nodemanager-logs.ipynb +++ /dev/null @@ -1,288 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG090 - Yarn nodemanager logs\n", - "==============================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "import re\n", - "\n", - "tail_lines = 500\n", - "\n", - "pod = None # All\n", - "container = \"hadoop\"\n", - "log_files = [ \"/var/log/supervisor/log/nodemanager*.log\" ]\n", - "\n", - "expressions_to_analyze = [\n", - " re.compile(\".{23} WARN \"),\n", - " re.compile(\".{23} ERROR \")\n", - "]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Instantiate Kubernetes client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Instantiate the Python Kubernetes client into 'api' variable\n", - "\n", - "import os\n", - "\n", - "try:\n", - " from kubernetes import client, config\n", - " from kubernetes.stream import stream\n", - "\n", - " if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n", - " config.load_incluster_config()\n", - " else:\n", - " try:\n", - " config.load_kube_config()\n", - " except:\n", - " display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n", - " raise\n", - " api = client.CoreV1Api()\n", - "\n", - " print('Kubernetes client instantiated')\n", - "except ImportError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n", - " raise" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n", - " except IndexError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print('The kubernetes namespace for your big data cluster is: ' + namespace)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get tail for log" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Display the last 'tail_lines' of files in 'log_files' list\n", - "\n", - "pods = api.list_namespaced_pod(namespace)\n", - "\n", - "entries_for_analysis = []\n", - "\n", - "for p in pods.items:\n", - " if pod is None or p.metadata.name == pod:\n", - " for c in p.spec.containers:\n", - " if container is None or c.name == container:\n", - " for log_file in log_files:\n", - " print (f\"- LOGS: '{log_file}' for CONTAINER: '{c.name}' in POD: '{p.metadata.name}'\")\n", - " try:\n", - " output = stream(api.connect_get_namespaced_pod_exec, p.metadata.name, namespace, command=['/bin/sh', '-c', f'tail -n {tail_lines} {log_file}'], container=c.name, stderr=True, stdout=True)\n", - " except Exception:\n", - " print (f\"FAILED to get LOGS for CONTAINER: {c.name} in POD: {p.metadata.name}\")\n", - " else:\n", - " for line in output.split('\\n'):\n", - " for expression in expressions_to_analyze:\n", - " if expression.match(line):\n", - " entries_for_analysis.append(line)\n", - " print(line)\n", - "print(\"\")\n", - "print(f\"{len(entries_for_analysis)} log entries found for further analysis.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Analyze log entries and suggest relevant Troubleshooting Guides" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Analyze log entries and suggest further relevant troubleshooting guides\n", - "from IPython.display import Markdown\n", - "import os\n", - "import json\n", - "import requests\n", - "import ipykernel\n", - "import datetime\n", - "\n", - "from urllib.parse import urljoin\n", - "from notebook import notebookapp\n", - "\n", - "def get_notebook_name():\n", - " \"\"\"Return the full path of the jupyter notebook. Some runtimes (e.g. ADS) \n", - " have the kernel_id in the filename of the connection file. If so, the \n", - " notebook name at runtime can be determined using `list_running_servers`.\n", - " Other runtimes (e.g. azdata) do not have the kernel_id in the filename of\n", - " the connection file, therefore we are unable to establish the filename\n", - " \"\"\"\n", - " connection_file = os.path.basename(ipykernel.get_connection_file())\n", - " \n", - " # If the runtime has the kernel_id in the connection filename, use it to\n", - " # get the real notebook name at runtime, otherwise, use the notebook \n", - " # filename from build time.\n", - " try: \n", - " kernel_id = connection_file.split('-', 1)[1].split('.')[0]\n", - " except:\n", - " pass\n", - " else:\n", - " for servers in list(notebookapp.list_running_servers()):\n", - " try:\n", - " response = requests.get(urljoin(servers['url'], 'api/sessions'), params={'token': servers.get('token', '')}, timeout=.01)\n", - " except:\n", - " pass\n", - " else:\n", - " for nn in json.loads(response.text):\n", - " if nn['kernel']['id'] == kernel_id:\n", - " return nn['path']\n", - "\n", - "def load_json(filename):\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def get_notebook_rules():\n", - " \"\"\"Load the notebook rules from the metadata of this notebook (in the .ipynb file)\"\"\"\n", - " file_name = get_notebook_name()\n", - "\n", - " if file_name == None:\n", - " return None\n", - " else:\n", - " j = load_json(file_name)\n", - "\n", - " if \"azdata\" not in j[\"metadata\"] or \\\n", - " \"expert\" not in j[\"metadata\"][\"azdata\"] or \\\n", - " \"log_analyzer_rules\" not in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - " return []\n", - " else:\n", - " return j[\"metadata\"][\"azdata\"][\"expert\"][\"log_analyzer_rules\"]\n", - "\n", - "rules = get_notebook_rules()\n", - "\n", - "if rules == None:\n", - " print(\"\")\n", - " print(f\"Log Analysis only available when run in Azure Data Studio. Not available when run in azdata.\")\n", - "else:\n", - " print(f\"Applying the following {len(rules)} rules to {len(entries_for_analysis)} log entries for analysis, looking for HINTs to further troubleshooting.\")\n", - " print(rules)\n", - " hints = 0\n", - " if len(rules) > 0:\n", - " for entry in entries_for_analysis:\n", - " for rule in rules:\n", - " if entry.find(rule[0]) != -1:\n", - " print (entry)\n", - "\n", - " display(Markdown(f'HINT: Use [{rule[2]}]({rule[3]}) to resolve this issue.'))\n", - " hints = hints + 1\n", - "\n", - " print(\"\")\n", - " print(f\"{len(entries_for_analysis)} log entries analyzed (using {len(rules)} rules). {hints} further troubleshooting hints made inline.\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg092-get-all-supervisord-log-tails.ipynb b/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg092-get-all-supervisord-log-tails.ipynb deleted file mode 100644 index be73e5aa..00000000 --- a/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg092-get-all-supervisord-log-tails.ipynb +++ /dev/null @@ -1,288 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG092 - Supervisord log tail for all containers in BDC\n", - "=======================================================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "import re\n", - "\n", - "tail_lines = 500\n", - "\n", - "pod = None # All\n", - "container = None # All containers\n", - "log_files = [ \"/var/log/supervisor/supervisord.log\" ]\n", - "\n", - "expressions_to_analyze = [\n", - " re.compile(\".{23} WARN \"),\n", - " re.compile(\".{23} ERROR \")\n", - "]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Instantiate Kubernetes client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Instantiate the Python Kubernetes client into 'api' variable\n", - "\n", - "import os\n", - "\n", - "try:\n", - " from kubernetes import client, config\n", - " from kubernetes.stream import stream\n", - "\n", - " if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n", - " config.load_incluster_config()\n", - " else:\n", - " try:\n", - " config.load_kube_config()\n", - " except:\n", - " display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n", - " raise\n", - " api = client.CoreV1Api()\n", - "\n", - " print('Kubernetes client instantiated')\n", - "except ImportError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n", - " raise" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n", - " except IndexError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print('The kubernetes namespace for your big data cluster is: ' + namespace)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get tail for log" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Display the last 'tail_lines' of files in 'log_files' list\n", - "\n", - "pods = api.list_namespaced_pod(namespace)\n", - "\n", - "entries_for_analysis = []\n", - "\n", - "for p in pods.items:\n", - " if pod is None or p.metadata.name == pod:\n", - " for c in p.spec.containers:\n", - " if container is None or c.name == container:\n", - " for log_file in log_files:\n", - " print (f\"- LOGS: '{log_file}' for CONTAINER: '{c.name}' in POD: '{p.metadata.name}'\")\n", - " try:\n", - " output = stream(api.connect_get_namespaced_pod_exec, p.metadata.name, namespace, command=['/bin/sh', '-c', f'tail -n {tail_lines} {log_file}'], container=c.name, stderr=True, stdout=True)\n", - " except Exception:\n", - " print (f\"FAILED to get LOGS for CONTAINER: {c.name} in POD: {p.metadata.name}\")\n", - " else:\n", - " for line in output.split('\\n'):\n", - " for expression in expressions_to_analyze:\n", - " if expression.match(line):\n", - " entries_for_analysis.append(line)\n", - " print(line)\n", - "print(\"\")\n", - "print(f\"{len(entries_for_analysis)} log entries found for further analysis.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Analyze log entries and suggest relevant Troubleshooting Guides" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Analyze log entries and suggest further relevant troubleshooting guides\n", - "from IPython.display import Markdown\n", - "import os\n", - "import json\n", - "import requests\n", - "import ipykernel\n", - "import datetime\n", - "\n", - "from urllib.parse import urljoin\n", - "from notebook import notebookapp\n", - "\n", - "def get_notebook_name():\n", - " \"\"\"Return the full path of the jupyter notebook. Some runtimes (e.g. ADS) \n", - " have the kernel_id in the filename of the connection file. If so, the \n", - " notebook name at runtime can be determined using `list_running_servers`.\n", - " Other runtimes (e.g. azdata) do not have the kernel_id in the filename of\n", - " the connection file, therefore we are unable to establish the filename\n", - " \"\"\"\n", - " connection_file = os.path.basename(ipykernel.get_connection_file())\n", - " \n", - " # If the runtime has the kernel_id in the connection filename, use it to\n", - " # get the real notebook name at runtime, otherwise, use the notebook \n", - " # filename from build time.\n", - " try: \n", - " kernel_id = connection_file.split('-', 1)[1].split('.')[0]\n", - " except:\n", - " pass\n", - " else:\n", - " for servers in list(notebookapp.list_running_servers()):\n", - " try:\n", - " response = requests.get(urljoin(servers['url'], 'api/sessions'), params={'token': servers.get('token', '')}, timeout=.01)\n", - " except:\n", - " pass\n", - " else:\n", - " for nn in json.loads(response.text):\n", - " if nn['kernel']['id'] == kernel_id:\n", - " return nn['path']\n", - "\n", - "def load_json(filename):\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def get_notebook_rules():\n", - " \"\"\"Load the notebook rules from the metadata of this notebook (in the .ipynb file)\"\"\"\n", - " file_name = get_notebook_name()\n", - "\n", - " if file_name == None:\n", - " return None\n", - " else:\n", - " j = load_json(file_name)\n", - "\n", - " if \"azdata\" not in j[\"metadata\"] or \\\n", - " \"expert\" not in j[\"metadata\"][\"azdata\"] or \\\n", - " \"log_analyzer_rules\" not in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - " return []\n", - " else:\n", - " return j[\"metadata\"][\"azdata\"][\"expert\"][\"log_analyzer_rules\"]\n", - "\n", - "rules = get_notebook_rules()\n", - "\n", - "if rules == None:\n", - " print(\"\")\n", - " print(f\"Log Analysis only available when run in Azure Data Studio. Not available when run in azdata.\")\n", - "else:\n", - " print(f\"Applying the following {len(rules)} rules to {len(entries_for_analysis)} log entries for analysis, looking for HINTs to further troubleshooting.\")\n", - " print(rules)\n", - " hints = 0\n", - " if len(rules) > 0:\n", - " for entry in entries_for_analysis:\n", - " for rule in rules:\n", - " if entry.find(rule[0]) != -1:\n", - " print (entry)\n", - "\n", - " display(Markdown(f'HINT: Use [{rule[2]}]({rule[3]}) to resolve this issue.'))\n", - " hints = hints + 1\n", - "\n", - " print(\"\")\n", - " print(f\"{len(entries_for_analysis)} log entries analyzed (using {len(rules)} rules). {hints} further troubleshooting hints made inline.\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg093-get-all-agent-log-tails.ipynb b/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg093-get-all-agent-log-tails.ipynb deleted file mode 100644 index 1362cb4a..00000000 --- a/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg093-get-all-agent-log-tails.ipynb +++ /dev/null @@ -1,262 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG093 - Agent log tail for all containers in BDC\n", - "=================================================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "tail_lines = 100\n", - "line_offset = 27 # Skip the date/time at start of line\n", - "\n", - "cmd = f'tail -n {tail_lines} /var/log/agent/agent.log'\n", - "\n", - "coalesce_duplicates = True" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Analyze log in all pod containers\n", - "\n", - "### Instantiate Kubernetes client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Instantiate the Python Kubernetes client into 'api' variable\n", - "\n", - "import os\n", - "\n", - "try:\n", - " from kubernetes import client, config\n", - " from kubernetes.stream import stream\n", - "\n", - " if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n", - " config.load_incluster_config()\n", - " else:\n", - " try:\n", - " config.load_kube_config()\n", - " except:\n", - " display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n", - " raise\n", - " api = client.CoreV1Api()\n", - "\n", - " print('Kubernetes client instantiated')\n", - "except ImportError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n", - " raise" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n", - " except IndexError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print('The kubernetes namespace for your big data cluster is: ' + namespace)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from IPython.display import Markdown\n", - "\n", - "import os\n", - "import json\n", - "import requests\n", - "import ipykernel\n", - "import datetime\n", - "\n", - "from urllib.parse import urljoin\n", - "from notebook import notebookapp\n", - "\n", - "def get_notebook_name():\n", - " \"\"\"Return the full path of the jupyter notebook. Some runtimes (e.g. ADS) \n", - " have the kernel_id in the filename of the connection file. If so, the \n", - " notebook name at runtime can be determined using `list_running_servers`.\n", - " Other runtimes (e.g. azdata) do not have the kernel_id in the filename of\n", - " the connection file, therefore we are unable to establish the filename\n", - " \"\"\"\n", - " connection_file = os.path.basename(ipykernel.get_connection_file())\n", - " \n", - " # If the runtime has the kernel_id in the connection filename, use it to\n", - " # get the real notebook name at runtime, otherwise, use the notebook \n", - " # filename from build time.\n", - " try: \n", - " kernel_id = connection_file.split('-', 1)[1].split('.')[0]\n", - " except:\n", - " pass\n", - " else:\n", - " for servers in list(notebookapp.list_running_servers()):\n", - " try:\n", - " response = requests.get(urljoin(servers['url'], 'api/sessions'), params={'token': servers.get('token', '')}, timeout=.01)\n", - " except:\n", - " pass\n", - " else:\n", - " for nn in json.loads(response.text):\n", - " if nn['kernel']['id'] == kernel_id:\n", - " return nn['path']\n", - "\n", - "def load_json(filename):\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def get_notebook_rules():\n", - " \"\"\"Load the notebook rules from the metadata of this notebook (in the .ipynb file)\"\"\"\n", - " file_name = get_notebook_name()\n", - "\n", - " if file_name == None:\n", - " return None\n", - " else:\n", - " j = load_json(file_name)\n", - "\n", - " if \"azdata\" not in j[\"metadata\"] or \\\n", - " \"expert\" not in j[\"metadata\"][\"azdata\"] or \\\n", - " \"log_analyzer_rules\" not in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - " return []\n", - " else:\n", - " return j[\"metadata\"][\"azdata\"][\"expert\"][\"log_analyzer_rules\"]\n", - "\n", - "rules = get_notebook_rules()\n", - "\n", - "pod_list = api.list_namespaced_pod(namespace)\n", - "pod_names = [pod.metadata.name for pod in pod_list.items]\n", - "\n", - "for pod in pod_list.items:\n", - " container_names = [container.name for container in pod.spec.containers]\n", - " for container in container_names:\n", - " print (f\"*** LOGS for CONTAINER: {container} in POD: {pod.metadata.name}\")\n", - " try:\n", - " logs=stream(api.connect_get_namespaced_pod_exec, pod.metadata.name, namespace, command=['/bin/sh', '-c', cmd], container=container, stderr=True, stdout=True)\n", - "\n", - " if coalesce_duplicates:\n", - " previous_line = \"\"\n", - " duplicates = 1\n", - " for line in logs.split('\\n'):\n", - " if line[line_offset:] != previous_line[line_offset:]:\n", - " if duplicates != 1:\n", - " print(f\"\\t{previous_line} (x{duplicates})\")\n", - " print(f\"\\t{line}\")\n", - "\n", - " for rule in rules:\n", - " if line[line_offset:].find(rule[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{rule[2]}](rule[3]) to resolve this issue.'))\n", - "\n", - " duplicates = 1\n", - " else:\n", - " duplicates = duplicates + 1\n", - " continue\n", - "\n", - " previous_line = line\n", - " else:\n", - " print(logs)\n", - "\n", - " except Exception:\n", - " print (f\"Failed to get LOGS for CONTAINER: {container} in POD: {pod.metadata.name}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false, - "expert": { - "log_analyzer_rules": [ - [ - "Failed to get file names from controller with Error", - "TSG040", - "TSG040 - Failed to get file names from controller with Error", - "../repair/tsg040-failed-get-file-names-controller.ipynb" - ], - [ - "Please increase sysctl fs.aio-max-nr", - "TSG041", - "TSG041 - Unable to create a new asynchronous I/O context (increase sysctl fs.aio-max-nr)", - "../repair/tsg041-increase-fs-aio-max-nr.ipynb" - ] - ] - } - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg094-get-grafana-logs.ipynb b/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg094-get-grafana-logs.ipynb deleted file mode 100644 index 7828859b..00000000 --- a/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg094-get-grafana-logs.ipynb +++ /dev/null @@ -1,285 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG094 - Grafana logs\n", - "=====================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "import re\n", - "\n", - "tail_lines = 500\n", - "\n", - "pod = None # All\n", - "container = \"grafana\"\n", - "log_files = [ \"/var/log/supervisor/log/grafana*.log\" ]\n", - "\n", - "expressions_to_analyze = []" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Instantiate Kubernetes client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Instantiate the Python Kubernetes client into 'api' variable\n", - "\n", - "import os\n", - "\n", - "try:\n", - " from kubernetes import client, config\n", - " from kubernetes.stream import stream\n", - "\n", - " if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n", - " config.load_incluster_config()\n", - " else:\n", - " try:\n", - " config.load_kube_config()\n", - " except:\n", - " display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n", - " raise\n", - " api = client.CoreV1Api()\n", - "\n", - " print('Kubernetes client instantiated')\n", - "except ImportError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n", - " raise" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n", - " except IndexError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print('The kubernetes namespace for your big data cluster is: ' + namespace)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get tail for log" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Display the last 'tail_lines' of files in 'log_files' list\n", - "\n", - "pods = api.list_namespaced_pod(namespace)\n", - "\n", - "entries_for_analysis = []\n", - "\n", - "for p in pods.items:\n", - " if pod is None or p.metadata.name == pod:\n", - " for c in p.spec.containers:\n", - " if container is None or c.name == container:\n", - " for log_file in log_files:\n", - " print (f\"- LOGS: '{log_file}' for CONTAINER: '{c.name}' in POD: '{p.metadata.name}'\")\n", - " try:\n", - " output = stream(api.connect_get_namespaced_pod_exec, p.metadata.name, namespace, command=['/bin/sh', '-c', f'tail -n {tail_lines} {log_file}'], container=c.name, stderr=True, stdout=True)\n", - " except Exception:\n", - " print (f\"FAILED to get LOGS for CONTAINER: {c.name} in POD: {p.metadata.name}\")\n", - " else:\n", - " for line in output.split('\\n'):\n", - " for expression in expressions_to_analyze:\n", - " if expression.match(line):\n", - " entries_for_analysis.append(line)\n", - " print(line)\n", - "print(\"\")\n", - "print(f\"{len(entries_for_analysis)} log entries found for further analysis.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Analyze log entries and suggest relevant Troubleshooting Guides" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Analyze log entries and suggest further relevant troubleshooting guides\n", - "from IPython.display import Markdown\n", - "import os\n", - "import json\n", - "import requests\n", - "import ipykernel\n", - "import datetime\n", - "\n", - "from urllib.parse import urljoin\n", - "from notebook import notebookapp\n", - "\n", - "def get_notebook_name():\n", - " \"\"\"Return the full path of the jupyter notebook. Some runtimes (e.g. ADS) \n", - " have the kernel_id in the filename of the connection file. If so, the \n", - " notebook name at runtime can be determined using `list_running_servers`.\n", - " Other runtimes (e.g. azdata) do not have the kernel_id in the filename of\n", - " the connection file, therefore we are unable to establish the filename\n", - " \"\"\"\n", - " connection_file = os.path.basename(ipykernel.get_connection_file())\n", - " \n", - " # If the runtime has the kernel_id in the connection filename, use it to\n", - " # get the real notebook name at runtime, otherwise, use the notebook \n", - " # filename from build time.\n", - " try: \n", - " kernel_id = connection_file.split('-', 1)[1].split('.')[0]\n", - " except:\n", - " pass\n", - " else:\n", - " for servers in list(notebookapp.list_running_servers()):\n", - " try:\n", - " response = requests.get(urljoin(servers['url'], 'api/sessions'), params={'token': servers.get('token', '')}, timeout=.01)\n", - " except:\n", - " pass\n", - " else:\n", - " for nn in json.loads(response.text):\n", - " if nn['kernel']['id'] == kernel_id:\n", - " return nn['path']\n", - "\n", - "def load_json(filename):\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def get_notebook_rules():\n", - " \"\"\"Load the notebook rules from the metadata of this notebook (in the .ipynb file)\"\"\"\n", - " file_name = get_notebook_name()\n", - "\n", - " if file_name == None:\n", - " return None\n", - " else:\n", - " j = load_json(file_name)\n", - "\n", - " if \"azdata\" not in j[\"metadata\"] or \\\n", - " \"expert\" not in j[\"metadata\"][\"azdata\"] or \\\n", - " \"log_analyzer_rules\" not in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - " return []\n", - " else:\n", - " return j[\"metadata\"][\"azdata\"][\"expert\"][\"log_analyzer_rules\"]\n", - "\n", - "rules = get_notebook_rules()\n", - "\n", - "if rules == None:\n", - " print(\"\")\n", - " print(f\"Log Analysis only available when run in Azure Data Studio. Not available when run in azdata.\")\n", - "else:\n", - " print(f\"Applying the following {len(rules)} rules to {len(entries_for_analysis)} log entries for analysis, looking for HINTs to further troubleshooting.\")\n", - " print(rules)\n", - " hints = 0\n", - " if len(rules) > 0:\n", - " for entry in entries_for_analysis:\n", - " for rule in rules:\n", - " if entry.find(rule[0]) != -1:\n", - " print (entry)\n", - "\n", - " display(Markdown(f'HINT: Use [{rule[2]}]({rule[3]}) to resolve this issue.'))\n", - " hints = hints + 1\n", - "\n", - " print(\"\")\n", - " print(f\"{len(entries_for_analysis)} log entries analyzed (using {len(rules)} rules). {hints} further troubleshooting hints made inline.\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg095-get-namenode-logs.ipynb b/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg095-get-namenode-logs.ipynb deleted file mode 100644 index ac33d838..00000000 --- a/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg095-get-namenode-logs.ipynb +++ /dev/null @@ -1,288 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG095 - Hadoop namenode logs\n", - "=============================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "import re\n", - "\n", - "tail_lines = 500\n", - "\n", - "pod = None # All\n", - "container = \"hadoop\"\n", - "log_files = [ \"/var/log/supervisor/log/namenode*.log\" ]\n", - "\n", - "expressions_to_analyze = [\n", - " re.compile(\".{23} WARN \"),\n", - " re.compile(\".{23} ERROR \")\n", - "]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Instantiate Kubernetes client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Instantiate the Python Kubernetes client into 'api' variable\n", - "\n", - "import os\n", - "\n", - "try:\n", - " from kubernetes import client, config\n", - " from kubernetes.stream import stream\n", - "\n", - " if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n", - " config.load_incluster_config()\n", - " else:\n", - " try:\n", - " config.load_kube_config()\n", - " except:\n", - " display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n", - " raise\n", - " api = client.CoreV1Api()\n", - "\n", - " print('Kubernetes client instantiated')\n", - "except ImportError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n", - " raise" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n", - " except IndexError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print('The kubernetes namespace for your big data cluster is: ' + namespace)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get tail for log" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Display the last 'tail_lines' of files in 'log_files' list\n", - "\n", - "pods = api.list_namespaced_pod(namespace)\n", - "\n", - "entries_for_analysis = []\n", - "\n", - "for p in pods.items:\n", - " if pod is None or p.metadata.name == pod:\n", - " for c in p.spec.containers:\n", - " if container is None or c.name == container:\n", - " for log_file in log_files:\n", - " print (f\"- LOGS: '{log_file}' for CONTAINER: '{c.name}' in POD: '{p.metadata.name}'\")\n", - " try:\n", - " output = stream(api.connect_get_namespaced_pod_exec, p.metadata.name, namespace, command=['/bin/sh', '-c', f'tail -n {tail_lines} {log_file}'], container=c.name, stderr=True, stdout=True)\n", - " except Exception:\n", - " print (f\"FAILED to get LOGS for CONTAINER: {c.name} in POD: {p.metadata.name}\")\n", - " else:\n", - " for line in output.split('\\n'):\n", - " for expression in expressions_to_analyze:\n", - " if expression.match(line):\n", - " entries_for_analysis.append(line)\n", - " print(line)\n", - "print(\"\")\n", - "print(f\"{len(entries_for_analysis)} log entries found for further analysis.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Analyze log entries and suggest relevant Troubleshooting Guides" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Analyze log entries and suggest further relevant troubleshooting guides\n", - "from IPython.display import Markdown\n", - "import os\n", - "import json\n", - "import requests\n", - "import ipykernel\n", - "import datetime\n", - "\n", - "from urllib.parse import urljoin\n", - "from notebook import notebookapp\n", - "\n", - "def get_notebook_name():\n", - " \"\"\"Return the full path of the jupyter notebook. Some runtimes (e.g. ADS) \n", - " have the kernel_id in the filename of the connection file. If so, the \n", - " notebook name at runtime can be determined using `list_running_servers`.\n", - " Other runtimes (e.g. azdata) do not have the kernel_id in the filename of\n", - " the connection file, therefore we are unable to establish the filename\n", - " \"\"\"\n", - " connection_file = os.path.basename(ipykernel.get_connection_file())\n", - " \n", - " # If the runtime has the kernel_id in the connection filename, use it to\n", - " # get the real notebook name at runtime, otherwise, use the notebook \n", - " # filename from build time.\n", - " try: \n", - " kernel_id = connection_file.split('-', 1)[1].split('.')[0]\n", - " except:\n", - " pass\n", - " else:\n", - " for servers in list(notebookapp.list_running_servers()):\n", - " try:\n", - " response = requests.get(urljoin(servers['url'], 'api/sessions'), params={'token': servers.get('token', '')}, timeout=.01)\n", - " except:\n", - " pass\n", - " else:\n", - " for nn in json.loads(response.text):\n", - " if nn['kernel']['id'] == kernel_id:\n", - " return nn['path']\n", - "\n", - "def load_json(filename):\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def get_notebook_rules():\n", - " \"\"\"Load the notebook rules from the metadata of this notebook (in the .ipynb file)\"\"\"\n", - " file_name = get_notebook_name()\n", - "\n", - " if file_name == None:\n", - " return None\n", - " else:\n", - " j = load_json(file_name)\n", - "\n", - " if \"azdata\" not in j[\"metadata\"] or \\\n", - " \"expert\" not in j[\"metadata\"][\"azdata\"] or \\\n", - " \"log_analyzer_rules\" not in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - " return []\n", - " else:\n", - " return j[\"metadata\"][\"azdata\"][\"expert\"][\"log_analyzer_rules\"]\n", - "\n", - "rules = get_notebook_rules()\n", - "\n", - "if rules == None:\n", - " print(\"\")\n", - " print(f\"Log Analysis only available when run in Azure Data Studio. Not available when run in azdata.\")\n", - "else:\n", - " print(f\"Applying the following {len(rules)} rules to {len(entries_for_analysis)} log entries for analysis, looking for HINTs to further troubleshooting.\")\n", - " print(rules)\n", - " hints = 0\n", - " if len(rules) > 0:\n", - " for entry in entries_for_analysis:\n", - " for rule in rules:\n", - " if entry.find(rule[0]) != -1:\n", - " print (entry)\n", - "\n", - " display(Markdown(f'HINT: Use [{rule[2]}]({rule[3]}) to resolve this issue.'))\n", - " hints = hints + 1\n", - "\n", - " print(\"\")\n", - " print(f\"{len(entries_for_analysis)} log entries analyzed (using {len(rules)} rules). {hints} further troubleshooting hints made inline.\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg096-get-zookeeper-logs.ipynb b/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg096-get-zookeeper-logs.ipynb deleted file mode 100644 index 77fd1f5b..00000000 --- a/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg096-get-zookeeper-logs.ipynb +++ /dev/null @@ -1,288 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG096 - Zookeeper logs\n", - "=======================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "import re\n", - "\n", - "tail_lines = 500\n", - "\n", - "pod = None # All\n", - "container = \"hadoop\"\n", - "log_files = [ \"/var/log/supervisor/log/zkfc*.log\" ]\n", - "\n", - "expressions_to_analyze = [\n", - " re.compile(\".{23} WARN \"),\n", - " re.compile(\".{23} ERROR \")\n", - "]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Instantiate Kubernetes client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Instantiate the Python Kubernetes client into 'api' variable\n", - "\n", - "import os\n", - "\n", - "try:\n", - " from kubernetes import client, config\n", - " from kubernetes.stream import stream\n", - "\n", - " if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n", - " config.load_incluster_config()\n", - " else:\n", - " try:\n", - " config.load_kube_config()\n", - " except:\n", - " display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n", - " raise\n", - " api = client.CoreV1Api()\n", - "\n", - " print('Kubernetes client instantiated')\n", - "except ImportError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n", - " raise" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n", - " except IndexError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print('The kubernetes namespace for your big data cluster is: ' + namespace)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get tail for log" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Display the last 'tail_lines' of files in 'log_files' list\n", - "\n", - "pods = api.list_namespaced_pod(namespace)\n", - "\n", - "entries_for_analysis = []\n", - "\n", - "for p in pods.items:\n", - " if pod is None or p.metadata.name == pod:\n", - " for c in p.spec.containers:\n", - " if container is None or c.name == container:\n", - " for log_file in log_files:\n", - " print (f\"- LOGS: '{log_file}' for CONTAINER: '{c.name}' in POD: '{p.metadata.name}'\")\n", - " try:\n", - " output = stream(api.connect_get_namespaced_pod_exec, p.metadata.name, namespace, command=['/bin/sh', '-c', f'tail -n {tail_lines} {log_file}'], container=c.name, stderr=True, stdout=True)\n", - " except Exception:\n", - " print (f\"FAILED to get LOGS for CONTAINER: {c.name} in POD: {p.metadata.name}\")\n", - " else:\n", - " for line in output.split('\\n'):\n", - " for expression in expressions_to_analyze:\n", - " if expression.match(line):\n", - " entries_for_analysis.append(line)\n", - " print(line)\n", - "print(\"\")\n", - "print(f\"{len(entries_for_analysis)} log entries found for further analysis.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Analyze log entries and suggest relevant Troubleshooting Guides" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Analyze log entries and suggest further relevant troubleshooting guides\n", - "from IPython.display import Markdown\n", - "import os\n", - "import json\n", - "import requests\n", - "import ipykernel\n", - "import datetime\n", - "\n", - "from urllib.parse import urljoin\n", - "from notebook import notebookapp\n", - "\n", - "def get_notebook_name():\n", - " \"\"\"Return the full path of the jupyter notebook. Some runtimes (e.g. ADS) \n", - " have the kernel_id in the filename of the connection file. If so, the \n", - " notebook name at runtime can be determined using `list_running_servers`.\n", - " Other runtimes (e.g. azdata) do not have the kernel_id in the filename of\n", - " the connection file, therefore we are unable to establish the filename\n", - " \"\"\"\n", - " connection_file = os.path.basename(ipykernel.get_connection_file())\n", - " \n", - " # If the runtime has the kernel_id in the connection filename, use it to\n", - " # get the real notebook name at runtime, otherwise, use the notebook \n", - " # filename from build time.\n", - " try: \n", - " kernel_id = connection_file.split('-', 1)[1].split('.')[0]\n", - " except:\n", - " pass\n", - " else:\n", - " for servers in list(notebookapp.list_running_servers()):\n", - " try:\n", - " response = requests.get(urljoin(servers['url'], 'api/sessions'), params={'token': servers.get('token', '')}, timeout=.01)\n", - " except:\n", - " pass\n", - " else:\n", - " for nn in json.loads(response.text):\n", - " if nn['kernel']['id'] == kernel_id:\n", - " return nn['path']\n", - "\n", - "def load_json(filename):\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def get_notebook_rules():\n", - " \"\"\"Load the notebook rules from the metadata of this notebook (in the .ipynb file)\"\"\"\n", - " file_name = get_notebook_name()\n", - "\n", - " if file_name == None:\n", - " return None\n", - " else:\n", - " j = load_json(file_name)\n", - "\n", - " if \"azdata\" not in j[\"metadata\"] or \\\n", - " \"expert\" not in j[\"metadata\"][\"azdata\"] or \\\n", - " \"log_analyzer_rules\" not in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - " return []\n", - " else:\n", - " return j[\"metadata\"][\"azdata\"][\"expert\"][\"log_analyzer_rules\"]\n", - "\n", - "rules = get_notebook_rules()\n", - "\n", - "if rules == None:\n", - " print(\"\")\n", - " print(f\"Log Analysis only available when run in Azure Data Studio. Not available when run in azdata.\")\n", - "else:\n", - " print(f\"Applying the following {len(rules)} rules to {len(entries_for_analysis)} log entries for analysis, looking for HINTs to further troubleshooting.\")\n", - " print(rules)\n", - " hints = 0\n", - " if len(rules) > 0:\n", - " for entry in entries_for_analysis:\n", - " for rule in rules:\n", - " if entry.find(rule[0]) != -1:\n", - " print (entry)\n", - "\n", - " display(Markdown(f'HINT: Use [{rule[2]}]({rule[3]}) to resolve this issue.'))\n", - " hints = hints + 1\n", - "\n", - " print(\"\")\n", - " print(f\"{len(entries_for_analysis)} log entries analyzed (using {len(rules)} rules). {hints} further troubleshooting hints made inline.\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg117-get-approxy-nginx-logs.ipynb b/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg117-get-approxy-nginx-logs.ipynb deleted file mode 100644 index f63a260c..00000000 --- a/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg117-get-approxy-nginx-logs.ipynb +++ /dev/null @@ -1,297 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG117 - App-Deploy Proxy Nginx Logs\n", - "====================================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "import re\n", - "\n", - "tail_lines = 200\n", - "\n", - "pod = None # All\n", - "container = \"app-service-proxy\"\n", - "log_files = [ \"/var/log/nginx/error.log\" ]\n", - "\n", - "expressions_to_analyze = [\n", - " re.compile(\".{23}[error]\")\n", - "]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Instantiate Kubernetes client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Instantiate the Python Kubernetes client into 'api' variable\n", - "\n", - "import os\n", - "\n", - "try:\n", - " from kubernetes import client, config\n", - " from kubernetes.stream import stream\n", - "\n", - " if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n", - " config.load_incluster_config()\n", - " else:\n", - " try:\n", - " config.load_kube_config()\n", - " except:\n", - " display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n", - " raise\n", - " api = client.CoreV1Api()\n", - "\n", - " print('Kubernetes client instantiated')\n", - "except ImportError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n", - " raise" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n", - " except IndexError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print('The kubernetes namespace for your big data cluster is: ' + namespace)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get tail for log" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Display the last 'tail_lines' of files in 'log_files' list\n", - "\n", - "pods = api.list_namespaced_pod(namespace)\n", - "\n", - "entries_for_analysis = []\n", - "\n", - "for p in pods.items:\n", - " if pod is None or p.metadata.name == pod:\n", - " for c in p.spec.containers:\n", - " if container is None or c.name == container:\n", - " for log_file in log_files:\n", - " print (f\"- LOGS: '{log_file}' for CONTAINER: '{c.name}' in POD: '{p.metadata.name}'\")\n", - " try:\n", - " output = stream(api.connect_get_namespaced_pod_exec, p.metadata.name, namespace, command=['/bin/sh', '-c', f'tail -n {tail_lines} {log_file}'], container=c.name, stderr=True, stdout=True)\n", - " except Exception:\n", - " print (f\"FAILED to get LOGS for CONTAINER: {c.name} in POD: {p.metadata.name}\")\n", - " else:\n", - " for line in output.split('\\n'):\n", - " for expression in expressions_to_analyze:\n", - " if expression.match(line):\n", - " entries_for_analysis.append(line)\n", - " print(line)\n", - "print(\"\")\n", - "print(f\"{len(entries_for_analysis)} log entries found for further analysis.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Analyze log entries and suggest relevant Troubleshooting Guides" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Analyze log entries and suggest further relevant troubleshooting guides\n", - "from IPython.display import Markdown\n", - "import os\n", - "import json\n", - "import requests\n", - "import ipykernel\n", - "import datetime\n", - "\n", - "from urllib.parse import urljoin\n", - "from notebook import notebookapp\n", - "\n", - "def get_notebook_name():\n", - " \"\"\"Return the full path of the jupyter notebook. Some runtimes (e.g. ADS) \n", - " have the kernel_id in the filename of the connection file. If so, the \n", - " notebook name at runtime can be determined using `list_running_servers`.\n", - " Other runtimes (e.g. azdata) do not have the kernel_id in the filename of\n", - " the connection file, therefore we are unable to establish the filename\n", - " \"\"\"\n", - " connection_file = os.path.basename(ipykernel.get_connection_file())\n", - " \n", - " # If the runtime has the kernel_id in the connection filename, use it to\n", - " # get the real notebook name at runtime, otherwise, use the notebook \n", - " # filename from build time.\n", - " try: \n", - " kernel_id = connection_file.split('-', 1)[1].split('.')[0]\n", - " except:\n", - " pass\n", - " else:\n", - " for servers in list(notebookapp.list_running_servers()):\n", - " try:\n", - " response = requests.get(urljoin(servers['url'], 'api/sessions'), params={'token': servers.get('token', '')}, timeout=.01)\n", - " except:\n", - " pass\n", - " else:\n", - " for nn in json.loads(response.text):\n", - " if nn['kernel']['id'] == kernel_id:\n", - " return nn['path']\n", - "\n", - "def load_json(filename):\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def get_notebook_rules():\n", - " \"\"\"Load the notebook rules from the metadata of this notebook (in the .ipynb file)\"\"\"\n", - " file_name = get_notebook_name()\n", - "\n", - " if file_name == None:\n", - " return None\n", - " else:\n", - " j = load_json(file_name)\n", - "\n", - " if \"azdata\" not in j[\"metadata\"] or \\\n", - " \"expert\" not in j[\"metadata\"][\"azdata\"] or \\\n", - " \"log_analyzer_rules\" not in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - " return []\n", - " else:\n", - " return j[\"metadata\"][\"azdata\"][\"expert\"][\"log_analyzer_rules\"]\n", - "\n", - "rules = get_notebook_rules()\n", - "\n", - "if rules == None:\n", - " print(\"\")\n", - " print(f\"Log Analysis only available when run in Azure Data Studio. Not available when run in azdata.\")\n", - "else:\n", - " print(f\"Applying the following {len(rules)} rules to {len(entries_for_analysis)} log entries for analysis, looking for HINTs to further troubleshooting.\")\n", - " print(rules)\n", - " hints = 0\n", - " if len(rules) > 0:\n", - " for entry in entries_for_analysis:\n", - " for rule in rules:\n", - " if entry.find(rule[0]) != -1:\n", - " print (entry)\n", - "\n", - " display(Markdown(f'HINT: Use [{rule[2]}]({rule[3]}) to resolve this issue.'))\n", - " hints = hints + 1\n", - "\n", - " print(\"\")\n", - " print(f\"{len(entries_for_analysis)} log entries analyzed (using {len(rules)} rules). {hints} further troubleshooting hints made inline.\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false, - "expert": { - "log_analyzer_rules": [ - [ - "(111: Connection refused) while connecting to upstream", - "TSG113", - "TSG113 - Run App-Deploy", - "../monitor-bdc/tsg113-run-app.ipynb" - ] - ] - } - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg120-get-all-provisioner-log-tails.ipynb b/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg120-get-all-provisioner-log-tails.ipynb deleted file mode 100644 index 6a650212..00000000 --- a/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg120-get-all-provisioner-log-tails.ipynb +++ /dev/null @@ -1,249 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG120 - Provisioner log tail for all containers in BDC\n", - "=======================================================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "tail_lines = 100\n", - "line_offset = 27 # Skip the date/time at start of line\n", - "\n", - "cmd = f'tail -n {tail_lines} /var/log/provisioner/provisioner.log'\n", - "\n", - "coalesce_duplicates = True" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Analyze log in all pod containers\n", - "\n", - "### Instantiate Kubernetes client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Instantiate the Python Kubernetes client into 'api' variable\n", - "\n", - "import os\n", - "\n", - "try:\n", - " from kubernetes import client, config\n", - " from kubernetes.stream import stream\n", - "\n", - " if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n", - " config.load_incluster_config()\n", - " else:\n", - " try:\n", - " config.load_kube_config()\n", - " except:\n", - " display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n", - " raise\n", - " api = client.CoreV1Api()\n", - "\n", - " print('Kubernetes client instantiated')\n", - "except ImportError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n", - " raise" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n", - " except IndexError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print('The kubernetes namespace for your big data cluster is: ' + namespace)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from IPython.display import Markdown\n", - "\n", - "import os\n", - "import json\n", - "import requests\n", - "import ipykernel\n", - "import datetime\n", - "\n", - "from urllib.parse import urljoin\n", - "from notebook import notebookapp\n", - "\n", - "def get_notebook_name():\n", - " \"\"\"Return the full path of the jupyter notebook. Some runtimes (e.g. ADS) \n", - " have the kernel_id in the filename of the connection file. If so, the \n", - " notebook name at runtime can be determined using `list_running_servers`.\n", - " Other runtimes (e.g. azdata) do not have the kernel_id in the filename of\n", - " the connection file, therefore we are unable to establish the filename\n", - " \"\"\"\n", - " connection_file = os.path.basename(ipykernel.get_connection_file())\n", - " \n", - " # If the runtime has the kernel_id in the connection filename, use it to\n", - " # get the real notebook name at runtime, otherwise, use the notebook \n", - " # filename from build time.\n", - " try: \n", - " kernel_id = connection_file.split('-', 1)[1].split('.')[0]\n", - " except:\n", - " pass\n", - " else:\n", - " for servers in list(notebookapp.list_running_servers()):\n", - " try:\n", - " response = requests.get(urljoin(servers['url'], 'api/sessions'), params={'token': servers.get('token', '')}, timeout=.01)\n", - " except:\n", - " pass\n", - " else:\n", - " for nn in json.loads(response.text):\n", - " if nn['kernel']['id'] == kernel_id:\n", - " return nn['path']\n", - "\n", - "def load_json(filename):\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def get_notebook_rules():\n", - " \"\"\"Load the notebook rules from the metadata of this notebook (in the .ipynb file)\"\"\"\n", - " file_name = get_notebook_name()\n", - "\n", - " if file_name == None:\n", - " return None\n", - " else:\n", - " j = load_json(file_name)\n", - "\n", - " if \"azdata\" not in j[\"metadata\"] or \\\n", - " \"expert\" not in j[\"metadata\"][\"azdata\"] or \\\n", - " \"log_analyzer_rules\" not in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - " return []\n", - " else:\n", - " return j[\"metadata\"][\"azdata\"][\"expert\"][\"log_analyzer_rules\"]\n", - "\n", - "rules = get_notebook_rules()\n", - "\n", - "pod_list = api.list_namespaced_pod(namespace)\n", - "pod_names = [pod.metadata.name for pod in pod_list.items]\n", - "\n", - "for pod in pod_list.items:\n", - " container_names = [container.name for container in pod.spec.containers]\n", - " for container in container_names:\n", - " print (f\"*** LOGS for CONTAINER: {container} in POD: {pod.metadata.name}\")\n", - " try:\n", - " logs=stream(api.connect_get_namespaced_pod_exec, pod.metadata.name, namespace, command=['/bin/sh', '-c', cmd], container=container, stderr=True, stdout=True)\n", - "\n", - " if coalesce_duplicates:\n", - " previous_line = \"\"\n", - " duplicates = 1\n", - " for line in logs.split('\\n'):\n", - " if line[line_offset:] != previous_line[line_offset:]:\n", - " if duplicates != 1:\n", - " print(f\"\\t{previous_line} (x{duplicates})\")\n", - " print(f\"\\t{line}\")\n", - "\n", - " for rule in rules:\n", - " if line[line_offset:].find(rule[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{rule[2]}](rule[3]) to resolve this issue.'))\n", - "\n", - " duplicates = 1\n", - " else:\n", - " duplicates = duplicates + 1\n", - " continue\n", - "\n", - " previous_line = line\n", - " else:\n", - " print(logs)\n", - "\n", - " except Exception:\n", - " print (f\"Failed to get LOGS for CONTAINER: {container} in POD: {pod.metadata.name}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false, - "expert": { - "log_analyzer_rules": [] - } - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg121-get-all-supervisor-mssql-logs.ipynb b/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg121-get-all-supervisor-mssql-logs.ipynb deleted file mode 100644 index 7a566053..00000000 --- a/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg121-get-all-supervisor-mssql-logs.ipynb +++ /dev/null @@ -1,291 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG121 - Supervisor mssql-server logs\n", - "=====================================\n", - "\n", - "These supervisor mssql-server logs can contain some more information\n", - "from Polybase, not available in errorlog or the polybase logs.\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "import re\n", - "\n", - "tail_lines = 500\n", - "\n", - "pod = None # All\n", - "container = \"mssql-server\"\n", - "log_files = [ \"/var/log/supervisor/log/mssql-server-*.log\" ]\n", - "\n", - "expressions_to_analyze = [\n", - " re.compile(\".{26}[WARN ]\"),\n", - " re.compile(\".{26}[ERROR]\")\n", - "]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Instantiate Kubernetes client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Instantiate the Python Kubernetes client into 'api' variable\n", - "\n", - "import os\n", - "\n", - "try:\n", - " from kubernetes import client, config\n", - " from kubernetes.stream import stream\n", - "\n", - " if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n", - " config.load_incluster_config()\n", - " else:\n", - " try:\n", - " config.load_kube_config()\n", - " except:\n", - " display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n", - " raise\n", - " api = client.CoreV1Api()\n", - "\n", - " print('Kubernetes client instantiated')\n", - "except ImportError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n", - " raise" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n", - " except IndexError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print('The kubernetes namespace for your big data cluster is: ' + namespace)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get tail for log" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Display the last 'tail_lines' of files in 'log_files' list\n", - "\n", - "pods = api.list_namespaced_pod(namespace)\n", - "\n", - "entries_for_analysis = []\n", - "\n", - "for p in pods.items:\n", - " if pod is None or p.metadata.name == pod:\n", - " for c in p.spec.containers:\n", - " if container is None or c.name == container:\n", - " for log_file in log_files:\n", - " print (f\"- LOGS: '{log_file}' for CONTAINER: '{c.name}' in POD: '{p.metadata.name}'\")\n", - " try:\n", - " output = stream(api.connect_get_namespaced_pod_exec, p.metadata.name, namespace, command=['/bin/sh', '-c', f'tail -n {tail_lines} {log_file}'], container=c.name, stderr=True, stdout=True)\n", - " except Exception:\n", - " print (f\"FAILED to get LOGS for CONTAINER: {c.name} in POD: {p.metadata.name}\")\n", - " else:\n", - " for line in output.split('\\n'):\n", - " for expression in expressions_to_analyze:\n", - " if expression.match(line):\n", - " entries_for_analysis.append(line)\n", - " print(line)\n", - "print(\"\")\n", - "print(f\"{len(entries_for_analysis)} log entries found for further analysis.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Analyze log entries and suggest relevant Troubleshooting Guides" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Analyze log entries and suggest further relevant troubleshooting guides\n", - "from IPython.display import Markdown\n", - "import os\n", - "import json\n", - "import requests\n", - "import ipykernel\n", - "import datetime\n", - "\n", - "from urllib.parse import urljoin\n", - "from notebook import notebookapp\n", - "\n", - "def get_notebook_name():\n", - " \"\"\"Return the full path of the jupyter notebook. Some runtimes (e.g. ADS) \n", - " have the kernel_id in the filename of the connection file. If so, the \n", - " notebook name at runtime can be determined using `list_running_servers`.\n", - " Other runtimes (e.g. azdata) do not have the kernel_id in the filename of\n", - " the connection file, therefore we are unable to establish the filename\n", - " \"\"\"\n", - " connection_file = os.path.basename(ipykernel.get_connection_file())\n", - " \n", - " # If the runtime has the kernel_id in the connection filename, use it to\n", - " # get the real notebook name at runtime, otherwise, use the notebook \n", - " # filename from build time.\n", - " try: \n", - " kernel_id = connection_file.split('-', 1)[1].split('.')[0]\n", - " except:\n", - " pass\n", - " else:\n", - " for servers in list(notebookapp.list_running_servers()):\n", - " try:\n", - " response = requests.get(urljoin(servers['url'], 'api/sessions'), params={'token': servers.get('token', '')}, timeout=.01)\n", - " except:\n", - " pass\n", - " else:\n", - " for nn in json.loads(response.text):\n", - " if nn['kernel']['id'] == kernel_id:\n", - " return nn['path']\n", - "\n", - "def load_json(filename):\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def get_notebook_rules():\n", - " \"\"\"Load the notebook rules from the metadata of this notebook (in the .ipynb file)\"\"\"\n", - " file_name = get_notebook_name()\n", - "\n", - " if file_name == None:\n", - " return None\n", - " else:\n", - " j = load_json(file_name)\n", - "\n", - " if \"azdata\" not in j[\"metadata\"] or \\\n", - " \"expert\" not in j[\"metadata\"][\"azdata\"] or \\\n", - " \"log_analyzer_rules\" not in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - " return []\n", - " else:\n", - " return j[\"metadata\"][\"azdata\"][\"expert\"][\"log_analyzer_rules\"]\n", - "\n", - "rules = get_notebook_rules()\n", - "\n", - "if rules == None:\n", - " print(\"\")\n", - " print(f\"Log Analysis only available when run in Azure Data Studio. Not available when run in azdata.\")\n", - "else:\n", - " print(f\"Applying the following {len(rules)} rules to {len(entries_for_analysis)} log entries for analysis, looking for HINTs to further troubleshooting.\")\n", - " print(rules)\n", - " hints = 0\n", - " if len(rules) > 0:\n", - " for entry in entries_for_analysis:\n", - " for rule in rules:\n", - " if entry.find(rule[0]) != -1:\n", - " print (entry)\n", - "\n", - " display(Markdown(f'HINT: Use [{rule[2]}]({rule[3]}) to resolve this issue.'))\n", - " hints = hints + 1\n", - "\n", - " print(\"\")\n", - " print(f\"{len(entries_for_analysis)} log entries analyzed (using {len(rules)} rules). {hints} further troubleshooting hints made inline.\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg122-get-hive-metastore-logs.ipynb b/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg122-get-hive-metastore-logs.ipynb deleted file mode 100644 index a92ba9ec..00000000 --- a/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg122-get-hive-metastore-logs.ipynb +++ /dev/null @@ -1,288 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG122 - Hive Metastore logs\n", - "============================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "import re\n", - "\n", - "tail_lines = 500\n", - "\n", - "pod = None # All\n", - "container = \"hadoop-hivemetastore\"\n", - "log_files = [ \"/var/log/supervisor/log/hivemetastorehttp*.log\" ]\n", - "\n", - "expressions_to_analyze = [\n", - " re.compile(\".{26}[WARN ]\"),\n", - " re.compile(\".{26}[ERROR]\")\n", - "]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Instantiate Kubernetes client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Instantiate the Python Kubernetes client into 'api' variable\n", - "\n", - "import os\n", - "\n", - "try:\n", - " from kubernetes import client, config\n", - " from kubernetes.stream import stream\n", - "\n", - " if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n", - " config.load_incluster_config()\n", - " else:\n", - " try:\n", - " config.load_kube_config()\n", - " except:\n", - " display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n", - " raise\n", - " api = client.CoreV1Api()\n", - "\n", - " print('Kubernetes client instantiated')\n", - "except ImportError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n", - " raise" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n", - " except IndexError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print('The kubernetes namespace for your big data cluster is: ' + namespace)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get tail for log" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Display the last 'tail_lines' of files in 'log_files' list\n", - "\n", - "pods = api.list_namespaced_pod(namespace)\n", - "\n", - "entries_for_analysis = []\n", - "\n", - "for p in pods.items:\n", - " if pod is None or p.metadata.name == pod:\n", - " for c in p.spec.containers:\n", - " if container is None or c.name == container:\n", - " for log_file in log_files:\n", - " print (f\"- LOGS: '{log_file}' for CONTAINER: '{c.name}' in POD: '{p.metadata.name}'\")\n", - " try:\n", - " output = stream(api.connect_get_namespaced_pod_exec, p.metadata.name, namespace, command=['/bin/sh', '-c', f'tail -n {tail_lines} {log_file}'], container=c.name, stderr=True, stdout=True)\n", - " except Exception:\n", - " print (f\"FAILED to get LOGS for CONTAINER: {c.name} in POD: {p.metadata.name}\")\n", - " else:\n", - " for line in output.split('\\n'):\n", - " for expression in expressions_to_analyze:\n", - " if expression.match(line):\n", - " entries_for_analysis.append(line)\n", - " print(line)\n", - "print(\"\")\n", - "print(f\"{len(entries_for_analysis)} log entries found for further analysis.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Analyze log entries and suggest relevant Troubleshooting Guides" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Analyze log entries and suggest further relevant troubleshooting guides\n", - "from IPython.display import Markdown\n", - "import os\n", - "import json\n", - "import requests\n", - "import ipykernel\n", - "import datetime\n", - "\n", - "from urllib.parse import urljoin\n", - "from notebook import notebookapp\n", - "\n", - "def get_notebook_name():\n", - " \"\"\"Return the full path of the jupyter notebook. Some runtimes (e.g. ADS) \n", - " have the kernel_id in the filename of the connection file. If so, the \n", - " notebook name at runtime can be determined using `list_running_servers`.\n", - " Other runtimes (e.g. azdata) do not have the kernel_id in the filename of\n", - " the connection file, therefore we are unable to establish the filename\n", - " \"\"\"\n", - " connection_file = os.path.basename(ipykernel.get_connection_file())\n", - " \n", - " # If the runtime has the kernel_id in the connection filename, use it to\n", - " # get the real notebook name at runtime, otherwise, use the notebook \n", - " # filename from build time.\n", - " try: \n", - " kernel_id = connection_file.split('-', 1)[1].split('.')[0]\n", - " except:\n", - " pass\n", - " else:\n", - " for servers in list(notebookapp.list_running_servers()):\n", - " try:\n", - " response = requests.get(urljoin(servers['url'], 'api/sessions'), params={'token': servers.get('token', '')}, timeout=.01)\n", - " except:\n", - " pass\n", - " else:\n", - " for nn in json.loads(response.text):\n", - " if nn['kernel']['id'] == kernel_id:\n", - " return nn['path']\n", - "\n", - "def load_json(filename):\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def get_notebook_rules():\n", - " \"\"\"Load the notebook rules from the metadata of this notebook (in the .ipynb file)\"\"\"\n", - " file_name = get_notebook_name()\n", - "\n", - " if file_name == None:\n", - " return None\n", - " else:\n", - " j = load_json(file_name)\n", - "\n", - " if \"azdata\" not in j[\"metadata\"] or \\\n", - " \"expert\" not in j[\"metadata\"][\"azdata\"] or \\\n", - " \"log_analyzer_rules\" not in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - " return []\n", - " else:\n", - " return j[\"metadata\"][\"azdata\"][\"expert\"][\"log_analyzer_rules\"]\n", - "\n", - "rules = get_notebook_rules()\n", - "\n", - "if rules == None:\n", - " print(\"\")\n", - " print(f\"Log Analysis only available when run in Azure Data Studio. Not available when run in azdata.\")\n", - "else:\n", - " print(f\"Applying the following {len(rules)} rules to {len(entries_for_analysis)} log entries for analysis, looking for HINTs to further troubleshooting.\")\n", - " print(rules)\n", - " hints = 0\n", - " if len(rules) > 0:\n", - " for entry in entries_for_analysis:\n", - " for rule in rules:\n", - " if entry.find(rule[0]) != -1:\n", - " print (entry)\n", - "\n", - " display(Markdown(f'HINT: Use [{rule[2]}]({rule[3]}) to resolve this issue.'))\n", - " hints = hints + 1\n", - "\n", - " print(\"\")\n", - " print(f\"{len(entries_for_analysis)} log entries analyzed (using {len(rules)} rules). {hints} further troubleshooting hints made inline.\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg123-get-hive-logs.ipynb b/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg123-get-hive-logs.ipynb deleted file mode 100644 index c6e5cf30..00000000 --- a/Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg123-get-hive-logs.ipynb +++ /dev/null @@ -1,288 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG123 - Hive logs\n", - "==================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "import re\n", - "\n", - "tail_lines = 500\n", - "\n", - "pod = None # All\n", - "container = \"hadoop-hivemetastore\"\n", - "log_files = [ \"/var/log/supervisor/log/hive-*.log\" ]\n", - "\n", - "expressions_to_analyze = [\n", - " re.compile(\".{26}[WARN ]\"),\n", - " re.compile(\".{26}[ERROR]\")\n", - "]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Instantiate Kubernetes client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Instantiate the Python Kubernetes client into 'api' variable\n", - "\n", - "import os\n", - "\n", - "try:\n", - " from kubernetes import client, config\n", - " from kubernetes.stream import stream\n", - "\n", - " if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n", - " config.load_incluster_config()\n", - " else:\n", - " try:\n", - " config.load_kube_config()\n", - " except:\n", - " display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n", - " raise\n", - " api = client.CoreV1Api()\n", - "\n", - " print('Kubernetes client instantiated')\n", - "except ImportError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n", - " raise" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n", - " except IndexError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print('The kubernetes namespace for your big data cluster is: ' + namespace)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get tail for log" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Display the last 'tail_lines' of files in 'log_files' list\n", - "\n", - "pods = api.list_namespaced_pod(namespace)\n", - "\n", - "entries_for_analysis = []\n", - "\n", - "for p in pods.items:\n", - " if pod is None or p.metadata.name == pod:\n", - " for c in p.spec.containers:\n", - " if container is None or c.name == container:\n", - " for log_file in log_files:\n", - " print (f\"- LOGS: '{log_file}' for CONTAINER: '{c.name}' in POD: '{p.metadata.name}'\")\n", - " try:\n", - " output = stream(api.connect_get_namespaced_pod_exec, p.metadata.name, namespace, command=['/bin/sh', '-c', f'tail -n {tail_lines} {log_file}'], container=c.name, stderr=True, stdout=True)\n", - " except Exception:\n", - " print (f\"FAILED to get LOGS for CONTAINER: {c.name} in POD: {p.metadata.name}\")\n", - " else:\n", - " for line in output.split('\\n'):\n", - " for expression in expressions_to_analyze:\n", - " if expression.match(line):\n", - " entries_for_analysis.append(line)\n", - " print(line)\n", - "print(\"\")\n", - "print(f\"{len(entries_for_analysis)} log entries found for further analysis.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Analyze log entries and suggest relevant Troubleshooting Guides" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Analyze log entries and suggest further relevant troubleshooting guides\n", - "from IPython.display import Markdown\n", - "import os\n", - "import json\n", - "import requests\n", - "import ipykernel\n", - "import datetime\n", - "\n", - "from urllib.parse import urljoin\n", - "from notebook import notebookapp\n", - "\n", - "def get_notebook_name():\n", - " \"\"\"Return the full path of the jupyter notebook. Some runtimes (e.g. ADS) \n", - " have the kernel_id in the filename of the connection file. If so, the \n", - " notebook name at runtime can be determined using `list_running_servers`.\n", - " Other runtimes (e.g. azdata) do not have the kernel_id in the filename of\n", - " the connection file, therefore we are unable to establish the filename\n", - " \"\"\"\n", - " connection_file = os.path.basename(ipykernel.get_connection_file())\n", - " \n", - " # If the runtime has the kernel_id in the connection filename, use it to\n", - " # get the real notebook name at runtime, otherwise, use the notebook \n", - " # filename from build time.\n", - " try: \n", - " kernel_id = connection_file.split('-', 1)[1].split('.')[0]\n", - " except:\n", - " pass\n", - " else:\n", - " for servers in list(notebookapp.list_running_servers()):\n", - " try:\n", - " response = requests.get(urljoin(servers['url'], 'api/sessions'), params={'token': servers.get('token', '')}, timeout=.01)\n", - " except:\n", - " pass\n", - " else:\n", - " for nn in json.loads(response.text):\n", - " if nn['kernel']['id'] == kernel_id:\n", - " return nn['path']\n", - "\n", - "def load_json(filename):\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def get_notebook_rules():\n", - " \"\"\"Load the notebook rules from the metadata of this notebook (in the .ipynb file)\"\"\"\n", - " file_name = get_notebook_name()\n", - "\n", - " if file_name == None:\n", - " return None\n", - " else:\n", - " j = load_json(file_name)\n", - "\n", - " if \"azdata\" not in j[\"metadata\"] or \\\n", - " \"expert\" not in j[\"metadata\"][\"azdata\"] or \\\n", - " \"log_analyzer_rules\" not in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - " return []\n", - " else:\n", - " return j[\"metadata\"][\"azdata\"][\"expert\"][\"log_analyzer_rules\"]\n", - "\n", - "rules = get_notebook_rules()\n", - "\n", - "if rules == None:\n", - " print(\"\")\n", - " print(f\"Log Analysis only available when run in Azure Data Studio. Not available when run in azdata.\")\n", - "else:\n", - " print(f\"Applying the following {len(rules)} rules to {len(entries_for_analysis)} log entries for analysis, looking for HINTs to further troubleshooting.\")\n", - " print(rules)\n", - " hints = 0\n", - " if len(rules) > 0:\n", - " for entry in entries_for_analysis:\n", - " for rule in rules:\n", - " if entry.find(rule[0]) != -1:\n", - " print (entry)\n", - "\n", - " display(Markdown(f'HINT: Use [{rule[2]}]({rule[3]}) to resolve this issue.'))\n", - " hints = hints + 1\n", - "\n", - " print(\"\")\n", - " print(f\"{len(entries_for_analysis)} log entries analyzed (using {len(rules)} rules). {hints} further troubleshooting hints made inline.\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/log-files/readme.md b/Big-Data-Clusters/CU8/Public/content/log-files/readme.md deleted file mode 100644 index 10585c95..00000000 --- a/Big-Data-Clusters/CU8/Public/content/log-files/readme.md +++ /dev/null @@ -1,19 +0,0 @@ -# Logs notebooks - -- A set of notebooks to gather logs from a SQL Server Big Data Cluster. - -[Home](../readme.md) - -## Notebooks in this Chapter -- [TSG001 - Run azdata copy-logs ](tsg001-copy-logs.ipynb) - -- [TSG091 - Get the azdata CLI logs ](tsg091-get-azdata-logs.ipynb) - -- [TSG083 - Run kubectl cluster-info dump ](tsg083-run-kubectl-cluster-info-dump.ipynb) - -- [TSG061 - Get tail of all container logs for pods in BDC namespace ](tsg061-tail-bdc-container-logs.ipynb) - -- [TSG062 - Get tail of all previous container logs for pods in BDC namespace ](tsg062-tail-bdc-previous-container-logs.ipynb) - -- [TSG084 - Internal Query Processor Error ](tsg084-internal-query-process-error.ipynb) - diff --git a/Big-Data-Clusters/CU8/Public/content/log-files/tsg001-copy-logs.ipynb b/Big-Data-Clusters/CU8/Public/content/log-files/tsg001-copy-logs.ipynb deleted file mode 100644 index c891fd93..00000000 --- a/Big-Data-Clusters/CU8/Public/content/log-files/tsg001-copy-logs.ipynb +++ /dev/null @@ -1,492 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG001 - Run azdata copy-logs\n", - "=============================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"tsg001-copy-logs.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond'], 'azdata': ['Endpoint sql-server-master does not exist', 'Endpoint livy does not exist', 'Failed to get state for cluster', 'Endpoint webhdfs does not exist', 'Adaptive Server is unavailable or does not exist', 'Error: Address already in use', 'Login timeout expired (0) (SQLDriverConnect)']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']], 'azdata': [['The token is expired', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Reason: Unauthorized', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Max retries exceeded with url: /api/v1/bdc/endpoints', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Look at the controller logs for more details', 'TSG027 - Observe cluster deployment', '../diagnose/tsg027-observe-bdc-create.ipynb'], ['provided port is already allocated', 'TSG062 - Get tail of all previous container logs for pods in BDC namespace', '../log-files/tsg062-tail-bdc-previous-container-logs.ipynb'], ['Create cluster failed since the existing namespace', 'SOP061 - Delete a big data cluster', '../install/sop061-delete-bdc.ipynb'], ['Failed to complete kube config setup', 'TSG067 - Failed to complete kube config setup', '../repair/tsg067-failed-to-complete-kube-config-setup.ipynb'], ['Error processing command: \"ApiError', 'TSG110 - Azdata returns ApiError', '../repair/tsg110-azdata-returns-apierror.ipynb'], ['Error processing command: \"ControllerError', 'TSG036 - Controller logs', '../log-analyzers/tsg036-get-controller-logs.ipynb'], ['ERROR: 500', 'TSG046 - Knox gateway logs', '../log-analyzers/tsg046-get-knox-logs.ipynb'], ['Data source name not found and no default driver specified', 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], [\"Can't open lib 'ODBC Driver 17 for SQL Server\", 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], ['Control plane upgrade failed. Failed to upgrade controller.', 'TSG108 - View the controller upgrade config map', '../diagnose/tsg108-controller-failed-to-upgrade.ipynb'], [\"[Errno 2] No such file or directory: '..\\\\\\\\\", 'TSG053 - ADS Provided Books must be saved before use', '../repair/tsg053-save-book-first.ipynb'], [\"NameError: name 'azdata_login_secret_name' is not defined\", 'SOP013 - Create secret for azdata login (inside cluster)', '../common/sop013-create-secret-for-azdata-login.ipynb'], ['ERROR: No credentials were supplied, or the credentials were unavailable or inaccessible.', \"TSG124 - 'No credentials were supplied' error from azdata login\", '../repair/tsg124-no-credentials-were-supplied.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb'], 'azdata': ['SOP063 - Install azdata CLI (using package manager)', '../install/sop063-packman-install-azdata.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Run copy-logs" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import tempfile\n", - "import shutil\n", - "\n", - "target_folder = os.path.join(tempfile.gettempdir(), \"copy-logs\", namespace)\n", - "\n", - "if os.path.isdir(target_folder):\n", - " shutil.rmtree(target_folder)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### View the `--help` options" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'azdata bdc debug copy-logs --help')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Run the `copy-logs`\n", - "\n", - "NOTES:\n", - "\n", - "1. The \u2013timeout option does not work on Windows\n", - "2. Use \u2013skip-compress on Windows if no utility available to uncompress\n", - " .tar.gz files." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'azdata bdc debug copy-logs --namespace {namespace} --target-folder {target_folder} --exclude-dumps --skip-compress --verbose')\n", - "\n", - "print(f'The logs are available at: {target_folder}')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false, - "symlink": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/log-files/tsg061-tail-bdc-container-logs.ipynb b/Big-Data-Clusters/CU8/Public/content/log-files/tsg061-tail-bdc-container-logs.ipynb deleted file mode 100644 index e4f32008..00000000 --- a/Big-Data-Clusters/CU8/Public/content/log-files/tsg061-tail-bdc-container-logs.ipynb +++ /dev/null @@ -1,205 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG061 - Get tail of all container logs for pods in BDC namespace\n", - "=================================================================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "since_seconds = 60 * 60 * 1 # the last hour\n", - "coalesce_duplicates = True" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Instantiate Kubernetes client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Instantiate the Python Kubernetes client into 'api' variable\n", - "\n", - "import os\n", - "\n", - "try:\n", - " from kubernetes import client, config\n", - " from kubernetes.stream import stream\n", - "\n", - " if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n", - " config.load_incluster_config()\n", - " else:\n", - " try:\n", - " config.load_kube_config()\n", - " except:\n", - " display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n", - " raise\n", - " api = client.CoreV1Api()\n", - "\n", - " print('Kubernetes client instantiated')\n", - "except ImportError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n", - " raise" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n", - " except IndexError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print('The kubernetes namespace for your big data cluster is: ' + namespace)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get logs for all containers in Big Data Cluster namespace" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pod_list = api.list_namespaced_pod(namespace)\n", - "\n", - "pod_names = [pod.metadata.name for pod in pod_list.items]\n", - "\n", - "print('Scanning pods: ' + ', '.join(pod_names))\n", - "\n", - "for pod in pod_list.items:\n", - " print(\"*** %s\\t%s\\t%s\" % (pod.metadata.name,\n", - " pod.status.phase,\n", - " pod.status.pod_ip))\n", - "\n", - " container_names = [container.name for container in pod.spec.containers]\n", - "\n", - " for container in container_names:\n", - " print (f\"POD: {pod.metadata.name} / CONTAINER: {container}\")\n", - " try:\n", - " logs = api.read_namespaced_pod_log(pod.metadata.name, namespace, container=container, since_seconds=since_seconds)\n", - "\n", - " if coalesce_duplicates:\n", - " previous_line = \"\"\n", - " duplicates = 1\n", - " for line in logs.split('\\n'):\n", - " if line[27:] != previous_line[27:]:\n", - " if duplicates != 1:\n", - " print(f\"\\t{previous_line} (x{duplicates})\")\n", - " print(f\"\\t{line}\")\n", - " duplicates = 1\n", - " else:\n", - " duplicates = duplicates + 1\n", - "\n", - " previous_line = line\n", - " else:\n", - " print(logs)\n", - " except Exception:\n", - " print (f\"Failed to get LOGS for CONTAINER: {container} in POD: {pod.metadata.name}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Related\n", - "-------\n", - "\n", - "- [TSG062 - Get tail of all previous container logs for pods in BDC\n", - " namespace](../log-files/tsg062-tail-bdc-previous-container-logs.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "side_effects": true, - "azdata": { - "diagnostic": { - "categories": [ - "kubernetes" - ] - }, - "symlink": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/log-files/tsg062-tail-bdc-previous-container-logs.ipynb b/Big-Data-Clusters/CU8/Public/content/log-files/tsg062-tail-bdc-previous-container-logs.ipynb deleted file mode 100644 index 588698a6..00000000 --- a/Big-Data-Clusters/CU8/Public/content/log-files/tsg062-tail-bdc-previous-container-logs.ipynb +++ /dev/null @@ -1,200 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG062 - Get tail of all previous container logs for pods in BDC namespace\n", - "==========================================================================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "tail_lines = 10000\n", - "coalesce_duplicates = True" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Instantiate Kubernetes client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Instantiate the Python Kubernetes client into 'api' variable\n", - "\n", - "import os\n", - "\n", - "try:\n", - " from kubernetes import client, config\n", - " from kubernetes.stream import stream\n", - "\n", - " if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n", - " config.load_incluster_config()\n", - " else:\n", - " try:\n", - " config.load_kube_config()\n", - " except:\n", - " display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n", - " raise\n", - " api = client.CoreV1Api()\n", - "\n", - " print('Kubernetes client instantiated')\n", - "except ImportError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n", - " raise" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n", - " except IndexError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print('The kubernetes namespace for your big data cluster is: ' + namespace)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get logs for previous instance of all containers in Big Data Cluster namespace" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pod_list = api.list_namespaced_pod(namespace)\n", - "\n", - "pod_names = [pod.metadata.name for pod in pod_list.items]\n", - "\n", - "print('Scanning pods: ' + ', '.join(pod_names))\n", - "\n", - "for pod in pod_list.items:\n", - " print(\"*** %s\\t%s\\t%s\" % (pod.metadata.name,\n", - " pod.status.phase,\n", - " pod.status.pod_ip))\n", - "\n", - " container_names = [container.name for container in pod.spec.containers]\n", - "\n", - " for container in container_names:\n", - " print (f\"POD: {pod.metadata.name} / CONTAINER: {container}\")\n", - " try:\n", - " logs = api.read_namespaced_pod_log(pod.metadata.name, namespace, container=container, tail_lines=tail_lines, previous=True)\n", - "\n", - " if coalesce_duplicates:\n", - " previous_line = \"\"\n", - " duplicates = 1\n", - " for line in logs.split('\\n'):\n", - " if line[27:] != previous_line[27:]:\n", - " if duplicates != 1:\n", - " print(f\"\\t{previous_line} (x{duplicates})\")\n", - " print(f\"\\t{line}\")\n", - " duplicates = 1\n", - " else:\n", - " duplicates = duplicates + 1\n", - "\n", - " previous_line = line\n", - " else:\n", - " print(logs)\n", - " except Exception:\n", - " print (f\"Failed to get LOGS for CONTAINER: {container} in POD: {pod.metadata.name}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Related\n", - "-------\n", - "\n", - "- [TSG061 - Get tail of all container logs for pods in BDC\n", - " namespace](../log-files/tsg061-tail-bdc-container-logs.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false, - "symlink": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/log-files/tsg083-run-kubectl-cluster-info-dump.ipynb b/Big-Data-Clusters/CU8/Public/content/log-files/tsg083-run-kubectl-cluster-info-dump.ipynb deleted file mode 100644 index f5d185bf..00000000 --- a/Big-Data-Clusters/CU8/Public/content/log-files/tsg083-run-kubectl-cluster-info-dump.ipynb +++ /dev/null @@ -1,401 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG083 - Run kubectl cluster-info dump\n", - "======================================\n", - "\n", - "NOTE: This kubectl command can produce a lot of output, and may take\n", - "some time (and produce a large notebook!). For Kubernetes clusters that\n", - "have been up for a long time, consider running this command outside of a\n", - "notebook.\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"tsg083-run-kubectl-cluster-info-dump.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Run cluster-info dump" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'kubectl cluster-info dump')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/log-files/tsg084-internal-query-process-error.ipynb b/Big-Data-Clusters/CU8/Public/content/log-files/tsg084-internal-query-process-error.ipynb deleted file mode 100644 index 3869f93b..00000000 --- a/Big-Data-Clusters/CU8/Public/content/log-files/tsg084-internal-query-process-error.ipynb +++ /dev/null @@ -1,66 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG084 - Internal Query Processor Error\n", - "=======================================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "When running a Polybase query, the following error maybe returned:\n", - "\n", - "> Msg 8680, Level 16, State 30, Line 1 Internal Query Processor Error:\n", - "> The query processor encountered an unexpected error during the\n", - "> processing of a remote query phase. OLE DB provider \u201cMSOLEDBSQL\u201d for\n", - "> linked server \u201c(null)\u201d returned message \u201cUnspecified error\u201d. Msg 7421,\n", - "> Level 16, State 2, Line 1 Cannot fetch the rowset from OLE DB provider\n", - "> \u201cMSOLEDBSQL\u201d for linked server \u201c(null)\u201d. .\n", - "\n", - "To get more information, use the following DMV query. The `details`\n", - "column will contain more information. All the rows for a single\n", - "\u2018execution\\_id\u2019 (QIDnnnnnn) are related to a single query execution.\n", - "\n", - "### Steps" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "DECLARE @query NVARCHAR(max) = ''\n", - "\n", - "SELECT e.*\n", - "FROM sys.dm_exec_distributed_requests dr\n", - "CROSS APPLY sys.dm_exec_sql_text(sql_handle) st\n", - "JOIN sys.dm_exec_compute_node_errors e ON dr.execution_id = e.execution_id\n", - "WHERE st.text = @query\n", - "ORDER BY create_time DESC" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "sql", - "display_name": "SQL" - }, - "azdata": { - "side_effects": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/log-files/tsg091-get-azdata-logs.ipynb b/Big-Data-Clusters/CU8/Public/content/log-files/tsg091-get-azdata-logs.ipynb deleted file mode 100644 index d9bfeb30..00000000 --- a/Big-Data-Clusters/CU8/Public/content/log-files/tsg091-get-azdata-logs.ipynb +++ /dev/null @@ -1,58 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG091 - Get the azdata CLI logs\n", - "================================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Get the azdata logs from the local machine\n", - "\n", - "Gets the contents of the most recent log. There may be old logs in\n", - "azdata.log.1, azdata.log.2 etc." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "from pathlib import Path\n", - "\n", - "home = str(Path.home())\n", - "\n", - "with open(os.path.join(home, \".azdata\", \"logs\", \"azdata.log\"), \"r\") as file:\n", - " line = file.readline()\n", - " while line:\n", - " print(line.replace(\"\\n\", \"\"))\n", - " line = file.readline()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/monitor-bdc/readme.md b/Big-Data-Clusters/CU8/Public/content/monitor-bdc/readme.md deleted file mode 100644 index 229ecc67..00000000 --- a/Big-Data-Clusters/CU8/Public/content/monitor-bdc/readme.md +++ /dev/null @@ -1,29 +0,0 @@ -# SQL Server Big Data Cluster monitoring notebooks - -- This chapter contains a set of notebooks useful for getting information and status about a SQL Server big data cluster using the `azdata` command line interface (CLI). - -[Home](../readme.md) - -## Notebooks in this Chapter -- [TSG014 - Show BDC endpoints ](tsg014-azdata-bdc-endpoint-list.ipynb) - -- [TSG012 - Show BDC Status ](tsg012-azdata-bdc-status.ipynb) - -- [TSG069 - Show Big Data Cluster Gateway status ](tsg069-azdata-bdc-gateway-status.ipynb) - -- [TSG049 - Show BDC Controller status ](tsg049-azdata-bdc-control-status.ipynb) - -- [TSG033 - Show BDC SQL status ](tsg033-azdata-bdc-sql-status.ipynb) - -- [TSG068 - Show BDC HDFS status ](tsg068-azdata-bdc-hdfs-status.ipynb) - -- [TSG017 - Show BDC Configuration ](tsg017-azdata-bdc-config-show.ipynb) - -- [TSG004 - Show BDC Apps ](tsg004-show-app-list.ipynb) - -- [TSG003 - Show BDC Spark sessions ](tsg003-show-spark-sessions.ipynb) - -- [TSG013 - Show file list in Storage Pool (HDFS) ](tsg013-azdata-bdc-hdfs-ls.ipynb) - -- [TSG070 - Query SQL master pool ](tsg070-use-azdata-sql-query.ipynb) - diff --git a/Big-Data-Clusters/CU8/Public/content/monitor-bdc/tsg003-show-spark-sessions.ipynb b/Big-Data-Clusters/CU8/Public/content/monitor-bdc/tsg003-show-spark-sessions.ipynb deleted file mode 100644 index 49d4ccbe..00000000 --- a/Big-Data-Clusters/CU8/Public/content/monitor-bdc/tsg003-show-spark-sessions.ipynb +++ /dev/null @@ -1,475 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG003 - Show BDC Spark sessions\n", - "================================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"tsg003-show-spark-sessions.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond'], 'azdata': ['Endpoint sql-server-master does not exist', 'Endpoint livy does not exist', 'Failed to get state for cluster', 'Endpoint webhdfs does not exist', 'Adaptive Server is unavailable or does not exist', 'Error: Address already in use', 'Login timeout expired (0) (SQLDriverConnect)']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']], 'azdata': [['The token is expired', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Reason: Unauthorized', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Max retries exceeded with url: /api/v1/bdc/endpoints', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Look at the controller logs for more details', 'TSG027 - Observe cluster deployment', '../diagnose/tsg027-observe-bdc-create.ipynb'], ['provided port is already allocated', 'TSG062 - Get tail of all previous container logs for pods in BDC namespace', '../log-files/tsg062-tail-bdc-previous-container-logs.ipynb'], ['Create cluster failed since the existing namespace', 'SOP061 - Delete a big data cluster', '../install/sop061-delete-bdc.ipynb'], ['Failed to complete kube config setup', 'TSG067 - Failed to complete kube config setup', '../repair/tsg067-failed-to-complete-kube-config-setup.ipynb'], ['Error processing command: \"ApiError', 'TSG110 - Azdata returns ApiError', '../repair/tsg110-azdata-returns-apierror.ipynb'], ['Error processing command: \"ControllerError', 'TSG036 - Controller logs', '../log-analyzers/tsg036-get-controller-logs.ipynb'], ['ERROR: 500', 'TSG046 - Knox gateway logs', '../log-analyzers/tsg046-get-knox-logs.ipynb'], ['Data source name not found and no default driver specified', 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], [\"Can't open lib 'ODBC Driver 17 for SQL Server\", 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], ['Control plane upgrade failed. Failed to upgrade controller.', 'TSG108 - View the controller upgrade config map', '../diagnose/tsg108-controller-failed-to-upgrade.ipynb'], [\"[Errno 2] No such file or directory: '..\\\\\\\\\", 'TSG053 - ADS Provided Books must be saved before use', '../repair/tsg053-save-book-first.ipynb'], [\"NameError: name 'azdata_login_secret_name' is not defined\", 'SOP013 - Create secret for azdata login (inside cluster)', '../common/sop013-create-secret-for-azdata-login.ipynb'], ['ERROR: No credentials were supplied, or the credentials were unavailable or inaccessible.', \"TSG124 - 'No credentials were supplied' error from azdata login\", '../repair/tsg124-no-credentials-were-supplied.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb'], 'azdata': ['SOP063 - Install azdata CLI (using package manager)', '../install/sop063-packman-install-azdata.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the controller username and password\n", - "\n", - "Get the controller username and password from the Kubernetes Secret\n", - "Store and place in the required AZDATA\\_USERNAME and AZDATA\\_PASSWORD\n", - "environment variables." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place controller secret in AZDATA_USERNAME/AZDATA_PASSWORD environment variables\n", - "\n", - "import os, base64\n", - "\n", - "os.environ[\"AZDATA_USERNAME\"] = run(f'kubectl get secret/controller-login-secret -n {namespace} -o jsonpath={{.data.username}}', return_output=True, base64_decode=True)\n", - "os.environ[\"AZDATA_PASSWORD\"] = run(f'kubectl get secret/controller-login-secret -n {namespace} -o jsonpath={{.data.password}}', return_output=True, base64_decode=True)\n", - "\n", - "print(f\"Controller username '{os.environ['AZDATA_USERNAME']}' and password stored in environment variables\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Run azdata to list spark sessions" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run('azdata bdc spark session list')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/monitor-bdc/tsg004-show-app-list.ipynb b/Big-Data-Clusters/CU8/Public/content/monitor-bdc/tsg004-show-app-list.ipynb deleted file mode 100644 index fcb85df7..00000000 --- a/Big-Data-Clusters/CU8/Public/content/monitor-bdc/tsg004-show-app-list.ipynb +++ /dev/null @@ -1,520 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG004 - Show BDC Apps\n", - "======================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"tsg004-show-app-list.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond'], 'azdata': ['Endpoint sql-server-master does not exist', 'Endpoint livy does not exist', 'Failed to get state for cluster', 'Endpoint webhdfs does not exist', 'Adaptive Server is unavailable or does not exist', 'Error: Address already in use', 'Login timeout expired (0) (SQLDriverConnect)']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']], 'azdata': [['The token is expired', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Reason: Unauthorized', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Max retries exceeded with url: /api/v1/bdc/endpoints', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Look at the controller logs for more details', 'TSG027 - Observe cluster deployment', '../diagnose/tsg027-observe-bdc-create.ipynb'], ['provided port is already allocated', 'TSG062 - Get tail of all previous container logs for pods in BDC namespace', '../log-files/tsg062-tail-bdc-previous-container-logs.ipynb'], ['Create cluster failed since the existing namespace', 'SOP061 - Delete a big data cluster', '../install/sop061-delete-bdc.ipynb'], ['Failed to complete kube config setup', 'TSG067 - Failed to complete kube config setup', '../repair/tsg067-failed-to-complete-kube-config-setup.ipynb'], ['Error processing command: \"ApiError', 'TSG110 - Azdata returns ApiError', '../repair/tsg110-azdata-returns-apierror.ipynb'], ['Error processing command: \"ControllerError', 'TSG036 - Controller logs', '../log-analyzers/tsg036-get-controller-logs.ipynb'], ['ERROR: 500', 'TSG046 - Knox gateway logs', '../log-analyzers/tsg046-get-knox-logs.ipynb'], ['Data source name not found and no default driver specified', 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], [\"Can't open lib 'ODBC Driver 17 for SQL Server\", 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], ['Control plane upgrade failed. Failed to upgrade controller.', 'TSG108 - View the controller upgrade config map', '../diagnose/tsg108-controller-failed-to-upgrade.ipynb'], [\"[Errno 2] No such file or directory: '..\\\\\\\\\", 'TSG053 - ADS Provided Books must be saved before use', '../repair/tsg053-save-book-first.ipynb'], [\"NameError: name 'azdata_login_secret_name' is not defined\", 'SOP013 - Create secret for azdata login (inside cluster)', '../common/sop013-create-secret-for-azdata-login.ipynb'], ['ERROR: No credentials were supplied, or the credentials were unavailable or inaccessible.', \"TSG124 - 'No credentials were supplied' error from azdata login\", '../repair/tsg124-no-credentials-were-supplied.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb'], 'azdata': ['SOP063 - Install azdata CLI (using package manager)', '../install/sop063-packman-install-azdata.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Run azdata to retrieve list of applications" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run('azdata app list')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Run azdata to retrieve list of applications as JSON output" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "\n", - "apps = run('azdata app list -o json', return_output=True)\n", - "apps = json.loads(apps)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Generate the `kubectl exec` command line for each app-deploy pod\n", - "\n", - "It is often useful to `kubectl exec` into an app-deploy pod to\n", - "troubleshoot issues. This cell will generate the command line for each\n", - "app-deploy pod.\n", - "\n", - "The application is run from a folder in\n", - "\n", - "/var/opt/mlserver/workdir/\n", - "\n", - "The application is staged into the `workdir` folder from:\n", - "\n", - "/var/opt/app/" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "for result in apps[\"result\"]:\n", - " pods = run(f'kubectl get pods -n {namespace} -l app=app-{result[\"name\"]}-{result[\"version\"]} -o custom-columns=NAME:.metadata.name --no-headers', return_output=True)\n", - "\n", - " pods = pods.split('\\n')\n", - "\n", - " print(f'{result[\"name\"]}-{result[\"version\"]}:')\n", - " for pod in pods[:-1]: # There is a trailing \\n, which adds an empty element\n", - " print(f'\\tCONNECT: kubectl exec {pod} -n {namespace} -c mlserver -it -- bash')\n", - " \n", - " print('')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Describe each app\n", - "\n", - "It is often useful to know the \u2018internal\\_name\u2019 of the app-deploy, the\n", - "is used as the prefix for the pod names" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "for result in apps[\"result\"]:\n", - " run(f'azdata app describe --name {result[\"name\"]} --version {result[\"version\"]}')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/monitor-bdc/tsg012-azdata-bdc-status.ipynb b/Big-Data-Clusters/CU8/Public/content/monitor-bdc/tsg012-azdata-bdc-status.ipynb deleted file mode 100644 index ee912c72..00000000 --- a/Big-Data-Clusters/CU8/Public/content/monitor-bdc/tsg012-azdata-bdc-status.ipynb +++ /dev/null @@ -1,396 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG012 - Show BDC Status\n", - "========================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, hyperlinked suggestions, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"tsg012-azdata-bdc-status.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'azdata': ['Endpoint sql-server-master does not exist', 'Endpoint livy does not exist', 'Failed to get state for cluster', 'Endpoint webhdfs does not exist', 'Adaptive Server is unavailable or does not exist', 'Error: Address already in use', 'Login timeout expired (0) (SQLDriverConnect)']}\n", - "error_hints = {'azdata': [['The token is expired', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Reason: Unauthorized', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Max retries exceeded with url: /api/v1/bdc/endpoints', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Look at the controller logs for more details', 'TSG027 - Observe cluster deployment', '../diagnose/tsg027-observe-bdc-create.ipynb'], ['provided port is already allocated', 'TSG062 - Get tail of all previous container logs for pods in BDC namespace', '../log-files/tsg062-tail-bdc-previous-container-logs.ipynb'], ['Create cluster failed since the existing namespace', 'SOP061 - Delete a big data cluster', '../install/sop061-delete-bdc.ipynb'], ['Failed to complete kube config setup', 'TSG067 - Failed to complete kube config setup', '../repair/tsg067-failed-to-complete-kube-config-setup.ipynb'], ['Error processing command: \"ApiError', 'TSG110 - Azdata returns ApiError', '../repair/tsg110-azdata-returns-apierror.ipynb'], ['Error processing command: \"ControllerError', 'TSG036 - Controller logs', '../log-analyzers/tsg036-get-controller-logs.ipynb'], ['ERROR: 500', 'TSG046 - Knox gateway logs', '../log-analyzers/tsg046-get-knox-logs.ipynb'], ['Data source name not found and no default driver specified', 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], [\"Can't open lib 'ODBC Driver 17 for SQL Server\", 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], ['Control plane upgrade failed. Failed to upgrade controller.', 'TSG108 - View the controller upgrade config map', '../diagnose/tsg108-controller-failed-to-upgrade.ipynb'], [\"[Errno 2] No such file or directory: '..\\\\\\\\\", 'TSG053 - ADS Provided Books must be saved before use', '../repair/tsg053-save-book-first.ipynb'], [\"NameError: name 'azdata_login_secret_name' is not defined\", 'SOP013 - Create secret for azdata login (inside cluster)', '../common/sop013-create-secret-for-azdata-login.ipynb'], ['ERROR: No credentials were supplied, or the credentials were unavailable or inaccessible.', \"TSG124 - 'No credentials were supplied' error from azdata login\", '../repair/tsg124-no-credentials-were-supplied.ipynb']]}\n", - "install_hint = {'azdata': ['SOP063 - Install azdata CLI (using package manager)', '../install/sop063-packman-install-azdata.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Use azdata to show big data cluster status" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run('azdata bdc status show --all')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/monitor-bdc/tsg013-azdata-bdc-hdfs-ls.ipynb b/Big-Data-Clusters/CU8/Public/content/monitor-bdc/tsg013-azdata-bdc-hdfs-ls.ipynb deleted file mode 100644 index a350af9a..00000000 --- a/Big-Data-Clusters/CU8/Public/content/monitor-bdc/tsg013-azdata-bdc-hdfs-ls.ipynb +++ /dev/null @@ -1,495 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG013 - Show file list in Storage Pool (HDFS)\n", - "==============================================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "path = \"/\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"tsg013-azdata-bdc-hdfs-ls.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond'], 'azdata': ['Endpoint sql-server-master does not exist', 'Endpoint livy does not exist', 'Failed to get state for cluster', 'Endpoint webhdfs does not exist', 'Adaptive Server is unavailable or does not exist', 'Error: Address already in use', 'Login timeout expired (0) (SQLDriverConnect)']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']], 'azdata': [['The token is expired', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Reason: Unauthorized', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Max retries exceeded with url: /api/v1/bdc/endpoints', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Look at the controller logs for more details', 'TSG027 - Observe cluster deployment', '../diagnose/tsg027-observe-bdc-create.ipynb'], ['provided port is already allocated', 'TSG062 - Get tail of all previous container logs for pods in BDC namespace', '../log-files/tsg062-tail-bdc-previous-container-logs.ipynb'], ['Create cluster failed since the existing namespace', 'SOP061 - Delete a big data cluster', '../install/sop061-delete-bdc.ipynb'], ['Failed to complete kube config setup', 'TSG067 - Failed to complete kube config setup', '../repair/tsg067-failed-to-complete-kube-config-setup.ipynb'], ['Error processing command: \"ApiError', 'TSG110 - Azdata returns ApiError', '../repair/tsg110-azdata-returns-apierror.ipynb'], ['Error processing command: \"ControllerError', 'TSG036 - Controller logs', '../log-analyzers/tsg036-get-controller-logs.ipynb'], ['ERROR: 500', 'TSG046 - Knox gateway logs', '../log-analyzers/tsg046-get-knox-logs.ipynb'], ['Data source name not found and no default driver specified', 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], [\"Can't open lib 'ODBC Driver 17 for SQL Server\", 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], ['Control plane upgrade failed. Failed to upgrade controller.', 'TSG108 - View the controller upgrade config map', '../diagnose/tsg108-controller-failed-to-upgrade.ipynb'], [\"[Errno 2] No such file or directory: '..\\\\\\\\\", 'TSG053 - ADS Provided Books must be saved before use', '../repair/tsg053-save-book-first.ipynb'], [\"NameError: name 'azdata_login_secret_name' is not defined\", 'SOP013 - Create secret for azdata login (inside cluster)', '../common/sop013-create-secret-for-azdata-login.ipynb'], ['ERROR: No credentials were supplied, or the credentials were unavailable or inaccessible.', \"TSG124 - 'No credentials were supplied' error from azdata login\", '../repair/tsg124-no-credentials-were-supplied.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb'], 'azdata': ['SOP063 - Install azdata CLI (using package manager)', '../install/sop063-packman-install-azdata.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the controller username and password\n", - "\n", - "Get the controller username and password from the Kubernetes Secret\n", - "Store and place in the required AZDATA\\_USERNAME and AZDATA\\_PASSWORD\n", - "environment variables." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place controller secret in AZDATA_USERNAME/AZDATA_PASSWORD environment variables\n", - "\n", - "import os, base64\n", - "\n", - "os.environ[\"AZDATA_USERNAME\"] = run(f'kubectl get secret/controller-login-secret -n {namespace} -o jsonpath={{.data.username}}', return_output=True, base64_decode=True)\n", - "os.environ[\"AZDATA_PASSWORD\"] = run(f'kubectl get secret/controller-login-secret -n {namespace} -o jsonpath={{.data.password}}', return_output=True, base64_decode=True)\n", - "\n", - "print(f\"Controller username '{os.environ['AZDATA_USERNAME']}' and password stored in environment variables\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Use azdata to list files" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'azdata bdc hdfs ls --path {path}')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/monitor-bdc/tsg014-azdata-bdc-endpoint-list.ipynb b/Big-Data-Clusters/CU8/Public/content/monitor-bdc/tsg014-azdata-bdc-endpoint-list.ipynb deleted file mode 100644 index 3e06b647..00000000 --- a/Big-Data-Clusters/CU8/Public/content/monitor-bdc/tsg014-azdata-bdc-endpoint-list.ipynb +++ /dev/null @@ -1,396 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG014 - Show BDC endpoints\n", - "===========================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, hyperlinked suggestions, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"tsg014-azdata-bdc-endpoint-list.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'azdata': ['Endpoint sql-server-master does not exist', 'Endpoint livy does not exist', 'Failed to get state for cluster', 'Endpoint webhdfs does not exist', 'Adaptive Server is unavailable or does not exist', 'Error: Address already in use', 'Login timeout expired (0) (SQLDriverConnect)']}\n", - "error_hints = {'azdata': [['The token is expired', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Reason: Unauthorized', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Max retries exceeded with url: /api/v1/bdc/endpoints', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Look at the controller logs for more details', 'TSG027 - Observe cluster deployment', '../diagnose/tsg027-observe-bdc-create.ipynb'], ['provided port is already allocated', 'TSG062 - Get tail of all previous container logs for pods in BDC namespace', '../log-files/tsg062-tail-bdc-previous-container-logs.ipynb'], ['Create cluster failed since the existing namespace', 'SOP061 - Delete a big data cluster', '../install/sop061-delete-bdc.ipynb'], ['Failed to complete kube config setup', 'TSG067 - Failed to complete kube config setup', '../repair/tsg067-failed-to-complete-kube-config-setup.ipynb'], ['Error processing command: \"ApiError', 'TSG110 - Azdata returns ApiError', '../repair/tsg110-azdata-returns-apierror.ipynb'], ['Error processing command: \"ControllerError', 'TSG036 - Controller logs', '../log-analyzers/tsg036-get-controller-logs.ipynb'], ['ERROR: 500', 'TSG046 - Knox gateway logs', '../log-analyzers/tsg046-get-knox-logs.ipynb'], ['Data source name not found and no default driver specified', 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], [\"Can't open lib 'ODBC Driver 17 for SQL Server\", 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], ['Control plane upgrade failed. Failed to upgrade controller.', 'TSG108 - View the controller upgrade config map', '../diagnose/tsg108-controller-failed-to-upgrade.ipynb'], [\"[Errno 2] No such file or directory: '..\\\\\\\\\", 'TSG053 - ADS Provided Books must be saved before use', '../repair/tsg053-save-book-first.ipynb'], [\"NameError: name 'azdata_login_secret_name' is not defined\", 'SOP013 - Create secret for azdata login (inside cluster)', '../common/sop013-create-secret-for-azdata-login.ipynb'], ['ERROR: No credentials were supplied, or the credentials were unavailable or inaccessible.', \"TSG124 - 'No credentials were supplied' error from azdata login\", '../repair/tsg124-no-credentials-were-supplied.ipynb']]}\n", - "install_hint = {'azdata': ['SOP063 - Install azdata CLI (using package manager)', '../install/sop063-packman-install-azdata.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Use azdata to list files" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run('azdata bdc endpoint list')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/monitor-bdc/tsg017-azdata-bdc-config-show.ipynb b/Big-Data-Clusters/CU8/Public/content/monitor-bdc/tsg017-azdata-bdc-config-show.ipynb deleted file mode 100644 index 80beae22..00000000 --- a/Big-Data-Clusters/CU8/Public/content/monitor-bdc/tsg017-azdata-bdc-config-show.ipynb +++ /dev/null @@ -1,396 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG017 - Show BDC Configuration\n", - "===============================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, hyperlinked suggestions, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"tsg017-azdata-bdc-config-show.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'azdata': ['Endpoint sql-server-master does not exist', 'Endpoint livy does not exist', 'Failed to get state for cluster', 'Endpoint webhdfs does not exist', 'Adaptive Server is unavailable or does not exist', 'Error: Address already in use', 'Login timeout expired (0) (SQLDriverConnect)']}\n", - "error_hints = {'azdata': [['The token is expired', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Reason: Unauthorized', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Max retries exceeded with url: /api/v1/bdc/endpoints', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Look at the controller logs for more details', 'TSG027 - Observe cluster deployment', '../diagnose/tsg027-observe-bdc-create.ipynb'], ['provided port is already allocated', 'TSG062 - Get tail of all previous container logs for pods in BDC namespace', '../log-files/tsg062-tail-bdc-previous-container-logs.ipynb'], ['Create cluster failed since the existing namespace', 'SOP061 - Delete a big data cluster', '../install/sop061-delete-bdc.ipynb'], ['Failed to complete kube config setup', 'TSG067 - Failed to complete kube config setup', '../repair/tsg067-failed-to-complete-kube-config-setup.ipynb'], ['Error processing command: \"ApiError', 'TSG110 - Azdata returns ApiError', '../repair/tsg110-azdata-returns-apierror.ipynb'], ['Error processing command: \"ControllerError', 'TSG036 - Controller logs', '../log-analyzers/tsg036-get-controller-logs.ipynb'], ['ERROR: 500', 'TSG046 - Knox gateway logs', '../log-analyzers/tsg046-get-knox-logs.ipynb'], ['Data source name not found and no default driver specified', 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], [\"Can't open lib 'ODBC Driver 17 for SQL Server\", 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], ['Control plane upgrade failed. Failed to upgrade controller.', 'TSG108 - View the controller upgrade config map', '../diagnose/tsg108-controller-failed-to-upgrade.ipynb'], [\"[Errno 2] No such file or directory: '..\\\\\\\\\", 'TSG053 - ADS Provided Books must be saved before use', '../repair/tsg053-save-book-first.ipynb'], [\"NameError: name 'azdata_login_secret_name' is not defined\", 'SOP013 - Create secret for azdata login (inside cluster)', '../common/sop013-create-secret-for-azdata-login.ipynb'], ['ERROR: No credentials were supplied, or the credentials were unavailable or inaccessible.', \"TSG124 - 'No credentials were supplied' error from azdata login\", '../repair/tsg124-no-credentials-were-supplied.ipynb']]}\n", - "install_hint = {'azdata': ['SOP063 - Install azdata CLI (using package manager)', '../install/sop063-packman-install-azdata.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Use azdata to show the big data cluster configuration" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run('azdata bdc config show')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/monitor-bdc/tsg033-azdata-bdc-sql-status.ipynb b/Big-Data-Clusters/CU8/Public/content/monitor-bdc/tsg033-azdata-bdc-sql-status.ipynb deleted file mode 100644 index 181cf5c1..00000000 --- a/Big-Data-Clusters/CU8/Public/content/monitor-bdc/tsg033-azdata-bdc-sql-status.ipynb +++ /dev/null @@ -1,396 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG033 - Show BDC SQL status\n", - "============================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, hyperlinked suggestions, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"tsg033-azdata-bdc-sql-status.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'azdata': ['Endpoint sql-server-master does not exist', 'Endpoint livy does not exist', 'Failed to get state for cluster', 'Endpoint webhdfs does not exist', 'Adaptive Server is unavailable or does not exist', 'Error: Address already in use', 'Login timeout expired (0) (SQLDriverConnect)']}\n", - "error_hints = {'azdata': [['The token is expired', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Reason: Unauthorized', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Max retries exceeded with url: /api/v1/bdc/endpoints', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Look at the controller logs for more details', 'TSG027 - Observe cluster deployment', '../diagnose/tsg027-observe-bdc-create.ipynb'], ['provided port is already allocated', 'TSG062 - Get tail of all previous container logs for pods in BDC namespace', '../log-files/tsg062-tail-bdc-previous-container-logs.ipynb'], ['Create cluster failed since the existing namespace', 'SOP061 - Delete a big data cluster', '../install/sop061-delete-bdc.ipynb'], ['Failed to complete kube config setup', 'TSG067 - Failed to complete kube config setup', '../repair/tsg067-failed-to-complete-kube-config-setup.ipynb'], ['Error processing command: \"ApiError', 'TSG110 - Azdata returns ApiError', '../repair/tsg110-azdata-returns-apierror.ipynb'], ['Error processing command: \"ControllerError', 'TSG036 - Controller logs', '../log-analyzers/tsg036-get-controller-logs.ipynb'], ['ERROR: 500', 'TSG046 - Knox gateway logs', '../log-analyzers/tsg046-get-knox-logs.ipynb'], ['Data source name not found and no default driver specified', 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], [\"Can't open lib 'ODBC Driver 17 for SQL Server\", 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], ['Control plane upgrade failed. Failed to upgrade controller.', 'TSG108 - View the controller upgrade config map', '../diagnose/tsg108-controller-failed-to-upgrade.ipynb'], [\"[Errno 2] No such file or directory: '..\\\\\\\\\", 'TSG053 - ADS Provided Books must be saved before use', '../repair/tsg053-save-book-first.ipynb'], [\"NameError: name 'azdata_login_secret_name' is not defined\", 'SOP013 - Create secret for azdata login (inside cluster)', '../common/sop013-create-secret-for-azdata-login.ipynb'], ['ERROR: No credentials were supplied, or the credentials were unavailable or inaccessible.', \"TSG124 - 'No credentials were supplied' error from azdata login\", '../repair/tsg124-no-credentials-were-supplied.ipynb']]}\n", - "install_hint = {'azdata': ['SOP063 - Install azdata CLI (using package manager)', '../install/sop063-packman-install-azdata.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Use azdata to show the big data cluster sql status" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run('azdata bdc sql status show')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/monitor-bdc/tsg049-azdata-bdc-control-status.ipynb b/Big-Data-Clusters/CU8/Public/content/monitor-bdc/tsg049-azdata-bdc-control-status.ipynb deleted file mode 100644 index 4477ce07..00000000 --- a/Big-Data-Clusters/CU8/Public/content/monitor-bdc/tsg049-azdata-bdc-control-status.ipynb +++ /dev/null @@ -1,396 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG049 - Show BDC Controller status\n", - "===================================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, hyperlinked suggestions, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"tsg049-azdata-bdc-control-status.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'azdata': ['Endpoint sql-server-master does not exist', 'Endpoint livy does not exist', 'Failed to get state for cluster', 'Endpoint webhdfs does not exist', 'Adaptive Server is unavailable or does not exist', 'Error: Address already in use', 'Login timeout expired (0) (SQLDriverConnect)']}\n", - "error_hints = {'azdata': [['The token is expired', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Reason: Unauthorized', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Max retries exceeded with url: /api/v1/bdc/endpoints', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Look at the controller logs for more details', 'TSG027 - Observe cluster deployment', '../diagnose/tsg027-observe-bdc-create.ipynb'], ['provided port is already allocated', 'TSG062 - Get tail of all previous container logs for pods in BDC namespace', '../log-files/tsg062-tail-bdc-previous-container-logs.ipynb'], ['Create cluster failed since the existing namespace', 'SOP061 - Delete a big data cluster', '../install/sop061-delete-bdc.ipynb'], ['Failed to complete kube config setup', 'TSG067 - Failed to complete kube config setup', '../repair/tsg067-failed-to-complete-kube-config-setup.ipynb'], ['Error processing command: \"ApiError', 'TSG110 - Azdata returns ApiError', '../repair/tsg110-azdata-returns-apierror.ipynb'], ['Error processing command: \"ControllerError', 'TSG036 - Controller logs', '../log-analyzers/tsg036-get-controller-logs.ipynb'], ['ERROR: 500', 'TSG046 - Knox gateway logs', '../log-analyzers/tsg046-get-knox-logs.ipynb'], ['Data source name not found and no default driver specified', 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], [\"Can't open lib 'ODBC Driver 17 for SQL Server\", 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], ['Control plane upgrade failed. Failed to upgrade controller.', 'TSG108 - View the controller upgrade config map', '../diagnose/tsg108-controller-failed-to-upgrade.ipynb'], [\"[Errno 2] No such file or directory: '..\\\\\\\\\", 'TSG053 - ADS Provided Books must be saved before use', '../repair/tsg053-save-book-first.ipynb'], [\"NameError: name 'azdata_login_secret_name' is not defined\", 'SOP013 - Create secret for azdata login (inside cluster)', '../common/sop013-create-secret-for-azdata-login.ipynb'], ['ERROR: No credentials were supplied, or the credentials were unavailable or inaccessible.', \"TSG124 - 'No credentials were supplied' error from azdata login\", '../repair/tsg124-no-credentials-were-supplied.ipynb']]}\n", - "install_hint = {'azdata': ['SOP063 - Install azdata CLI (using package manager)', '../install/sop063-packman-install-azdata.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Use azdata to show the big data cluster controller status" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run('azdata bdc control status show')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/monitor-bdc/tsg068-azdata-bdc-hdfs-status.ipynb b/Big-Data-Clusters/CU8/Public/content/monitor-bdc/tsg068-azdata-bdc-hdfs-status.ipynb deleted file mode 100644 index d35134f7..00000000 --- a/Big-Data-Clusters/CU8/Public/content/monitor-bdc/tsg068-azdata-bdc-hdfs-status.ipynb +++ /dev/null @@ -1,396 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG068 - Show BDC HDFS status\n", - "=============================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, hyperlinked suggestions, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"tsg068-azdata-bdc-hdfs-status.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'azdata': ['Endpoint sql-server-master does not exist', 'Endpoint livy does not exist', 'Failed to get state for cluster', 'Endpoint webhdfs does not exist', 'Adaptive Server is unavailable or does not exist', 'Error: Address already in use', 'Login timeout expired (0) (SQLDriverConnect)']}\n", - "error_hints = {'azdata': [['The token is expired', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Reason: Unauthorized', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Max retries exceeded with url: /api/v1/bdc/endpoints', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Look at the controller logs for more details', 'TSG027 - Observe cluster deployment', '../diagnose/tsg027-observe-bdc-create.ipynb'], ['provided port is already allocated', 'TSG062 - Get tail of all previous container logs for pods in BDC namespace', '../log-files/tsg062-tail-bdc-previous-container-logs.ipynb'], ['Create cluster failed since the existing namespace', 'SOP061 - Delete a big data cluster', '../install/sop061-delete-bdc.ipynb'], ['Failed to complete kube config setup', 'TSG067 - Failed to complete kube config setup', '../repair/tsg067-failed-to-complete-kube-config-setup.ipynb'], ['Error processing command: \"ApiError', 'TSG110 - Azdata returns ApiError', '../repair/tsg110-azdata-returns-apierror.ipynb'], ['Error processing command: \"ControllerError', 'TSG036 - Controller logs', '../log-analyzers/tsg036-get-controller-logs.ipynb'], ['ERROR: 500', 'TSG046 - Knox gateway logs', '../log-analyzers/tsg046-get-knox-logs.ipynb'], ['Data source name not found and no default driver specified', 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], [\"Can't open lib 'ODBC Driver 17 for SQL Server\", 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], ['Control plane upgrade failed. Failed to upgrade controller.', 'TSG108 - View the controller upgrade config map', '../diagnose/tsg108-controller-failed-to-upgrade.ipynb'], [\"[Errno 2] No such file or directory: '..\\\\\\\\\", 'TSG053 - ADS Provided Books must be saved before use', '../repair/tsg053-save-book-first.ipynb'], [\"NameError: name 'azdata_login_secret_name' is not defined\", 'SOP013 - Create secret for azdata login (inside cluster)', '../common/sop013-create-secret-for-azdata-login.ipynb'], ['ERROR: No credentials were supplied, or the credentials were unavailable or inaccessible.', \"TSG124 - 'No credentials were supplied' error from azdata login\", '../repair/tsg124-no-credentials-were-supplied.ipynb']]}\n", - "install_hint = {'azdata': ['SOP063 - Install azdata CLI (using package manager)', '../install/sop063-packman-install-azdata.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Use azdata to show the Big Data Cluster HDFS status" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run('azdata bdc hdfs status show')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/monitor-bdc/tsg069-azdata-bdc-gateway-status.ipynb b/Big-Data-Clusters/CU8/Public/content/monitor-bdc/tsg069-azdata-bdc-gateway-status.ipynb deleted file mode 100644 index 210dc507..00000000 --- a/Big-Data-Clusters/CU8/Public/content/monitor-bdc/tsg069-azdata-bdc-gateway-status.ipynb +++ /dev/null @@ -1,396 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG069 - Show Big Data Cluster Gateway status\n", - "=============================================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, hyperlinked suggestions, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"tsg069-azdata-bdc-gateway-status.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'azdata': ['Endpoint sql-server-master does not exist', 'Endpoint livy does not exist', 'Failed to get state for cluster', 'Endpoint webhdfs does not exist', 'Adaptive Server is unavailable or does not exist', 'Error: Address already in use', 'Login timeout expired (0) (SQLDriverConnect)']}\n", - "error_hints = {'azdata': [['The token is expired', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Reason: Unauthorized', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Max retries exceeded with url: /api/v1/bdc/endpoints', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Look at the controller logs for more details', 'TSG027 - Observe cluster deployment', '../diagnose/tsg027-observe-bdc-create.ipynb'], ['provided port is already allocated', 'TSG062 - Get tail of all previous container logs for pods in BDC namespace', '../log-files/tsg062-tail-bdc-previous-container-logs.ipynb'], ['Create cluster failed since the existing namespace', 'SOP061 - Delete a big data cluster', '../install/sop061-delete-bdc.ipynb'], ['Failed to complete kube config setup', 'TSG067 - Failed to complete kube config setup', '../repair/tsg067-failed-to-complete-kube-config-setup.ipynb'], ['Error processing command: \"ApiError', 'TSG110 - Azdata returns ApiError', '../repair/tsg110-azdata-returns-apierror.ipynb'], ['Error processing command: \"ControllerError', 'TSG036 - Controller logs', '../log-analyzers/tsg036-get-controller-logs.ipynb'], ['ERROR: 500', 'TSG046 - Knox gateway logs', '../log-analyzers/tsg046-get-knox-logs.ipynb'], ['Data source name not found and no default driver specified', 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], [\"Can't open lib 'ODBC Driver 17 for SQL Server\", 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], ['Control plane upgrade failed. Failed to upgrade controller.', 'TSG108 - View the controller upgrade config map', '../diagnose/tsg108-controller-failed-to-upgrade.ipynb'], [\"[Errno 2] No such file or directory: '..\\\\\\\\\", 'TSG053 - ADS Provided Books must be saved before use', '../repair/tsg053-save-book-first.ipynb'], [\"NameError: name 'azdata_login_secret_name' is not defined\", 'SOP013 - Create secret for azdata login (inside cluster)', '../common/sop013-create-secret-for-azdata-login.ipynb'], ['ERROR: No credentials were supplied, or the credentials were unavailable or inaccessible.', \"TSG124 - 'No credentials were supplied' error from azdata login\", '../repair/tsg124-no-credentials-were-supplied.ipynb']]}\n", - "install_hint = {'azdata': ['SOP063 - Install azdata CLI (using package manager)', '../install/sop063-packman-install-azdata.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Use azdata to show the Big Data Cluster gateway status" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run('azdata bdc gateway status show')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/monitor-bdc/tsg070-use-azdata-sql-query.ipynb b/Big-Data-Clusters/CU8/Public/content/monitor-bdc/tsg070-use-azdata-sql-query.ipynb deleted file mode 100644 index 6fd07239..00000000 --- a/Big-Data-Clusters/CU8/Public/content/monitor-bdc/tsg070-use-azdata-sql-query.ipynb +++ /dev/null @@ -1,514 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG070 - Query SQL master pool\n", - "==============================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "query = \"select * from sys.dm_cluster_endpoints\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"tsg070-use-azdata-sql-query.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond'], 'azdata': ['Endpoint sql-server-master does not exist', 'Endpoint livy does not exist', 'Failed to get state for cluster', 'Endpoint webhdfs does not exist', 'Adaptive Server is unavailable or does not exist', 'Error: Address already in use', 'Login timeout expired (0) (SQLDriverConnect)']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']], 'azdata': [['The token is expired', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Reason: Unauthorized', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Max retries exceeded with url: /api/v1/bdc/endpoints', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Look at the controller logs for more details', 'TSG027 - Observe cluster deployment', '../diagnose/tsg027-observe-bdc-create.ipynb'], ['provided port is already allocated', 'TSG062 - Get tail of all previous container logs for pods in BDC namespace', '../log-files/tsg062-tail-bdc-previous-container-logs.ipynb'], ['Create cluster failed since the existing namespace', 'SOP061 - Delete a big data cluster', '../install/sop061-delete-bdc.ipynb'], ['Failed to complete kube config setup', 'TSG067 - Failed to complete kube config setup', '../repair/tsg067-failed-to-complete-kube-config-setup.ipynb'], ['Error processing command: \"ApiError', 'TSG110 - Azdata returns ApiError', '../repair/tsg110-azdata-returns-apierror.ipynb'], ['Error processing command: \"ControllerError', 'TSG036 - Controller logs', '../log-analyzers/tsg036-get-controller-logs.ipynb'], ['ERROR: 500', 'TSG046 - Knox gateway logs', '../log-analyzers/tsg046-get-knox-logs.ipynb'], ['Data source name not found and no default driver specified', 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], [\"Can't open lib 'ODBC Driver 17 for SQL Server\", 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], ['Control plane upgrade failed. Failed to upgrade controller.', 'TSG108 - View the controller upgrade config map', '../diagnose/tsg108-controller-failed-to-upgrade.ipynb'], [\"[Errno 2] No such file or directory: '..\\\\\\\\\", 'TSG053 - ADS Provided Books must be saved before use', '../repair/tsg053-save-book-first.ipynb'], [\"NameError: name 'azdata_login_secret_name' is not defined\", 'SOP013 - Create secret for azdata login (inside cluster)', '../common/sop013-create-secret-for-azdata-login.ipynb'], ['ERROR: No credentials were supplied, or the credentials were unavailable or inaccessible.', \"TSG124 - 'No credentials were supplied' error from azdata login\", '../repair/tsg124-no-credentials-were-supplied.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb'], 'azdata': ['SOP063 - Install azdata CLI (using package manager)', '../install/sop063-packman-install-azdata.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the controller username and password\n", - "\n", - "Get the controller username and password from the Kubernetes Secret\n", - "Store and place in the required AZDATA\\_USERNAME and AZDATA\\_PASSWORD\n", - "environment variables." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place controller secret in AZDATA_USERNAME/AZDATA_PASSWORD environment variables\n", - "\n", - "import os, base64\n", - "\n", - "os.environ[\"AZDATA_USERNAME\"] = run(f'kubectl get secret/controller-login-secret -n {namespace} -o jsonpath={{.data.username}}', return_output=True, base64_decode=True)\n", - "os.environ[\"AZDATA_PASSWORD\"] = run(f'kubectl get secret/controller-login-secret -n {namespace} -o jsonpath={{.data.password}}', return_output=True, base64_decode=True)\n", - "\n", - "print(f\"Controller username '{os.environ['AZDATA_USERNAME']}' and password stored in environment variables\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Use `azdata` to query the `SQL Master Pool`\n", - "\n", - "Get the current `@@VERSION` and `@@SERVERNAME` (which is the `pod` name\n", - "of the current primary replica in an HA configuration)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run('azdata sql query -q \"select @@VERSION [version], @@SERVERNAME [primary replica pod]\"')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Run the query" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'azdata sql query -q \"{query}\"')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/monitor-k8s/readme.md b/Big-Data-Clusters/CU8/Public/content/monitor-k8s/readme.md deleted file mode 100644 index 45656cdb..00000000 --- a/Big-Data-Clusters/CU8/Public/content/monitor-k8s/readme.md +++ /dev/null @@ -1,51 +0,0 @@ -# SQL Server Big Data Cluster Kubernetes monitoring notebooks - -- This chapter contains a set of notebooks useful for getting information and status about the Kubernetes cluster hosting a SQL Server Big Data Cluster (BDC). - -[Home](../readme.md) - -## Notebooks in this Chapter -- [TSG021 - Get cluster info (Kubernetes) ](tsg021-get-k8s-cluster-info.ipynb) - -- [TSG008 - Get version information (Kubernetes) ](tsg008-get-k8s-version-info.ipynb) - -- [TSG081 - Get namespaces (Kubernetes) ](tsg081-get-kubernetes-namespaces.ipynb) - -- [TSG009 - Get nodes (Kubernetes) ](tsg009-get-nodes.ipynb) - -- [TSG006 - Get system pod status ](tsg006-view-system-pod-status.ipynb) - -- [TSG007 - Get BDC pod status ](tsg007-view-bdc-pod-status.ipynb) - -- [TSG015 - View BDC services (Kubernetes) ](tsg015-view-k8s-services-for-bdc.ipynb) - -- [TSG097 - Get BDC stateful sets (Kubernetes) ](tsg097-get-statefulsets.ipynb) - -- [TSG098 - Get BDC replicasets (Kubernetes) ](tsg098-get-replicasets.ipynb) - -- [TSG099 - Get BDC daemonsets (Kubernetes) ](tsg099-get-daemonsets.ipynb) - -- [TSG023 - Get all BDC objects (Kubernetes) ](tsg023-run-kubectl-get-all.ipynb) - -- [TSG063 - Get storage classes (Kubernetes) ](tsg063-get-storage-classes.ipynb) - -- [TSG072 - Get Persistent Volumes (Kubernetes) ](tsg072-get-persistent-volumes.ipynb) - -- [TSG064 - Get BDC Persistent Volume Claims ](tsg064-get-persistent-volume-claims.ipynb) - -- [TSG065 - Get BDC secrets (Kubernetes) ](tsg065-get-secrets-for-bdc-namespace.ipynb) - -- [TSG066 - Get BDC event (Kubernetes) ](tsg066-get-kubernetes-events.ipynb) - -- [TSG020- Describe nodes (Kubernetes) ](tsg020-describe-all-nodes.ipynb) - -- [TSG016- Describe BDC pods ](tsg016-describe-all-pods-in-bdc-namespace.ipynb) - -- [TSG089 - Describe BDC non-running pods ](tsg089-describe-non-running-pods-in-bdc.ipynb) - -- [TSG010 - Get configuration contexts ](tsg010-get-kubernetes-contexts.ipynb) - -- [TSG022 - Get external IP address for kubeadm host ](tsg022-get-external-ip-of-kubeadm-host.ipynb) - -- [TSG042 - Get `node name` and external mounts for `Data` and `Logs` `PVCs` ](tsg042-get-hosting-node-and-data-log-mount.ipynb) - diff --git a/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg006-view-system-pod-status.ipynb b/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg006-view-system-pod-status.ipynb deleted file mode 100644 index 7a471bba..00000000 --- a/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg006-view-system-pod-status.ipynb +++ /dev/null @@ -1,415 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG006 - Get system pod status\n", - "==============================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "View the status of all system pods.\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"tsg006-view-system-pod-status.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Show the kube-system pods" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run('kubectl get pods -n kube-system -o wide')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Related\n", - "-------\n", - "\n", - "- [TSG007 - Get BDC pod\n", - " status](../monitor-k8s/tsg007-view-bdc-pod-status.ipynb)\n", - "\n", - "- [TSG009 - Get nodes\n", - " (Kubernetes)](../monitor-k8s/tsg009-get-nodes.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg007-view-bdc-pod-status.ipynb b/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg007-view-bdc-pod-status.ipynb deleted file mode 100644 index dbc30132..00000000 --- a/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg007-view-bdc-pod-status.ipynb +++ /dev/null @@ -1,462 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG007 - Get BDC pod status\n", - "===========================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "View the big data cluster pods status.\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"tsg007-view-bdc-pod-status.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Show the big data cluster pods" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'kubectl get pods -n {namespace} -o wide')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Related\n", - "-------\n", - "\n", - "- [TSG006 - Get system pod\n", - " status](../monitor-k8s/tsg006-view-system-pod-status.ipynb)\n", - "\n", - "- [TSG009 - Get nodes\n", - " (Kubernetes)](../monitor-k8s/tsg009-get-nodes.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg008-get-k8s-version-info.ipynb b/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg008-get-k8s-version-info.ipynb deleted file mode 100644 index f223c180..00000000 --- a/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg008-get-k8s-version-info.ipynb +++ /dev/null @@ -1,401 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG008 - Get version information (Kubernetes)\n", - "=============================================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "Get the Kubernetes cluster-info\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"tsg008-get-k8s-version-info.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes version info" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run('kubectl version -o yaml')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg009-get-nodes.ipynb b/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg009-get-nodes.ipynb deleted file mode 100644 index a32328c2..00000000 --- a/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg009-get-nodes.ipynb +++ /dev/null @@ -1,415 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG009 - Get nodes (Kubernetes)\n", - "===============================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "Get the kubernetes nodes details\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"tsg009-get-nodes.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Show the Kubernetes nodes" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run('kubectl get nodes')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Related\n", - "-------\n", - "\n", - "- [TSG006 - Get system pod\n", - " status](../monitor-k8s/tsg006-view-system-pod-status.ipynb)\n", - "\n", - "- [TSG007 - Get BDC pod\n", - " status](../monitor-k8s/tsg007-view-bdc-pod-status.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg010-get-kubernetes-contexts.ipynb b/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg010-get-kubernetes-contexts.ipynb deleted file mode 100644 index 061a916c..00000000 --- a/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg010-get-kubernetes-contexts.ipynb +++ /dev/null @@ -1,413 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG010 - Get configuration contexts\n", - "===================================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "Get the kubernetes contexts\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"tsg010-get-kubernetes-contexts.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Show the Kubernetes config contexts" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run('kubectl config get-contexts')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Related\n", - "-------\n", - "\n", - "- [SOP011 - Set kubernetes configuration\n", - " context](../common/sop011-set-kubernetes-context.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false, - "symlink": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg015-view-k8s-services-for-bdc.ipynb b/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg015-view-k8s-services-for-bdc.ipynb deleted file mode 100644 index b0c41161..00000000 --- a/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg015-view-k8s-services-for-bdc.ipynb +++ /dev/null @@ -1,443 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG015 - View BDC services (Kubernetes)\n", - "=======================================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"tsg015-view-k8s-services-for-bdc.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Use kubectl to view the services" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'kubectl get svc -n {namespace}')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg016-describe-all-pods-in-bdc-namespace.ipynb b/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg016-describe-all-pods-in-bdc-namespace.ipynb deleted file mode 100644 index 63898f1d..00000000 --- a/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg016-describe-all-pods-in-bdc-namespace.ipynb +++ /dev/null @@ -1,493 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG016- Describe BDC pods\n", - "=========================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"tsg016-describe-all-pods-in-bdc-namespace.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Instantiate Kubernetes client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Instantiate the Python Kubernetes client into 'api' variable\n", - "\n", - "import os\n", - "\n", - "try:\n", - " from kubernetes import client, config\n", - " from kubernetes.stream import stream\n", - "\n", - " if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n", - " config.load_incluster_config()\n", - " else:\n", - " try:\n", - " config.load_kube_config()\n", - " except:\n", - " display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n", - " raise\n", - " api = client.CoreV1Api()\n", - "\n", - " print('Kubernetes client instantiated')\n", - "except ImportError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n", - " raise" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n", - " except IndexError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print('The kubernetes namespace for your big data cluster is: ' + namespace)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Describe all pods" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pod_list = api.list_namespaced_pod(namespace)\n", - "\n", - "pod_names = [pod.metadata.name for pod in pod_list.items]\n", - "\n", - "print('Describing pods: ' + ', '.join(pod_names))\n", - "\n", - "for pod in pod_list.items:\n", - " try:\n", - " run(f'kubectl describe pod/{pod.metadata.name} -n {namespace}')\n", - " except Exception:\n", - " print (f\"Failed to describe pod: {pod.metadata.name}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg020-describe-all-nodes.ipynb b/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg020-describe-all-nodes.ipynb deleted file mode 100644 index b15f5b7f..00000000 --- a/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg020-describe-all-nodes.ipynb +++ /dev/null @@ -1,443 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG020- Describe nodes (Kubernetes)\n", - "===================================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"tsg020-describe-all-nodes.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Describe all nodes" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'kubectl describe nodes')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg021-get-k8s-cluster-info.ipynb b/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg021-get-k8s-cluster-info.ipynb deleted file mode 100644 index d6c03af0..00000000 --- a/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg021-get-k8s-cluster-info.ipynb +++ /dev/null @@ -1,402 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG021 - Get cluster info (Kubernetes)\n", - "======================================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "Get the Kubernetes cluster-info\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"tsg021-get-k8s-cluster-info.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes cluster information" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run('kubectl cluster-info')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false, - "symlink": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg022-get-external-ip-of-kubeadm-host.ipynb b/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg022-get-external-ip-of-kubeadm-host.ipynb deleted file mode 100644 index a289b198..00000000 --- a/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg022-get-external-ip-of-kubeadm-host.ipynb +++ /dev/null @@ -1,401 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG022 - Get external IP address for kubeadm host\n", - "=================================================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "Get the external IP address of the host of kubeadm\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"tsg022-get-external-ip-of-kubeadm-host.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get IP address" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run('kubectl get node --selector=\"node-role.kubernetes.io/master\" -o=jsonpath=\"{.items[0].status.addresses[0].address}\"')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg023-run-kubectl-get-all.ipynb b/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg023-run-kubectl-get-all.ipynb deleted file mode 100644 index 96a4f281..00000000 --- a/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg023-run-kubectl-get-all.ipynb +++ /dev/null @@ -1,465 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG023 - Get all BDC objects (Kubernetes)\n", - "=========================================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "Get a summary of all Kubernetes resources for the system namespace and\n", - "the Big Data Cluster namespace\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"tsg023-run-kubectl-get-all.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Run kubectl get all for the system namespace" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(\"kubectl get all\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Run kubectl get all for the Big Data Cluster namespace" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f\"kubectl get all -n {namespace}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg042-get-hosting-node-and-data-log-mount.ipynb b/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg042-get-hosting-node-and-data-log-mount.ipynb deleted file mode 100644 index f89391d7..00000000 --- a/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg042-get-hosting-node-and-data-log-mount.ipynb +++ /dev/null @@ -1,560 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG042 - Get `node name` and external mounts for `Data` and `Logs` `PVCs`\n", - "=========================================================================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "Get `node name` hosting `pod` along with the `Data` and `Logs` external\n", - "mounts.\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "pod_name = \"master-0\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"tsg042-get-hosting-node-and-data-log-mount.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Instantiate Kubernetes client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Instantiate the Python Kubernetes client into 'api' variable\n", - "\n", - "import os\n", - "\n", - "try:\n", - " from kubernetes import client, config\n", - " from kubernetes.stream import stream\n", - "\n", - " if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n", - " config.load_incluster_config()\n", - " else:\n", - " try:\n", - " config.load_kube_config()\n", - " except:\n", - " display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n", - " raise\n", - " api = client.CoreV1Api()\n", - "\n", - " print('Kubernetes client instantiated')\n", - "except ImportError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n", - " raise" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n", - " except IndexError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print('The kubernetes namespace for your big data cluster is: ' + namespace)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get `PersistentVolumeClaim` reference for `Data` and `Logs`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "data_claim = run(f'kubectl get pod {pod_name} --namespace {namespace} --output jsonpath=\"{{range .spec.volumes[?(@.name==\\'data\\')]}} {{.persistentVolumeClaim.claimName}} {{end}}\"', return_output=True)\n", - "logs_claim = run(f'kubectl get pod {pod_name} --namespace {namespace} --output jsonpath=\"{{range .spec.volumes[?(@.name==\\'logs\\')]}} {{.persistentVolumeClaim.claimName}} {{end}}\"', return_output=True)\n", - "\n", - "print (f\"Data claim: {data_claim}. Logs claim: {logs_claim}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get Kubernetes agent `node` hosting the Pod" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'kubectl get pod {pod_name} --namespace {namespace} --output jsonpath=\"{{.spec.nodeName}}\"')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get external mount for `Data`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'kubectl get pv --namespace {namespace} --output jsonpath=\"{{range .items[?(@.spec.claimRef.name==\\'{data_claim.strip()}\\')]}}{{.spec.local.path}} {{.spec.azureDisk.diskURI}}\"')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get external mount for `Logs`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'kubectl get pv --namespace {namespace} --output jsonpath=\"{{range .items[?(@.spec.claimRef.name==\\'{logs_claim.strip()}\\')]}}{{.spec.local.path}} {{.spec.azureDisk.diskURI}}\"')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg063-get-storage-classes.ipynb b/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg063-get-storage-classes.ipynb deleted file mode 100644 index ae7168e6..00000000 --- a/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg063-get-storage-classes.ipynb +++ /dev/null @@ -1,401 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG063 - Get storage classes (Kubernetes)\n", - "=========================================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "Get the Kubernetes storage classes available in the cluster\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"tsg063-get-storage-classes.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Show the Kubernetes storage classes" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run('kubectl get sc')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg064-get-persistent-volume-claims.ipynb b/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg064-get-persistent-volume-claims.ipynb deleted file mode 100644 index 53fe2d75..00000000 --- a/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg064-get-persistent-volume-claims.ipynb +++ /dev/null @@ -1,448 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG064 - Get BDC Persistent Volume Claims\n", - "=========================================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "Show the persistent volume claims (PVCs) for the Big Data Cluster\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"tsg064-get-persistent-volume-claims.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Run kubectl to display the PVCs" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f\"kubectl get pvc -n {namespace}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg065-get-secrets-for-bdc-namespace.ipynb b/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg065-get-secrets-for-bdc-namespace.ipynb deleted file mode 100644 index 750c4fa5..00000000 --- a/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg065-get-secrets-for-bdc-namespace.ipynb +++ /dev/null @@ -1,448 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG065 - Get BDC secrets (Kubernetes)\n", - "=====================================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "View the big data cluster secrets\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"tsg065-get-secrets-for-bdc-namespace.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Show the big data cluster Kubernetes Secret Store entries" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'kubectl get secrets -n {namespace}')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg066-get-kubernetes-events.ipynb b/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg066-get-kubernetes-events.ipynb deleted file mode 100644 index 3af50426..00000000 --- a/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg066-get-kubernetes-events.ipynb +++ /dev/null @@ -1,480 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG066 - Get BDC event (Kubernetes)\n", - "===================================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "View the big data cluster secrets\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"tsg066-get-kubernetes-events.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Show the Kubernetes events for the Big Data Cluster namespace" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'kubectl get events -n {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Show the Kubernetes events for the system namespace" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'kubectl get events -n kube-system')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Show the Kubernetes events in the default namespace" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'kubectl get events')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg072-get-persistent-volumes.ipynb b/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg072-get-persistent-volumes.ipynb deleted file mode 100644 index 7d34c861..00000000 --- a/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg072-get-persistent-volumes.ipynb +++ /dev/null @@ -1,402 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG072 - Get Persistent Volumes (Kubernetes)\n", - "============================================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "Show the persistent volume (PVs) for the Kubernetes cluster. Persistent\n", - "Volumes are non-namespaces objects.\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"tsg072-get-persistent-volumes.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Run kubectl to display the PVs" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f\"kubectl get pv\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb b/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb deleted file mode 100644 index 0066e226..00000000 --- a/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb +++ /dev/null @@ -1,421 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG081 - Get namespaces (Kubernetes)\n", - "====================================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "Get the kubernetes namespaces\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"tsg081-get-kubernetes-namespaces.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Show the Kubernetes namespaces" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run('kubectl get namespace')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Show the Kubernetes namespaces with labels\n", - "\n", - "Kubernetes namespaces containing a SQL Server Big Data Cluster have the\n", - "label \u2018MSSQL\\_CLUSTER\u2019" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run('kubectl get namespaces -o custom-columns=NAME:.metadata.name,STATUS:.status.phase,LABELS:.metadata.labels')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false, - "symlink": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg089-describe-non-running-pods-in-bdc.ipynb b/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg089-describe-non-running-pods-in-bdc.ipynb deleted file mode 100644 index 86abd30b..00000000 --- a/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg089-describe-non-running-pods-in-bdc.ipynb +++ /dev/null @@ -1,511 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG089 - Describe BDC non-running pods\n", - "======================================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "phase = 'Running'" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"tsg089-describe-non-running-pods-in-bdc.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Instantiate Kubernetes client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Instantiate the Python Kubernetes client into 'api' variable\n", - "\n", - "import os\n", - "\n", - "try:\n", - " from kubernetes import client, config\n", - " from kubernetes.stream import stream\n", - "\n", - " if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n", - " config.load_incluster_config()\n", - " else:\n", - " try:\n", - " config.load_kube_config()\n", - " except:\n", - " display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n", - " raise\n", - " api = client.CoreV1Api()\n", - "\n", - " print('Kubernetes client instantiated')\n", - "except ImportError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n", - " raise" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n", - " except IndexError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print('The kubernetes namespace for your big data cluster is: ' + namespace)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Describe all non running pods" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pod_list = api.list_namespaced_pod(namespace)\n", - "\n", - "pod_names = [pod.metadata.name for pod in pod_list.items]\n", - "\n", - "for pod in pod_list.items:\n", - " if (pod.status.phase != phase):\n", - " run(f'kubectl describe pod/{pod.metadata.name} -n {namespace}')\n", - " else:\n", - " print(f\"SKIPPING: {pod.metadata.name}, its status is equal to {phase} ({pod.status.phase})\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg097-get-statefulsets.ipynb b/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg097-get-statefulsets.ipynb deleted file mode 100644 index 3a72b46d..00000000 --- a/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg097-get-statefulsets.ipynb +++ /dev/null @@ -1,446 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG097 - Get BDC stateful sets (Kubernetes)\n", - "===========================================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"tsg097-get-statefulsets.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Run kubectl to display the Stateful sets" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f\"kubectl get statefulset -n {namespace} -o wide\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg098-get-replicasets.ipynb b/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg098-get-replicasets.ipynb deleted file mode 100644 index 44e3cffb..00000000 --- a/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg098-get-replicasets.ipynb +++ /dev/null @@ -1,446 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG098 - Get BDC replicasets (Kubernetes)\n", - "=========================================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"tsg098-get-replicasets.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Run kubectl to display the replica sets" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f\"kubectl get replicaset -n {namespace} -o wide\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg099-get-daemonsets.ipynb b/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg099-get-daemonsets.ipynb deleted file mode 100644 index 59105004..00000000 --- a/Big-Data-Clusters/CU8/Public/content/monitor-k8s/tsg099-get-daemonsets.ipynb +++ /dev/null @@ -1,449 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG099 - Get BDC daemonsets (Kubernetes)\n", - "========================================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "Show the persistent volume (PVs) for the Kubernetes cluster. Persistent\n", - "Volumes are non-namespaces objects.\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"tsg099-get-daemonsets.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Run kubectl to display the daemon sets" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f\"kubectl get daemonset -n {namespace} -o wide\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/readme.md b/Big-Data-Clusters/CU8/Public/content/readme.md deleted file mode 100644 index b60eaf89..00000000 --- a/Big-Data-Clusters/CU8/Public/content/readme.md +++ /dev/null @@ -1,43 +0,0 @@ -# Operations and Support Jupyter Book - SQL Server 2019 Big Data Clusters (CU8) - -This `Jupyter Book` of executable notebooks (.ipynb) is a companion for `SQL Server 2019` to assist in operating and supporting `Big Data Clusters`. - -Each notebook is designed to check for its own dependencies. A 'run all cells' will either complete successfully or will raise an exception with a hyperlinked 'HINT' to another notebook to resolve the missing dependency. Follow the 'HINT' hyperlink to the subsequent notebook, press 'run all cells', and upon success return back to the original notebook, and 'run all cells'. - -Once all dependencies are installed, but 'run all cells' fails, each notebook will analyze results and where possible, produce a hyperlinked 'HINT' to another notebook to further aid in resolving the issue. - -## Coping with environmental change - -The notebooks in this book are designed to abstract away environmental aspects: - - 1. Running outside or inside the Big Data Cluster - The overlay network addresses will be used when a notebook is run inside the cluster, and when run outside the cluster, the addresses returned from `azdata bdc endpoint list` will be used. - 2. AZDATA_OPENSHIFT: Using Openshift - set the environment variable AZDATA_OPENSHIFT, to ensure the `oc` command is used instead of `kubectl`, and this will automatically workaround other compatibility issues. - 3. AZDATA_NAMESPACE: Using multiple Big Data Clusters in the same Kubernetes cluster - set AZDATA_NAMESPACE to target the correct cluster. By default these notebooks will target the cluster whose Kubernetes namespace comes alphabetically first. - -## Number convention for notebooks in a chapter - -Some chapters are effectively self-contained applications. These chapters use the following numbering convention for the contained notebooks. - -The '100' notebook, i.e. NTB100, is usually the goto notebook to run in a chapter. - -NTB000: Setup notebook -NTB001 - NTB499: The notebooks -NTB100 - NTB110: Notebooks that run other notebooks in chapter. i.e. NTB100 is usually the notebok to run -NTB500 - NTB599: Tests. Notebooks to test the (001 - 499) notebooks -NTB600 - NTB699: Montioring. Notebooks to monitor the (001 - 499) notebooks -NTB900 - NTB998: Troubleshooting. Notebooks to troubleshoot the (001 - 499) notebooks -NTB999: Cleanup notebook - -## Chapters - -1. [Troubleshooters](troubleshooters/readme.md) - notebooks hyper-linked from the `Big Data Cluster Dashboard` in `Azure Data Studio`. -2. [Log Analyzers](log-analyzers/readme.md) - notebooks linked from the troubleshooters, that get and analyze logs for known issues. -3. [Diagnose](diagnose/readme.md) - notebooks for diagnosing situations with a `Big Data Cluster`. -4. [Repair](repair/readme.md) - notebooks to perform repair actions for known issues in a `Big Data Cluster`. -5. [Monitor Big Data Cluster](monitor-bdc/readme.md) - notebooks for monitoring the `Big Data Cluster` using the `azdata` command line tool. -6. [Monitor Kubernetes](monitor-k8s/readme.md) - notebooks for monitoring a the `Kubernetes` cluster hosting a `Big Data Cluster`. -7. [Logs](log-files/readme.md) - notebooks for display log files from a `Big Data Cluster`. -8. [Sample](sample/readme.md) - notebooks demonstrating `Big Data Cluster` features and functionality. -9. [Install](install/readme.md) - notebooks to install prerequisites for other notebooks. -10. [Certificate Management](cert-management/readme.md) - notebooks to manage certificates on `Big Data Cluster` endpoints. -11. [Common](common/readme.md) - notebooks commonly linked from other notebooks, such as `azdata login / logout`. diff --git a/Big-Data-Clusters/CU8/Public/content/repair/readme.md b/Big-Data-Clusters/CU8/Public/content/repair/readme.md deleted file mode 100644 index aa60c632..00000000 --- a/Big-Data-Clusters/CU8/Public/content/repair/readme.md +++ /dev/null @@ -1,41 +0,0 @@ -# Repair - -- The notebooks in this chapter are for repairing known situations and states of a SQL Server Big Data Cluster. - -[Home](../readme.md) - -## Notebooks in this Chapter -- [TSG024 - Namenode is in safe mode ](tsg024-name-node-is-in-safe-mode.ipynb) - -- [TSG041 - Unable to create a new asynchronous I/O context (increase sysctl fs.aio-max-nr) ](tsg041-increase-fs-aio-max-nr.ipynb) - -- [TSG048 - Deployment stuck at "Waiting for controller pod to be up" ](tsg048-create-stuck-waiting-for-controller.ipynb) - -- [TSG038 - BDC create failures due to - doc is missing key ](tsg038-doc-is-missing-key-error.ipynb) - -- [TSG047 - ConfigException - Expected only one object with name ](tsg047-expected-only-one-object-with-name.ipynb) - -- [TSG050 - Cluster create hangs with "timeout expired waiting for volumes to attach or mount for pod" ](tsg050-timeout-expired-waiting-for-volumes.ipynb) - -- [TSG057 - Failed when starting controller service. System.TimeoutException ](tsg057-failed-when-starting-controller.ipynb) - -- [TSG067 - Failed to complete kube config setup ](tsg067-failed-to-complete-kube-config-setup.ipynb) - -- [TSG075 - FailedCreatePodSandBox due to NetworkPlugin cni failed to set up pod ](tsg075-networkplugin-cni-failed-to-setup-pod.ipynb) - -- [TSG110 - Azdata returns ApiError](tsg110-azdata-returns-apierror.ipynb) - -- [TSG028 - Restart node manager on all storage pool nodes ](tsg028-restart-nodemanager-in-storage-pool.ipynb) - -- [TSG045 - The maximum number of data disks allowed to be attached to a VM of this size (AKS) ](tsg045-max-number-data-disks-allowed.ipynb) - -- [TSG109 - Set upgrade timeouts ](tsg109-upgrade-stalled.ipynb) - -- [TSG053 - ADS Provided Books must be saved before use ](tsg053-save-book-first.ipynb) - -- [SOP016 - Get SID for Active Directory user/group ](sop016-get-sid-for-principal.ipynb) - -- [SOP017 - Add app-deploy AD group ](sop017-add-app-deploy-group.ipynb) - -- [TSG124 - 'No credentials were supplied' error from azdata login ](tsg124-no-credentials-were-supplied.ipynb) - diff --git a/Big-Data-Clusters/CU8/Public/content/repair/sop016-get-sid-for-principal.ipynb b/Big-Data-Clusters/CU8/Public/content/repair/sop016-get-sid-for-principal.ipynb deleted file mode 100644 index 30420ae7..00000000 --- a/Big-Data-Clusters/CU8/Public/content/repair/sop016-get-sid-for-principal.ipynb +++ /dev/null @@ -1,100 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "SOP016 - Get SID for Active Directory user/group\n", - "================================================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "### Steps\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "$user_or_group_name = \"\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Install Active Directory module" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "Add-WindowsCapability -Online -Name Rsat.ActiveDirectory.DS-LDS.Tools~~~~0.0.1.0" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the SID for user" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "Get-ADUser -Identity $user_or_group_name -Properties * | select SID" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the SID for group" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "Get-ADGroup -Identity $user_or_group_name -Properties * | select SID" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "powershell", - "display_name": "PowerShell" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/repair/sop017-add-app-deploy-group.ipynb b/Big-Data-Clusters/CU8/Public/content/repair/sop017-add-app-deploy-group.ipynb deleted file mode 100644 index ee758666..00000000 --- a/Big-Data-Clusters/CU8/Public/content/repair/sop017-add-app-deploy-group.ipynb +++ /dev/null @@ -1,204 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "SOP017 - Add app-deploy AD group\n", - "================================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "If the Big Data Cluster was installed without an Active Directory group,\n", - "you can add one post install using this notebook.\n", - "\n", - "### Steps\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "user_or_group_name = \"\"\n", - "realm = \"\" # Upper case\n", - "sid = \"\" # To find the SID of the user or the group being added, you can use Get-ADUser or Get-ADGroup PowerShell commands.\n", - "role = \"appReader\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Instantiate Kubernetes client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Instantiate the Python Kubernetes client into 'api' variable\n", - "\n", - "import os\n", - "\n", - "try:\n", - " from kubernetes import client, config\n", - " from kubernetes.stream import stream\n", - "\n", - " if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n", - " config.load_incluster_config()\n", - " else:\n", - " try:\n", - " config.load_kube_config()\n", - " except:\n", - " display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n", - " raise\n", - " api = client.CoreV1Api()\n", - "\n", - " print('Kubernetes client instantiated')\n", - "except ImportError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n", - " raise" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n", - " except IndexError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print('The kubernetes namespace for your big data cluster is: ' + namespace)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create helper function to run `sqlcmd` against the controller database" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas\n", - "from io import StringIO\n", - "pandas.set_option('display.max_colwidth', -1)\n", - "name = 'controldb-0'\n", - "container = 'mssql-server'\n", - "\n", - "def run_sqlcmd(query):\n", - " command=f\"\"\"export SQLCMDPASSWORD=$(cat /var/run/secrets/credentials/mssql-sa-password/password); /opt/mssql-tools/bin/sqlcmd -b -S . -U sa -Q \"SET NOCOUNT ON; {query}\" -d controller -s\"^\" -W > /tmp/out.csv; sed -i 2d /tmp/out.csv; cat /tmp/out.csv\"\"\"\n", - " output=stream(api.connect_get_namespaced_pod_exec, name, namespace, command=['/bin/sh', '-c', command], container=container, stderr=True, stdout=True)\n", - " print(output)\n", - "\n", - "print(\"Function defined\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Insert user or group into the controller database roles table" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run_sqlcmd(f\"INSERT INTO [controller].[auth].[roles] VALUES (N'{user_or_group_name}@{realm}', '{role}')\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Insert user or group into the controller database active\\_directory\\_principals tables" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run_sqlcmd(f\"INSERT INTO [controller].[auth].[active_directory_principals] VALUES (N'{user_or_group_name}@{realm}', N'{sid}')\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/repair/tsg024-name-node-is-in-safe-mode.ipynb b/Big-Data-Clusters/CU8/Public/content/repair/tsg024-name-node-is-in-safe-mode.ipynb deleted file mode 100644 index 99a8d15b..00000000 --- a/Big-Data-Clusters/CU8/Public/content/repair/tsg024-name-node-is-in-safe-mode.ipynb +++ /dev/null @@ -1,616 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG024 - Namenode is in safe mode\n", - "=================================\n", - "\n", - "HDFS can get itself into Safe mode. For example if too many Pods are\n", - "re-cycled too quickly in the Storage Pool then Safe mode may be\n", - "automatically enabled.\n", - "\n", - "When starting a spark session, the user may see (for example, when\n", - "trying to start a PySpark or PySpark3 session in a notebook from Azure\n", - "Data Studio):\n", - "\n", - "> The code failed because of a fatal error: Error sending http request\n", - "> and maximum retry encountered..\n", - ">\n", - "> Some things to try: a) Make sure Spark has enough available resources\n", - "> for Jupyter to create a Spark context. b) Contact your Jupyter\n", - "> administrator to make sure the Spark magics library is configured\n", - "> correctly. c) Restart the kernel.\n", - "\n", - "Use this notebook to run a report to understand more about HDFS, and\n", - "optionally move the cluster out of Safe mode if it is safe to do.\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"tsg024-name-node-is-in-safe-mode.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Instantiate Kubernetes client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Instantiate the Python Kubernetes client into 'api' variable\n", - "\n", - "import os\n", - "\n", - "try:\n", - " from kubernetes import client, config\n", - " from kubernetes.stream import stream\n", - "\n", - " if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n", - " config.load_incluster_config()\n", - " else:\n", - " try:\n", - " config.load_kube_config()\n", - " except:\n", - " display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n", - " raise\n", - " api = client.CoreV1Api()\n", - "\n", - " print('Kubernetes client instantiated')\n", - "except ImportError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n", - " raise" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n", - " except IndexError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print('The kubernetes namespace for your big data cluster is: ' + namespace)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the name of the namenode pod" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "namenode_pod = run(f'kubectl get pod --selector=role=namenode -n {namespace} -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - "\n", - "print ('Namenode pod name: ' + namenode_pod)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the `hdfs dfsadmin` report" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "name=namenode_pod\n", - "container='hadoop'\n", - "\n", - "command='hdfs dfsadmin -report'\n", - "\n", - "string=stream(api.connect_get_namespaced_pod_exec, name, namespace, command=['/bin/sh', '-c', command], container=container, stderr=True, stdout=True)\n", - "\n", - "print(string)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Set the text that identifies this issue" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "precondition_text=\"Safe mode is ON\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### PRECONDITION CHECK" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "if precondition_text not in string:\n", - " raise Exception(\"PRECONDITION NON-MATCH: 'tsg024-name-node-is-in-safe-mode' is not a match for an active problem\")\n", - "\n", - "print(\"PRECONDITION MATCH: 'tsg024-name-node-is-in-safe-mode' is a match for an active problem in this cluster\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Resolution\n", - "----------\n", - "\n", - "NOTE: Only if it is determined there are no missing, corrupt or under\n", - "replicated blocks that should not be ignored is it safe to take the name\n", - "node out of safe mode. Use `hdfs dfsadmin -report` and `hdfs fsck` to\n", - "understand more about missing, corrupt or under replicated blocks.\n", - "\n", - "### Move the namenode out of safe mode" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "command='hdfs dfsadmin -safemode leave'\n", - "\n", - "string=stream(api.connect_get_namespaced_pod_exec, name, namespace, command=['/bin/sh', '-c', command], container=container, stderr=True, stdout=True)\n", - "\n", - "print(string)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Validate - Verify the namenode is no longer in safe mode\n", - "\n", - "Validate that the text \u2018Safe mode is ON\u2019 is no longer in the\n", - "`hdfs dfsadmin -report` output" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "command='hdfs dfsadmin -report'\n", - "\n", - "string=stream(api.connect_get_namespaced_pod_exec, name, namespace, command=['/bin/sh', '-c', command], container=container, stderr=True, stdout=True)\n", - "\n", - "if precondition_text in string:\n", - " raise SystemExit ('FAILED - hdfs dfsadmin -report output still contains: ' + precondition_text)\n", - "\n", - "print ('SUCCESS - hdfs dfsadmin -report output no longer contains: ' + precondition_text)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/repair/tsg028-restart-nodemanager-in-storage-pool.ipynb b/Big-Data-Clusters/CU8/Public/content/repair/tsg028-restart-nodemanager-in-storage-pool.ipynb deleted file mode 100644 index 9ababd1e..00000000 --- a/Big-Data-Clusters/CU8/Public/content/repair/tsg028-restart-nodemanager-in-storage-pool.ipynb +++ /dev/null @@ -1,165 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG028 - Restart node manager on all storage pool nodes\n", - "=======================================================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "container='hadoop'\n", - "command=f'supervisorctl restart nodemanager'" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Instantiate Kubernetes client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Instantiate the Python Kubernetes client into 'api' variable\n", - "\n", - "import os\n", - "\n", - "try:\n", - " from kubernetes import client, config\n", - " from kubernetes.stream import stream\n", - "\n", - " if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n", - " config.load_incluster_config()\n", - " else:\n", - " try:\n", - " config.load_kube_config()\n", - " except:\n", - " display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n", - " raise\n", - " api = client.CoreV1Api()\n", - "\n", - " print('Kubernetes client instantiated')\n", - "except ImportError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n", - " raise" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n", - " except IndexError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print('The kubernetes namespace for your big data cluster is: ' + namespace)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Run command in containers" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pod_list = api.list_namespaced_pod(namespace)\n", - "\n", - "for pod in pod_list.items:\n", - " container_names = [container.name for container in pod.spec.containers]\n", - " for container_name in container_names:\n", - " if container_name == container:\n", - " print (f\"Pod: {pod.metadata.name} / Container: {container}:\")\n", - " try:\n", - " output=stream(api.connect_get_namespaced_pod_exec, pod.metadata.name, namespace, command=['/bin/sh', '-c', command], container=container, stderr=True, stdout=True)\n", - " print (output)\n", - " except Exception:\n", - " print (f\"Failed to run {command} in container: {container} for pod: {pod.metadata.name}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/repair/tsg038-doc-is-missing-key-error.ipynb b/Big-Data-Clusters/CU8/Public/content/repair/tsg038-doc-is-missing-key-error.ipynb deleted file mode 100644 index b1ea39b9..00000000 --- a/Big-Data-Clusters/CU8/Public/content/repair/tsg038-doc-is-missing-key-error.ipynb +++ /dev/null @@ -1,52 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG038 - BDC create failures due to - doc is missing key\n", - "========================================================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "An attempt to create a Big Data Cluster fails, and in the controller log\n", - "the follow ERROR can be found.\n", - "\n", - "> 2019-08-14 17:38:44.9872 \\| ERROR \\| Failed to create agent\n", - "> certificate for ScaleSet \u2018control\u2019 k8s.KubernetesException: Internal\n", - "> Server Error:\n", - "> {\u201ckind\u201d:\u201cStatus\u201d,\u201capiVersion\u201d:\u201cv1\u201d,\u201cmetadata\u201d:{},\u201cstatus\u201d:\u201cFailure\u201d,\u201cmessage\u201d:\u201cjsonpatch\n", - "> replace operation does not apply: doc is missing key:\n", - "> /data\u201d,\u201ccode\u201d:500} \u2014\\> Microsoft.Rest.HttpOperationException:\n", - "> Operation returned an invalid status code \u2018InternalServerError\u2019 at\n", - "> k8s.Kubernetes.d\\_\\_144.MoveNext()\n", - "> \u2014 End of stack trace from previous location where exception was thrown\n", - "> \u2014 at System.Runtime.ExceptionServices.ExceptionDispatchInfo.Throw() at\n", - "> System.Runtime.CompilerServices.TaskAwaiter.HandleNonSuccessAndDebuggerNotification\n", - "\n", - "### Resolution\n", - "\n", - "This has been seen due to the version of the Kubernetes cluster not\n", - "being at or above the minimum required version. e.g.\u00a0this error will\n", - "happen if trying to create a Big Data Cluster in a v1.10 Kubernetes\n", - "cluster.\n", - "\n", - "Upgrade the Kubernetes cluster to the minimum supported version or\n", - "higher." - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true, - "symlink": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/repair/tsg041-increase-fs-aio-max-nr.ipynb b/Big-Data-Clusters/CU8/Public/content/repair/tsg041-increase-fs-aio-max-nr.ipynb deleted file mode 100644 index f4eedb8c..00000000 --- a/Big-Data-Clusters/CU8/Public/content/repair/tsg041-increase-fs-aio-max-nr.ipynb +++ /dev/null @@ -1,47 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG041 - Unable to create a new asynchronous I/O context (increase sysctl fs.aio-max-nr)\n", - "========================================================================================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "The mssql-server containers continuously restart. The following is seen\n", - "in the Agent log for the mssql-server containers\n", - "\n", - "> 2019/08/30 00:18:44.038983 \\[mssql:stderr\\] This program has\n", - "> encountered a fatal error and cannot continue running at Fri Aug 30\n", - "> 00:17:42 2019 2019/08/30 00:18:44.038998 \\[mssql:stderr\\] The\n", - "> following diagnostic information is available: 2019/08/30\n", - "> 00:18:44.039007 \\[mssql:stderr\\] Reason: Host Extension RTL\\_ASSERT\n", - "> (0x00000003) 2019/08/30 00:18:44.039020 \\[mssql:stderr\\] Status:\n", - "> Unknown (0x0000000b) 2019/08/30 00:18:44.039030 \\[mssql:stderr\\]\n", - "> Message: Unable to create a new asynchronous I/O context. Please\n", - "> increase sysctl fs.aio-max-nr.\n", - "\n", - "### Resolution\n", - "\n", - "Follow the steps here to resolve this issue:\n", - "\n", - " Setting the sysctl parameter aio-max-nr value to 1048576\n", - "\n", - " https://sort.veritas.com/public/documents/HSO/2.0/linux/productguides/html/hfo_admin_rhel/ch04s03.htm" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/repair/tsg045-max-number-data-disks-allowed.ipynb b/Big-Data-Clusters/CU8/Public/content/repair/tsg045-max-number-data-disks-allowed.ipynb deleted file mode 100644 index f7a87b55..00000000 --- a/Big-Data-Clusters/CU8/Public/content/repair/tsg045-max-number-data-disks-allowed.ipynb +++ /dev/null @@ -1,634 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG045 - The maximum number of data disks allowed to be attached to a VM of this size (AKS)\n", - "===========================================================================================\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"tsg045-max-number-data-disks-allowed.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Instantiate Kubernetes client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Instantiate the Python Kubernetes client into 'api' variable\n", - "\n", - "import os\n", - "\n", - "try:\n", - " from kubernetes import client, config\n", - " from kubernetes.stream import stream\n", - "\n", - " if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n", - " config.load_incluster_config()\n", - " else:\n", - " try:\n", - " config.load_kube_config()\n", - " except:\n", - " display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n", - " raise\n", - " api = client.CoreV1Api()\n", - "\n", - " print('Kubernetes client instantiated')\n", - "except ImportError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n", - " raise" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n", - " except IndexError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print('The kubernetes namespace for your big data cluster is: ' + namespace)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Set the text to look for in pod events" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "name=\"master-0\"\n", - "kind=\"Pod\"\n", - "precondition_text=\"The maximum number of data disks allowed to be attached to a VM of this size\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get events for a kubernetes resources\n", - "\n", - "Get the events for a kubernetes named space resource:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "V1EventList=api.list_namespaced_event(namespace)\n", - "\n", - "for event in V1EventList.items:\n", - " if (event.involved_object.kind==kind and event.involved_object.name==name):\n", - " print(event.message)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### PRECONDITION CHECK" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "precondition=False\n", - "\n", - "for event in V1EventList.items:\n", - " if (event.involved_object.kind==kind and event.involved_object.name==name):\n", - " if event.message.find(precondition_text) != -1:\n", - " precondition=True\n", - "\n", - "if not precondition:\n", - " raise Exception(\"PRECONDITION NON-MATCH: 'tsg045-max-number-data-disks-allowed' is not a match for an active problem\")\n", - "\n", - "print(\"PRECONDITION MATCH: 'tsg045-max-number-data-disks-allowed' is a match for an active problem in this cluster\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Resolution\n", - "----------\n", - "\n", - "Kubernetes may have got stuck trying to get this pod to start on a node\n", - "which doesn\u2019t allow enough Persistent Volumes to be attached to it.\n", - "\n", - "You can list the Persistent volumes:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run('kubectl get pv')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To workaround this issue delete the master-0 pod, and the Kubernetes\n", - "statefulset will automatically create a new pod, hopefully on another\n", - "node!" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'kubectl delete pod/master-0 -n {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now watch the events from the new master-0 pod, to verify the issue is\n", - "resolved\n", - "\n", - "### Get events for a kubernetes resources\n", - "\n", - "Get the events for a kubernetes named space resource:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "V1EventList=api.list_namespaced_event(namespace)\n", - "\n", - "for event in V1EventList.items:\n", - " if (event.involved_object.kind==kind and event.involved_object.name==name):\n", - " print(event.message)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### View the pods\n", - "\n", - "Verify the master-0 pod is now running" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'kubectl get pods -n {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Describe the master-0\n", - "\n", - "Verify the master pod looks healthy" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run('kubectl describe pod/master-0 -n {namespace}')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/repair/tsg047-expected-only-one-object-with-name.ipynb b/Big-Data-Clusters/CU8/Public/content/repair/tsg047-expected-only-one-object-with-name.ipynb deleted file mode 100644 index 2485becd..00000000 --- a/Big-Data-Clusters/CU8/Public/content/repair/tsg047-expected-only-one-object-with-name.ipynb +++ /dev/null @@ -1,59 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG047 - ConfigException - Expected only one object with name\n", - "=============================================================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "When running mssqctl you get:\n", - "\n", - "> ERROR: Error processing command: \u201cConfigException\u201d Invalid kube-config\n", - "> file. Expected only one object with name username\\_bdc-aks-cluster in\n", - "> kube-config/users list\n", - "\n", - "There are duplicate entries in the .kube/config. This can be caused by\n", - "creating, deleting, creating the same AKS cluster, and calling \u2018az aks\n", - "get-credentials\u2019 each time. The \u2018get-credentials\u2019 call duplicates the\n", - "entries in the .kube/config file.\n", - "\n", - "The command line tool \u2018kubectl\u2019 does not seem to have a problem with\n", - "duplicate entries, but the kuberenete python client does, and azdata\n", - "uses the kubernetes python client to create the MSSQL\\_CLUSTER:\n", - "\n", - " https://github.com/kubernetes-client/python\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "The python kubernetes client (which azdata users), does not work if the\n", - "same details are in the kubeconfig twice. You\u2019ll need to remove the 1st\n", - "entry, and leave the 2nd entry.\n", - "\n", - "### Resolution\n", - "\n", - "You will have 2 of these in .kube/config\n", - "\n", - "- name: username\\_bdc-aks-cluster\n", - "\n", - "Delete the first one (the entire section, i.e including user (and all it\n", - "attributes)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/repair/tsg048-create-stuck-waiting-for-controller.ipynb b/Big-Data-Clusters/CU8/Public/content/repair/tsg048-create-stuck-waiting-for-controller.ipynb deleted file mode 100644 index f5b0579e..00000000 --- a/Big-Data-Clusters/CU8/Public/content/repair/tsg048-create-stuck-waiting-for-controller.ipynb +++ /dev/null @@ -1,220 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG048 - Deployment stuck at \u201cWaiting for controller pod to be up\u201d\n", - "==================================================================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "Troubleshooting for the situation where running azdata create cluster\n", - "hangs at:\n", - "\n", - " Waiting for controller pod to be up\u2026\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Instantiate Kubernetes client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Instantiate the Python Kubernetes client into 'api' variable\n", - "\n", - "import os\n", - "\n", - "try:\n", - " from kubernetes import client, config\n", - " from kubernetes.stream import stream\n", - "\n", - " if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n", - " config.load_incluster_config()\n", - " else:\n", - " try:\n", - " config.load_kube_config()\n", - " except:\n", - " display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n", - " raise\n", - " api = client.CoreV1Api()\n", - "\n", - " print('Kubernetes client instantiated')\n", - "except ImportError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n", - " raise" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n", - " except IndexError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print('The kubernetes namespace for your big data cluster is: ' + namespace)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the name of controller pod that has the \u201ccouldn\u2019t parse image reference problem\u201d" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "label_selector = 'app=controller'\n", - "name=api.list_namespaced_pod(namespace, label_selector=label_selector).items[0].metadata.name\n", - "\n", - "print (\"Controller pod name: \" + name)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Set the text to look for in pod events" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kind=\"Pod\"\n", - "precondition_text=\"couldn't parse image reference\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get events for a kubernetes resources\n", - "\n", - "Get the events for a kubernetes named space resource:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "V1EventList=api.list_namespaced_event(namespace)\n", - "\n", - "for event in V1EventList.items:\n", - " if (event.involved_object.kind==kind and event.involved_object.name==name):\n", - " print(event.message)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### PRECONDITION CHECK" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "precondition=False\n", - "\n", - "for event in V1EventList.items:\n", - " if (event.involved_object.kind==kind and event.involved_object.name==name):\n", - " if event.message.find(precondition_text) != -1:\n", - " precondition=True\n", - "\n", - "if not precondition:\n", - " raise Exception(\"PRECONDITION NON-MATCH: 'tsg048-create-stuck-waiting-for-controller' is not a match for an active problem\")\n", - "\n", - "print(\"PRECONDITION MATCH: 'tsg048-create-stuck-waiting-for-controller' is a match for an active problem in this cluster\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Resolution\n", - "----------\n", - "\n", - "To resolve this problem fix the docker repository name and run cluster\n", - "create again." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/repair/tsg050-timeout-expired-waiting-for-volumes.ipynb b/Big-Data-Clusters/CU8/Public/content/repair/tsg050-timeout-expired-waiting-for-volumes.ipynb deleted file mode 100644 index 74fa70a9..00000000 --- a/Big-Data-Clusters/CU8/Public/content/repair/tsg050-timeout-expired-waiting-for-volumes.ipynb +++ /dev/null @@ -1,654 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG050 - Cluster create hangs with \u201ctimeout expired waiting for volumes to attach or mount for pod\u201d\n", - "===================================================================================================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "The controller gets stuck during the `bdc create` create process.\n", - "\n", - "> Events: Type Reason Age From Message \u2014- \u2014\u2014 \u2014- \u2014- \u2014\u2014- Warning\n", - "> FailedScheduling 12m (x7 over 12m) default-scheduler pod has unbound\n", - "> immediate PersistentVolumeClaims (repeated 3 times) Normal Scheduled\n", - "> 12m default-scheduler Successfully assigned\n", - "> bdc/mssql-monitor-influxdb-0 to aks-nodepool1-32258814-0 Warning\n", - "> FailedMount 1m (x5 over 10m) kubelet, aks-nodepool1-32258814-0 Unable\n", - "> to mount volumes for pod\n", - "> \u201cmssql-monitor-influxdb-0\\_bdc(888fb098-4857-11e9-92d1-0e4531614717)\u201d:\n", - "> timeout expired waiting for volumes to attach or mount for pod\n", - "> \u201cbdc\u201d/\u201cmssql-controller-0\u201d. list of unmounted volumes=\\[storage\\].\n", - "> list of unattached volumes=\\[storage default-token-pj765\\]\n", - "\n", - "NOTE: This Warning does often appear during a normally, but it should\n", - "clear up with a couple of minutes.\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"tsg050-timeout-expired-waiting-for-volumes.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Instantiate Kubernetes client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Instantiate the Python Kubernetes client into 'api' variable\n", - "\n", - "import os\n", - "\n", - "try:\n", - " from kubernetes import client, config\n", - " from kubernetes.stream import stream\n", - "\n", - " if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n", - " config.load_incluster_config()\n", - " else:\n", - " try:\n", - " config.load_kube_config()\n", - " except:\n", - " display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n", - " raise\n", - " api = client.CoreV1Api()\n", - "\n", - " print('Kubernetes client instantiated')\n", - "except ImportError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n", - " raise" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n", - " except IndexError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print('The kubernetes namespace for your big data cluster is: ' + namespace)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the name of controller pod" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "label_selector = 'app=controller'\n", - "name=api.list_namespaced_pod(namespace, label_selector=label_selector).items[0].metadata.name\n", - "\n", - "print (\"Controller pod name: \" + name)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Set the text for look for in pod events\n", - "\n", - "Set the text to look for in pod events that demonstrates this TSG is\n", - "applicable to a current cluster state" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kind=\"Pod\"\n", - "precondition_text=\"timeout expired waiting for volumes to attach or mount for pod\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get events for a kubernetes resources\n", - "\n", - "Get the events for a kubernetes named space resource:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "V1EventList=api.list_namespaced_event(namespace)\n", - "\n", - "for event in V1EventList.items:\n", - " if (event.involved_object.kind==kind and event.involved_object.name==name):\n", - " print(event.message)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### PRECONDITION CHECK" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "precondition=False\n", - "\n", - "for event in V1EventList.items:\n", - " if (event.involved_object.kind==kind and event.involved_object.name==name):\n", - " if event.message.find(precondition_text) != -1:\n", - " precondition=True\n", - "\n", - "if not precondition:\n", - " raise Exception(\"PRECONDITION NON-MATCH: 'tsg050-timeout-expired-waiting-for-volumes' is not a match for an active problem\")\n", - "\n", - "print(\"PRECONDITION MATCH: 'tsg050-timeout-expired-waiting-for-volumes' is a match for an active problem in this cluster\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Resolution\n", - "----------\n", - "\n", - "Delete the pod that is stuck trying to mount a PV (Persisted Volume),\n", - "the higher level kubernetes resource (statefulset, replicaset etc.) will\n", - "re-create the Pod." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f'kubectl delete pod/{name} -n {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the name of the new controller pod\n", - "\n", - "Get the name of the new controller pod, and view the events to ensure\n", - "the issue has cleaned-up" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "name=api.list_namespaced_pod(namespace, label_selector=label_selector).items[0].metadata.name\n", - "\n", - "print(\"New controller pod name: \" + name)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get events for a kubernetes resources\n", - "\n", - "Get the events for a kubernetes named space resource:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "V1EventList=api.list_namespaced_event(namespace)\n", - "\n", - "for event in V1EventList.items:\n", - " if (event.involved_object.kind==kind and event.involved_object.name==name):\n", - " print(event.message)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Validate the new controller pod gettings into a \u2018Running\u2019 state" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run('kubectl get pod/{name} -n {namespace}')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/repair/tsg053-save-book-first.ipynb b/Big-Data-Clusters/CU8/Public/content/repair/tsg053-save-book-first.ipynb deleted file mode 100644 index 2ebf4374..00000000 --- a/Big-Data-Clusters/CU8/Public/content/repair/tsg053-save-book-first.ipynb +++ /dev/null @@ -1,40 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG053 - ADS Provided Books must be saved before use\n", - "====================================================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "Azure Data Studio comes with some \u201cProvided Books\u201d. These books must be\n", - "saved first, otherwise some notebooks will not run correctly.\n", - "\n", - "NOTE: The usability in this area could be better, this is being tracked\n", - "under:\n", - "\n", - "- https://github.com/microsoft/azuredatastudio/issues/10500\n", - "\n", - "### Steps\n", - "\n", - "To save a provided book, click the \u201cSave Book\u201d icon (this icon is to the\n", - "right of the book title, at the top of the tree viewer in the \u201cProvided\n", - "Books\u201d section), and save the book to the local machine" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/repair/tsg057-failed-when-starting-controller.ipynb b/Big-Data-Clusters/CU8/Public/content/repair/tsg057-failed-when-starting-controller.ipynb deleted file mode 100644 index 003c2201..00000000 --- a/Big-Data-Clusters/CU8/Public/content/repair/tsg057-failed-when-starting-controller.ipynb +++ /dev/null @@ -1,63 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG057 - Failed when starting controller service. System.TimeoutException\n", - "=========================================================================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "> 2019-09-03 23:30:02.0053 \\| ERROR \\| Failed when starting controller\n", - "> service. System.TimeoutException: Operation timed out after 10\n", - "> minutes. \u2014\\> System.Data.SqlClient.SqlException: A network-related or\n", - "> instance-specific error occurred while establishing a connection to\n", - "> SQL Server. The server was not found or was not accessible. Verify\n", - "> that the instance name is correct and that SQL Server is configured to\n", - "> allow remote connections. (provider: TCP Provider, error: 0 - The\n", - "> remote computer refused the network connection.) \u2014\\>\n", - "> System.ComponentModel.Win32Exception: The remote computer refused the\n", - "> network connection Unhandled Exception: System.TimeoutException:\n", - "> Operation timed out after 10 minutes. \u2014\\>\n", - "> System.Data.SqlClient.SqlException: A network-related or\n", - "> instance-specific error occurred while establishing a connection to\n", - "> SQL Server. The server was not found or was not accessible. Verify\n", - "> that the instance name is correct and that SQL Server is configured to\n", - "> allow remote connections. (provider: TCP Provider, error: 0 - The\n", - "> remote computer refused the network connection.) \u2014\\>\n", - "> System.ComponentModel.Win32Exception: The remote computer refused the\n", - "> network connection This program has encountered a fatal error and\n", - "> cannot continue running at Tue Sep 3 23:30:02 2019 Reason: OS Fatal\n", - "> Error (0x00000006)\n", - "\n", - "Also in the controller database errorlog, the following can be seen:\n", - "\n", - "> 2019-09-05 13:38:57.25 spid12s Server failed to listen on \u2018any\u2019 \n", - "> 1433. Error: 0x2742. To proceed, notify your system administrator.\n", - "> 2019-09-05 13:38:57.26 spid12s Error: 17182, Severity: 16, State: 1.\n", - "\n", - "### Resolution\n", - "\n", - "This has been seen when an overlay network has not been deployed in the\n", - "kubernetes cluster. Any container that requires some network access or\n", - "feature will fail.\n", - "\n", - "To resolve this issue, delete the BDC cluster (azdata bdc delete),\n", - "deploy an overlay network & run azdata bdc create again" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/repair/tsg067-failed-to-complete-kube-config-setup.ipynb b/Big-Data-Clusters/CU8/Public/content/repair/tsg067-failed-to-complete-kube-config-setup.ipynb deleted file mode 100644 index bc01702a..00000000 --- a/Big-Data-Clusters/CU8/Public/content/repair/tsg067-failed-to-complete-kube-config-setup.ipynb +++ /dev/null @@ -1,62 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG067 - Failed to complete kube config setup\n", - "=============================================\n", - "\n", - "When trying to run `azdata`, it fails with\n", - "\n", - "> Cluster deployment documentation can be viewed at:\n", - "> https://aka.ms/bdc-deploy\n", - ">\n", - "> ERROR: Failed to complete kube config setup. ERROR: Failed to complete\n", - "> kube config setup.\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "This is caused by an invalid .kube/config file.\n", - "\n", - "The command `az aks get-credentials` can cause a .kube config file to\n", - "get into this state if it is called twice on the same machine, for AKS\n", - "clusters that are named the same, but are different physical clusters,\n", - "and \u2013overwrite-existing was not appended to the `az aks get-credentials`\n", - "command line.\n", - "\n", - "Try running a simple kubectl command to confirm if there is a\n", - "fundemental issue with Kubernetes connectivity on this machine. See\n", - "Related links below.\n", - "\n", - "### Resolution\n", - "\n", - "To resolve this issue, manually edit the \\~/.kube/config file. On\n", - "Windows this can be found in the %USERPROFILE%.kube folder.\n", - "\n", - "If this issue was called by running `az aks get-credentials` twice, then\n", - "it is likely that the first entry for each section in the `config` file\n", - "is the stale entry that needs to be removed.\n", - "\n", - "Related\n", - "-------\n", - "\n", - "- [TSG021 - Get cluster info\n", - " (Kubernetes)](../monitor-k8s/tsg021-get-k8s-cluster-info.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true, - "symlink": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/repair/tsg075-networkplugin-cni-failed-to-setup-pod.ipynb b/Big-Data-Clusters/CU8/Public/content/repair/tsg075-networkplugin-cni-failed-to-setup-pod.ipynb deleted file mode 100644 index 8f362435..00000000 --- a/Big-Data-Clusters/CU8/Public/content/repair/tsg075-networkplugin-cni-failed-to-setup-pod.ipynb +++ /dev/null @@ -1,506 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG075 - FailedCreatePodSandBox due to NetworkPlugin cni failed to set up pod\n", - "=============================================================================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "> Error: Warning FailedCreatePodSandBox 58m kubelet,\n", - "> rasha-virtual-machine Failed create pod sandbox: rpc error: code =\n", - "> Unknown desc = failed to set up sandbox container\n", - "> \u201cb76dc0446642bf06ef91b331be55814795410d58807eeffddf1fe3b5c9c572c0\u201d\n", - "> network for pod \u201cmssql-controller-hfvxr\u201d: NetworkPlugin cni failed to\n", - "> set up pod \u201cmssql-controller-hfvxr\\_test\u201d network: open\n", - "> /run/flannel/subnet.env: no such file or directory Normal\n", - "> SandboxChanged 34m (x325 over 59m) kubelet, virtual-machine Pod\n", - "> sandbox changed, it will be killed and re-created. Warning\n", - "> FailedCreatePodSandBox 4m5s (x831 over 58m) kubelet, virtual-machine\n", - "> (combined from similar events): Failed create pod sandbox: rpc error:\n", - "> code = Unknown desc = failed to set up sandbox container\n", - "> \u201cbee7d4eb0a74a4937de687a31676887b0c324e88a528639180a10bdbc33ce008\u201d\n", - "> network for pod \u201cmssql-controller-hfvxr\u201d: NetworkPlugin cni failed to\n", - "> set up pod \u201cmssql-controller-hfvxr\\_test\u201d network: open\n", - "> /run/flannel/subnet.env: no such file or directory\n", - "\n", - "### Instantiate Kubernetes client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Instantiate the Python Kubernetes client into 'api' variable\n", - "\n", - "import os\n", - "\n", - "try:\n", - " from kubernetes import client, config\n", - " from kubernetes.stream import stream\n", - "\n", - " if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n", - " config.load_incluster_config()\n", - " else:\n", - " try:\n", - " config.load_kube_config()\n", - " except:\n", - " display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n", - " raise\n", - " api = client.CoreV1Api()\n", - "\n", - " print('Kubernetes client instantiated')\n", - "except ImportError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n", - " raise" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"tsg075-networkplugin-cni-failed-to-setup-pod.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Resolution\n", - "\n", - "This issue has been seen on single node kubeadm installations when the\n", - "host machine has been rebooted.\n", - "\n", - "To resolve the issue, delete the kube-flannel and coredns pods. The\n", - "higher level Kuberenetes objects will re-create these pods.\n", - "\n", - "The following code cells will do this for you:\n", - "\n", - "### Verify there are flannel and coredns pods in this kubernetes cluster" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f\"kubectl get pods -n kube-system\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Delete them, so they can be re-created by the higher level Kubernetes objects" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pod_list = api.list_namespaced_pod(\"kube-system\")\n", - "\n", - "for pod in pod_list.items:\n", - " if pod.metadata.name.find(\"kube-flannel-ds\") != -1:\n", - " print(f\"Deleting pod: {pod.metadata.name}\")\n", - " run(f\"kubectl delete pod/{pod.metadata.name} -n kube-system\")\n", - "\n", - " if pod.metadata.name.find(\"coredns-\") != -1:\n", - " print(f\"Deleting pod: {pod.metadata.name}\")\n", - " run(f\"kubectl delete pod/{pod.metadata.name} -n kube-system\")\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Verify the flannel and coredns pods have been re-created" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f\"kubectl get pods -n kube-system\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/repair/tsg109-upgrade-stalled.ipynb b/Big-Data-Clusters/CU8/Public/content/repair/tsg109-upgrade-stalled.ipynb deleted file mode 100644 index a315d001..00000000 --- a/Big-Data-Clusters/CU8/Public/content/repair/tsg109-upgrade-stalled.ipynb +++ /dev/null @@ -1,242 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG109 - Set upgrade timeouts\n", - "=============================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "When viewing the upgrade configmap it may report \u201cController upgrade\n", - "stalled\u201d or \u201cControllerDb upgrade has timed out\u201d, e.g.:\n", - "\n", - "> data: controller-upgrade:\n", - "> \u2018{\u201cupgradeInitiatedTimestamp\u201d:\u201c2019-12-19T21:07:37.1608034+00:00\u201d,\u201clastTransitionTimestamp\u201d:\u201c2019-12-19T21:15:08.7304489+00:00\u201d,\u201ctargetVersion\u201d:\u201c\u201d,\u201ccurrentVersion\u201d:\u201c15.0.4003.10009\\_2\u201d,\u201ctargetRepository\u201d:\u201c\u201d,\u201ccurrentRepository\u201d:\u201c\u201d,\u201ccurrentState\u201d:\u201cNoUpgradeInProgress\u201d,\u201cpreviousState\u201d:\u201cRollingBackController\u201d,\u201cmessage\u201d:\u201cController\n", - "> upgrade\n", - "> stalled.\u201d,\u201ccontrollerUpgradeTimeoutInMinutes\u201d:5,\u201ccomponentUpgradeTimeoutInMinutes\u201d:30,\u201ctotalUpgradeTimeoutInMinutes\u201d:30,\u201cstableUptimeThresholdInMinutes\u201d:2}\u2019\n", - "\n", - "or\n", - "\n", - "> data: controller-upgrade:\n", - "> \u2018{\u201cupgradeInitiatedTimestamp\u201d:\u201c2019-12-19T22:12:44.9427392+00:00\u201d,\u201clastTransitionTimestamp\u201d:\u201c2019-12-19T22:25:13.9526729+00:00\u201d,\u201ctargetVersion\u201d:\u201c\u201d,\u201ccurrentVersion\u201d:\u201c\u201d,\u201ctargetRepository\u201d:\u201c\u201d:\u201c\u201d,\u201ccurrentState\u201d:\u201cNoUpgradeInProgress\u201d,\u201cpreviousState\u201d:\u201cRollingBackController\u201d,\u201cmessage\u201d:\u201cControllerDb\n", - "> upgrade has timed out. Rolling back to version\n", - "> .\u201d,\u201ccontrollerUpgradeTimeoutInMinutes\u201d:5,\u201ccomponentUpgradeTimeoutInMinutes\u201d:30,\u201ctotalUpgradeTimeoutInMinutes\u201d:30,\u201cstableUptimeThresholdInMinutes\u201d:2}\u2019\n", - "\n", - "This can happen if it takes too long to pull the image. By default\n", - "upgrade allows for \\~5 minutes. This setting can be increased by editing\n", - "the configmap, and bumping the field `controllerUpgradeTimeoutInMinutes`\n", - "to a higher value.\n", - "\n", - "Recommend:\n", - "\n", - "- Increase `controllerUpgradeTimeoutInMinutes` field to 15 minutes,\n", - " depending on network speed.\n", - "- The `componentUpgradeTimeoutInMinutes` field may also need a bump,\n", - " because if the image pull for controller is taking a while, it\u2019s\n", - " likely that the downloads for Hadoop and mssql-server images might\n", - " take a while as well.\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "Use these steps to troubleshoot the issue.\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "controller_timeout=20\n", - "controller_total_timeout=40\n", - "component_timeout=45" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Instantiate Kubernetes client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Instantiate the Python Kubernetes client into 'api' variable\n", - "\n", - "import os\n", - "\n", - "try:\n", - " from kubernetes import client, config\n", - " from kubernetes.stream import stream\n", - "\n", - " if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n", - " config.load_incluster_config()\n", - " else:\n", - " try:\n", - " config.load_kube_config()\n", - " except:\n", - " display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n", - " raise\n", - " api = client.CoreV1Api()\n", - "\n", - " print('Kubernetes client instantiated')\n", - "except ImportError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n", - " raise" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n", - " except IndexError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print('The kubernetes namespace for your big data cluster is: ' + namespace)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Set upgrade timeouts\n", - "\n", - "Set the timeouts for upgrades. The timeout settings are as follows\n", - "\n", - "- controllerUpgradeTimeoutInMinutes: sets the max amount of time for\n", - " the controller or controllerdb to finish upgrading\n", - "- totalUpgradeTimeoutInMinutes: sets the max amount of time to wait\n", - " for both the controller and controllerdb to complete their upgrade\n", - "- componentUpgradeTimeoutInMinutes: sets the max amount of time\n", - " allowed for subsequent phases of the upgrade to complete" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "\n", - "upgrade_config_map = api.read_namespaced_config_map(\"controller-upgrade-configmap\", namespace)\n", - "\n", - "upgrade_config = json.loads(upgrade_config_map.data[\"controller-upgrade\"])\n", - "upgrade_config[\"controllerUpgradeTimeoutInMinutes\"] = controller_timeout\n", - "upgrade_config[\"totalUpgradeTimeoutInMinutes\"] = controller_total_timeout\n", - "upgrade_config[\"componentUpgradeTimeoutInMinutes\"] = component_timeout\n", - "upgrade_config_map.data[\"controller-upgrade\"] = json.dumps(upgrade_config)\n", - "\n", - "api.patch_namespaced_config_map(\"controller-upgrade-configmap\", namespace, upgrade_config_map)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Related\n", - "-------\n", - "\n", - "- [TSG108 - View the controller upgrade config\n", - " map](../diagnose/tsg108-controller-failed-to-upgrade.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true, - "expert": { - "rules": [ - [ - "TSG108", - "code", - "stream", - "name", - "stdout", - "text", - ".\\*upgrade stalled" - ], - [ - "TSG108", - "code", - "stream", - "name", - "stdout", - "text", - ".\\*upgrade has timed out" - ] - ] - }, - "symlink": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/repair/tsg110-azdata-returns-apierror.ipynb b/Big-Data-Clusters/CU8/Public/content/repair/tsg110-azdata-returns-apierror.ipynb deleted file mode 100644 index a9ea0806..00000000 --- a/Big-Data-Clusters/CU8/Public/content/repair/tsg110-azdata-returns-apierror.ipynb +++ /dev/null @@ -1,43 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG110 - Azdata returns ApiError\n", - "================================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "When running `azdata` commands, such as `azdata login`, they fail with\n", - "the following error:\n", - "\n", - "> ERROR: Error processing command: \u201cApiError\u201d Internal Server Error\n", - "\n", - "### Steps\n", - "\n", - "This has been seen to be caused by an issue with the `controller`. The\n", - "next steps to perform is to analyze the controller logs. Use the TSG(s)\n", - "listed below to do this.\n", - "\n", - "Related\n", - "-------\n", - "\n", - "- [TSG036 - Controller\n", - " logs](../log-analyzers/tsg036-get-controller-logs.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/repair/tsg124-no-credentials-were-supplied.ipynb b/Big-Data-Clusters/CU8/Public/content/repair/tsg124-no-credentials-were-supplied.ipynb deleted file mode 100644 index 37403857..00000000 --- a/Big-Data-Clusters/CU8/Public/content/repair/tsg124-no-credentials-were-supplied.ipynb +++ /dev/null @@ -1,40 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG124 - \u2018No credentials were supplied\u2019 error from azdata login\n", - "===============================================================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "When running `azdata login` is fails with:\n", - "\n", - " ERROR: No credentials were supplied, or the credentials were unavailable or inaccessible.\n", - "\n", - "### Steps\n", - "\n", - "- When using `azdata login` from Windows, ensure the machine is domain\n", - " joined, and the user that is logged in is part of the same domain as\n", - " the domain the Big Data Cluster is joined to, or the domain the user\n", - " is part of has a trust relationship with the domain the Big Data\n", - " Cluster is part of.\n", - "- When using `azdata login` from Linux, ensure the the user has logged\n", - " into the domain, using a Kerberos client such as `kinit`." - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/sample/readme.md b/Big-Data-Clusters/CU8/Public/content/sample/readme.md deleted file mode 100644 index 1a5de3fa..00000000 --- a/Big-Data-Clusters/CU8/Public/content/sample/readme.md +++ /dev/null @@ -1,23 +0,0 @@ -# Sample notebooks - -- A set of example notebooks demonstrating SQL Server Big Data Cluster scenarios. - -[Home](../readme.md) - -## Notebooks in this Chapter -- [SAM001a - Query Storage Pool from SQL Server Master Pool (1 of 3) - Load sample data ](sam001a-load-sample-data-into-bdc.ipynb) - -- [SAM001b - Query Storage Pool from SQL Server Master Pool (2 of 3) - Convert data to parquet ](sam001b-convert-csv-to-parquet.ipynb) - -- [SAM001c - Query Storage Pool from SQL Server Master Pool (3 of 3) - Query HDFS from SQL Server ](sam001c-query-hdfs-in-sql-server.ipynb) - -- [SAM002 - Storage Pool (2 of 2) - Query HDFS ](sam002-query-hdfs-in-sql-server.ipynb) - -- [SAM003 - Data Pool Example ](sam003-data-pool.ipynb) - -- [SAM008 - Spark using azdata ](sam008-spark-using-azdata.ipynb) - -- [SAM009 - HDFS using azdata ](sam009-hdfs-using-azdata.ipynb) - -- [SAM010 - App using azdata ](sam010-app-using-azdata.ipynb) - diff --git a/Big-Data-Clusters/CU8/Public/content/sample/sam001a-load-sample-data-into-bdc.ipynb b/Big-Data-Clusters/CU8/Public/content/sample/sam001a-load-sample-data-into-bdc.ipynb deleted file mode 100644 index 8bcb97b5..00000000 --- a/Big-Data-Clusters/CU8/Public/content/sample/sam001a-load-sample-data-into-bdc.ipynb +++ /dev/null @@ -1,594 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "SAM001a - Query Storage Pool from SQL Server Master Pool (1 of 3) - Load sample data\n", - "====================================================================================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "In this 3 part tutorial, load data into the Storage Pool (HDFS) using\n", - "`azdata`, convert it into Parquet (using Spark) and the in the 3rd part,\n", - "query the data using the Master Pool (SQL Server)\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"sam001a-load-sample-data-into-bdc.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond'], 'azdata': ['Endpoint sql-server-master does not exist', 'Endpoint livy does not exist', 'Failed to get state for cluster', 'Endpoint webhdfs does not exist', 'Adaptive Server is unavailable or does not exist', 'Error: Address already in use', 'Login timeout expired (0) (SQLDriverConnect)']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']], 'azdata': [['The token is expired', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Reason: Unauthorized', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Max retries exceeded with url: /api/v1/bdc/endpoints', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Look at the controller logs for more details', 'TSG027 - Observe cluster deployment', '../diagnose/tsg027-observe-bdc-create.ipynb'], ['provided port is already allocated', 'TSG062 - Get tail of all previous container logs for pods in BDC namespace', '../log-files/tsg062-tail-bdc-previous-container-logs.ipynb'], ['Create cluster failed since the existing namespace', 'SOP061 - Delete a big data cluster', '../install/sop061-delete-bdc.ipynb'], ['Failed to complete kube config setup', 'TSG067 - Failed to complete kube config setup', '../repair/tsg067-failed-to-complete-kube-config-setup.ipynb'], ['Error processing command: \"ApiError', 'TSG110 - Azdata returns ApiError', '../repair/tsg110-azdata-returns-apierror.ipynb'], ['Error processing command: \"ControllerError', 'TSG036 - Controller logs', '../log-analyzers/tsg036-get-controller-logs.ipynb'], ['ERROR: 500', 'TSG046 - Knox gateway logs', '../log-analyzers/tsg046-get-knox-logs.ipynb'], ['Data source name not found and no default driver specified', 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], [\"Can't open lib 'ODBC Driver 17 for SQL Server\", 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], ['Control plane upgrade failed. Failed to upgrade controller.', 'TSG108 - View the controller upgrade config map', '../diagnose/tsg108-controller-failed-to-upgrade.ipynb'], [\"[Errno 2] No such file or directory: '..\\\\\\\\\", 'TSG053 - ADS Provided Books must be saved before use', '../repair/tsg053-save-book-first.ipynb'], [\"NameError: name 'azdata_login_secret_name' is not defined\", 'SOP013 - Create secret for azdata login (inside cluster)', '../common/sop013-create-secret-for-azdata-login.ipynb'], ['ERROR: No credentials were supplied, or the credentials were unavailable or inaccessible.', \"TSG124 - 'No credentials were supplied' error from azdata login\", '../repair/tsg124-no-credentials-were-supplied.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb'], 'azdata': ['SOP063 - Install azdata CLI (using package manager)', '../install/sop063-packman-install-azdata.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create a temporary directory to stage files" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Create a temporary directory to hold configuration files\n", - "\n", - "import tempfile\n", - "\n", - "temp_dir = tempfile.mkdtemp()\n", - "\n", - "print(f\"Temporary directory created: {temp_dir}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Helper function to save configuration files to disk" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define helper function 'save_file' to save configuration files to the temporary directory created above\n", - "import os\n", - "import io\n", - "\n", - "def save_file(filename, contents):\n", - " with io.open(os.path.join(temp_dir, filename), \"w\", encoding='utf8', newline='\\n') as text_file:\n", - " text_file.write(contents)\n", - "\n", - " print(\"File saved: \" + os.path.join(temp_dir, filename))\n", - "\n", - "print(\"Function `save_file` defined successfully.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the controller username and password\n", - "\n", - "Get the controller username and password from the Kubernetes Secret\n", - "Store and place in the required AZDATA\\_USERNAME and AZDATA\\_PASSWORD\n", - "environment variables." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place controller secret in AZDATA_USERNAME/AZDATA_PASSWORD environment variables\n", - "\n", - "import os, base64\n", - "\n", - "os.environ[\"AZDATA_USERNAME\"] = run(f'kubectl get secret/controller-login-secret -n {namespace} -o jsonpath={{.data.username}}', return_output=True, base64_decode=True)\n", - "os.environ[\"AZDATA_PASSWORD\"] = run(f'kubectl get secret/controller-login-secret -n {namespace} -o jsonpath={{.data.password}}', return_output=True, base64_decode=True)\n", - "\n", - "print(f\"Controller username '{os.environ['AZDATA_USERNAME']}' and password stored in environment variables\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Steps\n", - "\n", - "Upload this data into HDFS." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "items = [\n", - " [1, \"Eldon Base for stackable storage shelf platinum\", \"Muhammed MacIntyre\", 3, -213.25, 38.94, 35, \"Nunavut\", \"Storage & Organization\", 0.8],\n", - " [2, \"1.7 Cubic Foot Compact \"\"Cube\"\" Office Refrigerators\", \"Barry French\", 293, 457.81, 208.16, 68.02, \"Nunavut\", \"Appliances\", 0.58], \n", - " [3, \"Cardinal Slant-D Ring Binder Heavy Gauge Vinyl\", \"Barry French\", 293,46.71, 8.69, 2.99, \"Nunavut\", \"Binders and Binder Accessories\", 0.39],\n", - " [4, \"R380\", \"Clay Rozendal\", 483, 1198.97, 195.99, 3.99, \"Nunavut\", \"Telephones and Communication\", 0.58],\n", - " [5, \"Holmes HEPA Air Purifier\", \"Carlos Soltero\", 515, 30.94, 21.78, 5.94, \"Nunavut\", \"Appliances\", 0.5],\n", - " [6, \"G.E. Longer-Life Indoor Recessed Floodlight Bulbs\", \"Carlos Soltero\", 515, 4.43, 6.64, 4.95, \"Nunavut\", \"Office Furnishings\", 0.37],\n", - " [7, \"Angle-D Binders with Locking Rings Label Holders\", \"Carl Jackson\", 613, -54.04, 7.3, 7.72, \"Nunavut\", \"Binders and Binder Accessories\", 0.38],\n", - " [8, \"SAFCO Mobile Desk Side File Wire Frame\", \"Carl Jackson\", 613, 127.7, 42.76, 6.22, \"Nunavut\", \"Storage & Organization\", ],\n", - " [9, \"SAFCO Commercial Wire Shelving Black\", \"Monica Federle\", 643, -695.26, 138.14, 35, \"Nunavut\", \"Storage & Organization\", ],\n", - " [10, \"Xerox 198\", \"Dorothy Badders\", 678, -226.36, 4.98, 8.33, \"Nunavut\", \"Paper\", 0.38]\n", - "]\n", - "\n", - "src = os.path.join(temp_dir, \"items.csv\")\n", - "dest = \"/tmp/clickstream_data/datasampleCS.csv\"\n", - "\n", - "s = \"\"\n", - "for item in items:\n", - " s = s + str(item)[1:-1] + \"\\n\"\n", - "\n", - "save_file(src, s)\n", - "\n", - "run(f'azdata bdc hdfs rm --path {dest}')\n", - "\n", - "src = src.replace(\"\\\\\", \"\\\\\\\\\")\n", - "\n", - "run(f'azdata bdc hdfs rm --path hdfs:{dest}')\n", - "run(f'azdata bdc hdfs cp --from-path {src} --to-path hdfs:{dest}')\n", - "\n", - "print (f\"CSV uploaded to HDFS: {dest}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Clean up temporary directory for staging configuration files" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Delete the temporary directory used to hold configuration files\n", - "\n", - "import shutil\n", - "\n", - "shutil.rmtree(temp_dir)\n", - "\n", - "print(f'Temporary directory deleted: {temp_dir}')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/sample/sam001b-convert-csv-to-parquet.ipynb b/Big-Data-Clusters/CU8/Public/content/sample/sam001b-convert-csv-to-parquet.ipynb deleted file mode 100644 index d1711f2c..00000000 --- a/Big-Data-Clusters/CU8/Public/content/sample/sam001b-convert-csv-to-parquet.ipynb +++ /dev/null @@ -1,133 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "SAM001b - Query Storage Pool from SQL Server Master Pool (2 of 3) - Convert data to parquet\n", - "===========================================================================================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "In this 2nd part of a 3 part tutorial, use Spark to convert a .csv file\n", - "into a parquet file.\n", - "\n", - "### Convert CSV to Parquet using the PySpark kernel\n", - "\n", - "First open the .csv file and convert it to a data frame object." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "results = spark.read.option(\"inferSchema\", \"true\").csv('/tmp/clickstream_data/datasampleCS.csv').toDF(\"NumberID\", \"Name\", \"Name2\", \"Price\", \"Discount\", \"Money\", \"Money2\", \"Company\", \"Type\", \"Space\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Verify the schema using the following command." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "results.printSchema()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "View the first 20 lines of this data using the following command." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "results.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Turn the .csv file to a parquet file." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sc._jsc.hadoopConfiguration().set(\"mapreduce.fileoutputcommitter.marksuccessfuljobs\", \"false\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "results.write.mode(\"overwrite\").parquet('/tmp/clickstream_data_parquet')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Verify the parquet file using the following commands." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "result_parquet = spark.read.parquet('/tmp/clickstream_data_parquet')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "result_parquet.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "pyspark3kernel", - "display_name": "PySpark3" - }, - "azdata": { - "side_effects": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/sample/sam001c-query-hdfs-in-sql-server.ipynb b/Big-Data-Clusters/CU8/Public/content/sample/sam001c-query-hdfs-in-sql-server.ipynb deleted file mode 100644 index af96a1e6..00000000 --- a/Big-Data-Clusters/CU8/Public/content/sample/sam001c-query-hdfs-in-sql-server.ipynb +++ /dev/null @@ -1,709 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "SAM001c - Query Storage Pool from SQL Server Master Pool (3 of 3) - Query HDFS from SQL Server\n", - "==============================================================================================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "In this 3rd part of the Storage Pool tutorial, you\u2019ll learn how to:\n", - "\n", - "- **Create an external table pointing to HDFS data in a big data\n", - " cluster**\n", - "- **Join this data with high-value data in the master instance**\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"sam001c-query-hdfs-in-sql-server.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond'], 'azdata': ['Endpoint sql-server-master does not exist', 'Endpoint livy does not exist', 'Failed to get state for cluster', 'Endpoint webhdfs does not exist', 'Adaptive Server is unavailable or does not exist', 'Error: Address already in use', 'Login timeout expired (0) (SQLDriverConnect)']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']], 'azdata': [['The token is expired', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Reason: Unauthorized', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Max retries exceeded with url: /api/v1/bdc/endpoints', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Look at the controller logs for more details', 'TSG027 - Observe cluster deployment', '../diagnose/tsg027-observe-bdc-create.ipynb'], ['provided port is already allocated', 'TSG062 - Get tail of all previous container logs for pods in BDC namespace', '../log-files/tsg062-tail-bdc-previous-container-logs.ipynb'], ['Create cluster failed since the existing namespace', 'SOP061 - Delete a big data cluster', '../install/sop061-delete-bdc.ipynb'], ['Failed to complete kube config setup', 'TSG067 - Failed to complete kube config setup', '../repair/tsg067-failed-to-complete-kube-config-setup.ipynb'], ['Error processing command: \"ApiError', 'TSG110 - Azdata returns ApiError', '../repair/tsg110-azdata-returns-apierror.ipynb'], ['Error processing command: \"ControllerError', 'TSG036 - Controller logs', '../log-analyzers/tsg036-get-controller-logs.ipynb'], ['ERROR: 500', 'TSG046 - Knox gateway logs', '../log-analyzers/tsg046-get-knox-logs.ipynb'], ['Data source name not found and no default driver specified', 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], [\"Can't open lib 'ODBC Driver 17 for SQL Server\", 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], ['Control plane upgrade failed. Failed to upgrade controller.', 'TSG108 - View the controller upgrade config map', '../diagnose/tsg108-controller-failed-to-upgrade.ipynb'], [\"[Errno 2] No such file or directory: '..\\\\\\\\\", 'TSG053 - ADS Provided Books must be saved before use', '../repair/tsg053-save-book-first.ipynb'], [\"NameError: name 'azdata_login_secret_name' is not defined\", 'SOP013 - Create secret for azdata login (inside cluster)', '../common/sop013-create-secret-for-azdata-login.ipynb'], ['ERROR: No credentials were supplied, or the credentials were unavailable or inaccessible.', \"TSG124 - 'No credentials were supplied' error from azdata login\", '../repair/tsg124-no-credentials-were-supplied.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb'], 'azdata': ['SOP063 - Install azdata CLI (using package manager)', '../install/sop063-packman-install-azdata.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the controller username and password\n", - "\n", - "Get the controller username and password from the Kubernetes Secret\n", - "Store and place in the required AZDATA\\_USERNAME and AZDATA\\_PASSWORD\n", - "environment variables." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place controller secret in AZDATA_USERNAME/AZDATA_PASSWORD environment variables\n", - "\n", - "import os, base64\n", - "\n", - "os.environ[\"AZDATA_USERNAME\"] = run(f'kubectl get secret/controller-login-secret -n {namespace} -o jsonpath={{.data.username}}', return_output=True, base64_decode=True)\n", - "os.environ[\"AZDATA_PASSWORD\"] = run(f'kubectl get secret/controller-login-secret -n {namespace} -o jsonpath={{.data.password}}', return_output=True, base64_decode=True)\n", - "\n", - "print(f\"Controller username '{os.environ['AZDATA_USERNAME']}' and password stored in environment variables\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sql = \"\"\"\n", - "IF DB_ID('storage_pool_test') IS NOT NULL\n", - "BEGIN\n", - " DROP DATABASE [storage_pool_test]\n", - "END\n", - "\n", - "CREATE DATABASE [storage_pool_test]\n", - "\"\"\"\n", - "\n", - "run(f'azdata sql query -q \"{sql}\"')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Define the format of the .csv or Parquet file to read from HDFS\n", - "\n", - "For CSV:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sql = f\"\"\"\n", - "CREATE EXTERNAL FILE FORMAT csv_file\n", - "WITH (\n", - " FORMAT_TYPE = DELIMITEDTEXT,\n", - " FORMAT_OPTIONS(\n", - " FIELD_TERMINATOR = ',',\n", - " STRING_DELIMITER = '\"\"',\n", - " USE_TYPE_DEFAULT = TRUE)\n", - ")\n", - "\"\"\"\n", - "\n", - "run(f\"\"\"azdata sql query --database storage_pool_test -q \"{sql}\" \"\"\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "For Parquet:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sql = f\"\"\"\n", - "CREATE EXTERNAL FILE FORMAT PARQUET\n", - " WITH (\n", - " FORMAT_TYPE = PARQUET\n", - " )\n", - "\"\"\"\n", - "\n", - "run(f'azdata sql query --database storage_pool_test -q \"{sql}\"')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create an external data source to the storage pool if it does not already exist" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sql = f\"\"\"\n", - "IF NOT EXISTS(SELECT * FROM sys.external_data_sources WHERE name = 'SqlStoragePool')\n", - "BEGIN\n", - " CREATE EXTERNAL DATA SOURCE SqlStoragePool\n", - " WITH (LOCATION = 'sqlhdfs://controller-svc/default')\n", - "END\n", - "\"\"\"\n", - "\n", - "run(f'azdata sql query --database storage_pool_test -q \"{sql}\"')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create an external table that can read the `/tmp/clickstream_data` from the storage pool\n", - "\n", - "The SQLStoragePool is accesible from the master instance of a big data\n", - "cluster.\n", - "\n", - "For CSV:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sql = f\"\"\"\n", - "CREATE EXTERNAL TABLE [clickstream_data_table_csv]\n", - "(\n", - " \"NumberID\" BIGINT ,\n", - " \"Name\" Varchar(120) ,\n", - " \"Name2\" Varchar(120),\n", - " \"Price\" Decimal ,\n", - " \"Discount\" Decimal ,\n", - " \"Money\" Decimal,\n", - " \"Money2\" Decimal,\n", - " \"Company\" Varchar(120),\n", - " \"Type\" Varchar(120),\n", - " \"Space\" Varchar(120)\n", - ")\n", - "WITH\n", - "(\n", - " DATA_SOURCE = SqlStoragePool,\n", - " LOCATION = '/tmp/clickstream_data',\n", - " FILE_FORMAT = csv_file\n", - ")\n", - "\"\"\"\n", - "\n", - "run(f'azdata sql query --database storage_pool_test -q \"{sql}\"')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "For Parquet:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sql = f\"\"\"\n", - "CREATE EXTERNAL TABLE [clickstream_data_table_parquet]\n", - "(\n", - " \"NumberID\" BIGINT,\n", - " \"Name\" Varchar(120) ,\n", - " \"Name2\" Varchar(120),\n", - " \"Price\" FLOAT ,\n", - " \"Discount\" FLOAT,\n", - " \"Money\" FLOAT,\n", - " \"Money2\" FLOAT,\n", - " \"Company\" Varchar(120),\n", - " \"Type\" Varchar(120),\n", - " \"Space\" FLOAT\n", - ")\n", - "WITH\n", - "(\n", - " DATA_SOURCE = SqlStoragePool,\n", - " LOCATION = '/tmp/clickstream_data_parquet',\n", - " FILE_FORMAT = PARQUET\n", - ")\n", - "\"\"\"\n", - "\n", - "run(f'azdata sql query --database storage_pool_test -q \"{sql}\"')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Query the data\n", - "\n", - "Run the following query to join the HDFS data in the `clickstream_hdfs`\n", - "external table with the relational data in the local database you loaded\n", - "the data in.\n", - "\n", - "For CSV:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sql = f\"\"\"\n", - "select NumberID, Name, Name2, Company, Type from [clickstream_data_table_csv]\n", - "\"\"\"\n", - "\n", - "run(f'azdata sql query --database storage_pool_test -q \"{sql}\"')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "For Parquet:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sql = f\"\"\"\n", - "select * from [clickstream_data_table_parquet]\n", - "\"\"\"\n", - "\n", - "run(f'azdata sql query --database storage_pool_test -q \"{sql}\"')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Clean up the database" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sql = \"\"\"\n", - "IF DB_ID('storage_pool_test') IS NOT NULL\n", - "BEGIN\n", - " DROP DATABASE [storage_pool_test]\n", - "END\n", - "\"\"\"\n", - "\n", - "run(f'azdata sql query --database master -q \"{sql}\"')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/sample/sam002-query-hdfs-in-sql-server.ipynb b/Big-Data-Clusters/CU8/Public/content/sample/sam002-query-hdfs-in-sql-server.ipynb deleted file mode 100644 index 40cc05cd..00000000 --- a/Big-Data-Clusters/CU8/Public/content/sample/sam002-query-hdfs-in-sql-server.ipynb +++ /dev/null @@ -1,722 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "SAM002 - Storage Pool (2 of 2) - Query HDFS\n", - "===========================================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "In this 2nd part of the Storage Pool tutorial, you\u2019ll learn how to:\n", - "\n", - "- **Create an external table pointing to HDFS data in a big data\n", - " cluster**\n", - "- **Join this data with high-value data in the master instance**\n", - "\n", - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"sam002-query-hdfs-in-sql-server.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond'], 'azdata': ['Endpoint sql-server-master does not exist', 'Endpoint livy does not exist', 'Failed to get state for cluster', 'Endpoint webhdfs does not exist', 'Adaptive Server is unavailable or does not exist', 'Error: Address already in use', 'Login timeout expired (0) (SQLDriverConnect)']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']], 'azdata': [['The token is expired', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Reason: Unauthorized', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Max retries exceeded with url: /api/v1/bdc/endpoints', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Look at the controller logs for more details', 'TSG027 - Observe cluster deployment', '../diagnose/tsg027-observe-bdc-create.ipynb'], ['provided port is already allocated', 'TSG062 - Get tail of all previous container logs for pods in BDC namespace', '../log-files/tsg062-tail-bdc-previous-container-logs.ipynb'], ['Create cluster failed since the existing namespace', 'SOP061 - Delete a big data cluster', '../install/sop061-delete-bdc.ipynb'], ['Failed to complete kube config setup', 'TSG067 - Failed to complete kube config setup', '../repair/tsg067-failed-to-complete-kube-config-setup.ipynb'], ['Error processing command: \"ApiError', 'TSG110 - Azdata returns ApiError', '../repair/tsg110-azdata-returns-apierror.ipynb'], ['Error processing command: \"ControllerError', 'TSG036 - Controller logs', '../log-analyzers/tsg036-get-controller-logs.ipynb'], ['ERROR: 500', 'TSG046 - Knox gateway logs', '../log-analyzers/tsg046-get-knox-logs.ipynb'], ['Data source name not found and no default driver specified', 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], [\"Can't open lib 'ODBC Driver 17 for SQL Server\", 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], ['Control plane upgrade failed. Failed to upgrade controller.', 'TSG108 - View the controller upgrade config map', '../diagnose/tsg108-controller-failed-to-upgrade.ipynb'], [\"[Errno 2] No such file or directory: '..\\\\\\\\\", 'TSG053 - ADS Provided Books must be saved before use', '../repair/tsg053-save-book-first.ipynb'], [\"NameError: name 'azdata_login_secret_name' is not defined\", 'SOP013 - Create secret for azdata login (inside cluster)', '../common/sop013-create-secret-for-azdata-login.ipynb'], ['ERROR: No credentials were supplied, or the credentials were unavailable or inaccessible.', \"TSG124 - 'No credentials were supplied' error from azdata login\", '../repair/tsg124-no-credentials-were-supplied.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb'], 'azdata': ['SOP063 - Install azdata CLI (using package manager)', '../install/sop063-packman-install-azdata.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Instantiate Kubernetes client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Instantiate the Python Kubernetes client into 'api' variable\n", - "\n", - "import os\n", - "\n", - "try:\n", - " from kubernetes import client, config\n", - " from kubernetes.stream import stream\n", - "\n", - " if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n", - " config.load_incluster_config()\n", - " else:\n", - " try:\n", - " config.load_kube_config()\n", - " except:\n", - " display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n", - " raise\n", - " api = client.CoreV1Api()\n", - "\n", - " print('Kubernetes client instantiated')\n", - "except ImportError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n", - " raise" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n", - " except IndexError:\n", - " from IPython.display import Markdown\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print('The kubernetes namespace for your big data cluster is: ' + namespace)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create an external table to HDFS\n", - "\n", - "The storage pool contains web clickstream data in a .csv file stored in\n", - "HDFS. Use the following steps to define an external table that can\n", - "access the data in that file." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from IPython.display import Markdown\n", - "\n", - "try:\n", - " %load_ext sql\n", - "except ModuleNotFoundError:\n", - " display(Markdown(f'HINT: Use [SOP062 - Install ipython-sql and pyodbc modules](../install/sop062-install-ipython-sql-module.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "import json\n", - "import base64\n", - "\n", - "controller_username = run(f'kubectl get secret/controller-login-secret -n {namespace} -o jsonpath={{.data.username}}', return_output=True)\n", - "controller_username = base64.b64decode(controller_username).decode('utf-8')\n", - "\n", - "controller_password = run(f'kubectl get secret/controller-login-secret -n {namespace} -o jsonpath={{.data.password}}', return_output=True)\n", - "controller_password = base64.b64decode(controller_password).decode('utf-8')\n", - "\n", - "master_endpoint_details = run('azdata bdc endpoint list --endpoint=\"sql-server-master\"', return_output=True)\n", - "json = json.loads(master_endpoint_details)\n", - "sql_master_tcp_and_port = json['endpoint']\n", - "\n", - "%sql mssql+pyodbc://{controller_username}:{controller_password}@{sql_master_tcp_and_port}/master?driver=SQL+Server+Native+Client+11.0&autocommit=True" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%sql\n", - "-- Create the new database if it does not exist already\n", - "IF NOT EXISTS (\n", - " SELECT [name]\n", - " FROM sys.databases\n", - " WHERE [name] = N'Testing'\n", - ")\n", - "CREATE DATABASE Testing" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Run the following Transact-SQL command to change context to the database you created in the master instance" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%sql\n", - "USE Testing" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Define the format of the .csv or Parquet file to read from HDFS\n", - "\n", - "For CSV:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%sql\n", - "\n", - "CREATE EXTERNAL FILE FORMAT csv_file\n", - "WITH (\n", - " FORMAT_TYPE = DELIMITEDTEXT,\n", - " FORMAT_OPTIONS(\n", - " FIELD_TERMINATOR = ',',\n", - " STRING_DELIMITER = '\"',\n", - " USE_TYPE_DEFAULT = TRUE)\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "For Parquet:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%sql\n", - "\n", - "CREATE EXTERNAL FILE FORMAT PARQUET\n", - " WITH (\n", - " FORMAT_TYPE = PARQUET\n", - " )" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create an external data source to the storage pool if it does not already exist" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%sql\n", - "--DROP EXTERNAL DATA SOURCE SqlStoragePool\n", - "IF NOT EXISTS(SELECT * FROM sys.external_data_sources WHERE name = 'SqlStoragePool')\n", - "BEGIN\n", - " CREATE EXTERNAL DATA SOURCE SqlStoragePool\n", - " WITH (LOCATION = 'sqlhdfs://controller-svc/default')\n", - "END" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create an external table that can read the `/tmp/clickstream_data` from the storage pool\n", - "\n", - "The SQLStoragePool is accesible from the master instance of a big data\n", - "cluster.\n", - "\n", - "For CSV:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%sql\n", - "\n", - "CREATE EXTERNAL TABLE [clickstream_data_table_csv]\n", - "(\"NumberID\" BIGINT ,\n", - "\"Name\" Varchar(120) ,\n", - "\"Name2\" Varchar(120),\n", - "\"Price\" Decimal ,\n", - "\"Discount\" Decimal ,\n", - "\"Money\" Decimal,\n", - "\"Money2\" Decimal,\n", - "\"Type\" Varchar(120),\n", - " \"Space\" Varchar(120))\n", - "WITH\n", - "(\n", - " DATA_SOURCE = SqlStoragePool,\n", - " LOCATION = '/tmp/clickstream_data',\n", - " FILE_FORMAT = csv_file\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "For Parquet:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%sql\n", - "\n", - "CREATE EXTERNAL TABLE [clickstream_data_table_parquet]\n", - "(\"NumberID\" BIGINT ,\n", - "\"Name\" Varchar(120) ,\n", - "\"Name2\" Varchar(120),\n", - "\"Price\" BIGINT ,\n", - "\"Discount\" FLOAT,\n", - "\"Money\" FLOAT,\n", - "\"Money2\" FLOAT,\n", - "\"Type\" Varchar(120),\n", - "\"Space\" Varchar(120))\n", - "WITH\n", - "(\n", - " DATA_SOURCE = SqlStoragePool,\n", - " LOCATION = '/tmp/clickstream_data_parquet',\n", - " FILE_FORMAT = PARQUET\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Query the data\n", - "\n", - "1. Run the following query to join the HDFS data in the\n", - " `clickstream_hdfs` external table with teh relational data in the\n", - " local database you loaded the data in.\n", - "\n", - "For CSV:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%sql\n", - "\n", - "select * from [clickstream_data_table_csv]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "For Parquet:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%sql\n", - "\n", - "select * from [clickstream_data_table_parquet]" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/sample/sam003-data-pool.ipynb b/Big-Data-Clusters/CU8/Public/content/sample/sam003-data-pool.ipynb deleted file mode 100644 index e29896e2..00000000 --- a/Big-Data-Clusters/CU8/Public/content/sample/sam003-data-pool.ipynb +++ /dev/null @@ -1,286 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "SAM003 - Data Pool Example\n", - "==========================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "In this tutorial, you learn how to:\n", - "\n", - "- **Create a data pool source.**\n", - "- **Create an external table in the data pool.**\n", - "- **Insert data in data pool tables.**\n", - "- **Loading data from one data pool table to another.**\n", - "- **Join data in the data pool table with other data pool tables.**\n", - "- **Truncating tables.**\n", - "- **Cleanup.**\n", - "\n", - "### Create a database and an external data source" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "USE MASTER\n", - "GO\n", - "\n", - "DROP DATABASE IF EXISTS datapoolNoteBookTutorialDB\n", - "GO\n", - "\n", - "CREATE DATABASE datapoolNoteBookTutorialDB\n", - "GO\n", - "\n", - "USE datapoolNoteBookTutorialDB\n", - "GO\n", - "\n", - "CREATE EXTERNAL DATA SOURCE datapoolsrc\n", - "WITH\n", - "(\n", - " LOCATION = 'sqldatapool://controller-svc/default'\n", - ")\n", - "GO" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create an external table in the data pool with Round Robin distribution" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "CREATE EXTERNAL TABLE [CustomersRR]\n", - "(\n", - " [CustomerId] [int] NOT NULL,\n", - " [Name] [nvarchar](256) NOT NULL,\n", - " [RegionId] [int] NOT NULL\n", - ") WITH\n", - "(\n", - " DATA_SOURCE = datapoolsrc,\n", - " DISTRIBUTION = ROUND_ROBIN\n", - ")\n", - "GO" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create an external table in the data pool with Replicated distribution" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "CREATE EXTERNAL TABLE [CustomersRep]\n", - "(\n", - " [CustomerId] [int] NOT NULL,\n", - " [Name] [nvarchar](256) NOT NULL,\n", - " [RegionId] [int] NOT NULL\n", - ") WITH\n", - "(\n", - " DATA_SOURCE = datapoolsrc,\n", - " DISTRIBUTION = REPLICATED\n", - ")\n", - "GO" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Load data into tables" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "INSERT INTO CustomersRR VALUES (1, 'customer1', 1)\n", - "GO\n", - "\n", - "INSERT INTO CustomersRR VALUES (2, 'customer2', 1)\n", - "GO\n", - "\n", - "INSERT INTO CustomersRR VALUES (3, 'customer3', 2)\n", - "GO\n", - "\n", - "INSERT INTO CustomersRR VALUES (4, 'customer4', 2)\n", - "GO\n", - "\n", - "INSERT INTO CustomersRR VALUES (5, 'customer5', 3)\n", - "GO" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "INSERT INTO CustomersRep VALUES (1, 'customerRep1', 1)\n", - "GO\n", - "\n", - "INSERT INTO CustomersRep VALUES (2, 'customerRep2', 1)\n", - "GO\n", - "\n", - "INSERT INTO CustomersRep VALUES (3, 'customerRep3', 2)\n", - "GO\n", - "\n", - "INSERT INTO CustomersRep VALUES (4, 'customerRep4', 2)\n", - "GO\n", - "\n", - "INSERT INTO CustomersRep VALUES (5, 'customerRep5', 3)\n", - "GO" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Loading data from one data pool table to another" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "INSERT INTO CustomersRep\n", - "SELECT * FROM CustomersRR\n", - "GO" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Querying data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "SELECT * FROM CustomersRR;\n", - "GO" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Join between different data pool tables" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "SELECT CustomersRR.CustomerId, CustomersRR.Name, CustomersRR.RegionId \n", - "FROM CustomersRR \n", - "INNER JOIN CustomersRep ON CustomersRR.CustomerId = CustomersRep.CustomerId\n", - "GO" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Truncate Tables" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "TRUNCATE TABLE CustomersRep\n", - "GO" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "TRUNCATE TABLE CustomersRR\n", - "GO" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Cleanup" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "DROP EXTERNAL TABLE CustomersRep\n", - "GO\n", - "\n", - "DROP EXTERNAL TABLE CustomersRR\n", - "GO\n", - "\n", - "DROP EXTERNAL DATA SOURCE datapoolsrc\n", - "GO\n", - "\n", - "USE master\n", - "GO\n", - "\n", - "DROP DATABASE datapoolNoteBookTutorialDB\n", - "GO" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "sql", - "display_name": "SQL" - }, - "azdata": { - "side_effects": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/sample/sam008-spark-using-azdata.ipynb b/Big-Data-Clusters/CU8/Public/content/sample/sam008-spark-using-azdata.ipynb deleted file mode 100644 index 42b29325..00000000 --- a/Big-Data-Clusters/CU8/Public/content/sample/sam008-spark-using-azdata.ipynb +++ /dev/null @@ -1,657 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "SAM008 - Spark using azdata\n", - "===========================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "spark_statement = \"2+2\"\n", - "max_tries_for_ready_state = 50" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"sam008-spark-using-azdata.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond'], 'azdata': ['Endpoint sql-server-master does not exist', 'Endpoint livy does not exist', 'Failed to get state for cluster', 'Endpoint webhdfs does not exist', 'Adaptive Server is unavailable or does not exist', 'Error: Address already in use', 'Login timeout expired (0) (SQLDriverConnect)']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']], 'azdata': [['The token is expired', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Reason: Unauthorized', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Max retries exceeded with url: /api/v1/bdc/endpoints', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Look at the controller logs for more details', 'TSG027 - Observe cluster deployment', '../diagnose/tsg027-observe-bdc-create.ipynb'], ['provided port is already allocated', 'TSG062 - Get tail of all previous container logs for pods in BDC namespace', '../log-files/tsg062-tail-bdc-previous-container-logs.ipynb'], ['Create cluster failed since the existing namespace', 'SOP061 - Delete a big data cluster', '../install/sop061-delete-bdc.ipynb'], ['Failed to complete kube config setup', 'TSG067 - Failed to complete kube config setup', '../repair/tsg067-failed-to-complete-kube-config-setup.ipynb'], ['Error processing command: \"ApiError', 'TSG110 - Azdata returns ApiError', '../repair/tsg110-azdata-returns-apierror.ipynb'], ['Error processing command: \"ControllerError', 'TSG036 - Controller logs', '../log-analyzers/tsg036-get-controller-logs.ipynb'], ['ERROR: 500', 'TSG046 - Knox gateway logs', '../log-analyzers/tsg046-get-knox-logs.ipynb'], ['Data source name not found and no default driver specified', 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], [\"Can't open lib 'ODBC Driver 17 for SQL Server\", 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], ['Control plane upgrade failed. Failed to upgrade controller.', 'TSG108 - View the controller upgrade config map', '../diagnose/tsg108-controller-failed-to-upgrade.ipynb'], [\"[Errno 2] No such file or directory: '..\\\\\\\\\", 'TSG053 - ADS Provided Books must be saved before use', '../repair/tsg053-save-book-first.ipynb'], [\"NameError: name 'azdata_login_secret_name' is not defined\", 'SOP013 - Create secret for azdata login (inside cluster)', '../common/sop013-create-secret-for-azdata-login.ipynb'], ['ERROR: No credentials were supplied, or the credentials were unavailable or inaccessible.', \"TSG124 - 'No credentials were supplied' error from azdata login\", '../repair/tsg124-no-credentials-were-supplied.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb'], 'azdata': ['SOP063 - Install azdata CLI (using package manager)', '../install/sop063-packman-install-azdata.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the controller username and password\n", - "\n", - "Get the controller username and password from the Kubernetes Secret\n", - "Store and place in the required AZDATA\\_USERNAME and AZDATA\\_PASSWORD\n", - "environment variables." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place controller secret in AZDATA_USERNAME/AZDATA_PASSWORD environment variables\n", - "\n", - "import os, base64\n", - "\n", - "os.environ[\"AZDATA_USERNAME\"] = run(f'kubectl get secret/controller-login-secret -n {namespace} -o jsonpath={{.data.username}}', return_output=True, base64_decode=True)\n", - "os.environ[\"AZDATA_PASSWORD\"] = run(f'kubectl get secret/controller-login-secret -n {namespace} -o jsonpath={{.data.password}}', return_output=True, base64_decode=True)\n", - "\n", - "print(f\"Controller username '{os.environ['AZDATA_USERNAME']}' and password stored in environment variables\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create a Spark Session" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import secrets\n", - "import json\n", - "\n", - "session_name = secrets.token_urlsafe(16).replace(\"-\", \"_\") # session name can't start with a '-' (when passed in with azdata)\n", - "\n", - "print(session_name)\n", - "\n", - "session_create = run(f'azdata bdc spark session create --name \"{session_name}\" --session-kind pyspark', return_output=True)\n", - "\n", - "print(session_create)\n", - "\n", - "session_create_json = json.loads(session_create)\n", - "\n", - "print(session_create_json)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Wait for Spark Session to finish starting" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "\n", - "session_id = session_create_json[\"id\"]\n", - "\n", - "state = \"starting\"\n", - "counter = 0\n", - "\n", - "while state == \"starting\":\n", - " session_state = run(f'azdata bdc spark session state --session-id {session_id}', return_output=True)\n", - " print(session_state)\n", - "\n", - " session_state_json = json.loads(session_state)\n", - " print (session_state_json)\n", - "\n", - " state = session_state_json[\"state\"]\n", - "\n", - " counter = counter + 1\n", - "\n", - " if counter == max_tries_for_ready_state:\n", - " raise SystemExit(f'Session has not moved out of starting state (after {max_tries_for_ready_state} attempts)')\n", - "\n", - "if state == \"dead\" or state == \"killed\":\n", - " display(Markdown(f'HINT: Use [TSG034 - Livy logs](../log-analyzers/tsg034-get-livy-logs.ipynb) to resolve this issue.'))\n", - " raise SystemExit(f\"Session moved from 'starting' to '{state}' state\")\n", - "\n", - "print (f\"Session successfully moved out of 'starting' state to '{state}'\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create a Spark Statement" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "\n", - "statement_create = run(f'azdata bdc spark statement create --code \"{spark_statement}\" --session-id {session_id}', return_output=True)\n", - "\n", - "statement_create_json = json.loads(statement_create)\n", - "print (statement_create_json)\n", - "\n", - "statement_id = statement_create_json[\"id\"]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Wait for Spark Statement to complete" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "\n", - "statement_state = \"waiting\"\n", - "counter = 0\n", - "\n", - "while statement_state == \"waiting\":\n", - " statement_info = run(f'azdata bdc spark statement info --session-id {session_id} --statement-id {statement_id}', return_output=True)\n", - " print(statement_info)\n", - "\n", - " statement_info_json = json.loads(statement_info)\n", - " print (statement_info_json)\n", - "\n", - " statement_state = statement_info_json[\"state\"]\n", - "\n", - " counter = counter + 1\n", - "\n", - " if counter == 25:\n", - " raise SystemExit('Statement has not moved out of waiting state')\n", - "\n", - "\n", - "print(f'Statement completed successfully. Output: {statement_info_json[\"output\"][\"data\"][\"text/plain\"]}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Spark log for the session" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f\"azdata bdc spark session log --session-id {session_id}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Delete the Spark session" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f\"azdata bdc spark session delete --session-id {session_id}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true, - "expert": { - "expanded_rules": [ - [ - 5, - "../log-analyzers/tsg046-get-knox-logs.ipynb", - "code", - "stream", - "name", - "stdout", - "text", - ".\\*ERROR: 500", - 0 - ] - ] - } - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/sample/sam009-hdfs-using-azdata.ipynb b/Big-Data-Clusters/CU8/Public/content/sample/sam009-hdfs-using-azdata.ipynb deleted file mode 100644 index c6cf6b47..00000000 --- a/Big-Data-Clusters/CU8/Public/content/sample/sam009-hdfs-using-azdata.ipynb +++ /dev/null @@ -1,624 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "SAM009 - HDFS using azdata\n", - "==========================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "hdfs_folder = \"/tmp\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"sam009-hdfs-using-azdata.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond'], 'azdata': ['Endpoint sql-server-master does not exist', 'Endpoint livy does not exist', 'Failed to get state for cluster', 'Endpoint webhdfs does not exist', 'Adaptive Server is unavailable or does not exist', 'Error: Address already in use', 'Login timeout expired (0) (SQLDriverConnect)']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']], 'azdata': [['The token is expired', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Reason: Unauthorized', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Max retries exceeded with url: /api/v1/bdc/endpoints', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Look at the controller logs for more details', 'TSG027 - Observe cluster deployment', '../diagnose/tsg027-observe-bdc-create.ipynb'], ['provided port is already allocated', 'TSG062 - Get tail of all previous container logs for pods in BDC namespace', '../log-files/tsg062-tail-bdc-previous-container-logs.ipynb'], ['Create cluster failed since the existing namespace', 'SOP061 - Delete a big data cluster', '../install/sop061-delete-bdc.ipynb'], ['Failed to complete kube config setup', 'TSG067 - Failed to complete kube config setup', '../repair/tsg067-failed-to-complete-kube-config-setup.ipynb'], ['Error processing command: \"ApiError', 'TSG110 - Azdata returns ApiError', '../repair/tsg110-azdata-returns-apierror.ipynb'], ['Error processing command: \"ControllerError', 'TSG036 - Controller logs', '../log-analyzers/tsg036-get-controller-logs.ipynb'], ['ERROR: 500', 'TSG046 - Knox gateway logs', '../log-analyzers/tsg046-get-knox-logs.ipynb'], ['Data source name not found and no default driver specified', 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], [\"Can't open lib 'ODBC Driver 17 for SQL Server\", 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], ['Control plane upgrade failed. Failed to upgrade controller.', 'TSG108 - View the controller upgrade config map', '../diagnose/tsg108-controller-failed-to-upgrade.ipynb'], [\"[Errno 2] No such file or directory: '..\\\\\\\\\", 'TSG053 - ADS Provided Books must be saved before use', '../repair/tsg053-save-book-first.ipynb'], [\"NameError: name 'azdata_login_secret_name' is not defined\", 'SOP013 - Create secret for azdata login (inside cluster)', '../common/sop013-create-secret-for-azdata-login.ipynb'], ['ERROR: No credentials were supplied, or the credentials were unavailable or inaccessible.', \"TSG124 - 'No credentials were supplied' error from azdata login\", '../repair/tsg124-no-credentials-were-supplied.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb'], 'azdata': ['SOP063 - Install azdata CLI (using package manager)', '../install/sop063-packman-install-azdata.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the controller username and password\n", - "\n", - "Get the controller username and password from the Kubernetes Secret\n", - "Store and place in the required AZDATA\\_USERNAME and AZDATA\\_PASSWORD\n", - "environment variables." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place controller secret in AZDATA_USERNAME/AZDATA_PASSWORD environment variables\n", - "\n", - "import os, base64\n", - "\n", - "os.environ[\"AZDATA_USERNAME\"] = run(f'kubectl get secret/controller-login-secret -n {namespace} -o jsonpath={{.data.username}}', return_output=True, base64_decode=True)\n", - "os.environ[\"AZDATA_PASSWORD\"] = run(f'kubectl get secret/controller-login-secret -n {namespace} -o jsonpath={{.data.password}}', return_output=True, base64_decode=True)\n", - "\n", - "print(f\"Controller username '{os.environ['AZDATA_USERNAME']}' and password stored in environment variables\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create a 10MB file to copy to HDFS" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "f = open('file-10MB',\"wb\")\n", - "f.seek((1024 * 1024 * 10) -1)\n", - "f.write(b\"\\0\")\n", - "f.close()\n", - "\n", - "os.stat(\"file-10MB\").st_size" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Clean up any files from a previous incomplete run of this notebook" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f\"azdata bdc hdfs rmr --path {hdfs_folder}/sam009-hdfs-using-azdata\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "if os.path.isfile(\"file-10MB.copied\"):\n", - " os.remove(\"file-10MB.copied\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Copy file from local filesystem to remote HDFS" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from time import gmtime, strftime\n", - "\n", - "time = strftime(\"%H-%M-%S\", gmtime())\n", - "\n", - "run(f\"azdata bdc hdfs cp --from-path file-10MB --to-path hdfs:{hdfs_folder}/sam009-hdfs-using-azdata/file-10MB-\" + time)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Copy file from remote HDFS to local filesystem" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f\"azdata bdc hdfs cp --from-path hdfs:{hdfs_folder}/sam009-hdfs-using-azdata/file-10MB-\" + time + \" --to-path ./file-10MB.copied\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Check before and after file size\n", - "--------------------------------" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "before_file_size = os.path.getsize(\"file-10MB\")\n", - "after_file_size = os.path.getsize(\"file-10MB.copied\")\n", - "\n", - "if before_file_size != after_file_size:\n", - " raise SystemExit(f\"File sizes are different, before {before_file_size}, after {after_file_size}\")\n", - "else:\n", - " print(\"SUCCESS: Copy was successful, file sizes are the same\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Clean up files in HDFS" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f\"azdata bdc hdfs rm --path {hdfs_folder}/sam009-hdfs-using-azdata/file-10MB-\" + time)\n", - "run(f\"azdata bdc hdfs rm --path {hdfs_folder}/sam009-hdfs-using-azdata\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Clean up file on local filesystem" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "os.remove(\"file-10MB\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/sample/sam010-app-using-azdata.ipynb b/Big-Data-Clusters/CU8/Public/content/sample/sam010-app-using-azdata.ipynb deleted file mode 100644 index ec078431..00000000 --- a/Big-Data-Clusters/CU8/Public/content/sample/sam010-app-using-azdata.ipynb +++ /dev/null @@ -1,677 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "SAM010 - App using azdata\n", - "=========================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "### Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "app_name = \"sam010-app-using-azdata\"\n", - "max_tries_for_ready_state = 150" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Common functions\n", - "\n", - "Define helper functions used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", - "import sys\n", - "import os\n", - "import re\n", - "import json\n", - "import platform\n", - "import shlex\n", - "import shutil\n", - "import datetime\n", - "\n", - "from subprocess import Popen, PIPE\n", - "from IPython.display import Markdown\n", - "\n", - "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", - "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", - "install_hint = {} # The SOP to help install the executable if it cannot be found\n", - "\n", - "first_run = True\n", - "rules = None\n", - "debug_logging = False\n", - "\n", - "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n", - " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", - "\n", - " NOTES:\n", - "\n", - " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", - "\n", - " Need to actually pass in as '\"':\n", - "\n", - " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", - "\n", - " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", - " \n", - " `iter(p.stdout.readline, b'')`\n", - "\n", - " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", - " \"\"\"\n", - " MAX_RETRIES = 5\n", - " output = \"\"\n", - " retry = False\n", - "\n", - " global first_run\n", - " global rules\n", - "\n", - " if first_run:\n", - " first_run = False\n", - " rules = load_rules()\n", - "\n", - " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", - " #\n", - " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", - " #\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", - " cmd = cmd.replace(\"\\n\", \" \")\n", - "\n", - " # shlex.split is required on bash and for Windows paths with spaces\n", - " #\n", - " cmd_actual = shlex.split(cmd)\n", - "\n", - " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", - " #\n", - " user_provided_exe_name = cmd_actual[0].lower()\n", - "\n", - " # When running python, use the python in the ADS sandbox ({sys.executable})\n", - " #\n", - " if cmd.startswith(\"python \"):\n", - " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", - "\n", - " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", - " # with:\n", - " #\n", - " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", - " #\n", - " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", - " #\n", - " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", - " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", - "\n", - " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", - " #\n", - " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", - " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", - "\n", - " # To aid supportability, determine which binary file will actually be executed on the machine\n", - " #\n", - " which_binary = None\n", - "\n", - " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", - " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", - " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", - " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", - " # look for the 2nd installation of CURL in the path)\n", - " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", - " path = os.getenv('PATH')\n", - " for p in path.split(os.path.pathsep):\n", - " p = os.path.join(p, \"curl.exe\")\n", - " if os.path.exists(p) and os.access(p, os.X_OK):\n", - " if p.lower().find(\"system32\") == -1:\n", - " cmd_actual[0] = p\n", - " which_binary = p\n", - " break\n", - "\n", - " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", - " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", - " #\n", - " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", - " #\n", - " if which_binary == None:\n", - " which_binary = shutil.which(cmd_actual[0])\n", - "\n", - " # Display an install HINT, so the user can click on a SOP to install the missing binary\n", - " #\n", - " if which_binary == None:\n", - " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", - " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", - " else: \n", - " cmd_actual[0] = which_binary\n", - "\n", - " start_time = datetime.datetime.now().replace(microsecond=0)\n", - "\n", - " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", - " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", - " print(f\" cwd: {os.getcwd()}\")\n", - "\n", - " # Command-line tools such as CURL and AZDATA HDFS commands output\n", - " # scrolling progress bars, which causes Jupyter to hang forever, to\n", - " # workaround this, use no_output=True\n", - " #\n", - "\n", - " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", - " #\n", - " wait = True \n", - "\n", - " try:\n", - " if no_output:\n", - " p = Popen(cmd_actual)\n", - " else:\n", - " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", - " with p.stdout:\n", - " for line in iter(p.stdout.readline, b''):\n", - " line = line.decode()\n", - " if return_output:\n", - " output = output + line\n", - " else:\n", - " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", - " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", - " match = regex.match(line)\n", - " if match:\n", - " if match.group(1).find(\"HTML\") != -1:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", - " else:\n", - " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", - "\n", - " wait = False\n", - " break # otherwise infinite hang, have not worked out why yet.\n", - " else:\n", - " print(line, end='')\n", - " if rules is not None:\n", - " apply_expert_rules(line)\n", - "\n", - " if wait:\n", - " p.wait()\n", - " except FileNotFoundError as e:\n", - " if install_hint is not None:\n", - " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", - "\n", - " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", - "\n", - " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", - "\n", - " if not no_output:\n", - " for line in iter(p.stderr.readline, b''):\n", - " try:\n", - " line_decoded = line.decode()\n", - " except UnicodeDecodeError:\n", - " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", - " #\n", - " # \\xa0\n", - " #\n", - " # For example see this in the response from `az group create`:\n", - " #\n", - " # ERROR: Get Token request returned http error: 400 and server \n", - " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", - " # The refresh token has expired due to inactivity.\\xa0The token was \n", - " # issued on 2018-10-25T23:35:11.9832872Z\n", - " #\n", - " # which generates the exception:\n", - " #\n", - " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", - " #\n", - " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", - " print(line)\n", - " line_decoded = \"\"\n", - " pass\n", - " else:\n", - "\n", - " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", - " # print this empty \"ERR:\" as it confuses.\n", - " #\n", - " if line_decoded == \"\":\n", - " continue\n", - " \n", - " print(f\"STDERR: {line_decoded}\", end='')\n", - "\n", - " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", - " exit_code_workaround = 1\n", - "\n", - " # inject HINTs to next TSG/SOP based on output in stderr\n", - " #\n", - " if user_provided_exe_name in error_hints:\n", - " for error_hint in error_hints[user_provided_exe_name]:\n", - " if line_decoded.find(error_hint[0]) != -1:\n", - " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", - "\n", - " # apply expert rules (to run follow-on notebooks), based on output\n", - " #\n", - " if rules is not None:\n", - " apply_expert_rules(line_decoded)\n", - "\n", - " # Verify if a transient error, if so automatically retry (recursive)\n", - " #\n", - " if user_provided_exe_name in retry_hints:\n", - " for retry_hint in retry_hints[user_provided_exe_name]:\n", - " if line_decoded.find(retry_hint) != -1:\n", - " if retry_count < MAX_RETRIES:\n", - " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", - " retry_count = retry_count + 1\n", - " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", - "\n", - " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", - " # don't wait here, if success known above\n", - " #\n", - " if wait: \n", - " if p.returncode != 0:\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", - " else:\n", - " if exit_code_workaround !=0 :\n", - " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", - "\n", - " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", - "\n", - " if return_output:\n", - " if base64_decode:\n", - " import base64\n", - "\n", - " return base64.b64decode(output).decode('utf-8')\n", - " else:\n", - " return output\n", - "\n", - "def load_json(filename):\n", - " \"\"\"Load a json file from disk and return the contents\"\"\"\n", - "\n", - " with open(filename, encoding=\"utf8\") as json_file:\n", - " return json.load(json_file)\n", - "\n", - "def load_rules():\n", - " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", - "\n", - " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", - " #\n", - " try:\n", - " j = load_json(\"sam010-app-using-azdata.ipynb\")\n", - " except:\n", - " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", - " else:\n", - " if \"metadata\" in j and \\\n", - " \"azdata\" in j[\"metadata\"] and \\\n", - " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", - " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", - "\n", - " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", - "\n", - " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", - "\n", - " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", - "\n", - " return rules\n", - "\n", - "def apply_expert_rules(line):\n", - " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", - " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", - "\n", - " global rules\n", - "\n", - " for rule in rules:\n", - " notebook = rule[1]\n", - " cell_type = rule[2]\n", - " output_type = rule[3] # i.e. stream or error\n", - " output_type_name = rule[4] # i.e. ename or name \n", - " output_type_value = rule[5] # i.e. SystemExit or stdout\n", - " details_name = rule[6] # i.e. evalue or text \n", - " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", - "\n", - " if debug_logging:\n", - " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", - "\n", - " if re.match(expression, line, re.DOTALL):\n", - "\n", - " if debug_logging:\n", - " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", - "\n", - " match_found = True\n", - "\n", - " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", - "\n", - "\n", - "\n", - "\n", - "print('Common functions defined successfully.')\n", - "\n", - "# Hints for binary (transient fault) retry, (known) error and install guide\n", - "#\n", - "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond'], 'azdata': ['Endpoint sql-server-master does not exist', 'Endpoint livy does not exist', 'Failed to get state for cluster', 'Endpoint webhdfs does not exist', 'Adaptive Server is unavailable or does not exist', 'Error: Address already in use', 'Login timeout expired (0) (SQLDriverConnect)']}\n", - "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']], 'azdata': [['The token is expired', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Reason: Unauthorized', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Max retries exceeded with url: /api/v1/bdc/endpoints', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Look at the controller logs for more details', 'TSG027 - Observe cluster deployment', '../diagnose/tsg027-observe-bdc-create.ipynb'], ['provided port is already allocated', 'TSG062 - Get tail of all previous container logs for pods in BDC namespace', '../log-files/tsg062-tail-bdc-previous-container-logs.ipynb'], ['Create cluster failed since the existing namespace', 'SOP061 - Delete a big data cluster', '../install/sop061-delete-bdc.ipynb'], ['Failed to complete kube config setup', 'TSG067 - Failed to complete kube config setup', '../repair/tsg067-failed-to-complete-kube-config-setup.ipynb'], ['Error processing command: \"ApiError', 'TSG110 - Azdata returns ApiError', '../repair/tsg110-azdata-returns-apierror.ipynb'], ['Error processing command: \"ControllerError', 'TSG036 - Controller logs', '../log-analyzers/tsg036-get-controller-logs.ipynb'], ['ERROR: 500', 'TSG046 - Knox gateway logs', '../log-analyzers/tsg046-get-knox-logs.ipynb'], ['Data source name not found and no default driver specified', 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], [\"Can't open lib 'ODBC Driver 17 for SQL Server\", 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], ['Control plane upgrade failed. Failed to upgrade controller.', 'TSG108 - View the controller upgrade config map', '../diagnose/tsg108-controller-failed-to-upgrade.ipynb'], [\"[Errno 2] No such file or directory: '..\\\\\\\\\", 'TSG053 - ADS Provided Books must be saved before use', '../repair/tsg053-save-book-first.ipynb'], [\"NameError: name 'azdata_login_secret_name' is not defined\", 'SOP013 - Create secret for azdata login (inside cluster)', '../common/sop013-create-secret-for-azdata-login.ipynb'], ['ERROR: No credentials were supplied, or the credentials were unavailable or inaccessible.', \"TSG124 - 'No credentials were supplied' error from azdata login\", '../repair/tsg124-no-credentials-were-supplied.ipynb']]}\n", - "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb'], 'azdata': ['SOP063 - Install azdata CLI (using package manager)', '../install/sop063-packman-install-azdata.ipynb']}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the Kubernetes namespace for the big data cluster\n", - "\n", - "Get the namespace of the Big Data Cluster use the kubectl command line\n", - "interface .\n", - "\n", - "**NOTE:**\n", - "\n", - "If there is more than one Big Data Cluster in the target Kubernetes\n", - "cluster, then either:\n", - "\n", - "- set \\[0\\] to the correct value for the big data cluster.\n", - "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", - " Azure Data Studio." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", - "\n", - "if \"AZDATA_NAMESPACE\" in os.environ:\n", - " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", - "else:\n", - " try:\n", - " namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n", - " except:\n", - " from IPython.display import Markdown\n", - " print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n", - " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", - " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", - " raise\n", - "\n", - "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the controller username and password\n", - "\n", - "Get the controller username and password from the Kubernetes Secret\n", - "Store and place in the required AZDATA\\_USERNAME and AZDATA\\_PASSWORD\n", - "environment variables." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide_input" - ] - }, - "outputs": [], - "source": [ - "# Place controller secret in AZDATA_USERNAME/AZDATA_PASSWORD environment variables\n", - "\n", - "import os, base64\n", - "\n", - "os.environ[\"AZDATA_USERNAME\"] = run(f'kubectl get secret/controller-login-secret -n {namespace} -o jsonpath={{.data.username}}', return_output=True, base64_decode=True)\n", - "os.environ[\"AZDATA_PASSWORD\"] = run(f'kubectl get secret/controller-login-secret -n {namespace} -o jsonpath={{.data.password}}', return_output=True, base64_decode=True)\n", - "\n", - "print(f\"Controller username '{os.environ['AZDATA_USERNAME']}' and password stored in environment variables\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Initialize the app specification (yaml)\n", - "\n", - "Clean up from a previous run of this notebook\n", - "\n", - "NOTE: IF the result is a `dict` then the app exists, otherwise its a\n", - "string warning us it doesn\u2019t exist!" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "\n", - "describe = run(f\"azdata app describe --name {app_name} --version v1 -o json\", return_output=True)\n", - "describe = json.loads(describe)\n", - "\n", - "if type(describe[\"result\"]) == dict:\n", - " run(f\"azdata app delete --name {app_name} --version v1\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import shutil\n", - "\n", - "if os.path.isdir(app_name):\n", - " shutil.rmtree(app_name)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Initialize the app based on the `python` template" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f\"azdata app init --name {app_name} --version v1 --template python --destination .\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create the app" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f\"azdata app create --spec {app_name}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Wait for the App to move to ready state" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "\n", - "state = \"WaitingForCreate\"\n", - "counter = 0\n", - "\n", - "while state == \"WaitingForCreate\" or state == \"WaitingForCredentials\" or state == \"Creating\":\n", - " app_state = run(f'azdata app describe --name {app_name} --version v1 --spec {app_name}', return_output=True)\n", - " print(app_state)\n", - "\n", - " app_state_json = json.loads(app_state)\n", - "\n", - " try:\n", - " state = app_state_json[\"state\"]\n", - " except TypeError as e: # Sometimes, we see \"TypeError: string indices must be integers\"\n", - " state = str(ex)\n", - " \n", - " print (f\"State: {state}\")\n", - " print('')\n", - "\n", - " counter = counter + 1\n", - "\n", - " if counter == max_tries_for_ready_state:\n", - " print (app_state)\n", - " raise SystemExit(f'App has not moved to Ready state in {counter} attempts')\n", - "\n", - "print (f\"App state: {app_state}\")\n", - "\n", - "if state == \"Error\":\n", - " raise SystemExit('App is in Error state')\n", - "\n", - "print (f\"App successfully moved from WaitingForCreate through to Ready state\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### List the apps" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(\"azdata app list\")\n", - "run(f\"azdata app list --name {app_name}\")\n", - "run(f\"azdata app list --name {app_name} --version v1\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Describe the app" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f\"azdata app describe --name {app_name} --version v1\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Run the app" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f\"azdata app run --name {app_name} --version v1 --inputs msg=HelloWorld\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Delete the app" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run(f\"azdata app delete --name {app_name} --version v1\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import shutil\n", - "\n", - "if os.path.isdir(app_name):\n", - " shutil.rmtree(app_name)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Notebook execution complete.')" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": true - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/troubleshooters/readme.md b/Big-Data-Clusters/CU8/Public/content/troubleshooters/readme.md deleted file mode 100644 index 0b06289a..00000000 --- a/Big-Data-Clusters/CU8/Public/content/troubleshooters/readme.md +++ /dev/null @@ -1,21 +0,0 @@ -# SQL Server Big Data Cluster Troubleshooter notebooks - -- This chapter contains the top-level troubleshooter notebooks linked from the Azure Data Studio `Big Data Cluster Dashboard` - -[Home](../readme.md) - -## Notebooks in this Chapter -- [TSG100 - The Big Data Cluster troubleshooter ](tsg100-troubleshoot-bdc.ipynb) - -- [TSG101 - SQL Server troubleshooter ](tsg101-troubleshoot-sql-server.ipynb) - -- [TSG102 - HDFS troubleshooter ](tsg102-troubleshoot-hdfs.ipynb) - -- [TSG103 - Spark troubleshooter ](tsg103-troubleshoot-spark.ipynb) - -- [TSG104 - Control troubleshooter ](tsg104-troubleshoot-control.ipynb) - -- [TSG105 - Gateway troubleshooter ](tsg105-troubleshoot-gateway.ipynb) - -- [TSG106 - App troubleshooter ](tsg106-troubleshoot-app.ipynb) - diff --git a/Big-Data-Clusters/CU8/Public/content/troubleshooters/tsg100-troubleshoot-bdc.ipynb b/Big-Data-Clusters/CU8/Public/content/troubleshooters/tsg100-troubleshoot-bdc.ipynb deleted file mode 100644 index d75c68df..00000000 --- a/Big-Data-Clusters/CU8/Public/content/troubleshooters/tsg100-troubleshoot-bdc.ipynb +++ /dev/null @@ -1,104 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG100 - The Big Data Cluster troubleshooter\n", - "============================================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "Follow these steps to troubleshoot Big Data Cluster (BDC) issues that\n", - "are not covered by the more specific troubleshooters in this chapter.\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Get the versions of `azdata`, the BDC and Kubernetes cluster\n", - "\n", - "Often the first question asked is \u201cwhat version are you using\u201d. There\n", - "are versions of several things that are useful to know:\n", - "\n", - "- [SOP007 - Version information (azdata, bdc,\n", - " kubernetes)](../common/sop007-get-key-version-information.ipynb)\n", - "\n", - "### Verify `azdata login` works\n", - "\n", - "The most fundemental operation that needs to work is `login`. All the\n", - "other troubleshooters in this chapter depend on `azdata login` working.\n", - "Run this SOP to verify `azdata login` works, this SOP will also analyze\n", - "any error output and suggest follow on TSGs as appropriate to help\n", - "resolve any issues.\n", - "\n", - "- [SOP028 - azdata login](../common/sop028-azdata-login.ipynb)\n", - "\n", - "### Verify the cluster health monitor is reporting \u2018Healthy\u2019\n", - "\n", - "- [TSG078 - Is cluster\n", - " healthy](../diagnose/tsg078-is-cluster-healthy.ipynb)\n", - "\n", - "### Verify that all the pods for \u2018Running\u2019\n", - "\n", - "Verify the pods for the `kube-system` namespace and the big data cluster\n", - "name space are all in the \u201cRunning\u201d state, and all the Kubernetes nodes\n", - "are \u201cReady\u201d\n", - "\n", - "- [TSG006 - Get system pod\n", - " status](../monitor-k8s/tsg006-view-system-pod-status.ipynb)\n", - "- [TSG007 - Get BDC pod\n", - " status](../monitor-k8s/tsg007-view-bdc-pod-status.ipynb)\n", - "- [TSG009 - Get nodes\n", - " (Kubernetes)](../monitor-k8s/tsg009-get-nodes.ipynb)\n", - "\n", - "### Verify there are no crash dumps in the cluser\n", - "\n", - "The Big Data Cluster should run without any process crashing. Run this\n", - "TSG to analyze the entire cluster to verify that no crash dumps have\n", - "been created.\n", - "\n", - "- [TSG029 - Find dumps in the\n", - " cluster](../diagnose/tsg029-find-dumps-in-the-cluster.ipynb)\n", - "\n", - "### Next steps\n", - "\n", - "This troubleshooter has helped verify the cluster itself is responding\n", - "to logins. Use the troubleshooters linked below to drill down into\n", - "specific funtionality in the cluster that may not be working correctly.\n", - "\n", - "Related\n", - "-------\n", - "\n", - "- [TSG101 - SQL Server\n", - " troubleshooter](../troubleshooters/tsg101-troubleshoot-sql-server.ipynb)\n", - "\n", - "- [TSG102 - HDFS\n", - " troubleshooter](../troubleshooters/tsg102-troubleshoot-hdfs.ipynb)\n", - "\n", - "- [TSG103 - Spark\n", - " troubleshooter](../troubleshooters/tsg103-troubleshoot-spark.ipynb)\n", - "\n", - "- [TSG104 - Control\n", - " troubleshooter](../troubleshooters/tsg104-troubleshoot-control.ipynb)\n", - "\n", - "- [TSG105 - Gateway\n", - " troubleshooter](../troubleshooters/tsg105-troubleshoot-gateway.ipynb)\n", - "\n", - "- [TSG106 - App\n", - " troubleshooter](../troubleshooters/tsg106-troubleshoot-app.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/troubleshooters/tsg101-troubleshoot-sql-server.ipynb b/Big-Data-Clusters/CU8/Public/content/troubleshooters/tsg101-troubleshoot-sql-server.ipynb deleted file mode 100644 index 150c5ec4..00000000 --- a/Big-Data-Clusters/CU8/Public/content/troubleshooters/tsg101-troubleshoot-sql-server.ipynb +++ /dev/null @@ -1,58 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG101 - SQL Server troubleshooter\n", - "==================================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "Follow these steps to troubleshoot SQL Server issues in the Big Data\n", - "Cluster (BDC)\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Run through the SQL Query monitor notebook\n", - "\n", - "Run through this TSG to incrementally verify each SQL Server process\n", - "running in the Big Data Cluster. If the TSG hits any problems, it will\n", - "HINT the next TSG to run to resolve the issue.\n", - "\n", - "- [TSG070 - Query SQL master\n", - " pool](../monitor-bdc/tsg070-use-azdata-sql-query.ipynb)\n", - "\n", - "### Run the SQL Server log analyzers\n", - "\n", - "The following TSGs will connect to the cluster and get the SQL Server\n", - "logs and analyze them for known issues and HINT the next TSG to run to\n", - "resolve known issues.\n", - "\n", - "- [TSG030 - SQL Server errorlog\n", - " files](../log-analyzers/tsg030-get-errorlog-from-all-pods.ipynb)\n", - "- [TSG031 - SQL Server PolyBase\n", - " logs](../log-analyzers/tsg031-get-polybase-logs-for-all-pods.ipynb)\n", - "\n", - "Related\n", - "-------\n", - "\n", - "- [TSG100 - The Big Data Cluster\n", - " troubleshooter](../troubleshooters/tsg100-troubleshoot-bdc.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/troubleshooters/tsg102-troubleshoot-hdfs.ipynb b/Big-Data-Clusters/CU8/Public/content/troubleshooters/tsg102-troubleshoot-hdfs.ipynb deleted file mode 100644 index 9417c2f4..00000000 --- a/Big-Data-Clusters/CU8/Public/content/troubleshooters/tsg102-troubleshoot-hdfs.ipynb +++ /dev/null @@ -1,63 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG102 - HDFS troubleshooter\n", - "============================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "Follow these steps to troubleshoot HDFS issues\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Run through the HDFS sample notebook\n", - "\n", - "Run through this sample nootebok to incrementally verify each major\n", - "component of HDFS in the Big Data Cluster. If the sample hits any\n", - "problems, it will HINT the next TSG to run to resolve the issue.\n", - "\n", - "- [SAM009 - HDFS using\n", - " azdata](../sample/sam009-hdfs-using-azdata.ipynb)\n", - "\n", - "### View and Analyze the HDFS related logs\n", - "\n", - "- [TSG088 - Hadoop datanode\n", - " logs](../log-analyzers/tsg088-get-datanode-logs.ipynb)\n", - "- [TSG090 - Yarn nodemanager\n", - " logs](../log-analyzers/tsg090-get-nodemanager-logs.ipynb)\n", - "- [TSG095 - Hadoop namenode\n", - " logs](../log-analyzers/tsg095-get-namenode-logs.ipynb)\n", - "\n", - "### Verify the Gateway log\n", - "\n", - "The Knox gateway will strip error information from responses going to\n", - "the client, and place it in the gateway logs.\n", - "\n", - "- [TSG046 - Knox gateway\n", - " logs](../log-analyzers/tsg046-get-knox-logs.ipynb)\n", - "\n", - "Related\n", - "-------\n", - "\n", - "- [TSG100 - The Big Data Cluster\n", - " troubleshooter](../troubleshooters/tsg100-troubleshoot-bdc.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/troubleshooters/tsg103-troubleshoot-spark.ipynb b/Big-Data-Clusters/CU8/Public/content/troubleshooters/tsg103-troubleshoot-spark.ipynb deleted file mode 100644 index 1eed3ee8..00000000 --- a/Big-Data-Clusters/CU8/Public/content/troubleshooters/tsg103-troubleshoot-spark.ipynb +++ /dev/null @@ -1,66 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG103 - Spark troubleshooter\n", - "=============================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "Follow these steps to troubleshoot Spark issues\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Run through the Spark sample notebook\n", - "\n", - "Run through this sample nootebok to incrementally verify each major\n", - "component of Spark in the Big Data Cluster. If the sample hits any\n", - "problems, it will HINT the next TSG to run to resolve the issue.\n", - "\n", - "- [SAM008 - Spark using\n", - " azdata](../sample/sam008-spark-using-azdata.ipynb)\n", - "\n", - "### View and Analyze the Spark related log files\n", - "\n", - "- [TSG035 - Spark History\n", - " logs](../log-analyzers/tsg035-get-sparkhistory-logs.ipynb)\n", - "\n", - "### Verify the Livy logs\n", - "\n", - "In a Big Data Cluster, spark jobs are submitted using Livy, problems\n", - "with Spark will often show up in these logs.\n", - "\n", - "- [TSG034 - Livy logs](../log-analyzers/tsg034-get-livy-logs.ipynb)\n", - "\n", - "### Verify the Gateway log\n", - "\n", - "The Knox gateway will strip error information from responses going to\n", - "the client, and place it in the gateway logs.\n", - "\n", - "- [TSG046 - Knox gateway\n", - " logs](../log-analyzers/tsg046-get-knox-logs.ipynb)\n", - "\n", - "Related\n", - "-------\n", - "\n", - "- [TSG100 - The Big Data Cluster\n", - " troubleshooter](../troubleshooters/tsg100-troubleshoot-bdc.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/troubleshooters/tsg104-troubleshoot-control.ipynb b/Big-Data-Clusters/CU8/Public/content/troubleshooters/tsg104-troubleshoot-control.ipynb deleted file mode 100644 index 0f894dd2..00000000 --- a/Big-Data-Clusters/CU8/Public/content/troubleshooters/tsg104-troubleshoot-control.ipynb +++ /dev/null @@ -1,53 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG104 - Control troubleshooter\n", - "===============================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "Follow these steps to troubleshoot `controller` issues.\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### View and analyze the `controller` log files\n", - "\n", - "The following TSG will get the controller logs from the cluster, and\n", - "analyze each entry for known issues and HINT further TSGs to assist.\n", - "\n", - "- [TSG036 - Controller\n", - " logs](../log-analyzers/tsg036-get-controller-logs.ipynb)\n", - "\n", - "The controller health monitor may also report issues from dependent\n", - "services, the logs for these can be viewed here:\n", - "\n", - "- [TSG076 - Elastic Search\n", - " logs](../log-analyzers/tsg076-get-elastic-search-logs.ipynb)\n", - "- [TSG077 - Kibana\n", - " logs](../log-analyzers/tsg077-get-kibana-logs.ipynb)\n", - "\n", - "Related\n", - "-------\n", - "\n", - "- [TSG100 - The Big Data Cluster\n", - " troubleshooter](../troubleshooters/tsg100-troubleshoot-bdc.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/troubleshooters/tsg105-troubleshoot-gateway.ipynb b/Big-Data-Clusters/CU8/Public/content/troubleshooters/tsg105-troubleshoot-gateway.ipynb deleted file mode 100644 index 2d00dbef..00000000 --- a/Big-Data-Clusters/CU8/Public/content/troubleshooters/tsg105-troubleshoot-gateway.ipynb +++ /dev/null @@ -1,42 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG105 - Gateway troubleshooter\n", - "===============================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "Follow these steps to troubleshoot Knox Gateway issues.\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### View and Analyze the gateway log\n", - "\n", - "- [TSG046 - Knox gateway\n", - " logs](../log-analyzers/tsg046-get-knox-logs.ipynb)\n", - "\n", - "Related\n", - "-------\n", - "\n", - "- [TSG100 - The Big Data Cluster\n", - " troubleshooter](../troubleshooters/tsg100-troubleshoot-bdc.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file diff --git a/Big-Data-Clusters/CU8/Public/content/troubleshooters/tsg106-troubleshoot-app.ipynb b/Big-Data-Clusters/CU8/Public/content/troubleshooters/tsg106-troubleshoot-app.ipynb deleted file mode 100644 index 7cfbfd94..00000000 --- a/Big-Data-Clusters/CU8/Public/content/troubleshooters/tsg106-troubleshoot-app.ipynb +++ /dev/null @@ -1,46 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TSG106 - App troubleshooter\n", - "===========================\n", - "\n", - "Description\n", - "-----------\n", - "\n", - "Follow these steps to troubleshoot App-Deploy issues\n", - "\n", - "Steps\n", - "-----\n", - "\n", - "### Run through the App-Deploy sample notebook\n", - "\n", - "Run through this sample nootebok to incrementally verify each major\n", - "component of the App-Deploy infrastructure in the Big Data Cluster. If\n", - "the sample hits problems, it will HINT the next TSG to run to resolve\n", - "the issue.\n", - "\n", - "- [SAM010 - App using azdata](../sample/sam010-app-using-azdata.ipynb)\n", - "\n", - "Related\n", - "-------\n", - "\n", - "- [TSG100 - The Big Data Cluster\n", - " troubleshooter](../troubleshooters/tsg100-troubleshoot-bdc.ipynb)" - ] - } - ], - "nbformat": 4, - "nbformat_minor": 5, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "azdata": { - "side_effects": false - } - } -} \ No newline at end of file