Skip to content

Commit

Permalink
feat: Improved logging for prepare-environment (#780)
Browse files Browse the repository at this point in the history
  • Loading branch information
niallthomson authored Dec 23, 2023
1 parent bc10611 commit e4d07ba
Show file tree
Hide file tree
Showing 25 changed files with 130 additions and 101 deletions.
2 changes: 1 addition & 1 deletion lab/bin/delete-all-and-wait-if-crd-exists
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ set -e
crd=$1

if [ -z "$crd" ]; then
echo "Error: You must provide a CRD"
>&2 echo "Error: You must provide a CRD"
exit 1
fi

Expand Down
2 changes: 1 addition & 1 deletion lab/bin/delete-all-if-crd-exists
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ set -e
crd=$1

if [ -z "$crd" ]; then
echo "Error: You must provide a CRD"
>&2 echo "Error: You must provide a CRD"
exit 1
fi

Expand Down
4 changes: 2 additions & 2 deletions lab/bin/delete-nodegroup
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@ nodegroup=$1
is_eksctl=$2

if [ -z "$nodegroup" ]; then
echo "You must provide a node group name"
>&2 echo "You must provide a node group name"
exit 1
fi

check=$(aws eks list-nodegroups --cluster-name $EKS_CLUSTER_NAME --query "nodegroups[? @ == '$nodegroup']" --output text)

if [ ! -z "$check" ]; then
echo "Deleting node group $nodegroup..."
logmessage "Deleting node group $nodegroup..."

if [ ! -z "$is_eksctl" ]; then
eksctl delete nodegroup --cluster $EKS_CLUSTER_NAME --name $nodegroup > /dev/null
Expand Down
79 changes: 54 additions & 25 deletions lab/bin/reset-environment
Original file line number Diff line number Diff line change
@@ -1,7 +1,26 @@
#!/bin/bash

mkdir -p /eks-workshop/logs
log_file=/eks-workshop/logs/action-$(date +%s).log

exec 7>&1

logmessage() {
echo "$@" >&7
echo "$@" >&1
}
export -f logmessage

if [ -z "${DEV_MODE}" ]; then
# Redirection for logging
exec >$log_file 2> >(tee >(cat >&7))
else
# Log the commands in dev mode
set -o xtrace
fi

if [ -z "$EKS_CLUSTER_NAME" ]; then
echo "Error: The EKS_CLUSTER_NAME environment variable must be set. Please run 'use-cluster <your cluster name>'"
logmessage "Error: The EKS_CLUSTER_NAME environment variable must be set. Please run 'use-cluster <your cluster name>'"
exit 1
fi

Expand All @@ -12,6 +31,15 @@ manifests_path="/eks-workshop/manifests"
base_path="$manifests_path/base-application"

set -Eeuo pipefail
trap 'catch $? $LINENO' EXIT

catch() {
if [ "$1" != "0" ]; then
logmessage "An error occurred, please contact your workshop proctor or raise an issue at https://github.com/aws-samples/eks-workshop-v2/issues"
logmessage "The full log can be found here: $log_file"
fi
exec 3<&-
}

mkdir -p /eks-workshop

Expand All @@ -24,12 +52,12 @@ REPOSITORY_REF=${REPOSITORY_REF:-""}
if [ ! -z "${REPOSITORY_REF}" ]; then
rm -rf $repository_path

echo "Refreshing copy of workshop repository from GitHub..."
logmessage "Refreshing copy of workshop repository from GitHub..."

git clone --quiet https://github.com/$REPOSITORY_OWNER/$REPOSITORY_NAME.git $repository_path > /dev/null
(cd $repository_path && git checkout --quiet "${REPOSITORY_REF}" > /dev/null)
git clone --quiet https://github.com/$REPOSITORY_OWNER/$REPOSITORY_NAME.git $repository_path
(cd $repository_path && git checkout --quiet "${REPOSITORY_REF}")

echo ""
logmessage ""

cp -R $repository_path/manifests $manifests_path
elif [ -d "/manifests" ]; then
Expand All @@ -44,15 +72,16 @@ if [ ! -z "$module" ]; then
fi
fi

echo "Resetting the environment, please wait"
logmessage "Resetting the environment..."
logmessage "Tip: Read the rest of the lab introduction while you wait!"

if [ -f "/eks-workshop/hooks/cleanup.sh" ]; then
bash /eks-workshop/hooks/cleanup.sh
fi

kubectl delete pod load-generator --ignore-not-found > /dev/null
kubectl delete pod load-generator --ignore-not-found

kubectl delete namespace other --ignore-not-found > /dev/null
kubectl delete namespace other --ignore-not-found

kubectl apply -k $base_path --prune --all \
--prune-allowlist=autoscaling/v1/HorizontalPodAutoscaler \
Expand All @@ -64,14 +93,14 @@ kubectl apply -k $base_path --prune --all \
--prune-allowlist=core/v1/Secret \
--prune-allowlist=core/v1/PersistentVolumeClaim \
--prune-allowlist=scheduling.k8s.io/v1/PriorityClass \
--prune-allowlist=networking.k8s.io/v1/Ingress > /dev/null
--prune-allowlist=networking.k8s.io/v1/Ingress

echo "Waiting for application to become ready..."
logmessage "Waiting for application to become ready..."

sleep 10

kubectl wait --for=condition=available --timeout=240s deployments -l app.kubernetes.io/created-by=eks-workshop -A > /dev/null
kubectl wait --for=condition=Ready --timeout=240s pods -l app.kubernetes.io/created-by=eks-workshop -A > /dev/null
kubectl wait --for=condition=available --timeout=240s deployments -l app.kubernetes.io/created-by=eks-workshop -A
kubectl wait --for=condition=Ready --timeout=240s pods -l app.kubernetes.io/created-by=eks-workshop -A

# Addons
mkdir -p /eks-workshop/terraform
Expand All @@ -81,12 +110,12 @@ export TF_VAR_eks_cluster_id="$EKS_CLUSTER_NAME"

RESOURCES_PRECREATED=${RESOURCES_PRECREATED:-""}

echo "Cleaning up previous lab infrastructure..."
logmessage "Cleaning up previous lab infrastructure..."

tf_dir=$(realpath --relative-to="$PWD" '/eks-workshop/terraform')

terraform -chdir="$tf_dir" init -upgrade > /tmp/terraform-destroy-init.log
terraform -chdir="$tf_dir" destroy --auto-approve > /tmp/terraform-destroy.log
terraform -chdir="$tf_dir" init -upgrade
terraform -chdir="$tf_dir" destroy --auto-approve

rm -rf /eks-workshop/terraform/addon*.tf

Expand All @@ -101,20 +130,20 @@ if [ ! -z "$module" ]; then
fi

if [ -f "$module_path/.workshop/terraform/addon.tf" ]; then
echo "Creating infrastructure for next lab..."
logmessage "Creating infrastructure for next lab..."

cp -R $module_path/.workshop/terraform/* /eks-workshop/terraform

if [ "$RESOURCES_PRECREATED" = "true" ]; then
rm -f /eks-workshop/terraform/addon_infrastructure.tf
fi

terraform -chdir="$tf_dir" init -upgrade > /tmp/terraform-apply-init.log
terraform -chdir="$tf_dir" apply -refresh=false --auto-approve > /tmp/terraform-apply.log
terraform -chdir="$tf_dir" init -upgrade
terraform -chdir="$tf_dir" apply -refresh=false --auto-approve
fi

if [ -d "$module_path/.workshop/manifests" ]; then
kubectl apply -k "$module_path/.workshop/manifests" > /dev/null
kubectl apply -k "$module_path/.workshop/manifests"
fi
fi

Expand All @@ -126,10 +155,10 @@ expected_size_config="$EKS_DEFAULT_MNG_MIN $EKS_DEFAULT_MNG_MAX $EKS_DEFAULT_MNG
mng_size_config=$(aws eks describe-nodegroup --cluster-name $EKS_CLUSTER_NAME --nodegroup-name $EKS_DEFAULT_MNG_NAME | jq -r '.nodegroup.scalingConfig | "\(.minSize) \(.maxSize) \(.desiredSize)"')

if [[ "$mng_size_config" != "$expected_size_config" ]]; then
echo "Setting EKS Node Group back to initial sizing..."
logmessage "Setting EKS Node Group back to initial sizing..."

aws eks update-nodegroup-config --cluster-name $EKS_CLUSTER_NAME --nodegroup-name $EKS_DEFAULT_MNG_NAME \
--scaling-config desiredSize=$EKS_DEFAULT_MNG_DESIRED,minSize=$EKS_DEFAULT_MNG_MIN,maxSize=$EKS_DEFAULT_MNG_MAX > /dev/null
--scaling-config desiredSize=$EKS_DEFAULT_MNG_DESIRED,minSize=$EKS_DEFAULT_MNG_MIN,maxSize=$EKS_DEFAULT_MNG_MAX
aws eks wait nodegroup-active --cluster-name $EKS_CLUSTER_NAME --nodegroup-name $EKS_DEFAULT_MNG_NAME

sleep 10
Expand All @@ -138,7 +167,7 @@ fi
asg_size_config=$(aws autoscaling describe-auto-scaling-groups --filters "Name=tag:eks:nodegroup-name,Values=$EKS_DEFAULT_MNG_NAME" "Name=tag:eks:cluster-name,Values=$EKS_CLUSTER_NAME" | jq -r '.AutoScalingGroups[0] | "\(.MinSize) \(.MaxSize) \(.DesiredCapacity)"')

if [[ "$asg_size_config" != "$expected_size_config" ]]; then
echo "Setting ASG back to initial sizing..."
logmessage "Setting ASG back to initial sizing..."

export ASG_NAME=$(aws autoscaling describe-auto-scaling-groups --filters "Name=tag:eks:nodegroup-name,Values=$EKS_DEFAULT_MNG_NAME" "Name=tag:eks:cluster-name,Values=$EKS_CLUSTER_NAME" --query "AutoScalingGroups[0].AutoScalingGroupName" --output text)
aws autoscaling update-auto-scaling-group \
Expand All @@ -161,9 +190,9 @@ if [ $EXIT_CODE -ne 0 ]; then
fi

# Recycle workload pods in case stateful pods got restarted
kubectl delete pod -l app.kubernetes.io/created-by=eks-workshop -l app.kubernetes.io/component=service -A > /dev/null
kubectl delete pod -l app.kubernetes.io/created-by=eks-workshop -l app.kubernetes.io/component=service -A

kubectl wait --for=condition=Ready --timeout=240s pods -l app.kubernetes.io/created-by=eks-workshop -A > /dev/null
kubectl wait --for=condition=Ready --timeout=240s pods -l app.kubernetes.io/created-by=eks-workshop -A

# Finished
echo 'Environment is ready'
logmessage 'Environment is ready'
6 changes: 3 additions & 3 deletions lab/bin/uninstall-helm-chart
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,19 @@ release=$1
namespace=$2

if [ -z "$release" ]; then
echo "You must provide a release name"
>&2 echo "You must provide a release name"
exit 1
fi

if [ -z "$namespace" ]; then
echo "You must provide a namespace"
>&2 echo "You must provide a namespace"
exit 1
fi

check=$(helm ls --filter "$release" -n "$namespace" --no-headers)

if [ ! -z "$check" ]; then
echo "Uninstalling helm chart $release..."
logmessage "Uninstalling helm chart $release..."

helm uninstall $release -n $namespace --wait > /dev/null
fi
10 changes: 5 additions & 5 deletions manifests/modules/aiml/inferentia/.workshop/cleanup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,16 @@

set -e

echo "Deleting AIML resources..."
logmessage "Deleting AIML resources..."

kubectl delete namespace aiml --ignore-not-found > /dev/null
kubectl delete namespace aiml --ignore-not-found

echo "Deleting Karpenter NodePool and EC2NodeClass..."
logmessage "Deleting Karpenter NodePool and EC2NodeClass..."

delete-all-if-crd-exists nodepools.karpenter.sh
delete-all-if-crd-exists ec2nodeclasses.karpenter.k8s.aws

echo "Waiting for Karpenter nodes to be removed..."
logmessage "Waiting for Karpenter nodes to be removed..."

EXIT_CODE=0

Expand All @@ -21,5 +21,5 @@ timeout --foreground -s TERM 30 bash -c \
done' || EXIT_CODE=$?

if [ $EXIT_CODE -ne 0 ]; then
echo "Warning: Karpenter nodes did not clean up"
logmessage "Warning: Karpenter nodes did not clean up"
fi
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash

echo "Deleting resources created by ACK..."
logmessage "Deleting resources created by ACK..."

eksctl delete iamserviceaccount --name carts-ack --namespace carts --cluster $EKS_CLUSTER_NAME -v 0 > /dev/null
kubectl delete table items -n carts --ignore-not-found=true > /dev/null
eksctl delete iamserviceaccount --name carts-ack --namespace carts --cluster $EKS_CLUSTER_NAME -v 0
kubectl delete table items -n carts --ignore-not-found=true
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
#!/bin/bash

echo "Deleting resources created by Crossplane..."
logmessage "Deleting resources created by Crossplane..."

delete-all-and-wait-if-crd-exists dynamodbtables.awsblueprints.io

kubectl delete tables.dynamodb.aws.upbound.io --all --ignore-not-found=true > /dev/null
kubectl delete tables.dynamodb.aws.upbound.io --all --ignore-not-found=true

kubectl wait --for=delete tables.dynamodb.aws.upbound.io --all --timeout=600s > /dev/null
kubectl wait --for=delete tables.dynamodb.aws.upbound.io --all --timeout=600s

kubectl delete -k /eks-workshop/manifests/modules/automation/controlplanes/crossplane/compositions/composition --ignore-not-found=true > /dev/null
kubectl delete -k /eks-workshop/manifests/modules/automation/controlplanes/crossplane/compositions/composition --ignore-not-found=true

kubectl wait --for=delete composition table.dynamodb.awsblueprints.io --timeout=600s > /dev/null
kubectl wait --for=delete composition table.dynamodb.awsblueprints.io --timeout=600s

eksctl delete iamserviceaccount --name carts-crossplane --namespace carts --cluster $EKS_CLUSTER_NAME -v 0 > /dev/null
eksctl delete iamserviceaccount --name carts-crossplane --namespace carts --cluster $EKS_CLUSTER_NAME -v 0
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

set -e

echo "Deleting ArgoCD applications..."
logmessage "Deleting ArgoCD applications..."

delete-all-and-wait-if-crd-exists applications.argoproj.io

Expand Down
6 changes: 3 additions & 3 deletions manifests/modules/automation/gitops/flux/.workshop/cleanup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

set -e

echo "Uninstalling flux"
logmessage "Uninstalling flux"

flux uninstall --silent > /dev/null
flux uninstall --silent

kubectl delete namespace ui > /dev/null
kubectl delete namespace ui

rm -rf ~/environment/flux
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@

set -e

echo "Deleting Karpenter NodePool and EC2NodeClass..."
logmessage "Deleting Karpenter NodePool and EC2NodeClass..."

delete-all-if-crd-exists nodepools.karpenter.sh
delete-all-if-crd-exists ec2nodeclasses.karpenter.k8s.aws

echo "Waiting for Karpenter nodes to be removed..."
logmessage "Waiting for Karpenter nodes to be removed..."

EXIT_CODE=0

Expand All @@ -17,5 +17,5 @@ timeout --foreground -s TERM 30 bash -c \
done' || EXIT_CODE=$?

if [ $EXIT_CODE -ne 0 ]; then
echo "Warning: Karpenter nodes did not clean up"
logmessage "Warning: Karpenter nodes did not clean up"
fi
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@

set -e

echo "Resetting CoreDNS replicas..."
logmessage "Resetting CoreDNS replicas..."

kubectl -n kube-system scale deployment/coredns --replicas=2 > /dev/null
kubectl -n kube-system scale deployment/coredns --replicas=2
4 changes: 2 additions & 2 deletions manifests/modules/fundamentals/fargate/.workshop/cleanup.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
check=$(aws eks list-fargate-profiles --cluster-name $EKS_CLUSTER_NAME --query "fargateProfileNames[? @ == 'checkout-profile']" --output text)

if [ ! -z "$check" ]; then
echo "Deleting Fargate profile..."
logmessage "Deleting Fargate profile..."

aws eks delete-fargate-profile --region $AWS_REGION --cluster-name $EKS_CLUSTER_NAME --fargate-profile-name checkout-profile > /dev/null
aws eks delete-fargate-profile --region $AWS_REGION --cluster-name $EKS_CLUSTER_NAME --fargate-profile-name checkout-profile
fi
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@ set -e

check=$(aws eks list-addons --cluster-name $EKS_CLUSTER_NAME --query "addons[? @ == 'aws-ebs-csi-driver']" --output text)

kubectl delete namespace catalog --wait --ignore-not-found > /dev/null
kubectl delete namespace catalog --wait --ignore-not-found

if [ ! -z "$check" ]; then
echo "Deleting EBS CSI driver addon..."
logmessage "Deleting EBS CSI driver addon..."

aws eks delete-addon --cluster-name $EKS_CLUSTER_NAME --addon-name aws-ebs-csi-driver > /dev/null
aws eks delete-addon --cluster-name $EKS_CLUSTER_NAME --addon-name aws-ebs-csi-driver

aws eks wait addon-deleted --cluster-name $EKS_CLUSTER_NAME --addon-name aws-ebs-csi-driver > /dev/null
aws eks wait addon-deleted --cluster-name $EKS_CLUSTER_NAME --addon-name aws-ebs-csi-driver
fi
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@

set -e

echo "Deleting EFS storage class..."
logmessage "Deleting EFS storage class..."

kubectl delete storageclass efs-sc --ignore-not-found > /dev/null
kubectl delete storageclass efs-sc --ignore-not-found
Loading

0 comments on commit e4d07ba

Please sign in to comment.