diff --git a/.gitignore b/.gitignore index f5e6a958..4755593d 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ resources/ node_modules/ **/cdk.out/* *~ +.idea/* diff --git a/config.toml b/config.toml index 6cb13715..9062aa24 100644 --- a/config.toml +++ b/config.toml @@ -18,6 +18,7 @@ disableAssetsBusting = false disableLanguageSwitchingButton = false disableShortcutsTitle = false disableInlineCopyToClipBoard = true +disableLandingPageButton = true [outputs] home = [ "HTML", "AMP", "RSS", "JSON"] diff --git a/content/authors.md b/content/authors.md deleted file mode 100644 index 6057c0f6..00000000 --- a/content/authors.md +++ /dev/null @@ -1,13 +0,0 @@ ---- -title: Credits -disableToc: true ---- - -
-

Project Contributors

-
- -{{< contributors "https://api.github.com/repos/aws-samples/ecs-deep-learning-workshop/contributors?per_page=1000" "true" "hyandell" >}} -{{< contributors "https://api.github.com/repos/awslabs/ec2-spot-workshops/contributors?per_page=1000" "true" "schmutze" >}} - - diff --git a/content/ecs-spot-capacity-providers/Introduction/_index.md b/content/ecs-spot-capacity-providers/Introduction/_index.md new file mode 100644 index 00000000..4b8d98bb --- /dev/null +++ b/content/ecs-spot-capacity-providers/Introduction/_index.md @@ -0,0 +1,12 @@ ++++ +title = "Introduction" +weight = 20 ++++ + + +If you are already familiar with the concepts below or already have experience with operating ECS clusters, you can skip the introduction and proceed to [**Setup the workspace environment on AWS**](/ecs-spot-capacity-providers/workshopsetup.html) section to start the workshop. + +Otherwise, you can read through to get an initial understanding of the services, technologies and features used in this workshop. + + +{{% children %}} diff --git a/content/ecs-spot-capacity-providers/Introduction/about_containers.md b/content/ecs-spot-capacity-providers/Introduction/about_containers.md new file mode 100644 index 00000000..9c134eb1 --- /dev/null +++ b/content/ecs-spot-capacity-providers/Introduction/about_containers.md @@ -0,0 +1,27 @@ ++++ +title = "Introduction to Containers" +weight = 10 ++++ + +![Container Ship](/images/ecs-spot-capacity-providers/containership.jpg) + +What is a Container? +--- + +* Containers provide a standard way to package your application’s code, configurations, and dependencies into a single object. +* Containers share an operating system installed on the server and run as a resource-isolated processes, ensuring quick, reliable, and consistent deployments, regardless of environment. +* Whether you deploy locally on your laptop or to production, the experience will remain the same (except secrets and other environmental values, of course). + +Why Containers? +--- +Containers allow developers to iterate at high velocity and offer the speed to scale to meet the demands of the application. It is first important to understand what a container is, and how it enables teams to move faster. + +Benefits of Containers +--- + +Containers are a powerful way for developers to package and deploy their applications. They are lightweight and provide a consistent, portable software environment for applications to easily run and scale anywhere. Building and deploying microservices, running batch jobs, for machine learning applications, and moving existing applications into the cloud is just some popular use cases for containers. + +Amazon EC2 Spot Instances +--- + +[Amazon EC2 Spot Instances] (https://aws.amazon.com/ec2/spot/) offer spare compute capacity available in the AWS Cloud at steep discounts compared to On-Demand prices. EC2 can interrupt Spot Instances with two minutes of notification when EC2 needs the capacity back. You can use Spot Instances for various fault-tolerant and flexible applications. Some examples are analytics, containerized workloads, high-performance computing (HPC), stateless web servers, rendering, CI/CD, and other test and development workloads. diff --git a/content/ecs-spot-capacity-providers/Introduction/intro_to_ecs.md b/content/ecs-spot-capacity-providers/Introduction/intro_to_ecs.md new file mode 100644 index 00000000..d03eb2a2 --- /dev/null +++ b/content/ecs-spot-capacity-providers/Introduction/intro_to_ecs.md @@ -0,0 +1,92 @@ ++++ +title = "Introduction to ECS" +weight = 20 ++++ + +![Amazon ECS](/images/ecs-spot-capacity-providers/ecs.png) + +- [Amazon Elastic Container Service (Amazon ECS)](https://aws.amazon.com/ecs/) is a highly scalable, high-performance container orchestration service that supports Docker containers and allows you to easily run and scale containerized applications on AWS. + +- Amazon ECS eliminates the need for you to install and operate your own container orchestration software, manage and scale a cluster of virtual machines, or schedule containers on those virtual machines. + +- ECS is also deeply integrated into the rest of the AWS ecosystem. + +![ECS integration](/images/ecs-spot-capacity-providers/integration.svg) + +## Amazon ECS Clusters + +An Amazon ECS cluster is a logical grouping of tasks or services, which we'll cover in more detail in the following pages. + +- If you are running tasks or services that use the EC2 launch type, a cluster is also a grouping of container instances. +- If you are using capacity providers, a cluster is also a logical grouping of capacity providers. +- A cluster can be a combination of Fargate and EC2 launch types. + +When you first use Amazon ECS, a default cluster is created for you, but you can create multiple clusters in an account to keep your resources separate. + +For more information on ECS Clusters, see [here](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/clusters.html). + +## Tasks Definitions + + +To prepare your application to run on Amazon ECS, you create a task definition. The task definition is a text file, in JSON format, that describes one or more containers, up to a maximum of ten, that form your application. + +We can think of it as a blueprint for your application. Task definitions specify various parameters for your application. Examples of task definition parameters are which containers to use, which launch type to use, which ports to open for your application, and what data volumes to use with the containers in the task. The specific parameters available for the task definition depend on which launch type you are using. For more information about creating task definitions, see [Amazon ECS Task Definitions](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task_definitions.html). + +The following is an example of a task definition containing a single container that runs an NGINX web server using the Fargate launch type. For a more extended example showing the use of multiple containers in a task definition, see [Example Task Definitions](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/example_task_definitions.html). + +``` +{ + “family”: “webserver”, + “containerDefinitions”: [ + { + “name”: “web”, + “image”: “nginx”, + “memory”: “100”, + “cpu”: “99” + }, + ], + “requiresCompatibilities”: [ + “FARGATE” + ], + “networkMode”: “awsvpc”, + “memory”: “512”, + “cpu”: “256”, +} +``` + +## Fargate + +[AWS Fargate](https://aws.amazon.com/fargate/) is a technology for Amazon ECS that allows you to run containers without having to manage servers or clusters. With AWS Fargate, you no longer have to provision, configure, and scale clusters of virtual machines to run containers. This removes the need to choose server types, decide when to scale your clusters, or optimize cluster packing. AWS Fargate removes the need for you to interact with or think about servers or clusters. Fargate lets you focus on designing and building your applications instead of managing the infrastructure that runs them. + +## Tasks and Scheduling + +A task is the instantiation of a task definition within a cluster. After you have created a task definition for your application within Amazon ECS, you can specify the number of tasks that will run on your cluster. Each task that uses the Fargate launch type has its own isolation boundary and does not share the underlying kernel, CPU resources, memory resources, or elastic network interface with another task. + +The Amazon ECS task scheduler places tasks within your cluster. There are several scheduling options available. For example, you can define a service that runs and maintains a specified number of tasks simultaneously. You might also want to run a single task on a schedule or invoke it through APIs or as part of a serverless workflow. For more information about the different scheduling options available, see [Scheduling Amazon ECS Tasks](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/scheduling_tasks.html). + +## Services + +Amazon ECS allows you to run and maintain a specified number of instances of a task definition simultaneously in an Amazon ECS cluster. This is called a service. If any of your tasks should fail or stop for any reason, the Amazon ECS service scheduler launches another instance of your task definition to replace it and maintain the desired count of tasks in the service depending on the scheduling strategy used. + +Besides maintaining the desired count of tasks in your service, you can optionally run your service behind a load balancer. The load balancer distributes traffic across the tasks associated with the service. + +There are two service scheduler strategies available: + +- REPLICA: + + - The replica scheduling strategy places and maintains the desired number of tasks across your cluster. By default, the service scheduler spreads tasks across Availability Zones. You can use task placement strategies and constraints to customize task placement decisions. For more information, see [Replica](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/ecs_services.html#service_scheduler_replica). + +- DAEMON: + + - The daemon scheduling strategy deploys exactly one task on each active container instance that meets all the task placement constraints that you specify in your cluster. The service scheduler evaluates the task placement constraints for running tasks and will stop tasks that do not meet the placement constraints. When using this strategy, there is no need to specify a desired number of tasks, a task placement strategy, or use Service Auto Scaling policies. For more information, see [Daemon](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/ecs_services.html#service_scheduler_daemon). + + +## Service Discovery + +Because containers are immutable by nature, they can churn regularly and be replaced with newer versions of the service. This means that there is a need to register the new and deregister the old/unhealthy services. To do this on your own is challenging, hence the need for service discovery. + +AWS Cloud Map is a cloud resource discovery service. With Cloud Map, you can define custom names for your application resources, and it maintains the updated location of these dynamically changing resources. This increases your application availability because your web service always discovers the most up-to-date locations of its resources. + +Cloud Map natively integrates with ECS, and as we build services in the workshop, will see this firsthand. For more information on service discovery with ECS, please see [here](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/service-discovery.html). + +![Service Discovery](/images/ecs-spot-capacity-providers/cloudmapproduct.png) \ No newline at end of file diff --git a/content/ecs-spot-capacity-providers/Introduction/scaling_ecs_workloads.md b/content/ecs-spot-capacity-providers/Introduction/scaling_ecs_workloads.md new file mode 100644 index 00000000..d3baf184 --- /dev/null +++ b/content/ecs-spot-capacity-providers/Introduction/scaling_ecs_workloads.md @@ -0,0 +1,54 @@ ++++ +title = "Scaling ECS Workloads" +weight = 30 ++++ + +There are different approaches for scaling a system. Traditionally systems have used, what we call an **Infrastructure First** approach, where the system focuses on infrastructure metrics such as CPU or Memory usage, and scales up the cluster infrastructure. In this case the application scales up following the metrics derived from the infrastructure. + +While you can still use that approach on ECS, ECS follows an **Application First** scaling approach, where the scaling is based on the number of desired. ECS has two type of scaling activities: + +* **ECS Service / Application Scaling**: This refers to the ability to increase or decrease the desired count of tasks in your Amazon ECS service based on dynamic traffic and load patterns in the workload. Amazon ECS publishes CloudWatch metrics with your service’s average CPU and memory usage. You can use these and other CloudWatch metrics to scale out your service (add more tasks) to deal with high demand at peak times, and to scale in your service (run fewer tasks) to reduce costs during periods of low utilization. + +* **ECS Container Instances Scaling**: This refers to the ability to increase or decrease the desired count of EC2 instances in your Amazon ECS cluster based on ECS Service / Application scaling. For this kind of scaling, it is typical practice depending upon Auto Scaling group level scaling policies. + + +To scale the infrastructure using the **Application First** approach on ECS, we will use Amazon ECS cluster **Capacity Providers** to determine the infrastructure in use for our tasks and we will use Amazon ECS **Cluster Auto Scaling** (CAS) to enables to manage the scale of the cluster according to the application needs. + +Capacity Providers configuration include: + +* An **Auto Scaling Group** to associate with the capacity provider. The Autoscaling group must already exist. +* An attribute to enable/disable **Managed scaling**; if enabled, Amazon ECS manages the scale-in and scale-out actions of the Auto Scaling group through the use of AWS Auto Scaling scaling plan also referred to as **Cluster Auto Scaling** (CAS). This also means you can scale up your ECS cluster zero capacity in the Auto Scaling group. +* An attribute to define the **Target capacity %(percentage)** - number between 1 and 100. When **managed scaling** is enabled this value is used as the target value against the metric used by Amazon ECS-managed target tracking scaling policy. +* An attribute to define **Managed termination protection**. which prevents EC2 instances that contain ECS tasks and that are in an Auto Scaling group from being terminated during scale-in actions. + + +Each ECS cluster can have one or more capacity providers and an optional default capacity provider strategy. For an ECS Cluster there is a **Default capacity provider strategy** that can be set for Newly created tasks or services on the cluster that are created without an explicit strategy. Otherwise, for those services or tasks where the default capacity provider strategy does not meet your needs you can define a **capacity provider strategy** that is specific for that service or task. + +{{% notice info %}} +You can read more about **Capacity Provider Strategies** [here](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/cluster-capacity-providers.html) +{{% /notice %}} + +# ECS Cluster Auto scaling + +When enabling **managed scaling** Amazon ECS manages the scale-in and scale-out actions of the Auto Scaling group. This is what we call ECS **Cluster Auto Scaling (CAS)**. CAS is a new capability for ECS to manage the scaling of EC2 Auto Scaling groups (ASG). CAS relies on ECS capacity providers. + +Amazon ECS creates an AWS Auto Scaling scaling plan with a target tracking scaling policy based on the target capacity value you specify. Amazon ECS then associates this scaling plan with your Auto Scaling group. For each of the capacity providers with managed scaling enabled, an Amazon ECS managed CloudWatch metric with the prefix `AWS/ECS/ManagedScaling` is created along with two CloudWatch alarms. The CloudWatch metrics and alarms used to monitor the container instance capacity in your Auto Scaling groups and will trigger the Auto Scaling group to scale in and scale out as needed. + +The scaling policy uses a new CloudWatch metric called **CapacityProviderReservation** that ECS publishes for every ASG capacity provider that has managed scaling enabled. The new CloudWatch metric CapacityProviderReservation is defined as follows. + +```ruby +CapacityProviderReservation = ( M / N ) x 100 +``` + +Where: + +* **N** represents the current number of instances in the Auto Scaling group(ASG) that are **already running** +* **M** represents the number of instances running in an ASG necessary to meet the needs of the tasks assigned to that ASG, including tasks already running and tasks the customer is trying to run that don’t fit on the existing instances. + +Given this assumption, if N = M, scaling out not required, and scaling in isn’t possible. If N < M, scale out is required because you don’t have enough instances. If N > M, scale in is possible (but not necessarily required) because you have more instances than you need to run all of your ECS tasks.The CapacityProviderReservation metric is a relative proportion of Target capacity value and dictates how much scale-out / scale-in should happen. CAS always tries to ensure **CapacityProviderReservation** is equal to specified Target capacity value either by increasing or decreasing number of instances in ASG. + +The scale-out activity is triggered if **`CapacityProviderReservation` > `Target capacity`** for 1 datapoints with 1 minute duration. That means it takes 1 minute to scale out the capacity in the ASG. The scale-in activity is triggered if CapacityProviderReservation < Target capacity for 15 data points with 1 minute duration. We will see all of this demonstrated in this workshop. + +{{% notice info %}} +You can read more about **ECS Cluster Auto Scaling (CAS)** and how it works under different scenarios and conditions **[in this blog post](https://aws.amazon.com/blogs/containers/deep-dive-on-amazon-ecs-cluster-auto-scaling/)** +{{% /notice %}} \ No newline at end of file diff --git a/content/ecs-spot-capacity-providers/WorkshopSetup/_index.md b/content/ecs-spot-capacity-providers/WorkshopSetup/_index.md new file mode 100644 index 00000000..50134361 --- /dev/null +++ b/content/ecs-spot-capacity-providers/WorkshopSetup/_index.md @@ -0,0 +1,8 @@ +--- +title: "Setup the Workspace environment" +weight: 40 +--- + + +{{% children %}} + diff --git a/content/ecs-spot-capacity-providers/WorkshopSetup/cli_setup.md b/content/ecs-spot-capacity-providers/WorkshopSetup/cli_setup.md new file mode 100644 index 00000000..c4c7db2e --- /dev/null +++ b/content/ecs-spot-capacity-providers/WorkshopSetup/cli_setup.md @@ -0,0 +1,96 @@ +--- +title: "Setup AWS CLI and clone the workshop repo" +weight: 40 +--- + +{{% notice tip %}} +For this workshop, please ignore warnings about the version of pip being used. +{{% /notice %}} + +1. Run the following command to view the current version of aws-cli: +``` +aws --version +``` + +1. Update to the latest version: +``` +curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" +unzip awscliv2.zip +sudo ./aws/install +. ~/.bash_profile +``` + +1. Confirm you have a newer version: +``` +aws --version +``` + +Install dependencies for use in the workshop by running: + +``` +sudo yum -y install jq gettext +``` + +### Clone the GitHub repo + +In order to execute the steps in the workshop, you'll need to clone the workshop GitHub repo. + +In the Cloud9 IDE terminal, run the following command: + +``` +git clone https://github.com/awslabs/ec2-spot-workshops.git +``` +Change into the workshop directory: + +``` +cd ec2-spot-workshops/workshops/ecs-spot-capacity-providers +``` + +Feel free to browse around. You can also browse the directory structure in the **Environment** tab on the left and even edit files directly there by double clicking on them. + +We should configure our aws cli with our current region as default: + +``` +export ACCOUNT_ID=$(aws sts get-caller-identity --output text --query Account) +export AWS_REGION=$(curl -s 169.254.169.254/latest/dynamic/instance-identity/document | jq -r '.region') +echo "export ACCOUNT_ID=${ACCOUNT_ID}" >> ~/.bash_profile +echo "export AWS_REGION=${AWS_REGION}" >> ~/.bash_profile +aws configure set default.region ${AWS_REGION} +aws configure get default.region + +``` + +Use the commands below to set the CloudFormation stack name to an environment variable. + +* If you created the stack manually: + +``` +export STACK_NAME=EcsSpotWorkshop +``` + +* If the stack created automatically within Event Engine: + +``` +export STACK_NAME=$(aws cloudformation list-stacks | jq -r '.StackSummaries[] | select(.StackName|test("mod.")) | .StackName') +echo "STACK_NAME=$STACK_NAME" +``` + +The output should look something like below. + +``` +STACK_NAME=mod-9feefdd1672c4eac +``` + + +Run the command below to load CloudFormation outputs as the environment variables. + +``` +for output in $(aws cloudformation describe-stacks --stack-name ${STACK_NAME} --query 'Stacks[].Outputs[].OutputKey' --output text) +do + export $output=$(aws cloudformation describe-stacks --stack-name ${STACK_NAME} --query 'Stacks[].Outputs[?OutputKey==`'$output'`].OutputValue' --output text) + eval "echo $output : \"\$$output\"" +done +``` + +***Congratulations***, your Cloud9 workspace setup is complete, and you can continue with this workshop. + diff --git a/content/ecs-spot-capacity-providers/WorkshopSetup/launch_cloudformation.md b/content/ecs-spot-capacity-providers/WorkshopSetup/launch_cloudformation.md new file mode 100644 index 00000000..03138e5a --- /dev/null +++ b/content/ecs-spot-capacity-providers/WorkshopSetup/launch_cloudformation.md @@ -0,0 +1,50 @@ +--- +title: "Deploy CloudFormation Stack" +weight: 10 +--- + +To save time on the initial setup, a CloudFormation template will be used to create the required resources needed for the workshop. + +1. You can view and download the CloudFormation template from GitHub [here] (https://raw.githubusercontent.com/awslabs/ec2-spot-workshops/master/workshops/ecs-spot-capacity-providers/ecs-spot-workshop-cfn.yaml). +2. Take a moment to review the CloudFormation template so you understand the resources it will be creating. +3. Browse to the [AWS CloudFormation console] (https://console.aws.amazon.com/cloudformation). Make sure you are in AWS region designated by the facilitators of the workshop. +4. Click **Create stack**. +5. Under the *Specify template* section, select **Upload a template file**. Click **Choose file** and, select the template you downloaded in step 1. +6. Click **Next**. +7. In the *Specify stack details* section, enter **EcsSpotWorkshop** as *Stack name*. +8. [Optional] In the *Parameters* section, optionally change the *sourceCidr* to restrict load balancer http access. +9. Click **Next**. +10. In *Configure stack options*, you don’t need to make any changes. +11. Click **Next**. +12. Review the information for the stack. At the bottom under *Capabilities*, select **I acknowledge that AWS CloudFormation might create IAM resources**. When you’re satisfied with the settings, click **Create stack**. + +### Monitor the progress of stack creation + +It will take roughly 5 minutes for the stack creation to complete. + +1. On the [AWS CloudFormation console] (https://console.aws.amazon.com/cloudformation), select the stack in the list. +2. In the stack details pane, click the **Events** tab. You can click the refresh button to update the events in the stack creation. + +The *Events* tab displays each major step in the stack's creation sorted by the time of each event, with the latest events on top. + +The *CREATE_IN_PROGRESS* event is logged when AWS CloudFormation reports that it has begun to create the resource. The *CREATE_COMPLETE* event logged when the resources successfully created. + +When AWS CloudFormation has successfully created the stack, you will see the *CREATE_COMPLETE* event at the top of the Events tab: + +Take a moment and check out all the resources created by this stack. + +![CloudFormation Stack](/images/ecs-spot-capacity-providers/ecs_cfn_stack.png) + +Note that if you are running this workshop inside an Event Engine, the CloudFormation stack names may look like this + +![CloudFormation Stack](/images/ecs-spot-capacity-providers/CFN_stacks.png) + + +The CloudFormation stack creates the following resources for the workshop. + +* 1 VPC with 6 subnets; 3 public and 3 private subnets +* Application Load Balancer (ALB) with its own security group +* Target Group and an ALB listener +* Cloud9 Environment and its IAM Role +* EC2 Launch template with necessary ECS config for bootstrapping the instances into the ECS cluster +* ECR Repository \ No newline at end of file diff --git a/content/ecs-spot-capacity-providers/WorkshopSetup/resize_ebs.md b/content/ecs-spot-capacity-providers/WorkshopSetup/resize_ebs.md new file mode 100644 index 00000000..a7d351c1 --- /dev/null +++ b/content/ecs-spot-capacity-providers/WorkshopSetup/resize_ebs.md @@ -0,0 +1,54 @@ +--- +title: "Resize Cloud9 Instance Root Volume" +chapter: false +weight: 30 +--- + +## Resize Cloud9 EBS + +The default 10GB is may not be enough to build the application docker images. +Thus, let us resize the EBS volume used by the Cloud9 instance. + +To change the EBS volume, please do + + 1. Select the Cloud9 instance in the EC2 console [deep link to get there](https://console.aws.amazon.com/ec2/v2/home?#Instances:search=aws-cloud9-EcsSpotWorkshop) + 2. Click the **Storage :** Section + 3. Click on the Volume ID. That will take you to the EBS Volume page details. + +![resize_ebs_0](/images/ecs-spot-capacity-providers/cloud9_instance.png) + +Modify the EBS volume. + +![resize_ebs_1](/images/ecs-spot-capacity-providers/resize_ebs_1.png) + +Choose a new volume size (e.g. 100GB). + +![resize_ebs_2](/images/ecs-spot-capacity-providers/resize_ebs_2.png) + +{{% notice info %}} +Please make sure changes went through, and the EBS volume now reflects the new size of the volume. +{{% /notice %}} + +## Resize FS + +Changing the block device does not increase the size of the file system. + +To do so head back to the Cloud9 instance and use the following commands. + +``` +sudo growpart /dev/xvda 1 +sudo resize2fs $(df -h |awk '/^\/dev/{print $1}') +``` + +The root file-system should now show 99GB. + +``` +df --human-readable +``` + +```plaintext +Filesystem Size Used Avail Use% Mounted on +devtmpfs 483M 60K 483M 1% /dev +tmpfs 493M 0 493M 0% /dev/shm +/dev/xvda1 99G 8.0G 91G 9% / +``` diff --git a/content/ecs-spot-capacity-providers/WorkshopSetup/setup_cloud9_workspace.md b/content/ecs-spot-capacity-providers/WorkshopSetup/setup_cloud9_workspace.md new file mode 100644 index 00000000..e054fe5c --- /dev/null +++ b/content/ecs-spot-capacity-providers/WorkshopSetup/setup_cloud9_workspace.md @@ -0,0 +1,68 @@ +--- +title: "Setup Cloud9 Environment" +weight: 20 +--- + +## Seting up Cloud 9 Environment + +{{% notice warning %}} +If you are running the workshop on your own, the Cloud9 workspace should be built by an IAM user with Administrator privileges, not the root account user. +{{% /notice %}} + +Please ensure you are logged in as an IAM user. We will open the Cloud9 environment first to execute all the commands needed for this workshop. + +1. Login into AWS console with your account credentials and choose the region where you deployed the CloudFormation template. +1. Select **Services** and type **Cloud9** +1. Click on **Your environments**. +1. Select the Cloud9 environment with the name **EcsSpotWorkshop** +1. Click on **Open IDE** + +![Cloud 9 Environment](/images/ecs-spot-capacity-providers/cloud9_environment.png) + +1. When it comes up, customize the environment by closing the **welcome tab** and **lower work area**, and opening a new **terminal** tab in the main work area: +1. If you like the dark theme seen below, you can choose it yourself by selecting **View / Themes / Solarized / Solarized Dark** in the Cloud9 workspace menu. + +{{% notice tip %}} +If you have not used Cloud9 before, take your time to explore the IDE (Integrated Development Environment). We will primarily be using the terminal and the editor to read files. +{{% /notice %}} + + +Your workspace should now look like this: +![Cloud 9 Environment](/images/ecs-spot-capacity-providers/cloud9_4.png) + + +## Attaching an IAM role to the Cloud9 workspace + +In order to work with ECS from our new Cloud9 IDE environment, we need the required permissions. + +* Find your Cloud9 EC2 instance [here] (https://console.aws.amazon.com/ec2/v2/home?#Instances:search=aws-cloud9-EcsSpotWorkshop) +* Select the Instance, then choose **Actions** -> **Security** -> **Modify IAM Role** + +![Attach IAM Role](/images/ecs-spot-capacity-providers/attach_iam_role.png) + +* Choose **EcsSpotWorkshop-Cloud9InstanceProfile** from the *IAM Role* drop down, and select *Apply* + +![Attach IAM Role](/images/ecs-spot-capacity-providers/c9_2.png) + +* Return to your Cloud9 instance and click on the **Settings** icon at the top right +* Select **AWS SETTINGS** +* Turn off **AWS managed temporary credentials** +* Close the Preferences tab + +![Attach IAM Role](/images/ecs-spot-capacity-providers/c9_3.png) + +Use the [GetCallerIdentity] (https://docs.aws.amazon.com/cli/latest/reference/sts/get-caller-identity.html) CLI command to validate that the Cloud9 IDE is using the correct IAM role. + +``` +aws sts get-caller-identity +``` + +The output assumed-role name should contain the name of the role in the Arn field. + +``` +{ + "UserId": "AROAQAHCJ2QPOAJPQADXV:i-0eedc304975256fac", + "Account": "0004746XXXXX", + "Arn": "arn:aws:sts::0004746XXXXX:assumed-role/EcsSpotWorkshop-Cloud9InstanceRole/i-0eedc304975256fac" +} +``` \ No newline at end of file diff --git a/content/ecs-spot-capacity-providers/_index.md b/content/ecs-spot-capacity-providers/_index.md new file mode 100644 index 00000000..30c909a7 --- /dev/null +++ b/content/ecs-spot-capacity-providers/_index.md @@ -0,0 +1,22 @@ +--- +title: "ECS: Cost Optimize Container Workloads using EC2 Spot" +date: 2020-04-15T09:05:54Z +weight: 30 +pre: "" +--- + +## Overview + +Welcome! The **learning objective** of this hands-on workshop is to help understand the different options to cost optimize container workloads running on **[Amazon ECS](https://aws.amazon.com/ecs/)** using **[EC2 Spot Instances](https://aws.amazon.com/ec2/spot/)** and **[AWS Fargate Spot](https://aws.amazon.com/fargate/)**. This workshop covers topics such as ECS cluster auto scaling and how to scale efficiently with **[Capacity Providers](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/cluster-capacity-providers.html)** to spread your tasks across a mix of resources, both on AWS Fargate and AWS Fargate Spot as well as EC2 On-Demand and Spot Instances. + + +{{% notice info %}} +The estimated time for completing the workshop is **90 to 120 minutes**. The estimated cost will be less than **$5**. +{{% /notice %}} + +The workshop designed to be completed in sequence. If you are reading this at a live AWS event, the workshop attendants will give you a high level run down of the workshop. Then it is up to you to follow the instructions below to completion. Don't worry if you're embarking on this journey in the comfort of your office or home, this site contains all the materials for you'll need to complete this workshop. + + +### About Spot Instances in Containerized workloads + +Containerized workloads are often stateless and fault tolerant, which is a great fit for running on EC2 Spot Instances. In this workshop we will explore how to run containers on interruptible EC2 Spot Instances and achieve significant cost savings. diff --git a/content/ecs-spot-capacity-providers/before/_index.md b/content/ecs-spot-capacity-providers/before/_index.md new file mode 100644 index 00000000..56c143e7 --- /dev/null +++ b/content/ecs-spot-capacity-providers/before/_index.md @@ -0,0 +1,9 @@ ++++ +title = "Starting the workshop" +chapter = false +weight = 30 ++++ + +To start the workshop, follow one of the following pages, depending on whether you are... + +{{% children %}} \ No newline at end of file diff --git a/content/ecs-spot-capacity-providers/before/aws_event.md b/content/ecs-spot-capacity-providers/before/aws_event.md new file mode 100644 index 00000000..11de8bcf --- /dev/null +++ b/content/ecs-spot-capacity-providers/before/aws_event.md @@ -0,0 +1,27 @@ ++++ +title = "...At an AWS event" +weight = 10 ++++ + +{{% notice warning %}} +Only complete this section if you are at an AWS hosted event (such as re:Invent, public workshop, Immersion Day, or any other event hosted by an AWS employee). If you are running the workshop on your own, go to: [Start the workshop on your own]({{< ref "/ecs-spot-capacity-providers/before/self_paced.md" >}}) +{{% /notice %}} + +### Login to the AWS Workshop Portal + +If you are at an AWS event, an AWS account created for you to use throughout the workshop. You will need the **Participant Hash** provided to you by the event's organizers. + +1. Connect to the portal by browsing to [https://dashboard.eventengine.run/](https://dashboard.eventengine.run/). +2. Enter the Hash in the text box, and click **Proceed** +3. In the User Dashboard screen, click **AWS Console** +4. In the popup page, click **Open Console** +5. Select the AWS region specified by your facilitator. + +You are now logged in to the AWS console in an account that was created for you, and will be available only throughout the workshop run time. + +You can now proceed to the workshop steps [**Setup the workshop environment on AWS**](/ecs-spot-capacity-providers/workshopsetup.html) + +{{% notice info %}} +**Optional:** If you want to read through basic concepts on Amazon ECS before doing workshop steps, you may go to [Introduction](/ecs-spot-capacity-providers/introduction.html") +{{% /notice %}} + diff --git a/content/ecs-spot-capacity-providers/before/self_paced.md b/content/ecs-spot-capacity-providers/before/self_paced.md new file mode 100644 index 00000000..42825f6e --- /dev/null +++ b/content/ecs-spot-capacity-providers/before/self_paced.md @@ -0,0 +1,16 @@ ++++ +title = "...On your own (self-paced)" +weight = 20 ++++ + +### Running the workshop self-paced, in your own AWS account + +To complete this workshop, have access to an AWS account with administrative permissions. An IAM user with administrator access (**arn:aws:iam::aws:policy/AdministratorAccess**) will do nicely. + + +{{% notice info %}} +If you need an introduction to containers, Amazon ECS and AWS Fargate, continue to the [**Introduction section.**](/ecs-spot-capacity-providers/introduction.html") OR You can directly go to [**Setup the workshop environment on AWS**](/ecs-spot-capacity-providers/workshopsetup.html) section. +{{% /notice %}} + +To avoid unwanted costs in your account, don't forget to go through the [**Cleanup step**](/ecs-spot-capacity-providers/cleanup.html) when you finish the workshop, or if you deploy the CloudFormation template but don't complete the workshop. + diff --git a/content/ecs-spot-capacity-providers/cleanup.md b/content/ecs-spot-capacity-providers/cleanup.md new file mode 100644 index 00000000..230eec9c --- /dev/null +++ b/content/ecs-spot-capacity-providers/cleanup.md @@ -0,0 +1,102 @@ +--- +title: "Cleanup" +date: 2018-08-07T08:30:11-07:00 +weight: 70 +--- + +{{% notice warning %}} +If you're running in your own account, make sure you run through these steps to make sure you don't encounter unwanted costs !! For those of you that are running as part of an AWS event, there's no need to go through the cleanup stage +{{% /notice %}} + +{{% notice tip %}} +Before you clean up the resources and complete the workshop, you may want to review the complete some optional exercises in the previous section of this workshop! +{{% /notice %}} + +We need to scale down the number of tasks in the ECS services before deleting it. + +Run the command below to delete the ECS Servers *ec2-service-split* and *fargate-service-split*. This command may take +some time to complete. All the task will be terminated and the services removed. + +{{% notice note %}} +To delete resources like the ECS cluster and the capacity providers, we need first to make sure the resources they depend on have been terminated and the status is *DELETED* or *INACTIVE*. The following command has a few **while** loops, just to wait until the conditions required for a clear removal are met. +{{% /notice %}} + +``` +aws ecs update-service --cluster EcsSpotWorkshop --service ec2-service-split --desired-count 0 > /dev/null +aws ecs update-service --cluster EcsSpotWorkshop --service fargate-service-split --desired-count 0 > /dev/null +while [ 1 -ne $(aws ecs list-tasks --cluster EcsSpotWorkshop --output yaml | wc -l) ] +do + aws ecs list-tasks --cluster EcsSpotWorkshop --output table + echo "Waiting for the tasks above to clear out" + sleep 10 +done +aws ecs delete-service --cluster EcsSpotWorkshop --service ec2-service-split > /dev/null +aws ecs delete-service --cluster EcsSpotWorkshop --service fargate-service-split > /dev/null +``` + +Once the services and tasks have been removed we can remove the capacity providers. + +``` +aws ecs put-cluster-capacity-providers \ +--cluster EcsSpotWorkshop \ +--capacity-providers [] \ +--default-capacity-provider-strategy [] > /dev/null +aws ecs delete-capacity-provider --capacity-provider CP-OD > /dev/null +while [ "true" == $(aws ecs describe-capacity-providers --capacity-provider CP-OD --query "capacityProviders[0].status!='INACTIVE'") ] +do + echo "Waiting for Capacity-provider CP-OD to become inactive" + sleep 5 +done +aws ecs delete-capacity-provider --capacity-provider CP-SPOT > /dev/null +while [ "true" == $(aws ecs describe-capacity-providers --capacity-provider CP-SPOT --query "capacityProviders[0].status!='INACTIVE'") ] +do + echo "Waiting for Capacity-provider CP-SPOT to become inactive" + sleep 5 +done +``` + + +Now let's remove the auto scaling group and the ECS cluster. +Note again how we will need to wait for all the instances to be terminated +before the cluster deletion can proceed. + +``` +aws autoscaling delete-auto-scaling-group \ +--force-delete --auto-scaling-group-name EcsSpotWorkshop-ASG-SPOT +aws autoscaling delete-auto-scaling-group \ +--force-delete --auto-scaling-group-name EcsSpotWorkshop-ASG-OD +while [ 1 -ne $(aws ecs list-container-instances --cluster EcsSpotWorkshop --output yaml | wc -l) ] +do + aws ecs list-container-instances --cluster EcsSpotWorkshop --output table + echo "Waiting for the instances above to clear out" + sleep 10 +done +aws ecs delete-cluster --cluster EcsSpotWorkshop +``` + +Deregister [EC2 Task] (https://console.aws.amazon.com/ecs/home?#/taskDefinitions/ec2-task/status/ACTIVE) -- If you see multiple versions, repeate below steps for all versions. + +``` +aws ecs deregister-task-definition --task-definition ec2-task:1 +``` + +Deregister [Fargate Task] (https://console.aws.amazon.com/ecs/home?#/taskDefinitions/fargate-task/status/ACTIVE) -- If you see multiple versions, repeate below steps for all versions. +``` +aws ecs deregister-task-definition --task-definition fargate-task:1 +``` + +Delete "ecs-spot-workshop/webapp" container from Amazon Elastic Container Registry + +``` +aws ecr delete-repository --force --repository-name ecs-spot-workshop/webapp +``` + +Finally, let's remove the cloudformation stack. Go to the [AWS CloudFormation console] (https://console.aws.amazon.com/cloudformation/home?#/stacks?filteringStatus=active&filteringText=&viewNested=true&hideStacks=false) and select the Cloudformation stack **EcsSpotWorkshop** , finally click on **delete** to remove the stack and all resources associated. + +![DeleteStacl](/images/ecs-spot-capacity-providers/cloudformation_delete_stack.png) + +{{% notice tip %}} +Please verify in the [AWS CloudFormation console] (https://console.aws.amazon.com/cloudformation/home?#/stacks?filteringStatus=active&filteringText=&viewNested=true&hideStacks=false) cloudformation stack is deleted without any failures. If you notice any failure, just delete again directly from Cloud Formation console. +{{% /notice %}} + +That's it, all the resources you created during this workshops have now been removed. diff --git a/content/ecs-spot-capacity-providers/conclusion.md b/content/ecs-spot-capacity-providers/conclusion.md new file mode 100644 index 00000000..a06b59c2 --- /dev/null +++ b/content/ecs-spot-capacity-providers/conclusion.md @@ -0,0 +1,33 @@ +--- +title: "Conclusion" +chapter: false +weight: 90 +--- + +**Congratulations!** you have reached the end of the workshop. We covered a lot of ground learning how to apply EC2 Spot best practices such as diversification, as +well as the use of capacity providers. + +In the session, we have: + +- Deployed a CloudFormation Stack that prepared our environment, including our VPC and a Cloud9 environment. +- Created and configured an ECS cluster from the scratch. +- Created Auto Scaling Groups and Capacity Providers associated with them for OnDemand and Spot instances, and applied EC2 Spot Diversification srategies. +- Configured a Capacity provider strategy that mixes OnDemand and Spot +- Learned how ECS Cluster Scaling works with Capacity Providers +- Deployed Services both on Fargate Capacity Providers and EC2 Capacity providers + + +# EC2 Spot Savings + +There is one more thing that we've accomplished! + + * Log into the **[EC2 Spot Request](https://console.aws.amazon.com/ec2sp/v1/spot/home)** page in the Console. + * Click on the **Savings Summary** button. + +![EC2 Spot Savings](/images/spot_savings_summary.png) + +{{% notice note %}} +We have achieved a significant cost saving over On-Demand prices that we can apply in a controlled way and at scale. We hope this savings will help you try new experiments or build other cool projects. **Now Go Build** ! +{{% /notice %}} + +{{< youtube 3wGeqmSwz9k >}} \ No newline at end of file diff --git a/content/ecs-spot-capacity-providers/module-1/_index.md b/content/ecs-spot-capacity-providers/module-1/_index.md new file mode 100644 index 00000000..9bde726a --- /dev/null +++ b/content/ecs-spot-capacity-providers/module-1/_index.md @@ -0,0 +1,8 @@ +--- +title: "Using Spot Instances with Auto Scaling groups capacity providers" +weight: 50 +--- + +In this section, we will show how to leverage ECS Auto Scaling group capacity providers to optimize costs using EC2 Spot Instances. + +{{% children %}} \ No newline at end of file diff --git a/content/ecs-spot-capacity-providers/module-1/architecture.md b/content/ecs-spot-capacity-providers/module-1/architecture.md new file mode 100644 index 00000000..fd35d211 --- /dev/null +++ b/content/ecs-spot-capacity-providers/module-1/architecture.md @@ -0,0 +1,29 @@ +--- +title: "Architecture" +date: 2020-04-15T08:30:11-07:00 +weight: 10 +--- + +Your Challenge +--- + +Your company hosts external-facing Apache web servers serving millions of users across the globe, based on micro-services architecture running inside Docker containers on an Amazon ECS Cluster. The underlying compute for the ECS Cluster is completely based on EC2 On-demand instances. Your company is forecasting huge traffic in the next couple of months and would like to leverage Amazon EC2 Spot instances for cost optimization. + +Also, the current scale in/out policies are based on the vCPU reservation metrics of the EC2 instances. However, it is observed that the ECS cluster does not scale fast enough to handle the sudden surge of web traffic during peak hours. During the scale in, sometimes EC2 instances that are actively running ECS tasks are getting terminated, causing disruption to the web service. + +As a long-term strategy, your company does not want to invest resources in undifferentiated heavy lifting of managing the underlying computing infrastructure and instead would like to evaluate running some containerized workloads on a serverless container platform, to further focus on the application and not the infrastructure. + +You were introduced to Amazon EC2 Spot instances and few ECS features that can improve cluster scaling and increase the resilience of the applications. Your manager ask you to build a PoC to test all these features. + +* What options do you have to incorporate EC2 Spot instances in your architecture? +* How do you plan to improve the cluster scaling and resilience of the applications? + +Here is the overall architecture. By the end of the workshop, you will achieve the following objectives. + +1. Explore the serverless computing options such as ECS Fargate and ECS Fargate Spot. +2. Explore both EC2 Spot and On-Demand instances for the underlying compute platform. +3. Leverage ECS features such capacity providers and Cluster Autoscaling (CAS) to improve the scaling and resilience of the applications + + +#### Amazon ECS Application Architecture: +![Overall Architecture](/images/ecs-spot-capacity-providers/amazon_ecs_arch.png) diff --git a/content/ecs-spot-capacity-providers/module-1/asg_with_od.md b/content/ecs-spot-capacity-providers/module-1/asg_with_od.md new file mode 100644 index 00000000..06e3ed15 --- /dev/null +++ b/content/ecs-spot-capacity-providers/module-1/asg_with_od.md @@ -0,0 +1,116 @@ +--- +title: "Create On-Demand Auto Scaling Group and Capacity Provider" +weight: 30 +--- + +## Create the OnDemand Auto Scaling Group + +So far we have an ECS Cluster created and a Launch Template that bootstrap ECS agents and links them against the ECS Cluster. In this section, we will create an EC2 Auto Scaling group (ASG) for On-Demand Instances using the Launch Template created by the CloudFormation stack. Go back to your Cloud9 environment and copy the file **templates/asg.json** for the EC2 Auto Scaling group configuration. + +``` +cd ~/environment/ec2-spot-workshops/workshops/ecs-spot-capacity-providers/ +cp templates/asg.json . +``` +We will now replace the environment variables in the **asg.json** file with the On-Demand settings, changing the OnDemand percentage field in ASG and +substituting the CloudFormation environment variables that we exported earlier with the **asg.json** placeholder names in the template. + +``` +export ASG_NAME=EcsSpotWorkshop-ASG-OD +export OD_PERCENTAGE=100 # Note that ASG will have 100% On-Demand, 0% Spot +sed -i -e "s#%ASG_NAME%#$ASG_NAME#g" -e "s#%OD_PERCENTAGE%#$OD_PERCENTAGE#g" -e "s#%PUBLIC_SUBNET_LIST%#$VPCPublicSubnets#g" asg.json +``` +{{% notice info%}} +Read the **asg.json** file and understand the various configuration options for the EC2 Auto Scaling group. Check how although this is an OnDemand we still apply instance diversification with the Prioritized allocation strategy. Check how the **Launch Template** we reviewed in the previous section is referenced in the Auto Scaling Group. +{{% /notice %}} + +Create the ASG for the On-Demand Instances. + +``` +aws autoscaling create-auto-scaling-group --cli-input-json file://asg.json +ASG_ARN=$(aws autoscaling describe-auto-scaling-groups --auto-scaling-group-name $ASG_NAME | jq -r '.AutoScalingGroups[0].AutoScalingGroupARN') +echo "$ASG_NAME ARN=$ASG_ARN" +``` + +The output of the above command will appear and will be similar to the one below. We have captured the ASG ARM to use in the next sections. + +```plaintext +EcsSpotWorkshop-ASG-OD ARN=arn:aws:autoscaling:us-east-1:0004746XXXX:autoScalingGroup:1e9de503-068e-4d78-8272-82536fc92d14:autoScalingGroupName/EcsSpotWorkshop-ASG-OD +``` + + +The On-Demand auto scaling group will appear as below in the [console](https://console.aws.amazon.com/ec2autoscaling/home?#/details/EcsSpotWorkshop-ASG-OD?view=details) + + +#### Optional Exercises + +Based on the configuration and steps above, try to answer the following questions: + + +* Now that we have created an OnDemand AutoScaling Group, **Can you guess how much capacity we have allocated to our Cluster ?** + + +{{%expand "Show me the answer" %}} +{{% notice note %}} +So far there is no capacity provisioned in the Auto Scaling Group, or in our ECS cluster. Check how the desired capacity is zero in the ASG. We expect the capacity to scale up automatically when we deploy applications later. +{{% /notice %}} + +![On-demand ASG](/images/ecs-spot-capacity-providers/asg_od_initial_view_1.png) +{{% /expand %}} + + +* **How did we configured the Auto Scaling Group to Scale on demand instances?** + +{{%expand "Show me the answer" %}} + +{{% notice note %}} +Check in the console that there are no scaling policies attached to this Auto scaling group. Later on the policies will be created when we enable CAS (managed Cluster Auto Scaling) in the capacity providers. +{{% /notice %}} + +![On-demand ASG](/images/ecs-spot-capacity-providers/asg_od_initial_view_2.png) + +{{% /expand %}} + + +## Create the OnDemand Capacity Provider + +To create a capacity provider, follow these steps: + +* Open the [ECS console] (https://console.aws.amazon.com/ecs/home) in the region where you deployed the CFN template. +* Click **Clusters** +* Click [EcsSpotWorkshop] (https://console.aws.amazon.com/ecs/home#/clusters/EcsSpotWorkshop) +* Click the tab **Capacity Providers** +* Click **Create** +* For capacity provider name, enter **CP-OD** +* For Auto Scaling group, select **EcsSpotWorkshop-ASG-OD** +* For Managed Scaling, leave with default selection of **Enabled** +* For Target capacity %, enter **100** +* For Managed termination protection, leave with default selection of **Enabled** +* Click on **Create** on the bottom right + +![Capacity Provider on OD ASG](/images/ecs-spot-capacity-providers/CP_OD.png) + +#### Optional Exercises + +Based on the configuration and steps above, try to answer the following questions: + +* **How would you check in the console the details about the new capacity provider created?** + +{{%expand "Show me the answer" %}} +* Open the [ECS console] (https://console.aws.amazon.com/ecs/home) in the region where you deployed the CFN template. +* Click **Clusters** +* Click [EcsSpotWorkshop] (https://console.aws.amazon.com/ecs/home#/clusters/EcsSpotWorkshop) +* Refresh the *Capacity Providers* tab, and you will see the CP-OD is created and attached to the ECS cluster. + +![Capacity Provider on OD ASG](/images/ecs-spot-capacity-providers/CP-OD.png) +{{% /expand %}} + + +* When creating the capacity provider against the Auto Scaling Group, we did enable "Managed Scaling" or CAS (Cluster Auto Scaling). **How can I confirm the right scaling policy has been created for this Auto Scaling Group?** + +{{%expand "Show me the answer" %}} +The capacity provider creates a target tracking policy on the On-Demand Auto Scaling group. + +Go to the [AWS EC2 Console](https://console.aws.amazon.com/ec2autoscaling/home?#/details/EcsSpotWorkshop-ASG-OD?view=scaling) and select the Automatic Scaling tab on the EcsSpotWorkshop-ASG-OD. + +![OD ASG](/images/ecs-spot-capacity-providers/asg_od_with_cp_view_1.png) +{{% /expand %}} \ No newline at end of file diff --git a/content/ecs-spot-capacity-providers/module-1/asg_with_spot.md b/content/ecs-spot-capacity-providers/module-1/asg_with_spot.md new file mode 100644 index 00000000..e858155f --- /dev/null +++ b/content/ecs-spot-capacity-providers/module-1/asg_with_spot.md @@ -0,0 +1,88 @@ +--- +title: "Create EC2 Spot Auto Scaling Group and Capacity Provider" +weight: 50 +--- + +## Create the Spot Auto Scaling Group + +In this section, you create an Auto Scaling group for EC2 Spot Instances using the Launch Template created by the CloudFormation stack. This procedure is exactly the same as the previous section, except for a few changes specific to the configuration for Spot Instances. + +Copy the file **templates/asg.json** for the EC2 Auto Scaling group configuration. + +``` +cd ~/environment/ec2-spot-workshops/workshops/ecs-spot-capacity-providers/ +cp templates/asg.json spot-asg.json +``` + +{{% notice note %}} +Read the **spot-asg.json** file. We configured the instance diversification in **spot-asg.json** according to the guidelines from our **[previous section](/ecs-spot-capacity-providers/module-1/selecting_spot_instance_types.html)**. Notice how we've chosen instance types with similar hardware characteristics in order to have a consistent auto scaling experience. Check also how the allocation strategy chosen for Spot is **[Capacity-optimized](https://aws.amazon.com/blogs/aws/capacity-optimized-spot-instance-allocation-in-action-at-mobileye-and-skyscanner/)**, this will let the ASG select the instances that mimimize the frequency of spot interruptions. +{{% /notice %}} + +We will now replace the environment variables in the spot-asg.json file with the Spot settings, setting the OnDemand percentage to 0 and substituting the CloudFormation environment variables that we exported earlier. + + +``` +export ASG_NAME=EcsSpotWorkshop-ASG-SPOT +export OD_PERCENTAGE=0 # Note that ASG will have 0% On-Demand, 100% Spot +sed -i -e "s#%ASG_NAME%#$ASG_NAME#g" -e "s#%OD_PERCENTAGE%#$OD_PERCENTAGE#g" -e "s#%PUBLIC_SUBNET_LIST%#$VPCPublicSubnets#g" spot-asg.json +``` + +Finally we create the ASG for the Spot Instances and store the ARN for the spot group. + +``` +aws autoscaling create-auto-scaling-group --cli-input-json file://spot-asg.json +ASG_ARN=$(aws autoscaling describe-auto-scaling-groups --auto-scaling-group-name $ASG_NAME | jq -r '.AutoScalingGroups[0].AutoScalingGroupARN') +echo "$ASG_NAME ARN=$ASG_ARN" +``` + +The output of the above command will appear as below: + +```plaintext +EcsSpotWorkshop-ASG-SPOT ARN=arn:aws:autoscaling:us-east-1:0004746XXXX:autoScalingGroup:dd7a67e0-4df0-4cda-98d7-7e13c36dec5b:autoScalingGroupName/EcsSpotWorkshop-ASG-SPOT +``` + +The EC2 Spot auto scaling group should appear as below in the [console](https://console.aws.amazon.com/ec2autoscaling/home?#/details/EcsSpotWorkshop-ASG-SPOT?view=details) Note that there is no capacity provisioned i.e. desired capacity is zero in the ASG. We expect the capacity to scale up automatically when we deploy applications later. + + + +## Create the Spot Capacity Provider + +To create the capacity provider, follow these steps: + +* Open the [ECS console] (https://console.aws.amazon.com/ecs/home) in the region where you are looking to launch your cluster. +* Click **Clusters** +* Click [EcsSpotWorkshop] (https://console.aws.amazon.com/ecs/home?#/clusters/EcsSpotWorkshop) +* Click the tab **Capacity Providers** +* Click **Create** +* For Capacity provider name, enter **CP-SPOT** +* For Auto Scaling group, select **EcsSpotWorkshop-ASG-SPOT** +* For Managed Scaling, leave with default selection of **Enabled** +* For Target capacity %, enter **100** +* For Managed termination protection, leave with default selection of *Enabled* +* Click on **Create** on the bottom right + + +![Capacity Provider on Spot ASG](/images/ecs-spot-capacity-providers/CP_SPOT.png) + +{{% notice tip %}} +We encourage you to do a similar exercise to what you did with the OnDemand Auto Scaling Group. Check in the console that the +ECS Cluster has the new **CP-SPOT** Capacity Provider, and check out the configuration and scaling policy created on the **EcsSpotWorkshop-ASG-SPOT** +Auto Scaling Group +{{% /notice %}} + +Refresh the *Capacity Providers* tab, and you will see the CP-SPOT is created and attached to the ECS cluster. + +![Capacity Provider on Spot ASG](/images/ecs-spot-capacity-providers/CP-SPOT.png) + + \ No newline at end of file diff --git a/content/ecs-spot-capacity-providers/module-1/create_ecs_cluster.md b/content/ecs-spot-capacity-providers/module-1/create_ecs_cluster.md new file mode 100644 index 00000000..d39428c2 --- /dev/null +++ b/content/ecs-spot-capacity-providers/module-1/create_ecs_cluster.md @@ -0,0 +1,59 @@ +--- +title: "Create an ECS cluster" +weight: 20 +--- + +Let us first create an empty ECS cluster.To create an ECS cluster, follow these steps: + +* Open the [ECS console] (https://console.aws.amazon.com/ecs/home) in the region where you are looking to launch your cluster. +* Click **Create Cluster** +* Un-select **New ECS Experience** on the top left corner to work on previous ECS console version (Capacity providers not supported on new version) +* Under *Select cluster template* select **EC2 Linux + Networking** + +![ECS Cluster](/images/ecs-spot-capacity-providers/ecs_cluster_type.png) + +* Click **Next step** +* Under *Configure cluster* for *Cluster name*, enter **EcsSpotWorkshop** +* Select the checkbox **Create an empty cluster** +* Select the checkbox **Enable Container Insights** + +![ECS Cluster](/images/ecs-spot-capacity-providers/ecs_create_cluster.png) + +* Click **Create** +* Click **View Cluster** +* Click **Capacity Providers** tab + +The new ECS cluster will appear as below in the AWS Console. + +![ECS Cluster](/images/ecs-spot-capacity-providers/ecs_empty_cluster.png) + +{{% notice note %}} +**CloudWatch Container Insights** collects, aggregates, and summarizes metrics and logs from your containerized applications and microservices. It collects metrics for many resources, such as CPU, memory, disk, and network. Container Insights is available for Amazon Elastic Container Service (Amazon ECS), Amazon Elastic Kubernetes Service (Amazon EKS), and Kubernetes platforms on Amazon EC2. Amazon ECS support includes support for Fargate. You can **[read more about CloudWatch Container Insights here](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/ContainerInsights.html)**. +{{% /notice %}} + +# Launch Templates & ECS Agent Bootstrapping + +{{% notice info %}} +Launch Template **User Data** section is key in ECS for actions such as **[bootstrapping container instances](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/bootstrap_container_instance.html)** and **[configuring the ECS agent](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/ecs-agent-config.html)** +{{% /notice %}} + +EC2 launch templates reduce the number of steps required to create an instance by capturing all launch parameters within one resource. For example, a launch template can contain the ECS optimized AMI, instance type, User data section, Instance Profile / Role, and network settings that you typically used to launch instances. When you launch an instance using the Amazon EC2 console, an AWS SDK, a CLI tool, or an EC2 Auto Scaling group (like we will use in this workshop), you can specify the launch template to use. + +In this case we have pre-created an EC2 launch template when we deployed the CloudFormation stack. You can use the AWS Management Console to see the configuration. Please note that launch templates are necessary in order to use EC2 Auto Scaling groups with mixed instances policy (to allow for mixing On-Demand and Spot Instances in an Auto Scaling group, and diversifying the instance type selection). + +![Launch Template](/images/ecs-spot-capacity-providers/c9_6.png) + +{{% notice tip %}} +Select launch template and navigate to Advanced Details tab. +**Review the user data section** of the EC2 launch template to see ECS Container agent configuration. +{{% /notice %}} + +![User Data](/images/ecs-spot-capacity-providers/ecs_launch_template.png) + +- **ECS_CLUSTER**: The cluster that will be used by the ECS Agent to bootstrap against and connect. Must match a name of an ECS Cluster. + +- **ECS_CONTAINER_STOP_TIMEOUT**: Time to wait from when a task is stopped before its containers are forcefully stopped if they do not exit normally on their own + +- **ECS_ENABLE_SPOT_INSTANCE_DRAINING**: Whether to enable Spot Instance draining for the container instance. When true, if the container instance receives a Spot interruption notice, then the agent sets the instance status to DRAINING, which gracefully shuts down and replaces all tasks running on the instance that are part of a service. + +- **ECS_ENABLE_CONTAINER_METADATA**: When true, the agent creates a file describing the container's metadata. The file can be located and consumed by using the container environment variable $ECS_CONTAINER_METADATA_FILE \ No newline at end of file diff --git a/content/ecs-spot-capacity-providers/module-1/modify_default_cps.md b/content/ecs-spot-capacity-providers/module-1/modify_default_cps.md new file mode 100644 index 00000000..54cb5e0f --- /dev/null +++ b/content/ecs-spot-capacity-providers/module-1/modify_default_cps.md @@ -0,0 +1,27 @@ +--- +title: "Setup default capacity provider strategy" +weight: 60 +--- + +Once that we have defined the ECS Cluster Capacity Providers, we can setup a default strategy. New services and tasks launched to this cluster will use this strategy by default. You can create however specific strategies different from the default for each service. + +For our default capacity provider we have considered the following application requirements: + +* There should be at least 2 tasks running on On-Demand instances to serve the normal traffic. The **base=2** configuration satisfies this requirement. +* Tasks deployed to On-Demand and Spot Instances, follow a 1:3 ratio to handle any additional traffic + +With this requirements we can set **EcsSpotWorkshop** cluster default capacity provider strategy, follow these steps: + +* Go to the ECS Cluster console and select the **EcsSpotWorkshop** ECS Cluster. +* Click on the **Update Cluster** option on the top right, and click **Add Another Provider** +* For Provider 1: select **CP-OD**, set base value to **2** and weight to **1** +* Click on **Add another provider** one more time +* For Provider 2: select **CP-SPOT**, leave base to default value of **0** and set weight to **3** +* Click on **Update** on bottom right + + +![Capacity Provider Strategy](/images/ecs-spot-capacity-providers/CPS.png) + +{{% notice note %}} +Checkout the strategy configuration; it sets **`base=2`** and **`weight=1`** for CP-OD and **`weight=3`** for CP-SPOT. That means, ECS first places 2 tasks (since base=2) to CP-OD and then splits the remaining tasks between CP-OD and CP-SOT in 1:3 ratio, so for every 1 task on CP-OD, 3 tasks placed on CP-SPOT. +{{% /notice %}} diff --git a/content/ecs-spot-capacity-providers/module-1/selecting_spot_instance_types.md b/content/ecs-spot-capacity-providers/module-1/selecting_spot_instance_types.md new file mode 100644 index 00000000..75d319c4 --- /dev/null +++ b/content/ecs-spot-capacity-providers/module-1/selecting_spot_instance_types.md @@ -0,0 +1,96 @@ +--- +title: "Selecting Spot Instance Types" +date: 2018-08-07T11:05:19-07:00 +weight: 40 +draft: false +--- + +[Amazon EC2 Spot Instances](https://aws.amazon.com/ec2/spot/) offer spare compute capacity available in the AWS Cloud at steep discounts compared to On-Demand prices. EC2 can interrupt Spot Instances with two minutes of notification when EC2 needs the capacity back. One of the best practices for successful adoption of Spot instances is to implement **Spot instance diversification** as part of your configuration. Spot instance diversification helps to acquire capacity from multiple Spot Instance pools, both for scaling up and for replacing spot instances that may receive a spot instance termination notification. A Spot instance pool is a set of unused EC2 instances with the same instance type and size (for example, m5.large), availability zone (AZ), in the same region + +We can diversify Spot instances by selecting a mix of instances types and families from different pools that meet the same vCPU's and memory criteria. In the case of ECS we can check what's the ratio of vCPU and Memory used by our task resources. For example, look at the ECS task resource reservation in the file **ec2-task.json**: + +```plaintext +"cpu": "480", "memory": "1920" +``` + +This means the ratio for vCPU:Memory in our ECS task that would run in the cluster is **1:4**. Ideally, we should select instance types with similar vCPU:Memory ratio, in order to have good utilization of the resources in the EC2 instances. There are over 270 different instance types available on EC2 which can make selecting appropriate instance types difficult. **[amazon-ec2-instance-selector](https://github.com/aws/amazon-ec2-instance-selector)** helps you select compatible instance types for your application to run on. We can pass the command line options for resource criteria like vCPUs, memory, network performance, and much more and then return the available, matching instance types. + +{{% notice note%}} +To learn more about EC2 instance types, click [here](https://aws.amazon.com/ec2/instance-types/). As for **ec2-instance-selector**, it is an open source tool that makes calls to [DescribeInstanceTypes](https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_DescribeInstanceTypes.html) APIs, on the specific region and filters instances based on the criteria selected in the command line. We encourage you to test what are the options available with `ec2-instance-selector` and run a few commands with it to familiarize yourself with the tool.For example, try running the same commands as you did before with the extra parameter **`--output table-wide`**. +{{% /notice %}} + +In our case, for the **1:4** vCPU to memory ratio, the smallest instance type which would satisfy these criteria from the latest generation of x86_64 EC2 instance types is m5.large. We will use **[amazon-ec2-instance-selector](https://github.com/aws/amazon-ec2-instance-selector)** to help us select the relevant instance types and families with an enough number of vCPUs and Memory. Let's first install **amazon-ec2-instance-selector** : + +``` +curl -Lo ec2-instance-selector https://github.com/aws/amazon-ec2-instance-selector/releases/download/v1.3.0/ec2-instance-selector-`uname | tr '[:upper:]' '[:lower:]'`-amd64 && chmod +x ec2-instance-selector +sudo mv ec2-instance-selector /usr/local/bin/ +ec2-instance-selector --version +``` + +Now that you have ec2-instance-selector installed, you can run `ec2-instance-selector --help` to understand how you could use it for selecting +Instances that match your workload requirements. For this workshop, we need to first get a group of instances that meet the following criteria: + +* 1:4 vCPU:RAM Ratio +* Instances have 2 vCPUs +* Instances don't have a GPUs +* Instances Architecture is: x86_64 (no ARM instances like A1 or m6g instances, for example) +* Instances of current generation (4th gen onwards) +* Instances that don't meet the regular expression `.*n.*|.*d.*`, so effectively discard instances such as: m5n, m5dn, m5d. + + +``` +ec2-instance-selector --vcpus-to-memory-ratio 1:4 --vcpus=2 --gpus 0 --current-generation -a x86_64 --deny-list '.*n.*|.*d.*' +``` + +This should display a list like the one that follows (note results might differ depending on the region). We will use these instance types as part or EC2 Auto Scaling groups. + +``` +m4.large +m5.large +m5a.large +t2.large +t3.large +t3a.large +``` + +{{% notice warning %}}Your workload may have other constraints that you should consider when selecting instances types. For example. **t2** and **t3** instance types are [burstable instances](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/burstable-performance-instances.html) and might not be appropriate for CPU bound workloads that require CPU execution determinism. Instances such as m5**a** are [AMD Instances](https://aws.amazon.com/ec2/amd/), if your workload is sensitive to numerical differences (i.e. financial risk calculations, industrial simulations) mixing these instance types might not be appropriate. +{{% /notice %}} + + +While in this example we have restricted our selection to similar instances and pools of the same size (of memory and vCPUs), in production +workloads the recommendation is to increase the instance selection by adding other sizes that respect the same vCPU to memory ratio. + +{{% notice info %}} +As a summary of Spot Best practices selection for ECS Auto Scaling Groups and Capacity Providers : **a)** Use different AZ's **b)** Diversify across multiple instance types-pools **c)** Diversify using multiple generation of similar hardware that keep the multiplier or ratio of cpu/mem close i.e: m4.large, m5.large, m4.xlarge, m5.xlarge. **d)** While diversifying in size, avoid very large spreads in instance sizes, and add contiguous 2 to 3 sizes i.e: large, xlarge, 2xlarge. or 2xlarge, 4xlarge, 8xlarge. +{{% /notice %}} + +**Exercise : How would you change the ec2-instance-selector command above to provide other instance sizes?** + +{{%expand "Click here to show the answer" %}} + +Just changing the parameters `vcpus-min` and `vcpus-max` will spread the filtering selection and provide similar instances of larger sizes adjacent +to the initial selection we made + +``` +ec2-instance-selector --vcpus-to-memory-ratio 1:4 --vcpus-min 2 --vcpus-max=4 --burst-support=0 --gpus 0 --current-generation -a x86_64 --deny-list '.*n.*|.*d.*' +``` + +In this case I've removed burstable instances. Note, when using 3 AZs this make for a total of capacity pools that Spot will use to provision capacity from + +``` +m4.large +m4.xlarge +m5.large +m5.xlarge +m5a.large +m5a.xlarge +``` + + +{{% /expand %}} + + + + + + diff --git a/content/ecs-spot-capacity-providers/module-1/service.md b/content/ecs-spot-capacity-providers/module-1/service.md new file mode 100644 index 00000000..bf8251c6 --- /dev/null +++ b/content/ecs-spot-capacity-providers/module-1/service.md @@ -0,0 +1,133 @@ +--- +title: "Create an ECS service" +weight: 80 +--- + +We are very close from testing our scaling rules. Before we create an ECS Service, there are a few things we need to do. First we need to building a container image and store it into ECR and then create an ECS task definition in preparation for our ECS Service. + +## Building a container image and storing it into ECR + +We need an application to scale!. In this section we will build a Docker image using a simple python flask-based web application and deploy in our ECS cluster using ECR (Amazon Elastic Container Registry). CloudFormation deployment has already created an entry in the ECR registry that we will use to store our webapp container image. + +Execute the lines below. This might take a couple of minutes. The lines below: + +- a) authenticate and retrieve a token to the repository that cloudformation created so we can perform upload operations later on +- b) moves the current working directory over the web application directory where there is a Dockerfile +- c) build a docker image with the application +- d) tag the docker image and upload to the ECR repository + +``` +export ECR_REPO_URI=$(aws ecr describe-repositories --repository-names ecs-spot-workshop/webapp | jq -r '.repositories[0].repositoryUri') +aws ecr get-login-password --region $AWS_REGION | docker login --username AWS --password-stdin $ECR_REPO_URI +cd ~/environment/ec2-spot-workshops/workshops/ecs-spot-capacity-providers/webapp/ +docker build --no-cache -t ecs-spot-workshop/webapp . +docker tag ecs-spot-workshop/webapp:latest $ECR_REPO_URI:latest +docker push $ECR_REPO_URI:latest +``` + +## Creating a task definition for our Service + +When creating new services, the service will make a reference to what type of tasks are launched within the service, hence we need to register +a `task definition`. We have prepared a simple task definition that uses the container image we just created and sets a few parameters such as +the resource required (CPU/Memory) and the ports that will be exposed. + +Run the following section. This creates a copy the template ECS Task from *templates/ec2-task.json* to the current directory and substitutes the template with the actual value of the docker image path. Finally it registers the task so it can be used by Services or deployed to ECS clusters. + +``` +cd ~/environment/ec2-spot-workshops/workshops/ecs-spot-capacity-providers/ +cp -Rfp templates/ec2-task.json . +sed -i -e "s#DOCKER_IMAGE_URI#$ECR_REPO_URI:latest#g" ec2-task.json +aws ecs register-task-definition --cli-input-json file://ec2-task.json +``` + +The task definition will look like this in the console: + +![Task](/images/ecs-spot-capacity-providers/task1.png) + +## Create an ECS Service + +To create the service, follow these steps: + +* In the ECS Console select the **EcsSpotWorkshopUpdate** or just **[click here](https://console.aws.amazon.com/ecs/home?#/clusters/EcsSpotWorkshop/services)** to open the **EcsSpotWorkshopUpdate** cluster view +* Select the **Services** Tab +* Click on **Create** +* For Capacity provider strategy, leave it to default value **Cluster default Strategy** +* For Task Definition Family, select **ec2-task** +* For Task Definition Revision, select **1** +* For Cluster, leave default value **EcsSpotWorkshop** +* For Service name, **ec2-service-split** +* For Service type, leave it to the default value **REPLICA** +* For Number of tasks, enter **10** + +![Service](/images/ecs-spot-capacity-providers/Ser1.png) + +* Leave the default values for **Minimum healthy percent** and **Maximum percent** +* Under Deployments section, leave it to default values +* Under Task Placement section, for Placement Templates, select **BinPack** +* Under Task tagging configuration section, leave it to default values +* Click on **Next Step** + +![Service Binpack](/images/ecs-spot-capacity-providers/ser2.png) + +* Under Configure network section, in Load balancing, for Load balancer type*, select **Application Load Balancer** +* For Service IAM role, leave default value +* For Load balancer name, select **EcsSpotWorkshop** + +![Service ALB](/images/ecs-spot-capacity-providers/ecs_service_alb.png) + +* Under Container to load balance, for Container name : port, click on **add to load balancer** +* For Production listener port, Select **HTTP:80** from the dropdown list +* For Production listener protocol, leave default value of **HTTP** +* For Target group name, select **EcsSpotWorkshop** from the list +* Leave default values for *Target group protocol*, *Target type*, *Path pattern*, *Health check path* +* Click on **Next Step** + +![Service ALB Target Group](/images/ecs-spot-capacity-providers/ecs_service_alb_listener.png) + +* Under Set Auto Scaling (optional), leave default value for service auto scaling +* Click on **Next Step** +* Click on **Create Service** +* Click on **View Service** + + +{{% notice note %}} +It may take up to a couple of mintues for the capacity to be provisioned and the task to be running. You can check the **[CloudWatch Dashboard](https://console.aws.amazon.com/cloudwatch/home?#dashboards:name=EcsSpotWorkshop)** or the C3VIS tool that we used in the previous sections to check out for changes in the cluster once the service is created. +{{% /notice %}} + +## Exercise : + +**Question:** Given the Capacity Provider Strategy in use (OnDemand Capacity Provider **`Base=2, weight=1`**, Spot Capacity Provider **`Base=0, weight=3`**), Could you predict how many out of the 10 tasks will be running in Spot instances versus how many will be running on OnDemand Instances? + +{{% notice tip %}} +The CLI can help you to provide details of how tasks are spread across capacity providers. You can use [`aws ecs describe-tasks`](https://docs.aws.amazon.com/cli/latest/reference/ecs/describe-tasks.html) to validate your response. +{{% /notice %}} + +{{%expand "Click here to show the answer" %}} +Given the split OnDemand Capacity Provider **`Base=2, weight=1`**, Spot Capacity Provider **`Base=0, weight=3`**, we should expect + +* The first 2 tasks to be deployed on demand, leaving us with extra 8 tasks to distribute according to weights. +* The 8 tasks get's distribute on a 1:3 ratio, meaning that 2 tasks more go to OnDemand and 6 to Spot + +The tasks should distributed as **OnDemand = 4 tasks** and **Spot =6 tasks**. We can veryfy it by running the following command in the Cloud9 terminal + +``` +export cluster_name=EcsSpotWorkshop +export service_name=ec2-service-split +aws ecs describe-tasks \ +--tasks $(aws ecs list-tasks --cluster $cluster_name \ +--service-name $service_name --query taskArns[*] --output text) \ +--cluster $cluster_name \ +--query 'sort_by(tasks,&capacityProviderName)[*].{TaskArn:taskArn,CapacityProvider:capacityProviderName,Instance:containerInstanceArn,AZ:availabilityZone,Status:lastStatus}' \ +--output table +``` + +Your results should be similar to the ones below: + +![Results Table](/images/ecs-spot-capacity-providers/table.png) + +{{% /expand %}} + + + + + diff --git a/content/ecs-spot-capacity-providers/module-1/service_view.md b/content/ecs-spot-capacity-providers/module-1/service_view.md new file mode 100644 index 00000000..d38b686c --- /dev/null +++ b/content/ecs-spot-capacity-providers/module-1/service_view.md @@ -0,0 +1,153 @@ +--- +title: "ECS Managed Scaling" +weight: 90 +--- + +## Checking the application + +So far we have created the service and this has started procuring capacity accordingly and exposing the service through +the load balancer that we attached to the workload. Before we proceed with scaling out and down the application, let's +check the application and what it does. + +**Exercise: How can I get check get access to the web application deployed?** + +{{% notice tip %}} +To get access to the application you need to find the URL associated with the service. The URL has been associated to the Load Balancer +that was created as part of the CloudFormation stack. There are a few ways you could get the URL; you could perhaps review AWS console +sections like `CloudFormation`, or the `LoadBalancer` section in EC2. +{{% /notice %}} + +{{%expand "Click here to show the answer" %}} + +There are a couple of ways you can do. One is using the Cloud9 terminal. In the previous steps we did dump the outpout of the CloudFormation stack to +environment variables. Executing this on the Cloud9 initial terminal will show the URL of the Load balancer that is connected with the service. + +``` +echo "URL of the service is http://${ALBDNSName}" + +``` + +The output should be something similar to the line below. Just click on that url and open a new browser window to it. + +``` +URL of the service is http://EcsSpotWorkshop-XXXXXXXXXX..elb.amazonaws.com +``` + +A second way to get to the URL is to get the DNS name of the Application Load Balancer from the output section of the CloudFormation stack. + +![Get DNS](/images/ecs-spot-capacity-providers/CFN.png) + +Open a browser tab and enter this URL. You should see a simple web page displaying various useful info about the task such IP address, availability zone, lifecycle of the EC2 instance. + + +Once you get the URL, open a browser tab and enter this URL. You should see a simple web page displaying various useful info about the task such IP address, availability zone, lifecycle of the EC2 instance. + +![Application](/images/ecs-spot-capacity-providers/app.png) + +{{% notice note %}} +If you keep refreshing the web page, you will notice the content of the page changes as the Application Load Balancer does route requests to different tasks across the instances in the ECS Cluster. We have highlighted in the screenshot above the entries that will change when hitting +a different task. +{{% /notice %}} + +{{% /expand %}} + + +## ECS Managed Scaling (CAS) in Action + +As we explained earlier, ECS offers two different type of scaling: + +* **[Service Auto Scaling](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/service-auto-scaling.html)** - as the ability to increase or decrease the desired count of tasks in your Amazon ECS service automatically + +* **[Cluster Auto Scaling (CAS)](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/cluster-auto-scaling.html)** - as the ability to manage the scaling actions of Auto Scaling group asociated to Capacity Providers. + +In this section we will simulate a Service Auto Scaling, by changing manually the number of tasks on the service. We will see how ECS Managed Cluster Auto Scaling calculates the **CapacityProviderReservation** metric for different Capacity Providers, and procures capacity accordingly. So far we have deployed our initial 10 tasks, you can check the status in the ECS Console by clicking on Cluster **EcsSpotWorkshop** and then **ec2-service-split** or alternatively **[click here](https://console.aws.amazon.com/ecs/home?#/clusters/EcsSpotWorkshop/services/ec2-service-split/details)** to take you to the right console.: + +![Capacity Provider](/images/ecs-spot-capacity-providers/CP4.png) + +Let's increase manually the number of tasks on the service and increase them up to 22. Execute the following on Cloud9 + +``` +aws ecs update-service --cluster EcsSpotWorkshop --service ec2-service-split --desired-count 22 --output table +``` + +The change in the numbers of desired tasks should result in a change to the metrics **CapacityProviderReservation** associated +with the Capacity Providers. If you recall from previous sections, calculation of the **CapacityProviderReservation** is done with +the following formula `CapacityProviderReservation = ( M / N ) * 100`. + +**Exercise: Answers the following questions** + +* 1) **What will be the task distribution Spot vs OnDemand?** +* 2) **Can you guess how many new instances will be created?** +* 3) **What is the maximum value we should expect for both the Spot and OnDemand CapacityProviderReservation metric?** +* 4) **Can you find which CloudWatch Alarms are trigger when CapacityProviderReservation increases?** + +{{%expand "Click here to show the answer" %}} + +**Answer to first question:** Let's find out first how many tasks will be created. As we did before, the weights between OnDemand and Spot on the Capacity Providers are OnDemand=1 and Spot=3. We are creating an extra 12 tasks, which means 3 tasks will be OnDemand and 9 will be Spot. This is on top of our current split of 4 OnDemand tasks and 6 Spot. The final distribution should be **7 On Demand tasks and 15 Spot tasks**. + +**Answer to second question:** The way that we have configured the cluster, each instance can provide `binpack` placement of up to 4 tasks (all instances pool selected offer 2vCPUs and 8GB of RAM). Given that for our capacity providers we selected `Target Capacity = 100%`, we expect a +**total of 2 OnDemand Instances and 4 Spot Instances**. + +**Answer to third question:** The calculation of the CapacityProviderReservation for each Capacity Provider is done with the formula `CapacityProviderReservation = ( M / N ) * 100`. For the Spot Capacity provider, N = 2 is the current number of Spot instances, M = 4 +is equal to the number of instances we need to place existing and pending tasks. Using the formula `( M / N ) x 100` we expect the +value to be **`(4 / 2) * 100 = 200`**. The calculation for the OnDemand Capacity provider, yield the same result (200). + + +**Answer to the forth question**: We should be able to confirm the value of the **CapacityProviderReservation** in the **[AWS Cloudwatch console] (https://console.aws.amazon.com/cloudwatch/home)**, EcsSpotWorkshop dashboard. At some point you should see in chart how the **CapacityProviderReservation** metric value is 200 for both CP-OD and SP-SPOT. This triggers the CloudWatch alarms associated with +the target tracking policy in the Auto Scaling groups. + +![Capacity Provider Reservation](/images/ecs-spot-capacity-providers/cp5.png) + + +If you are in time, you should see how the CloudWatch Alarms are triggered. Go to the CloudWatch console and click on the [Alarms section](https://console.aws.amazon.com/cloudwatch/home?#alarmsV2:!alarmStateFilter=ALARM). + + +{{% notice note %}} +Note that the Alarms are triggered when they go over the value 100. This value matches the `Target Capacity` that we associated with the Capacity +Provider. You can change the `Target Capacity` to control how much you over-provision that Capacity Provider. +{{% /notice %}} + +![Cloud Watch Alarms](/images/ecs-spot-capacity-providers/ecs_service_alarms.png) + +Finally to see the activity in the Auto Scaling Group, you can go to EC2 console, select [EC2 Spot ASG](https://console.aws.amazon.com/ec2autoscaling/home?#/details/EcsSpotWorkshop-ASG-SPOT?view=activity) and click the Activity tab. You should see two instances are just getting launched. + +![ASG Scale Out](/images/ecs-spot-capacity-providers/ecs_asg_spot_scale_out.png) + + +{{% notice info %}} +When using instances of multiple sizes in the AutoScaling groups there are a few changes in the calculations we've done above. For example, let's +say we add m5.xlarge to our Spot Auto Scaling Group. When difference sizes are used, ECS Managed Scaling (CAS) does assume a conservative scenario +where the largest instances will be provisioned and will request capacity accordingly. Then it will iterate to smaller instances if needed to finally +reach to the right number of instances. +{{% /notice %}} + + {{% /expand %}} + + + + +## Optional Exercises + + {{% notice warning %}} + Some of this exercises will take time for CAS to scale up and down. If you are running this workshop at a AWS event or with limited time, + we recommend to come back to this section once you have completed the workshop, and before getting into the cleanup section. + {{% /notice %}} + + In this section we propose additional exercises you can do at your own pace to get a better understanding of Capacity Providers, Managed + Scaling and Spot instances best practices. We recommend users adopting spot in Test and Production workloads to complete the exercises below + to get a better understanding of the different attributes they can change when setting up their cluster configuration. Note we are not + providing a solution to this exercises however, you should be able to reach the solutions with with the skills acquired in the previous sections. + + * Scale down the number of desired tasks back to 10. Answer the following questions: a) How long do you expect the trigger to scale down + instances to take? b) Can you predict how many instances you will end up with? Tip : [read about placement strategies](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-placement-strategies.html) + + * The Service definition we have used so far has used the `BinPack` strategy as the way to define how tasks are placed. a) What would be the effect + of changing the placement strategy on the service and repeating the Scale-out & Scale-in exercises we've done so far? b) Would the number of instances + be the same ? c) In which situations you may want to use `Spread` vs `BinPack` and what are the pros / cons of each placement strategy ? Tip: + [read about placement strategies](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-placement-strategies.html) + + * So far in the Spot Auto Scaling Group, we have used a Mixed Instance Group (MIG) of instances with the same number of vCPU's and Memory. This + has made our calculations and understanding of CAS simple, but it limits the number of pools we diversify Spot instances on. a) Edit the Spot + Auto Scaling Group and add instances of different sizes that still respect the vCPU to Memory ratio for example `m5.xlarge`, `m5a.xlarge` ,`m4.xlarge`. + b) Can you explain what are the benefits of this configuration? c) Repeat the scaling exercises above and check if this time around you can predict + how many instances will be used? d) Can you explain the trade-offs of this configuration? Tip: [refresh your spot best Practices knowledge](https://docs.aws.amazon.com/whitepapers/latest/cost-optimization-leveraging-ec2-spot-instances/spot-best-practices.html), Read about [Spot Capacity optimized allocation Strategy](https://aws.amazon.com/blogs/compute/introducing-the-capacity-optimized-allocation-strategy-for-amazon-ec2-spot-instances), read how [ECS Managed Cluster works with mixed instance sizes](https://aws.amazon.com/blogs/containers/deep-dive-on-amazon-ecs-cluster-auto-scaling/) \ No newline at end of file diff --git a/content/ecs-spot-capacity-providers/module-1/spot_inturruption_handling.md b/content/ecs-spot-capacity-providers/module-1/spot_inturruption_handling.md new file mode 100644 index 00000000..778ca708 --- /dev/null +++ b/content/ecs-spot-capacity-providers/module-1/spot_inturruption_handling.md @@ -0,0 +1,63 @@ +--- +title: "EC2 Spot Interruption Handling in ECS" +weight: 100 +--- + +The Amazon EC2 service interrupts your Spot instance when it needs the capacity back. It provides a Spot instance interruption notice, 2 minutes before the instance gets terminated. The EC2 spot interruption notification is available in two ways: + +1. **Amazon EventBridge Events:** EC2 service emits an event two minutes prior to the actual interruption. This event can be detected by Amazon CloudWatch Events. + +1. **EC2 Instance Metadata service (IMDS):** If your Spot Instance marked for termination by EC2, the instance-action item is present in your instance metadata. + +While for EC2 applications we may need to provide an implementation to handle the events described above so we can gracefully terminate our +application uppon a Spot notification for termination, that is not the case with ECS. On ECS, the ECS agent deployed on the instances, can be configured to automatically capture and handle the Spot interruption instance +notification. You can [read more in the ECS Documentation](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/container-instance-spot.html) + +During this workshop, the Launch Template that we used to create the Auto Scaling Groups had the following entry: +```plaintext +echo "ECS_ENABLE_SPOT_INSTANCE_DRAINING=true" >> /etc/ecs/ecs.config +``` + +When Amazon ECS Spot instance draining is enabled on the instance, the ECS container agent receives the Spot instance interruption notice and places the instance in DRAINING status. + + +## Graceful application termination + +By enabling the `ECS_ENABLE_SPOT_INSTANCE_DRAINING` in the ECS agent configuration, the ECS agent will monitor the Spot interruption +signal and place the instance in `DRAINING` status. When an instance is set to `DRAINING` Amazon ECS prevents new tasks from being scheduled +on the instance. Tasks will also are moved from RUNNING to STOPPED state using the [SpotTask API](https://docs.aws.amazon.com/AmazonECS/latest/APIReference/API_StopTask.html). The StopTask API is the equivalent of docker stop, it is issued to the containers running in the task and results in +a **SIGTERM** System V signal sent to the application. + +As a best practice application should capture the **SIGTERM** signal and implement a graceful termination mechanism. By default ECS Agent does +up to `ECS_CONTAINER_STOP_TIMEOUT`, by default 30 seconds, to handle the graceful termination of the process. After the 30 seconds a **SIGKILL** +signal is sent and the containers are forcibly stopped. The `ECS_CONTAINER_STOP_TIMEOUT` can be extended to provide some extra time, but +note that anything above the 120 seconds (2 minute notification for Spot) will result in a Spot termination. + +For this workshop we used a Python application. The code snippet below shows how our python application can capture the +IPC (Inter Process Communication) relevant signals and call a specific method `exit_gracefully` to coordinate graceful termination +activities. The code below is obviously is a simplification, as the coordination may require coordinating threads and become more complex. +Implementing a graceful termination is however highly recommended to among other things: + +* Log the reason the process is being terminated +* Release resources. For example terminate connection with databases so that the Database can re-use those connections with other instances +* Complete in-flight operations but stop processing new operations +* Flush buffers and do best effort checkpointing (note there is a 30sec to 120sec limit) +* ... + + +```python +class Ec2SpotInterruptionHandler: + def __init__(self): + signal.signal(signal.SIGINT, self.exit_gracefully) + signal.signal(signal.SIGTERM, self.exit_gracefully) + + def exit_gracefully(self, signum, frame): + print("\nReceived {} signal".format(self.signals[signum])) + if self.signals[signum] == 'SIGTERM': + print("SIGTERM Signal Received. Let's wrap up..") +``` + +***Congratulations!*** you have successfully completed the section on *Using Spot Instances with Auto Scaling groups capacity providers*. + +You may continue to **optional** section on how to save costs using ***Fargate Spot*** capacity providers. + diff --git a/content/ecs-spot-capacity-providers/module-1/visualizing_ecs_metrics.md b/content/ecs-spot-capacity-providers/module-1/visualizing_ecs_metrics.md new file mode 100644 index 00000000..1120bacf --- /dev/null +++ b/content/ecs-spot-capacity-providers/module-1/visualizing_ecs_metrics.md @@ -0,0 +1,68 @@ +--- +title: "Visualizing ECS Scaling Metrics" +weight: 70 +--- + +## Visualizing ECS Metrics with CloudWatch Dashboards + +Before we start testing our cluster scaling, let's check out how to visualize the scaling activities in the cluster.Go back to your initial terminal and run the command below to create the CloudWatch dashboard to watch key metrics + +``` +cd ~/environment/ec2-spot-workshops/workshops/ecs-spot-capacity-providers/ +sed -i -e "s#%AWS_REGION%#$AWS_REGION#g" cwt-dashboard.json +aws cloudwatch put-dashboard --dashboard-name EcsSpotWorkshop --dashboard-body file://cwt-dashboard.json + +``` +The output of the command appears as below. + +```plaintext +{ +"DashboardValidationMessages": [] +} +``` + +In the [AWS Cloudwatch console] (https://console.aws.amazon.com/cloudwatch/home) select the newly created dashboard, drag it right/down to expand to view the graphs properly and save the dashboard. + +![Cloud Watch](/images/ecs-spot-capacity-providers/cwt4.png) + +**Question: What are initial values of CapacityProviderReservation metrics for CP-OD and CP-SPOT capacity providers when there are no tasks or instances running in the ECS cluster, and why?** + +{{%expand "Click to expand the answer." %}} +![CPR Metric](/images/ecs-spot-capacity-providers/CP3.png) + +Why are the values 100? If you recall from the introduction, the metric **CapacityProviderReservation** is obtain by using the following formula. + +```plaintext +Capacity Provider Reservation = M/N * 100 +``` + +In this case both `N` and `M` are 0 hence the division comes up with an undefined value. There are a few special cases where the formula is not used. If M and N are both zero, meaning no instances, no running tasks, and no provisioning tasks, then **`CapacityProviderReservation = 100`**. For more details on how ECS cluster auto scaling works, refer to this [blog] (https://aws.amazon.com/blogs/containers/deep-dive-on-amazon-ecs-cluster-auto-scaling/). + +{{% /expand%}} + +## Visualizing ECS and Cluster metrics with C3VIS (Cloud Container Cluster Visualizer) Tool + +[C3vis](https://github.com/ExpediaDotCom/c3vis) is an open source tool useful to show the visual representation of the tasks placements across instances in an ECS Cluster. We will use it as an example to display how tasks are placed in different capacity providers. Let's first setup the application. Go back to the in your Cloud9 Environment, and create a new terminal, we will use that terminal to run and expose C3VIS. + +The following screenshot shows how to create a new terminal : +![c3vis](/images/ecs-spot-capacity-providers/cloud9_new_terminal.png) + +The following lines, clone the c3vis tool repository, build the c3is application docker image and run the container. + +``` +cd ~/environment/ +git clone https://github.com/ExpediaDotCom/c3vis.git +cd c3vis +docker build -t c3vis . +docker run -e "AWS_REGION=$AWS_REGION" -p 8080:3000 c3vis +``` + +Open the preview application in your cloud9 environment and click on the arrow on the top right to open the application in the browser + +![c3vis](/images/ecs-spot-capacity-providers/c3vs_tool.png) + +The initial screen will appear as below, since there are no tasks or instances running in the cluster for now. + +![c3vis](/images/ecs-spot-capacity-providers/c3vis2.png) + +Since our ECS cluster is empty and does not have any instances, the c3vis application shows an empty page. \ No newline at end of file diff --git a/content/ecs-spot-capacity-providers/module-2/_index.md b/content/ecs-spot-capacity-providers/module-2/_index.md new file mode 100644 index 00000000..97815027 --- /dev/null +++ b/content/ecs-spot-capacity-providers/module-2/_index.md @@ -0,0 +1,17 @@ +--- +title: "Using AWS Fargate Spot capacity providers" +weight: 60 +--- + +![Fargate](/images/ecs-spot-capacity-providers/fargate.png) + +[AWS Fargate](https://aws.amazon.com/fargate/) is a technology for Amazon ECS that allows you to run containers without having to manage servers or clusters. With AWS Fargate, you no longer have to provision, configure, and scale clusters of virtual machines to run containers. This removes the need to choose server types, decide when to scale your clusters, or optimize cluster packing. AWS Fargate removes the need for you to interact with or think about servers or clusters. Fargate lets you focus on designing and building your applications instead of managing the infrastructure that runs them. + + +## AWS Fargate capacity providers + +In this section, **we will learn how to leverage ECS FARGATE and FARGATE_SPOT capacity providers to optimize costs**. + + +Amazon ECS cluster capacity providers enable you to use both Fargate and Fargate Spot capacity with your Amazon ECS tasks. With Fargate Spot you can run interruption tolerant Amazon ECS tasks at a discounted rate compared to the Fargate price. Fargate Spot runs tasks on spare compute capacity. When AWS needs the capacity back, your tasks will be interrupted with a two-minute warning notice. + diff --git a/content/ecs-spot-capacity-providers/module-2/add_fargate_cp.md b/content/ecs-spot-capacity-providers/module-2/add_fargate_cp.md new file mode 100644 index 00000000..85c553e4 --- /dev/null +++ b/content/ecs-spot-capacity-providers/module-2/add_fargate_cp.md @@ -0,0 +1,26 @@ +--- +title: "Add Fargate capacity providers to ECS Cluster" +weight: 10 +--- + +Before we deploy tasks on ECS Fargate, let us first add Fargate capacity providers to the ECS cluster. Unlike with EC2 Auto Scaling Groups +the Capacity Providers `FARGATE` and `FARGATE_SPOT` are already predefined by default, so the only thing we need to do is attach them +to our cluster running the following command. + +``` +aws ecs put-cluster-capacity-providers \ + --cluster EcsSpotWorkshop \ + --capacity-providers FARGATE FARGATE_SPOT CP-OD CP-SPOT \ + --default-capacity-provider-strategy capacityProvider=FARGATE,weight=1 capacityProvider=FARGATE_SPOT,weight=1 \ + --region $AWS_REGION +``` + +{{% notice note %}} +The command above does not only insert the two capacity providers but has also modified the cluster default capacity provider strategy. +In this case we do set both weights to 1. However if you run the command `aws ecs describe-clusters --cluster EcsSpotWorkshop` you +will see how the service `ec2-service-split` still holds the initial capacity provider strategy. +{{% /notice %}} + +The strategy sets a weight of 1 both FARGATE and FARGATE_SPOT as the default capacity provider strategy. That means for equal distribution of tasks on FARGATE and FARGATE_SPOT. The ECS cluster should now contain 4 capacity providers i.e. CP-OD, CP-SPOT, FARGATE and FARGATE_SPOT. + +![Fargate Capacity Providers](/images/ecs-spot-capacity-providers/ecs_fargate_cps.png) diff --git a/content/ecs-spot-capacity-providers/module-2/fargate_service.md b/content/ecs-spot-capacity-providers/module-2/fargate_service.md new file mode 100644 index 00000000..1494707e --- /dev/null +++ b/content/ecs-spot-capacity-providers/module-2/fargate_service.md @@ -0,0 +1,102 @@ +--- +title: "Create ECS Fargate service" +weight: 15 +--- +Now that we have a fargate task registered, let's define a ECS Fargate service to deploy Fargate tasks accross the FARGATE and FARGATE_SPOT +capacity providers. In this case, we will be overriding the cluster default capacity provider strategy (with FARGATE weight 1 and FARGATE_SPOT weight 1) and instead appply a weight of 1 to FARGATE_SPOT and weight 3 to FARGATE. For every 1 task on FARGATE_SPOT deployed in this service there will be 3 tasks on FARGATE. + +We will create an ECS service to place tasks in the new VPC created by the CloudFormation stack. By executing the comand below, we can +load all the Outputs from the Cloudformation stack into environment variables. We will need some of the environment variables such as: +`$VPCPublicSubnets`, `$vpc`, and the default `SECURITY_GROUP` for the vpc. + +``` +export STACK_NAME=EcsSpotWorkshop +for output in $(aws cloudformation describe-stacks --stack-name ${STACK_NAME} --query 'Stacks[].Outputs[].OutputKey' --output text) +do + export $output=$(aws cloudformation describe-stacks --stack-name ${STACK_NAME} --query 'Stacks[].Outputs[?OutputKey==`'$output'`].OutputValue' --output text) + eval "echo $output : \"\$$output\"" +done +export SECURITY_GROUP=$( aws ec2 describe-security-groups --filters Name=vpc-id,Values=$vpc Name=group-name,Values='default' | jq -r '.SecurityGroups[0].GroupId') +echo "SECURITY_GROUP : $SECURITY_GROUP" +``` + +We can now create the ECS service. We will name it **fargate-service-split**. We will deploy a total of 4 different tasks. Execute the command below: + +``` +aws ecs create-service \ + --capacity-provider-strategy capacityProvider=FARGATE,weight=3 capacityProvider=FARGATE_SPOT,weight=1 \ + --cluster EcsSpotWorkshop \ + --service-name fargate-service-split \ + --task-definition fargate-task:1 \ + --desired-count 4\ + --region $AWS_REGION \ + --network-configuration "awsvpcConfiguration={subnets=[$VPCPublicSubnets],securityGroups=[$SECURITY_GROUP],assignPublicIp="ENABLED"}" + + +``` + +{{% notice note %}} +Note how the command we are about to execute uses the environment variables that we just read to define attributes such as +in which vpc and subnets Fargate tasks will run as well as the Security group to be used. Also observe how we are overriding the +capacity provider strategy with the `--capacity-provider-strategy` parameter, just for this specific service. The custom strategy +sets a weight of 3 to FARGATE and 1 to FARGATE_SPOT capacity provider. +{{% /notice %}} + +**Exercise: How many tasks are you expecting on FARGATE ? How many on FARGATE_SPOT? Verify the tasks spread on FARAGTE and FARGATE_SPOT under the custom strategy?** + +{{%expand "Click here to show the answer" %}} + +Similar to what we did before, we can run the following command to see how tasks spread across capacity providers. + +``` +aws ecs describe-tasks \ +--tasks $(aws ecs list-tasks --cluster EcsSpotWorkshop \ +--service-name fargate-service-split --query taskArns[*] --output text) \ +--cluster $cluster_name \ +--query 'sort_by(tasks,&capacityProviderName)[*].{TaskArn:taskArn,CapacityProvider:capacityProviderName,Instance:containerInstanceArn,AZ:availabilityZone,Status:lastStatus}' \ +--output table +``` + +The output of the above command should display a table as below. + +![Table](/images/ecs-spot-capacity-providers/table1.png) + +**3 tasks were placed on FARGATE** and **1 task on FARGATE_SPOT** capacity providers, as expected. + +{{% /expand %}} + + +## Spot Interruption Handling on ECS Fargate Spot + +When tasks using Fargate Spot capacity are stopped because of a Spot interruption, a two-minute warning is sent before a task is stopped. +So far this is similar to the EC2 case. There are however a few differences. + +* Fargate Spot is configured automatically to capture Spot Interruptions and set the task in DRAINING mode, a **SITERM** is sent to the task +and containers and the application is expected to capture the **SIGTERM** signal and proceed in the same terms as in the EC2 case with a +graceful termination (the implementation is the same to all effects). + +* The container definition can define the `stopTimeout` attribute (30 seconds by default) and increase the value up to 120 seconds. This +is the value between the **SIGTERM** and the **SIGKILL** termination IPC signal when the task will be forced to finish. + +* Finally, Fargate manages serverless containers, as such there is no access to instance metadata or signals for spot terminations that come +through Cloudwatch/Event Bridge state change for instances. Instead the you can monitor Fargate Spot interruptions with Event Bridge but +checking for ECS task state changes. Spot interruption change states will show up when the `detail-type` is `ECS Task State Change` and the +`stoppedReason` is set to `Your Spot Task was interrupted.` You can read more [here](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/fargate-capacity-providers.html#fargate-capacity-providers-termination) + +## Optional Exercises + + {{% notice warning %}} + Some of this exercises will take time for CAS to scale up and down. If you are running this workshop at a AWS event or with limited time, + we recommend to come back to this section. + {{% /notice %}} + + In this section we propose additional exercises you can do at your own pace to get a better understanding of Capacity Providers and Fargate. + Note that we are not providing solutions for this section. You should be able to reach to the solution by using some of your + new acquired skills. + + +* Change the default strategy of the cluster to FARGATE weight 4 FARGATE_SPOT weight 1. What is the impact hat you expect on existing services +such as `fargate-service-split` and `ec2-service-split` ? + +* Ups, it seems that we miss-configured quite a few things on the service and now we cannot get access to the application ! Is there anyway you can reconfigure the service so customer can get access to the web application? + diff --git a/content/ecs-spot-capacity-providers/module-2/fargate_task.md b/content/ecs-spot-capacity-providers/module-2/fargate_task.md new file mode 100644 index 00000000..d9f06860 --- /dev/null +++ b/content/ecs-spot-capacity-providers/module-2/fargate_task.md @@ -0,0 +1,19 @@ +--- +title: "Create ECS Fargate Tasks" +weight: 10 +--- + +In this section, we will register a task definition for Fargate tasks. Run the command below to create the task definition + +``` +aws ecs register-task-definition --cli-input-json file://fargate-task.json +``` + +{{% notice tip %}} +Take some time to read the Fargate task definition **fargate-task.json** file. Check the setting of properties such as +**requiresCompatibilities** and read more about **[Fargate Task Definition](https://docs.aws.amazon.com/AmazonECS/latest/userguide/fargate-task-defs.html)** documentation. +{{% /notice %}} + +The task will look like this in the console + +![Fargate Task](/images/ecs-spot-capacity-providers/fargate_task1.png) diff --git a/content/ecs-spot-capacity-providers/prerequisites.md b/content/ecs-spot-capacity-providers/prerequisites.md new file mode 100644 index 00000000..c6e6bd6a --- /dev/null +++ b/content/ecs-spot-capacity-providers/prerequisites.md @@ -0,0 +1,22 @@ +--- +title: "Prerequisites" +weight: 10 +--- + +To run through this workshop we expect you to have some familiarity with [Docker](https://en.wikipedia.org/wiki/Docker_(software)), AWS, any container orchestration tools such as [Amazon Elastic Container Service (ECS)](https://aws.amazon.com/ecs), [Amazon Elastic Kubernetes Service (EKS)](https://aws.amazon.com/eks/), or [Kubernetes](https://kubernetes.io/). During the workshop you will use [AWS Cloud9](https://aws.amazon.com/cloud9/) IDE to run [AWS CLI](https://aws.amazon.com/cli/) commands. Use the AWS region that is specified by the facilitator when running this workshop at AWS hosted event. You may use any AWS region that supports Cloud9 (you can check [here](https://aws.amazon.com/about-aws/global-infrastructure/regional-product-services/)) while running it self-paced mode in your own AWS account. + +## Conventions: + +Throughout this workshop, we provide commands for you to run in the Cloud9 terminal (not in your local terminal). These commands will look like: + +
+$ ssh -i PRIVATE_KEY.PEM ec2-user@EC2_PUBLIC_DNS_NAME
+
+ +The command starts after `$`. Words that are ***UPPER_ITALIC_BOLD*** indicate a value unique to your environment. For example, the ***PRIVATE\_KEY.PEM*** refers to the private key of an SSH key pair you've created, and the ***EC2\_PUBLIC\_DNS\_NAME*** is a value specific to an EC2 instance launched in your account. + +## General requirements and notes: + +1. This workshop is self-paced. The instructions will walk you through achieving the workshop’s learning objective using the AWS Management Console and CLI. + +2. While the workshop provides step-by-step instructions, *please take a moment to look around and understand what is happening at each step* as this will enhance your learning experience. The workshop meant as a getting started guide, but you will learn the most by digesting each of the steps and thinking about how they would apply in your own environment and in your own organization. You can even consider experimenting with the steps to challenge yourself. \ No newline at end of file diff --git a/content/monte-carlo-on-ec2-spot-fleet/_index.md b/content/monte-carlo-on-ec2-spot-fleet/_index.md index a2a9ef5e..b3e838c7 100644 --- a/content/monte-carlo-on-ec2-spot-fleet/_index.md +++ b/content/monte-carlo-on-ec2-spot-fleet/_index.md @@ -1,7 +1,7 @@ --- title: "Run Monte Carlo Simulations on EC2 Spot Fleet" date: 2019-01-24T09:05:54Z -weight: 80 +weight: 90 pre: "" --- diff --git a/content/using-sagemaker-managed-spot-training/_index.md b/content/using-sagemaker-managed-spot-training/_index.md index 5424841e..d346b6f7 100644 --- a/content/using-sagemaker-managed-spot-training/_index.md +++ b/content/using-sagemaker-managed-spot-training/_index.md @@ -1,7 +1,7 @@ --- title: "Using Amazon SageMaker Managed Spot Training" date: 2019-10-30T09:05:54Z -weight: 50 +weight: 40 pre: "" --- diff --git a/content/using_ec2_spot_instances_with_eks/eksctl/launcheks.files/eksworkshop-kubeflow.yml.template b/content/using_ec2_spot_instances_with_eks/eksctl/launcheks.files/eksworkshop-kubeflow.yml.template index b53f715a..39008190 100644 --- a/content/using_ec2_spot_instances_with_eks/eksctl/launcheks.files/eksworkshop-kubeflow.yml.template +++ b/content/using_ec2_spot_instances_with_eks/eksctl/launcheks.files/eksworkshop-kubeflow.yml.template @@ -8,7 +8,7 @@ availabilityZones: [${AWS_AZS}] metadata: name: eksworkshop-eksctl region: ${AWS_REGION} - version: "1.13" + version: "1.18" nodeGroups: - name: gpu-nodegroup diff --git a/content/using_ec2_spot_instances_with_eks/eksctl/launcheks.files/eksworkshop.yml.template b/content/using_ec2_spot_instances_with_eks/eksctl/launcheks.files/eksworkshop.yml.template index 1e44f2fd..1f691e5a 100644 --- a/content/using_ec2_spot_instances_with_eks/eksctl/launcheks.files/eksworkshop.yml.template +++ b/content/using_ec2_spot_instances_with_eks/eksctl/launcheks.files/eksworkshop.yml.template @@ -10,7 +10,7 @@ kind: ClusterConfig metadata: name: eksworkshop-eksctl region: ${AWS_REGION} - version: "1.13" + version: "1.18" #vpc: # securityGroup: "sg-0f2ae54eb340e8191" diff --git a/content/using_ec2_spot_instances_with_eks/prerequisites/_index.md b/content/using_ec2_spot_instances_with_eks/prerequisites/_index.md index f8a0ab19..c6bae9b7 100644 --- a/content/using_ec2_spot_instances_with_eks/prerequisites/_index.md +++ b/content/using_ec2_spot_instances_with_eks/prerequisites/_index.md @@ -7,7 +7,7 @@ weight: 10 # Getting Started To start the workshop, follow one of the following depending on whether you are... -* ...[running the workshop on your own (in your own account)]({{< relref "self_paced" >}}), or -* ...[attending an AWS hosted event (using AWS provided hashes)]({{< relref "aws_event" >}}) +* ...[running the workshop on your own (in your own account)]({{< ref "/using_ec2_spot_instances_with_eks/prerequisites/self_paced.md" >}}), or +* ...[attending an AWS hosted event (using AWS provided hashes)]({{< ref "/using_ec2_spot_instances_with_eks/prerequisites/aws_event.md" >}}) -Once you have completed with either setup, continue with **[Create a Workspace]({{< relref "workspace.md" >}})** +Once you have completed with either setup, continue with **[Create a Workspace]({{< ref "/using_ec2_spot_instances_with_eks/prerequisites/workspace.md" >}})** diff --git a/content/using_ec2_spot_instances_with_eks/prerequisites/aws_event.md b/content/using_ec2_spot_instances_with_eks/prerequisites/aws_event.md index 2a59556f..9d592d81 100644 --- a/content/using_ec2_spot_instances_with_eks/prerequisites/aws_event.md +++ b/content/using_ec2_spot_instances_with_eks/prerequisites/aws_event.md @@ -9,7 +9,7 @@ weight: 20 {{% notice warning %}} Only complete this section if you are at an AWS hosted event (such as re:Invent, Kubecon, Immersion Day, or any other event hosted by an AWS employee). If you -are running the workshop on your own, go to: [Start the workshop on your own]({{< relref "self_paced.md" >}}). +are running the workshop on your own, go to: [Start the workshop on your own]({{< ref "/using_ec2_spot_instances_with_eks/prerequisites/self_paced.md" >}}). {{% /notice %}} ### Login to the AWS Workshop Portal @@ -81,7 +81,7 @@ aws sts get-caller-identity {{% insert-md-from-file file="using_ec2_spot_instances_with_eks/prerequisites/at_an_aws_validaterole.md" %}} {{% notice note %}} -Since we have already setup the prerequisites, **you can head straight to [Test the Cluster]({{< relref "../eksctl/test.md" >}})** +Since we have already setup the prerequisites, **you can head straight to [Test the Cluster]({{< relref "/using_ec2_spot_instances_with_eks/eksctl/test.md" >}})** {{% /notice %}} {{% /expand%}} diff --git a/content/using_ec2_spot_instances_with_eks/prerequisites/self_paced.md b/content/using_ec2_spot_instances_with_eks/prerequisites/self_paced.md index bea67433..d1616205 100644 --- a/content/using_ec2_spot_instances_with_eks/prerequisites/self_paced.md +++ b/content/using_ec2_spot_instances_with_eks/prerequisites/self_paced.md @@ -5,7 +5,7 @@ weight: 10 --- {{% notice warning %}} -Only complete this section if you are running the workshop on your own. If you are at an AWS hosted event (such as re:Invent, Kubecon, Immersion Day, etc), go to [Start the workshop at an AWS event]({{< relref "aws_event.md" >}}). +Only complete this section if you are running the workshop on your own. If you are at an AWS hosted event (such as re:Invent, Kubecon, Immersion Day, etc), go to [Start the workshop at an AWS event]({{< ref "/using_ec2_spot_instances_with_eks/prerequisites/aws_event.md" >}}). {{% /notice %}} ### Running the workshop on your own @@ -34,4 +34,4 @@ as an IAM user with administrator access to the AWS account: ![Login URL](/images/using_ec2_spot_instances_with_eks/prerequisites/iam-4-save-url.png) -Once you have completed the step above, **you can head straight to [Create a Workspace]({{< relref "workspace.md" >}})** \ No newline at end of file +Once you have completed the step above, **you can head straight to [Create a Workspace]({{< ref "/using_ec2_spot_instances_with_eks/prerequisites/workspace.md" >}})** \ No newline at end of file diff --git a/content/using_ec2_spot_instances_with_eks/prerequisites/us-east-1.md b/content/using_ec2_spot_instances_with_eks/prerequisites/us-east-1.md new file mode 100644 index 00000000..7b7f215c --- /dev/null +++ b/content/using_ec2_spot_instances_with_eks/prerequisites/us-east-1.md @@ -0,0 +1,9 @@ +--- +title: "N.Virginia" +chapter: false +disableToc: true +hidden: true +--- + +Create a Cloud9 Environment: [https://us-east-1.console.aws.amazon.com/cloud9/home?region=us-east-1](https://us-east-1.console.aws.amazon.com/cloud9/home?region=us-east-1) + diff --git a/content/using_ec2_spot_instances_with_eks/prerequisites/workspace.md b/content/using_ec2_spot_instances_with_eks/prerequisites/workspace.md index 05c82fe5..8c64c21a 100644 --- a/content/using_ec2_spot_instances_with_eks/prerequisites/workspace.md +++ b/content/using_ec2_spot_instances_with_eks/prerequisites/workspace.md @@ -23,6 +23,7 @@ Cloud9 requires third-party-cookies. You can whitelist the [specific domains]( h ### Launch Cloud9 in your closest region: {{< tabs name="Region" >}} + {{< tab name="N. Virginia" include="us-east-1.md" />}} {{< tab name="Oregon" include="us-west-2.md" />}} {{< tab name="Ireland" include="eu-west-1.md" />}} {{< tab name="Ohio" include="us-east-2.md" />}} diff --git a/static/images/ecs-spot-capacity-providers/CFN.png b/static/images/ecs-spot-capacity-providers/CFN.png new file mode 100644 index 00000000..9122cb4c Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/CFN.png differ diff --git a/static/images/ecs-spot-capacity-providers/CFN_stacks.png b/static/images/ecs-spot-capacity-providers/CFN_stacks.png new file mode 100644 index 00000000..3b493f8d Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/CFN_stacks.png differ diff --git a/static/images/ecs-spot-capacity-providers/CP-OD.png b/static/images/ecs-spot-capacity-providers/CP-OD.png new file mode 100644 index 00000000..d32b7340 Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/CP-OD.png differ diff --git a/static/images/ecs-spot-capacity-providers/CP-SPOT.png b/static/images/ecs-spot-capacity-providers/CP-SPOT.png new file mode 100644 index 00000000..eeb1bf66 Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/CP-SPOT.png differ diff --git a/static/images/ecs-spot-capacity-providers/CP3.png b/static/images/ecs-spot-capacity-providers/CP3.png new file mode 100644 index 00000000..3374b50e Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/CP3.png differ diff --git a/static/images/ecs-spot-capacity-providers/CP4.png b/static/images/ecs-spot-capacity-providers/CP4.png new file mode 100644 index 00000000..b34b8083 Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/CP4.png differ diff --git a/static/images/ecs-spot-capacity-providers/CPS.png b/static/images/ecs-spot-capacity-providers/CPS.png new file mode 100644 index 00000000..fd066e99 Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/CPS.png differ diff --git a/static/images/ecs-spot-capacity-providers/CP_OD.png b/static/images/ecs-spot-capacity-providers/CP_OD.png new file mode 100644 index 00000000..ccaa1ed7 Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/CP_OD.png differ diff --git a/static/images/ecs-spot-capacity-providers/CP_SPOT.png b/static/images/ecs-spot-capacity-providers/CP_SPOT.png new file mode 100644 index 00000000..77e5b38e Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/CP_SPOT.png differ diff --git a/static/images/ecs-spot-capacity-providers/Ser1.png b/static/images/ecs-spot-capacity-providers/Ser1.png new file mode 100644 index 00000000..3efc966a Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/Ser1.png differ diff --git a/static/images/ecs-spot-capacity-providers/amazon_ecs_arch.png b/static/images/ecs-spot-capacity-providers/amazon_ecs_arch.png new file mode 100644 index 00000000..91972115 Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/amazon_ecs_arch.png differ diff --git a/static/images/ecs-spot-capacity-providers/app.png b/static/images/ecs-spot-capacity-providers/app.png new file mode 100644 index 00000000..5e2dc82e Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/app.png differ diff --git a/static/images/ecs-spot-capacity-providers/asg_od_initial_view_1.png b/static/images/ecs-spot-capacity-providers/asg_od_initial_view_1.png new file mode 100644 index 00000000..c3e12867 Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/asg_od_initial_view_1.png differ diff --git a/static/images/ecs-spot-capacity-providers/asg_od_initial_view_2.png b/static/images/ecs-spot-capacity-providers/asg_od_initial_view_2.png new file mode 100644 index 00000000..7ccf0aad Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/asg_od_initial_view_2.png differ diff --git a/static/images/ecs-spot-capacity-providers/asg_od_with_cp_view_1.png b/static/images/ecs-spot-capacity-providers/asg_od_with_cp_view_1.png new file mode 100644 index 00000000..3574b2ca Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/asg_od_with_cp_view_1.png differ diff --git a/static/images/ecs-spot-capacity-providers/asg_spot_initial_view_1.png b/static/images/ecs-spot-capacity-providers/asg_spot_initial_view_1.png new file mode 100644 index 00000000..7a97cb08 Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/asg_spot_initial_view_1.png differ diff --git a/static/images/ecs-spot-capacity-providers/asg_spot_initial_view_2.png b/static/images/ecs-spot-capacity-providers/asg_spot_initial_view_2.png new file mode 100644 index 00000000..13dab0dd Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/asg_spot_initial_view_2.png differ diff --git a/static/images/ecs-spot-capacity-providers/asg_spot_with_cp_view_1.png b/static/images/ecs-spot-capacity-providers/asg_spot_with_cp_view_1.png new file mode 100644 index 00000000..68432d80 Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/asg_spot_with_cp_view_1.png differ diff --git a/static/images/ecs-spot-capacity-providers/attach_iam_role.png b/static/images/ecs-spot-capacity-providers/attach_iam_role.png new file mode 100644 index 00000000..b6b4c985 Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/attach_iam_role.png differ diff --git a/static/images/ecs-spot-capacity-providers/c3vis2.png b/static/images/ecs-spot-capacity-providers/c3vis2.png new file mode 100644 index 00000000..89671353 Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/c3vis2.png differ diff --git a/static/images/ecs-spot-capacity-providers/c3vs_tool.png b/static/images/ecs-spot-capacity-providers/c3vs_tool.png new file mode 100644 index 00000000..6438f65e Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/c3vs_tool.png differ diff --git a/static/images/ecs-spot-capacity-providers/c9_2.png b/static/images/ecs-spot-capacity-providers/c9_2.png new file mode 100644 index 00000000..a3d0775b Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/c9_2.png differ diff --git a/static/images/ecs-spot-capacity-providers/c9_3.png b/static/images/ecs-spot-capacity-providers/c9_3.png new file mode 100644 index 00000000..4565b68b Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/c9_3.png differ diff --git a/static/images/ecs-spot-capacity-providers/c9_6.png b/static/images/ecs-spot-capacity-providers/c9_6.png new file mode 100644 index 00000000..239b6ef3 Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/c9_6.png differ diff --git a/static/images/ecs-spot-capacity-providers/cloud9_4.png b/static/images/ecs-spot-capacity-providers/cloud9_4.png new file mode 100644 index 00000000..5962d116 Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/cloud9_4.png differ diff --git a/static/images/ecs-spot-capacity-providers/cloud9_environment.png b/static/images/ecs-spot-capacity-providers/cloud9_environment.png new file mode 100644 index 00000000..50df63c9 Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/cloud9_environment.png differ diff --git a/static/images/ecs-spot-capacity-providers/cloud9_instance.png b/static/images/ecs-spot-capacity-providers/cloud9_instance.png new file mode 100644 index 00000000..c35261fb Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/cloud9_instance.png differ diff --git a/static/images/ecs-spot-capacity-providers/cloud9_new_terminal.png b/static/images/ecs-spot-capacity-providers/cloud9_new_terminal.png new file mode 100644 index 00000000..09c6bd1a Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/cloud9_new_terminal.png differ diff --git a/static/images/ecs-spot-capacity-providers/cloudformation_delete_stack.png b/static/images/ecs-spot-capacity-providers/cloudformation_delete_stack.png new file mode 100644 index 00000000..d6fddb7a Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/cloudformation_delete_stack.png differ diff --git a/static/images/ecs-spot-capacity-providers/cloudmapproduct.png b/static/images/ecs-spot-capacity-providers/cloudmapproduct.png new file mode 100644 index 00000000..42744336 Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/cloudmapproduct.png differ diff --git a/static/images/ecs-spot-capacity-providers/containership.jpg b/static/images/ecs-spot-capacity-providers/containership.jpg new file mode 100644 index 00000000..a7f56bb6 Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/containership.jpg differ diff --git a/static/images/ecs-spot-capacity-providers/cp5.png b/static/images/ecs-spot-capacity-providers/cp5.png new file mode 100644 index 00000000..79abdfa2 Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/cp5.png differ diff --git a/static/images/ecs-spot-capacity-providers/cwt4.png b/static/images/ecs-spot-capacity-providers/cwt4.png new file mode 100644 index 00000000..84c19d2a Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/cwt4.png differ diff --git a/static/images/ecs-spot-capacity-providers/ecs.png b/static/images/ecs-spot-capacity-providers/ecs.png new file mode 100644 index 00000000..57ed17a7 Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/ecs.png differ diff --git a/static/images/ecs-spot-capacity-providers/ecs_asg_spot_scale_out.png b/static/images/ecs-spot-capacity-providers/ecs_asg_spot_scale_out.png new file mode 100644 index 00000000..a8ee3d16 Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/ecs_asg_spot_scale_out.png differ diff --git a/static/images/ecs-spot-capacity-providers/ecs_cfn_stack.png b/static/images/ecs-spot-capacity-providers/ecs_cfn_stack.png new file mode 100644 index 00000000..5816c78f Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/ecs_cfn_stack.png differ diff --git a/static/images/ecs-spot-capacity-providers/ecs_cluster_type.png b/static/images/ecs-spot-capacity-providers/ecs_cluster_type.png new file mode 100644 index 00000000..85a6169b Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/ecs_cluster_type.png differ diff --git a/static/images/ecs-spot-capacity-providers/ecs_create_cluster.png b/static/images/ecs-spot-capacity-providers/ecs_create_cluster.png new file mode 100644 index 00000000..7b735043 Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/ecs_create_cluster.png differ diff --git a/static/images/ecs-spot-capacity-providers/ecs_empty_cluster.png b/static/images/ecs-spot-capacity-providers/ecs_empty_cluster.png new file mode 100644 index 00000000..386592b6 Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/ecs_empty_cluster.png differ diff --git a/static/images/ecs-spot-capacity-providers/ecs_fargate_cps.png b/static/images/ecs-spot-capacity-providers/ecs_fargate_cps.png new file mode 100644 index 00000000..063d7d87 Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/ecs_fargate_cps.png differ diff --git a/static/images/ecs-spot-capacity-providers/ecs_launch_template.png b/static/images/ecs-spot-capacity-providers/ecs_launch_template.png new file mode 100644 index 00000000..392a724a Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/ecs_launch_template.png differ diff --git a/static/images/ecs-spot-capacity-providers/ecs_service_alarms.png b/static/images/ecs-spot-capacity-providers/ecs_service_alarms.png new file mode 100644 index 00000000..704e7ffb Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/ecs_service_alarms.png differ diff --git a/static/images/ecs-spot-capacity-providers/ecs_service_alb.png b/static/images/ecs-spot-capacity-providers/ecs_service_alb.png new file mode 100644 index 00000000..756085b9 Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/ecs_service_alb.png differ diff --git a/static/images/ecs-spot-capacity-providers/ecs_service_alb_listener.png b/static/images/ecs-spot-capacity-providers/ecs_service_alb_listener.png new file mode 100644 index 00000000..297c2f7b Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/ecs_service_alb_listener.png differ diff --git a/static/images/ecs-spot-capacity-providers/fargate.png b/static/images/ecs-spot-capacity-providers/fargate.png new file mode 100644 index 00000000..0cf2cf6a Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/fargate.png differ diff --git a/static/images/ecs-spot-capacity-providers/fargate_task1.png b/static/images/ecs-spot-capacity-providers/fargate_task1.png new file mode 100644 index 00000000..3e48a122 Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/fargate_task1.png differ diff --git a/static/images/ecs-spot-capacity-providers/integration.svg b/static/images/ecs-spot-capacity-providers/integration.svg new file mode 100644 index 00000000..758276f8 --- /dev/null +++ b/static/images/ecs-spot-capacity-providers/integration.svg @@ -0,0 +1 @@ +integration \ No newline at end of file diff --git a/static/images/ecs-spot-capacity-providers/resize_ebs_1.png b/static/images/ecs-spot-capacity-providers/resize_ebs_1.png new file mode 100644 index 00000000..b6002ac9 Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/resize_ebs_1.png differ diff --git a/static/images/ecs-spot-capacity-providers/resize_ebs_2.png b/static/images/ecs-spot-capacity-providers/resize_ebs_2.png new file mode 100644 index 00000000..bfe9d9e4 Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/resize_ebs_2.png differ diff --git a/static/images/ecs-spot-capacity-providers/ser2.png b/static/images/ecs-spot-capacity-providers/ser2.png new file mode 100644 index 00000000..6398272d Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/ser2.png differ diff --git a/static/images/ecs-spot-capacity-providers/table.png b/static/images/ecs-spot-capacity-providers/table.png new file mode 100644 index 00000000..04b319d1 Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/table.png differ diff --git a/static/images/ecs-spot-capacity-providers/table1.png b/static/images/ecs-spot-capacity-providers/table1.png new file mode 100644 index 00000000..0bd01357 Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/table1.png differ diff --git a/static/images/ecs-spot-capacity-providers/task1.png b/static/images/ecs-spot-capacity-providers/task1.png new file mode 100644 index 00000000..8e5ad02e Binary files /dev/null and b/static/images/ecs-spot-capacity-providers/task1.png differ diff --git a/workshops/ecs-deep-learning-workshop/lab-1-setup/cfn-templates/ecs-deep-learning-workshop.yaml b/workshops/ecs-deep-learning-workshop/lab-1-setup/cfn-templates/ecs-deep-learning-workshop.yaml deleted file mode 100644 index e8200e51..00000000 --- a/workshops/ecs-deep-learning-workshop/lab-1-setup/cfn-templates/ecs-deep-learning-workshop.yaml +++ /dev/null @@ -1,813 +0,0 @@ ---- -AWSTemplateFormatVersion: 2010-09-09 -Description: Environment for running ECS Deep Learning Workshop -Mappings: - CidrMappings: - public-subnet-1: - CIDR: 10.0.1.0/24 - public-subnet-2: - CIDR: 10.0.2.0/24 - vpc: - CIDR: 10.0.0.0/16 - ECSAmi: - ap-northeast-1: - AMI: ami-0d5f884dada5562c6 - ap-northeast-2: - AMI: ami-0060ad36f655af38b - ap-south-1: - AMI: ami-056a07eb5b1d13734 - ap-southeast-1: - AMI: ami-065c0bd2832a70f9d - ap-southeast-2: - AMI: ami-0aa8b7a8042811ddf - ca-central-1: - AMI: ami-0d50dee936e241e7e - eu-central-1: - AMI: ami-03804565a6baf6d30 - eu-west-1: - AMI: ami-0dbcd2533bc72c3f6 - eu-west-2: - AMI: ami-005307409c5f6e76c - eu-west-3: - AMI: ami-024c0b7d07abc6526 - sa-east-1: - AMI: ami-0078e33a9103e1e58 - us-east-1: - AMI: ami-0254e5972ebcd132c - us-east-2: - AMI: ami-0a0d2004b44b9287c - us-gov-west-1: - AMI: ami-a842dcc9 - us-west-1: - AMI: ami-0de5608ca20c07aa2 - us-west-2: - AMI: ami-093381d21a4fc38d1 -Outputs: - awsRegionName: - Description: The name of the AWS Region your template was launched in - Value: - Ref: AWS::Region - cloudWatchLogsGroupName: - Description: Name of the CloudWatch Logs Group - Value: - Ref: cloudWatchLogsGroup - ecrRepositoryName: - Description: The name of the ECR Repository - Value: - Ref: ecrRepository - ecsClusterName: - Description: The name of the ECS Cluster - Value: - Ref: ecsCluster - inputBucketName: - Description: The name of the input S3 Bucket - Value: - Ref: inputBucket - outputBucketName: - Description: The name of the output S3 Bucket - Value: - Ref: outputBucket - spotFleetName: - Description: The name of the Spot Fleet - Value: - Ref: spotFleet -Parameters: - KeyName: - Description: Name of an existing EC2 KeyPair to enable SSH access to the EC2 instances - Type: AWS::EC2::KeyPair::KeyName - SourceCidr: - Default: 0.0.0.0/0 - Description: Optional - CIDR/IP range for instance ssh access - defaults to 0.0.0.0/0 - Type: String -Resources: - attachGateway: - DependsOn: - - vpc - - internetGateway - Properties: - InternetGatewayId: - Ref: internetGateway - VpcId: - Ref: vpc - Type: AWS::EC2::VPCGatewayAttachment - cloudWatchLogsGroup: - Properties: - RetentionInDays: 7 - Type: AWS::Logs::LogGroup - ecrRepository: - Type: AWS::ECR::Repository - ecsCluster: - Type: AWS::ECS::Cluster - inputBucket: - Type: AWS::S3::Bucket - internetGateway: - DependsOn: - - vpc - Type: AWS::EC2::InternetGateway - outputBucket: - Type: AWS::S3::Bucket - predictTaskDefinition: - Properties: - ContainerDefinitions: - - Command: - - DATE=`date -Iseconds` && echo "running predict_imagenet.py $IMAGEURL" && /usr/local/bin/predict_imagenet.py - $IMAGEURL | tee results && echo "results being written to s3://$OUTPUTBUCKET/predict_imagenet.results.$HOSTNAME.$DATE.txt" - && aws s3 cp results s3://$OUTPUTBUCKET/predict_imagenet.results.$HOSTNAME.$DATE.txt - && echo "Task complete!" - EntryPoint: - - /bin/bash - - -c - Environment: - - Name: IMAGEURL - Value: https://images-na.ssl-images-amazon.com/images/G/01/img15/pet-products/small-tiles/23695_pets_vertical_store_dogs_small_tile_8._CB312176604_.jpg - - Name: OUTPUTBUCKET - Value: - Ref: outputBucket - - Name: AWS_DEFAULT_REGION - Value: - Ref: AWS::Region - Image: - Fn::Join: - - '' - - - Ref: AWS::AccountId - - .dkr.ecr. - - Ref: AWS::Region - - .amazonaws.com/ - - Ref: ecrRepository - - :latest - LogConfiguration: - LogDriver: awslogs - Options: - awslogs-group: - Ref: cloudWatchLogsGroup - awslogs-region: - Ref: AWS::Region - awslogs-stream-prefix: predict_imagenet - Memory: '2048' - Name: ecs-deep-learning-workshop - Privileged: 'true' - Type: AWS::ECS::TaskDefinition - publicRoute: - DependsOn: - - publicRouteTable - - attachGateway - Properties: - DestinationCidrBlock: 0.0.0.0/0 - GatewayId: - Ref: internetGateway - RouteTableId: - Ref: publicRouteTable - Type: AWS::EC2::Route - publicRouteTable: - DependsOn: - - vpc - - attachGateway - Properties: - Tags: - - Key: Name - Value: Public Route Table - VpcId: - Ref: vpc - Type: AWS::EC2::RouteTable - publicSubnet1: - DependsOn: attachGateway - Properties: - AvailabilityZone: - Fn::Select: - - 0 - - Fn::GetAZs: - Ref: AWS::Region - CidrBlock: - Fn::FindInMap: - - CidrMappings - - public-subnet-1 - - CIDR - MapPublicIpOnLaunch: true - Tags: - - Key: Name - Value: Public Subnet 1 - VpcId: - Ref: vpc - Type: AWS::EC2::Subnet - publicSubnet1RouteTableAssociation: - DependsOn: - - publicRouteTable - - publicSubnet1 - - attachGateway - Properties: - RouteTableId: - Ref: publicRouteTable - SubnetId: - Ref: publicSubnet1 - Type: AWS::EC2::SubnetRouteTableAssociation - publicSubnet2: - DependsOn: attachGateway - Properties: - AvailabilityZone: - Fn::Select: - - 1 - - Fn::GetAZs: - Ref: AWS::Region - CidrBlock: - Fn::FindInMap: - - CidrMappings - - public-subnet-2 - - CIDR - MapPublicIpOnLaunch: true - Tags: - - Key: Name - Value: Public Subnet 2 - VpcId: - Ref: vpc - Type: AWS::EC2::Subnet - publicSubnet2RouteTableAssociation: - DependsOn: - - publicRouteTable - - publicSubnet2 - - attachGateway - Properties: - RouteTableId: - Ref: publicRouteTable - SubnetId: - Ref: publicSubnet2 - Type: AWS::EC2::SubnetRouteTableAssociation - scalableTarget: - DependsOn: - - spotFleet - - spotFleetAutoscaleRole - Properties: - MaxCapacity: 1 - MinCapacity: 1 - ResourceId: - Fn::Join: - - / - - - spot-fleet-request - - Ref: spotFleet - RoleARN: - Fn::GetAtt: - - spotFleetAutoscaleRole - - Arn - ScalableDimension: ec2:spot-fleet-request:TargetCapacity - ServiceNamespace: ec2 - Type: AWS::ApplicationAutoScaling::ScalableTarget - scalingPolicy: - Properties: - PolicyName: - Fn::Join: - - '-' - - - Ref: AWS::StackName - - StepPolicy - PolicyType: StepScaling - ScalingTargetId: - Ref: scalableTarget - StepScalingPolicyConfiguration: - AdjustmentType: PercentChangeInCapacity - Cooldown: 30 - MetricAggregationType: Average - StepAdjustments: - - MetricIntervalLowerBound: 0 - ScalingAdjustment: 100 - Type: AWS::ApplicationAutoScaling::ScalingPolicy - securityGroup: - Properties: - GroupDescription: Spot Fleet Instance Security Group - SecurityGroupIngress: - - CidrIp: - Ref: SourceCidr - FromPort: 22 - IpProtocol: tcp - ToPort: 22 - - CidrIp: 0.0.0.0/0 - FromPort: 80 - IpProtocol: tcp - ToPort: 80 - VpcId: - Ref: vpc - Type: AWS::EC2::SecurityGroup - spotFleet: - DependsOn: - - spotFleetRole - - spotFleetInstanceProfile - - ecsCluster - Properties: - SpotFleetRequestConfigData: - AllocationStrategy: diversified - IamFleetRole: - Fn::GetAtt: - - spotFleetRole - - Arn - LaunchSpecifications: - - IamInstanceProfile: - Arn: - Fn::GetAtt: - - spotFleetInstanceProfile - - Arn - ImageId: - Fn::FindInMap: - - ECSAmi - - Ref: AWS::Region - - AMI - InstanceType: m4.large - KeyName: - Ref: KeyName - Monitoring: - Enabled: true - SecurityGroups: - - GroupId: - Ref: securityGroup - SubnetId: - Fn::Join: - - ',' - - - Ref: publicSubnet1 - - Ref: publicSubnet2 - UserData: - Fn::Base64: - Fn::Sub: '#!/bin/bash -xe - - yum -y --security update - - yum -y update ecs-init - - service docker restart - - yum -y install aws-cli git emacs nano aws-cfn-bootstrap - - echo ECS_CLUSTER=${ecsCluster} >> /etc/ecs/ecs.config - - echo ECS_AVAILABLE_LOGGING_DRIVERS=[\"json-file\",\"awslogs\"] >> /etc/ecs/ecs.config - - su - ec2-user -c "aws configure set default.region ${AWS::Region}" - - mkdir /home/ec2-user/.docker - - cat << EOF > /home/ec2-user/.docker/config.json - - { - - "credsStore": "ecr-login" - - } - - EOF - - chown -R ec2-user. /home/ec2-user/.docker - - git clone https://github.com/awslabs/amazon-ecr-credential-helper.git - - cd amazon-ecr-credential-helper && make docker && cp bin/local/docker-credential-ecr-login - /usr/local/bin/ - - INSTANCE_ID=$(curl 169.254.169.254/latest/meta-data/instance-id 2>/dev/null) - - /opt/aws/bin/cfn-signal -s true -i $INSTANCE_ID "${spotFleetWaitConditionHandle}" - - ' - - IamInstanceProfile: - Arn: - Fn::GetAtt: - - spotFleetInstanceProfile - - Arn - ImageId: - Fn::FindInMap: - - ECSAmi - - Ref: AWS::Region - - AMI - InstanceType: m4.xlarge - KeyName: - Ref: KeyName - Monitoring: - Enabled: true - SecurityGroups: - - GroupId: - Ref: securityGroup - SubnetId: - Fn::Join: - - ',' - - - Ref: publicSubnet1 - - Ref: publicSubnet2 - UserData: - Fn::Base64: - Fn::Sub: '#!/bin/bash -xe - - yum -y --security update - - yum -y update ecs-init - - service docker restart - - yum -y install aws-cli git emacs nano aws-cfn-bootstrap - - echo ECS_CLUSTER=${ecsCluster} >> /etc/ecs/ecs.config - - echo ECS_AVAILABLE_LOGGING_DRIVERS=[\"json-file\",\"awslogs\"] >> /etc/ecs/ecs.config - - su - ec2-user -c "aws configure set default.region ${AWS::Region}" - - mkdir /home/ec2-user/.docker - - cat << EOF > /home/ec2-user/.docker/config.json - - { - - "credsStore": "ecr-login" - - } - - EOF - - chown -R ec2-user. /home/ec2-user/.docker - - git clone https://github.com/awslabs/amazon-ecr-credential-helper.git - - cd amazon-ecr-credential-helper && make docker && cp bin/local/docker-credential-ecr-login - /usr/local/bin/ - - INSTANCE_ID=$(curl 169.254.169.254/latest/meta-data/instance-id 2>/dev/null) - - /opt/aws/bin/cfn-signal -s true -i $INSTANCE_ID "${spotFleetWaitConditionHandle}" - - ' - - IamInstanceProfile: - Arn: - Fn::GetAtt: - - spotFleetInstanceProfile - - Arn - ImageId: - Fn::FindInMap: - - ECSAmi - - Ref: AWS::Region - - AMI - InstanceType: c4.large - KeyName: - Ref: KeyName - Monitoring: - Enabled: true - SecurityGroups: - - GroupId: - Ref: securityGroup - SubnetId: - Fn::Join: - - ',' - - - Ref: publicSubnet1 - - Ref: publicSubnet2 - UserData: - Fn::Base64: - Fn::Sub: '#!/bin/bash -xe - - yum -y --security update - - yum -y update ecs-init - - service docker restart - - yum -y install aws-cli git emacs nano aws-cfn-bootstrap - - echo ECS_CLUSTER=${ecsCluster} >> /etc/ecs/ecs.config - - echo ECS_AVAILABLE_LOGGING_DRIVERS=[\"json-file\",\"awslogs\"] >> /etc/ecs/ecs.config - - su - ec2-user -c "aws configure set default.region ${AWS::Region}" - - mkdir /home/ec2-user/.docker - - cat << EOF > /home/ec2-user/.docker/config.json - - { - - "credsStore": "ecr-login" - - } - - EOF - - chown -R ec2-user. /home/ec2-user/.docker - - git clone https://github.com/awslabs/amazon-ecr-credential-helper.git - - cd amazon-ecr-credential-helper && make docker && cp bin/local/docker-credential-ecr-login - /usr/local/bin/ - - INSTANCE_ID=$(curl 169.254.169.254/latest/meta-data/instance-id 2>/dev/null) - - /opt/aws/bin/cfn-signal -s true -i $INSTANCE_ID "${spotFleetWaitConditionHandle}" - - ' - - IamInstanceProfile: - Arn: - Fn::GetAtt: - - spotFleetInstanceProfile - - Arn - ImageId: - Fn::FindInMap: - - ECSAmi - - Ref: AWS::Region - - AMI - InstanceType: c4.xlarge - KeyName: - Ref: KeyName - Monitoring: - Enabled: true - SecurityGroups: - - GroupId: - Ref: securityGroup - SubnetId: - Fn::Join: - - ',' - - - Ref: publicSubnet1 - - Ref: publicSubnet2 - UserData: - Fn::Base64: - Fn::Sub: '#!/bin/bash -xe - - yum -y --security update - - yum -y update ecs-init - - service docker restart - - yum -y install aws-cli git emacs nano aws-cfn-bootstrap - - echo ECS_CLUSTER=${ecsCluster} >> /etc/ecs/ecs.config - - echo ECS_AVAILABLE_LOGGING_DRIVERS=[\"json-file\",\"awslogs\"] >> /etc/ecs/ecs.config - - su - ec2-user -c "aws configure set default.region ${AWS::Region}" - - mkdir /home/ec2-user/.docker - - cat << EOF > /home/ec2-user/.docker/config.json - - { - - "credsStore": "ecr-login" - - } - - EOF - - chown -R ec2-user. /home/ec2-user/.docker - - git clone https://github.com/awslabs/amazon-ecr-credential-helper.git - - cd amazon-ecr-credential-helper && make docker && cp bin/local/docker-credential-ecr-login - /usr/local/bin/ - - INSTANCE_ID=$(curl 169.254.169.254/latest/meta-data/instance-id 2>/dev/null) - - /opt/aws/bin/cfn-signal -s true -i $INSTANCE_ID "${spotFleetWaitConditionHandle}" - - ' - - IamInstanceProfile: - Arn: - Fn::GetAtt: - - spotFleetInstanceProfile - - Arn - ImageId: - Fn::FindInMap: - - ECSAmi - - Ref: AWS::Region - - AMI - InstanceType: r3.large - KeyName: - Ref: KeyName - Monitoring: - Enabled: true - SecurityGroups: - - GroupId: - Ref: securityGroup - SubnetId: - Fn::Join: - - ',' - - - Ref: publicSubnet1 - - Ref: publicSubnet2 - UserData: - Fn::Base64: - Fn::Sub: '#!/bin/bash -xe - - yum -y --security update - - yum -y update ecs-init - - service docker restart - - yum -y install aws-cli git emacs nano aws-cfn-bootstrap - - echo ECS_CLUSTER=${ecsCluster} >> /etc/ecs/ecs.config - - echo ECS_AVAILABLE_LOGGING_DRIVERS=[\"json-file\",\"awslogs\"] >> /etc/ecs/ecs.config - - su - ec2-user -c "aws configure set default.region ${AWS::Region}" - - mkdir /home/ec2-user/.docker - - cat << EOF > /home/ec2-user/.docker/config.json - - { - - "credsStore": "ecr-login" - - } - - EOF - - chown -R ec2-user. /home/ec2-user/.docker - - git clone https://github.com/awslabs/amazon-ecr-credential-helper.git - - cd amazon-ecr-credential-helper && make docker && cp bin/local/docker-credential-ecr-login - /usr/local/bin/ - - INSTANCE_ID=$(curl 169.254.169.254/latest/meta-data/instance-id 2>/dev/null) - - /opt/aws/bin/cfn-signal -s true -i $INSTANCE_ID "${spotFleetWaitConditionHandle}" - - ' - - IamInstanceProfile: - Arn: - Fn::GetAtt: - - spotFleetInstanceProfile - - Arn - ImageId: - Fn::FindInMap: - - ECSAmi - - Ref: AWS::Region - - AMI - InstanceType: r3.xlarge - KeyName: - Ref: KeyName - Monitoring: - Enabled: true - SecurityGroups: - - GroupId: - Ref: securityGroup - SubnetId: - Fn::Join: - - ',' - - - Ref: publicSubnet1 - - Ref: publicSubnet2 - UserData: - Fn::Base64: - Fn::Sub: '#!/bin/bash -xe - - yum -y --security update - - yum -y update ecs-init - - service docker restart - - yum -y install aws-cli git emacs nano aws-cfn-bootstrap - - echo ECS_CLUSTER=${ecsCluster} >> /etc/ecs/ecs.config - - echo ECS_AVAILABLE_LOGGING_DRIVERS=[\"json-file\",\"awslogs\"] >> /etc/ecs/ecs.config - - su - ec2-user -c "aws configure set default.region ${AWS::Region}" - - mkdir /home/ec2-user/.docker - - cat << EOF > /home/ec2-user/.docker/config.json - - { - - "credsStore": "ecr-login" - - } - - EOF - - chown -R ec2-user. /home/ec2-user/.docker - - git clone https://github.com/awslabs/amazon-ecr-credential-helper.git - - cd amazon-ecr-credential-helper && make docker && cp bin/local/docker-credential-ecr-login - /usr/local/bin/ - - INSTANCE_ID=$(curl 169.254.169.254/latest/meta-data/instance-id 2>/dev/null) - - /opt/aws/bin/cfn-signal -s true -i $INSTANCE_ID "${spotFleetWaitConditionHandle}" - - ' - TargetCapacity: 1 - TerminateInstancesWithExpiration: true - Type: AWS::EC2::SpotFleet - spotFleetAutoscaleRole: - Properties: - AssumeRolePolicyDocument: - Statement: - - Action: - - sts:AssumeRole - Effect: Allow - Principal: - Service: - - application-autoscaling.amazonaws.com - Version: 2012-10-17 - ManagedPolicyArns: - - arn:aws:iam::aws:policy/service-role/AmazonEC2SpotFleetAutoscaleRole - Path: / - Type: AWS::IAM::Role - spotFleetInstanceProfile: - DependsOn: - - spotFleetInstanceRole - Properties: - Path: / - Roles: - - Ref: spotFleetInstanceRole - Type: AWS::IAM::InstanceProfile - spotFleetInstanceRole: - Properties: - AssumeRolePolicyDocument: - Statement: - - Action: - - sts:AssumeRole - Effect: Allow - Principal: - Service: - - ec2.amazonaws.com - Version: 2012-10-17 - ManagedPolicyArns: - - arn:aws:iam::aws:policy/service-role/AmazonEC2ContainerServiceforEC2Role - Path: / - Policies: - - PolicyDocument: - Statement: - - Action: s3:ListBucket - Effect: Allow - Resource: - Fn::Join: - - '' - - - 'arn:aws:s3:::' - - Ref: outputBucket - - Action: - - s3:PutObject - - s3:GetObject - - s3:DeleteObject - Effect: Allow - Resource: - Fn::Join: - - '' - - - 'arn:aws:s3:::' - - Ref: outputBucket - - /* - - Action: - - ecr:DescribeRepositories - - ecr:ListImages - - ecr:InitiateLayerUpload - - ecr:UploadLayerPart - - ecr:CompleteLayerUpload - - ecr:PutImage - Effect: Allow - Resource: - Fn::Join: - - '' - - - 'arn:aws:ecr:' - - Ref: AWS::Region - - ':' - - Ref: AWS::AccountId - - :repository/ - - Ref: ecrRepository - Version: '2012-10-17' - PolicyName: - Fn::Join: - - '-' - - - Ref: AWS::StackName - - ecs-deep-learning-workshop-role - Type: AWS::IAM::Role - spotFleetRole: - Properties: - AssumeRolePolicyDocument: - Statement: - - Action: - - sts:AssumeRole - Effect: Allow - Principal: - Service: - - spotfleet.amazonaws.com - Version: 2012-10-17 - ManagedPolicyArns: - - arn:aws:iam::aws:policy/service-role/AmazonEC2SpotFleetRole - Path: / - Type: AWS::IAM::Role - spotFleetWaitCondition: - DependsOn: spotFleetWaitConditionHandle - Properties: - Count: 1 - Handle: - Ref: spotFleetWaitConditionHandle - Timeout: 900 - Type: AWS::CloudFormation::WaitCondition - spotFleetWaitConditionHandle: - Type: AWS::CloudFormation::WaitConditionHandle - vpc: - Properties: - CidrBlock: - Fn::FindInMap: - - CidrMappings - - vpc - - CIDR - EnableDnsHostnames: true - EnableDnsSupport: true - Tags: - - Key: Name - Value: VPC for ECS Deep Learning Workshop - Type: AWS::EC2::VPC -... diff --git a/workshops/ecs-deep-learning-workshop/lab-2-build/mxnet/Dockerfile b/workshops/ecs-deep-learning-workshop/lab-2-build/mxnet/Dockerfile deleted file mode 100644 index 56dd0a6d..00000000 --- a/workshops/ecs-deep-learning-workshop/lab-2-build/mxnet/Dockerfile +++ /dev/null @@ -1,38 +0,0 @@ -FROM mxnet/python - -ENV DEBIAN_FRONTEND noninteractive - -RUN apt-get -y update -RUN apt-get -y install git \ - python-opencv \ - build-essential \ - python3-dev \ - python3-tk - -RUN pip install opencv-python dumb-init awscli matplotlib - -ENV WORKSHOPDIR /root/ecs-deep-learning-workshop -RUN mkdir ${WORKSHOPDIR} - -RUN cd ${WORKSHOPDIR} \ - && git clone --recursive https://github.com/apache/incubator-mxnet.git mxnet - -COPY predict_imagenet.py /usr/local/bin/ - -RUN pip install jupyter - -RUN jupyter-notebook --generate-config --allow-root \ - && sed -i "s/#c.NotebookApp.ip = 'localhost'/c.NotebookApp.ip = '*'/g" /root/.jupyter/jupyter_notebook_config.py \ - && sed -i "s/#c.NotebookApp.allow_remote_access = False/c.NotebookApp.allow_remote_access = True/g" /root/.jupyter/jupyter_notebook_config.py - -ARG PASSWORD - -RUN python3 -c "from notebook.auth import passwd;print(passwd('${PASSWORD}') if '${PASSWORD}' != '' else 'sha1:c6bd96fb0824:6654e9eabfc54d0b3d0715ddf9561bed18e09b82')" > ${WORKSHOPDIR}/password_temp - -RUN sed -i "s/#c.NotebookApp.password = ''/c.NotebookApp.password = '$(cat ${WORKSHOPDIR}/password_temp)'/g" /root/.jupyter/jupyter_notebook_config.py - -RUN rm ${WORKSHOPDIR}/password_temp - -WORKDIR ${WORKSHOPDIR} -EXPOSE 8888 -CMD ["/usr/local/bin/dumb-init", "/usr/local/bin/jupyter-notebook", "--no-browser", "--allow-root"] diff --git a/workshops/ecs-deep-learning-workshop/lab-2-build/mxnet/predict_imagenet.py b/workshops/ecs-deep-learning-workshop/lab-2-build/mxnet/predict_imagenet.py deleted file mode 100755 index 15d76a25..00000000 --- a/workshops/ecs-deep-learning-workshop/lab-2-build/mxnet/predict_imagenet.py +++ /dev/null @@ -1,67 +0,0 @@ -#!/usr/bin/env python3 - -from __future__ import print_function -import os, sys, urllib.request - -if len(sys.argv) < 2: - print("Usage:", sys.argv[0], "") - exit(0) - -url = sys.argv[1] - -import mxnet as mx - -def download(url,prefix=''): - filename = prefix+url.split("/")[-1] - if not os.path.exists(filename): - urllib.request.urlretrieve(url, filename) - -path='http://data.mxnet.io/models/imagenet-11k/' -download(path+'resnet-152/resnet-152-symbol.json', 'full-') -download(path+'resnet-152/resnet-152-0000.params', 'full-') -download(path+'synset.txt', 'full-') - -with open('full-synset.txt', 'r') as f: - synsets = [l.rstrip() for l in f] - -sym, arg_params, aux_params = mx.model.load_checkpoint('full-resnet-152', 0) - -mod = mx.mod.Module(symbol=sym, context=mx.cpu()) -mod.bind(for_training=False, data_shapes=[('data', (1,3,224,224))]) -mod.set_params(arg_params, aux_params) - -import matplotlib -matplotlib.rc("savefig", dpi=100) -import cv2 -import numpy as np -from collections import namedtuple -Batch = namedtuple('Batch', ['data']) - -def get_image(url, show=True): - filename = url.split("/")[-1] - urllib.request.urlretrieve(url, filename) - img = cv2.imread(filename) - if img is None: - print('failed to download ' + url) - return filename - -def predict(filename, mod, synsets): - img = cv2.cvtColor(cv2.imread(filename), cv2.COLOR_BGR2RGB) - if img is None: - return None - img = cv2.resize(img, (224, 224)) - img = np.swapaxes(img, 0, 2) - img = np.swapaxes(img, 1, 2) - img = img[np.newaxis, :] - - mod.forward(Batch([mx.nd.array(img)])) - prob = mod.get_outputs()[0].asnumpy() - prob = np.squeeze(prob) - - a = np.argsort(prob)[::-1] - for i in a[0:5]: - print('probability=%f, class=%s' %(prob[i], synsets[i])) - -results = predict(get_image(url), mod, synsets) -print(url) -print(results) diff --git a/workshops/ecs-deep-learning-workshop/presentation/ecs_deep_learning_workshop.pdf b/workshops/ecs-deep-learning-workshop/presentation/ecs_deep_learning_workshop.pdf deleted file mode 100644 index 74543167..00000000 Binary files a/workshops/ecs-deep-learning-workshop/presentation/ecs_deep_learning_workshop.pdf and /dev/null differ diff --git a/workshops/ecs-spot-capacity-providers/cwagent-ecs-instance-metric-cfn.json b/workshops/ecs-spot-capacity-providers/cwagent-ecs-instance-metric-cfn.json new file mode 100644 index 00000000..c2f4d04a --- /dev/null +++ b/workshops/ecs-spot-capacity-providers/cwagent-ecs-instance-metric-cfn.json @@ -0,0 +1,258 @@ +{ + "AWSTemplateFormatVersion": "2010-09-09", + "Parameters": { + "ClusterName": { + "Type": "String", + "Description": "Enter the name of your ECS cluster from which you want to collect metrics" + }, + "CreateIAMRoles": { + "Type": "String", + "Default": "False", + "AllowedValues": [ + "True", + "False" + ], + "Description": "Whether to create default IAM roles", + "ConstraintDescription": "must specify True or False." + }, + "TaskRoleArn": { + "Type": "String", + "Default": "Default", + "Description": "Enter the role arn you want to use as the ecs task role" + }, + "ExecutionRoleArn": { + "Type": "String", + "Default": "Default", + "Description": "Enter the role arn you want to use as the ecs execution role" + } + }, + "Conditions": { + "CreateRoles": { + "Fn::Equals": [ + { + "Ref": "CreateIAMRoles" + }, + "True" + ] + }, + "DefaultTaskRole": { + "Fn::Equals": [ + { + "Ref": "TaskRoleArn" + }, + "Default" + ] + }, + "DefaultExecutionRole": { + "Fn::Equals": [ + { + "Ref": "ExecutionRoleArn" + }, + "Default" + ] + } + }, + "Resources": { + "ECSTaskDefinition": { + "Type": "AWS::ECS::TaskDefinition", + "Properties": { + "Family": "ecs-cwagent-daemon-service", + "TaskRoleArn": { + "Fn::If": [ + "CreateRoles", + { + "Fn::GetAtt": [ + "ECSTaskRole", + "Arn" + ] + }, + { + "Fn::If": [ + "DefaultTaskRole", + { + "Fn::Sub": "arn:aws:iam::${AWS::AccountId}:role/CWAgentECSTaskRole" + }, + { + "Ref": "TaskRoleArn" + } + ] + } + ] + }, + "ExecutionRoleArn": { + "Fn::If": [ + "CreateRoles", + { + "Fn::GetAtt": [ + "ECSExcutionRole", + "Arn" + ] + }, + { + "Fn::If": [ + "DefaultExecutionRole", + { + "Fn::Sub": "arn:aws:iam::${AWS::AccountId}:role/CWAgentECSExecutionRole" + }, + { + "Ref": "ExecutionRoleArn" + } + ] + } + ] + }, + "NetworkMode": "bridge", + "ContainerDefinitions": [ + { + "Name": "cloudwatch-agent", + "Image": "amazon/cloudwatch-agent:1.231221.0", + "Cpu": "128", + "Memory": "128", + "MemoryReservation": "128", + "MountPoints": [ + { + "ReadOnly": true, + "ContainerPath": "/rootfs/proc", + "SourceVolume": "proc" + }, + { + "ReadOnly": true, + "ContainerPath": "/rootfs/dev", + "SourceVolume": "dev" + }, + { + "ReadOnly": true, + "ContainerPath": "/sys/fs/cgroup", + "SourceVolume": "al2_cgroup" + }, + { + "ReadOnly": true, + "ContainerPath": "/cgroup", + "SourceVolume": "al1_cgroup" + }, + { + "ReadOnly": true, + "ContainerPath": "/rootfs/sys/fs/cgroup", + "SourceVolume": "al2_cgroup" + }, + { + "ReadOnly": true, + "ContainerPath": "/rootfs/cgroup", + "SourceVolume": "al1_cgroup" + } + ], + "Environment": [ + { + "Name": "USE_DEFAULT_CONFIG", + "Value": "True" + } + ], + "LogConfiguration": { + "LogDriver": "awslogs", + "Options": { + "awslogs-create-group": "True", + "awslogs-group": "/ecs/ecs-cwagent-daemon-service", + "awslogs-region": { + "Ref": "AWS::Region" + }, + "awslogs-stream-prefix": "ecs" + } + } + } + ], + "RequiresCompatibilities": [ + "EC2" + ], + "Volumes": [ + { + "Name": "proc", + "Host": { + "SourcePath": "/proc" + } + }, + { + "Name": "dev", + "Host": { + "SourcePath": "/dev" + } + }, + { + "Name": "al1_cgroup", + "Host": { + "SourcePath": "/cgroup" + } + }, + { + "Name": "al2_cgroup", + "Host": { + "SourcePath": "/sys/fs/cgroup" + } + } + ] + } + }, + "ECSDaemonService": { + "Type": "AWS::ECS::Service", + "Properties": { + "TaskDefinition": { + "Ref": "ECSTaskDefinition" + }, + "Cluster": { + "Ref": "ClusterName" + }, + "LaunchType": "EC2", + "SchedulingStrategy": "DAEMON", + "ServiceName": "cwagent-daemon-service" + } + }, + "ECSTaskRole": { + "Type": "AWS::IAM::Role", + "Condition": "CreateRoles", + "Properties": { + "Description": "Allows ECS tasks to call AWS services on your behalf.", + "AssumeRolePolicyDocument": { + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "", + "Effect": "Allow", + "Principal": { + "Service": "ecs-tasks.amazonaws.com" + }, + "Action": "sts:AssumeRole" + } + ] + }, + "ManagedPolicyArns": [ + "arn:aws:iam::aws:policy/CloudWatchAgentServerPolicy" + ], + "RoleName": "CWAgentECSTaskRole" + } + }, + "ECSExcutionRole": { + "Type": "AWS::IAM::Role", + "Condition": "CreateRoles", + "Properties": { + "Description": "Allows ECS container agent makes calls to the Amazon ECS API on your behalf.", + "AssumeRolePolicyDocument": { + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "", + "Effect": "Allow", + "Principal": { + "Service": "ecs-tasks.amazonaws.com" + }, + "Action": "sts:AssumeRole" + } + ] + }, + "ManagedPolicyArns": [ + "arn:aws:iam::aws:policy/CloudWatchAgentServerPolicy", + "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy" + ], + "RoleName": "CWAgentECSExecutionRole" + } + } + } +} diff --git a/workshops/ecs-spot-capacity-providers/cwt-dashboard.json b/workshops/ecs-spot-capacity-providers/cwt-dashboard.json new file mode 100644 index 00000000..ea958588 --- /dev/null +++ b/workshops/ecs-spot-capacity-providers/cwt-dashboard.json @@ -0,0 +1,21 @@ + { + "widgets":[ { + "type":"metric", + "properties":{ + "metrics": [ + [ "AWS/ECS/ManagedScaling", "CapacityProviderReservation", "ClusterName", "EcsSpotWorkshop", "CapacityProviderName", "CP-SPOT" ], + [ "...", "CP-OD" ], + [ "ECS/ContainerInsights", "ContainerInstanceCount", ".", "." ], + [ ".", "TaskCount", ".", "." ], + [ ".", "PendingTaskCount", "ServiceName", "ec2-service-split", "ClusterName", "EcsSpotWorkshop" ] + ], + "view": "timeSeries", + "stacked": false, + "region": "%AWS_REGION%", + "stat": "Maximum", + "period": 30, + "title": "EcsSpotWorkshop" + } + } ] + } + diff --git a/workshops/ecs-spot-capacity-providers/ecs-spot-workshop-cfn.yaml b/workshops/ecs-spot-capacity-providers/ecs-spot-workshop-cfn.yaml new file mode 100644 index 00000000..b37d8a9a --- /dev/null +++ b/workshops/ecs-spot-capacity-providers/ecs-spot-workshop-cfn.yaml @@ -0,0 +1,573 @@ +--- +AWSTemplateFormatVersion: 2010-09-09 +Description: ECS Spot Workshop CFN Template + +Metadata: + Author: + Description: Jayaprakash Alawala + License: + Description: 'Copyright 2020 Amazon.com, Inc. and its affiliates. All Rights Reserved. + + Licensed under the Amazon Software License (the "License"). You may not use this file + except in compliance with the License. A copy of the License is located at + + http://aws.amazon.com/asl/ + + or in the "license" file accompanying this file. This file is distributed on an "AS IS" + BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + License for the specific language governing permissions and limitations under the License.' + +Parameters: + sourceCidr: + Default: 0.0.0.0/0 + Description: Optional - CIDR/IP range for instance ssh/http access and load balancer http + access + Type: String + + ECSAMI: + Description: AMI ID + Type: AWS::SSM::Parameter::Value + Default: /aws/service/ecs/optimized-ami/amazon-linux-2/recommended/image_id + + EETeamRoleArn: + Type: String + Default: '' + +Conditions: + EventEngine: + !Not [!Equals [!Ref EETeamRoleArn, '']] + +Resources: + + vpc: + Type: AWS::EC2::VPC + Properties: + CidrBlock: 10.0.0.0/16 + EnableDnsHostnames: true + EnableDnsSupport: true + Tags: + - Key: Name + Value: "EcsSpotWorkshop" + + internetGateway: + Type: AWS::EC2::InternetGateway + DependsOn: + - vpc + + attachGateway: + Type: AWS::EC2::VPCGatewayAttachment + DependsOn: + - vpc + - internetGateway + Properties: + InternetGatewayId: !Ref internetGateway + VpcId: !Ref vpc + + + publicSubnet1: + Type: AWS::EC2::Subnet + DependsOn: attachGateway + Properties: + AvailabilityZone: + Fn::Select: + - 0 + - Fn::GetAZs: + Ref: AWS::Region + CidrBlock: 10.0.0.0/24 + MapPublicIpOnLaunch: true + Tags: + - Key: Name + Value: "EcsSpotWorkshop publicSubnet1" + VpcId: !Ref vpc + + publicSubnet2: + Type: AWS::EC2::Subnet + DependsOn: attachGateway + Properties: + AvailabilityZone: + Fn::Select: + - 1 + - Fn::GetAZs: + Ref: AWS::Region + CidrBlock: 10.0.1.0/24 + MapPublicIpOnLaunch: true + Tags: + - Key: Name + Value: "EcsSpotWorkshop publicSubnet2" + VpcId: !Ref vpc + + publicSubnet3: + Type: AWS::EC2::Subnet + DependsOn: attachGateway + Properties: + AvailabilityZone: + Fn::Select: + - 2 + - Fn::GetAZs: + Ref: AWS::Region + CidrBlock: 10.0.2.0/24 + MapPublicIpOnLaunch: true + Tags: + - Key: Name + Value: "EcsSpotWorkshop publicSubnet3" + VpcId: !Ref vpc + + + privateSubnet1: + Type: AWS::EC2::Subnet + Properties: + AvailabilityZone: + Fn::Select: + - 0 + - Fn::GetAZs: + Ref: AWS::Region + CidrBlock: 10.0.3.0/24 + Tags: + - Key: Name + Value: "EcsSpotWorkshop privateSubnet1" + VpcId: !Ref vpc + + + privateSubnet2: + Type: AWS::EC2::Subnet + Properties: + AvailabilityZone: + Fn::Select: + - 1 + - Fn::GetAZs: + Ref: AWS::Region + CidrBlock: 10.0.4.0/24 + Tags: + - Key: Name + Value: "EcsSpotWorkshop privateSubnet2" + VpcId: !Ref vpc + + privateSubnet3: + Type: AWS::EC2::Subnet + Properties: + AvailabilityZone: + Fn::Select: + - 2 + - Fn::GetAZs: + Ref: AWS::Region + CidrBlock: 10.0.5.0/24 + Tags: + - Key: Name + Value: "EcsSpotWorkshop privateSubnet3" + VpcId: !Ref vpc + + + publicRouteTable: + Type: AWS::EC2::RouteTable + DependsOn: + - vpc + - attachGateway + Properties: + Tags: + - Key: Name + Value: "EcsSpotWorkshop Public Route Table" + VpcId: !Ref vpc + + RouteTablePrivate: + Type: AWS::EC2::RouteTable + DependsOn: vpc + Properties: + VpcId: !Ref vpc + Tags: + - Key: Name + Value: "EcsSpotWorkshop Private Route Table" + + publicSubnet1RouteTableAssociation: + Type: AWS::EC2::SubnetRouteTableAssociation + DependsOn: + - publicRouteTable + - publicSubnet1 + - attachGateway + Properties: + RouteTableId: !Ref publicRouteTable + SubnetId: !Ref publicSubnet1 + + publicSubnet2RouteTableAssociation: + Type: AWS::EC2::SubnetRouteTableAssociation + DependsOn: + - publicRouteTable + - publicSubnet2 + - attachGateway + Properties: + RouteTableId: !Ref publicRouteTable + SubnetId: !Ref publicSubnet2 + + publicSubnet3RouteTableAssociation: + Type: AWS::EC2::SubnetRouteTableAssociation + DependsOn: + - publicRouteTable + - publicSubnet3 + - attachGateway + Properties: + RouteTableId: !Ref publicRouteTable + SubnetId: !Ref publicSubnet3 + + + privateSubnet1RouteTableAssociation: + Type: AWS::EC2::SubnetRouteTableAssociation + DependsOn: + - RouteTablePrivate + - privateSubnet1 + Properties: + RouteTableId: !Ref RouteTablePrivate + SubnetId: !Ref privateSubnet1 + + privateSubnet2RouteTableAssociation: + Type: AWS::EC2::SubnetRouteTableAssociation + DependsOn: + - RouteTablePrivate + - privateSubnet2 + Properties: + RouteTableId: !Ref RouteTablePrivate + SubnetId: !Ref privateSubnet2 + + privateSubnet3RouteTableAssociation: + Type: AWS::EC2::SubnetRouteTableAssociation + DependsOn: + - RouteTablePrivate + - privateSubnet3 + Properties: + RouteTableId: !Ref RouteTablePrivate + SubnetId: !Ref privateSubnet3 + + + EIPNATGateway: + Type: AWS::EC2::EIP + DependsOn: + - attachGateway + - vpc + Properties: + Domain: vpc + + NATGateway: + Type: AWS::EC2::NatGateway + DependsOn: + - publicSubnet1 + Properties: + AllocationId: !GetAtt EIPNATGateway.AllocationId + SubnetId: !Ref publicSubnet1 + + publicRoute: + Type: AWS::EC2::Route + DependsOn: + - publicRouteTable + - internetGateway + - attachGateway + Properties: + DestinationCidrBlock: 0.0.0.0/0 + GatewayId: !Ref internetGateway + RouteTableId: !Ref publicRouteTable + + privateRoute: + Type: AWS::EC2::Route + DependsOn: + - RouteTablePrivate + - NATGateway + Properties: + DestinationCidrBlock: 0.0.0.0/0 + NatGatewayId: !Ref NATGateway + RouteTableId: !Ref RouteTablePrivate + + autoScalingServiceLinkedRole: + Type: AWS::IAM::ServiceLinkedRole + Properties: + AWSServiceName: autoscaling.amazonaws.com + Description: Default Service-Linked Role enables access to AWS Services and Resources + used or managed by Auto Scaling + + loadBalancerSecurityGroup: + Type: AWS::EC2::SecurityGroup + DependsOn: + - vpc + Properties: + GroupDescription: Allow all traffic from internet + SecurityGroupIngress: + - CidrIp: !Ref sourceCidr + IpProtocol: -1 + VpcId: !Ref vpc + + instanceSecurityGroup: + Type: AWS::EC2::SecurityGroup + DependsOn: + - vpc + Properties: + GroupDescription: Allow traffic from ALB + VpcId: !Ref vpc + + instanceSecurityGroupIngress: + Type: AWS::EC2::SecurityGroupIngress + DependsOn: + - instanceSecurityGroup + - loadBalancerSecurityGroup + Properties: + GroupId: !Ref instanceSecurityGroup + IpProtocol: -1 + SourceSecurityGroupId: !Ref loadBalancerSecurityGroup + + ECSServiceALB: + Type: AWS::ElasticLoadBalancingV2::LoadBalancer + DependsOn: + - loadBalancerSecurityGroup + - publicSubnet1 + - publicSubnet2 + - publicSubnet3 + Properties: + Name: "EcsSpotWorkshop" + Scheme: internet-facing + SecurityGroups: + - !Ref loadBalancerSecurityGroup + Subnets: + - !Ref publicSubnet1 + - !Ref publicSubnet2 + - !Ref publicSubnet3 + + ECSServiceTG: + Type: AWS::ElasticLoadBalancingV2::TargetGroup + DependsOn: + - vpc + Properties: + HealthCheckIntervalSeconds: 50 + HealthCheckPath: / + HealthCheckPort: traffic-port + HealthCheckProtocol: HTTP + HealthCheckTimeoutSeconds: 45 + HealthyThresholdCount: 2 + Matcher: + HttpCode: 200 + Name: "EcsSpotWorkshop" + Port: 80 + Protocol: HTTP + UnhealthyThresholdCount: 4 + VpcId: !Ref vpc + + ECSServiceALBListener: + Type: AWS::ElasticLoadBalancingV2::Listener + DependsOn: + - ECSServiceALB + - ECSServiceTG + Properties: + DefaultActions: + - Type: forward + TargetGroupArn: !Ref ECSServiceTG + LoadBalancerArn: !Ref ECSServiceALB + Port: 80 + Protocol: HTTP + + instanceRole: + Type: AWS::IAM::Role + Properties: + RoleName: "EcsSpotWorkshop-EcsInstanceRole" + AssumeRolePolicyDocument: + Statement: + - Action: + - sts:AssumeRole + Effect: Allow + Principal: + Service: + - ec2.amazonaws.com + Version: 2012-10-17 + ManagedPolicyArns: + - arn:aws:iam::aws:policy/service-role/AmazonEC2ContainerServiceforEC2Role + - arn:aws:iam::aws:policy/service-role/AmazonEC2ContainerServiceAutoscaleRole + - arn:aws:iam::aws:policy/AmazonEC2ReadOnlyAccess + + Cloud9instanceRole: + Type: AWS::IAM::Role + Properties: + RoleName: "EcsSpotWorkshop-Cloud9InstanceRole" + AssumeRolePolicyDocument: + Statement: + - Action: + - sts:AssumeRole + Effect: Allow + Principal: + Service: + - ec2.amazonaws.com + Version: 2012-10-17 + ManagedPolicyArns: + - arn:aws:iam::aws:policy/AdministratorAccess + + cloud9Environment: + DependsOn: + - publicSubnet1 + Properties: + Name : EcsSpotWorkshop + Description: ECS Spot Workshop - Cloud9 environment + OwnerArn: + !If [EventEngine, !Sub "arn:aws:sts::${AWS::AccountId}:assumed-role/TeamRole/MasterKey", !Ref "AWS::NoValue"] + InstanceType: t3.large + SubnetId: + Ref: publicSubnet1 + Type: AWS::Cloud9::EnvironmentEC2 + + Cloud9instanceProfile: + Type: AWS::IAM::InstanceProfile + DependsOn: + - Cloud9instanceRole + Properties: + InstanceProfileName: "EcsSpotWorkshop-Cloud9InstanceProfile" + Path: / + Roles: + - Ref: Cloud9instanceRole + + instanceProfile: + Type: AWS::IAM::InstanceProfile + DependsOn: + - instanceRole + Properties: + InstanceProfileName: "EcsSpotWorkshop-InstanceProfile" + Path: / + Roles: + - Ref: instanceRole + + + ECSInstanceLaunchTemplate: + DependsOn: + - instanceProfile + Type: AWS::EC2::LaunchTemplate + Properties: + LaunchTemplateData: + InstanceType: t3.large + ImageId: !Ref ECSAMI + IamInstanceProfile: + Arn: + Fn::GetAtt: + - instanceProfile + - Arn + + SecurityGroupIds: + - !Ref instanceSecurityGroup + + TagSpecifications: + - ResourceType: instance + Tags: + - Key: Name + Value: "EcsSpotWorkshop" + UserData: + Fn::Base64: !Sub | + #!/bin/bash + echo "ECS_CLUSTER=EcsSpotWorkshop" >> /etc/ecs/ecs.config + echo "ECS_ENABLE_SPOT_INSTANCE_DRAINING=true" >> /etc/ecs/ecs.config + echo "ECS_CONTAINER_STOP_TIMEOUT=90s" >> /etc/ecs/ecs.config + echo "ECS_ENABLE_CONTAINER_METADATA=true" >> /etc/ecs/ecs.config + LaunchTemplateName: "EcsSpotWorkshop" + + ecrRepository: + Type: AWS::ECR::Repository + Properties: + RepositoryName: ecs-spot-workshop/webapp + +Outputs: + awsRegionId: + Description: The AWS Region ID your template was launched in + Value: !Ref AWS::Region + + instanceRole: + Description: Instance Role + Value: + Fn::GetAtt: + - instanceRole + - Arn + + instanceProfile: + Description: Instance profile ARN + Value: + Fn::GetAtt: + - instanceProfile + - Arn + + Cloud9instanceProfile: + Description: Cloud9 Instance profile ARN + Value: + Fn::GetAtt: + - Cloud9instanceProfile + - Arn + + instanceSecurityGroup: + Description: Instance security group + Value: !Ref instanceSecurityGroup + + loadBalancerSecurityGroup: + Description: Load Balancer security group + Value: !Ref loadBalancerSecurityGroup + + + publicSubnet1: + Description: Public subnet 1 + Value: !Ref publicSubnet1 + + publicSubnet2: + Description: Public subnet 2 + Value: !Ref publicSubnet2 + + publicSubnet3: + Description: Public subnet 3 + Value: !Ref publicSubnet3 + + privateSubnet1: + Description: Private subnet 1 + Value: !Ref privateSubnet1 + + privateSubnet2: + Description: Private subnet 2 + Value: !Ref privateSubnet2 + + privateSubnet3: + Description: Private subnet 3 + Value: !Ref privateSubnet3 + + vpc: + Description: The VPC + Value: !Ref vpc + + LaunchTemplateId: + Description: The Launch Template + Value: !Ref ECSInstanceLaunchTemplate + + ALBDNSName: + Description: The Application Load Balancer + Value: !GetAtt ECSServiceALB.DNSName + + TargetGroup: + Description: The Target Group + Value: !Ref ECSServiceTG + + VPCPublicSubnets: + Description: The list of public subnets in the VPC + Value: !Join [",", [!Ref publicSubnet1, !Ref publicSubnet2]] + + VPCPrivateSubnets: + Description: The list of private subnets in the VPC + Value: !Join [",", [!Ref privateSubnet1, !Ref privateSubnet2]] + + ecrRepository: + Description: The ecrRepository + Value: !Ref ecrRepository + + Cloud9instanceRole: + Description: The Cloud9instanceRole + Value: + Fn::GetAtt: + - Cloud9instanceRole + - Arn + + autoScalingServiceLinkedRole: + Description: The AWS Region ID your template was launched in + Value: !Ref autoScalingServiceLinkedRole + + EETeamRoleArn: + Description: EETeamRoleArn + Value: !Ref EETeamRoleArn + + cloud9Environment: + Description: Cloud9 environment + Value: + Fn::GetAtt: + - cloud9Environment + - Name +... diff --git a/workshops/ecs-spot-capacity-providers/fargate-task.json b/workshops/ecs-spot-capacity-providers/fargate-task.json new file mode 100644 index 00000000..508037f2 --- /dev/null +++ b/workshops/ecs-spot-capacity-providers/fargate-task.json @@ -0,0 +1,29 @@ +{ + "family": "fargate-task", + "cpu": "512", + "memory": "1024", + "networkMode": "awsvpc", + "containerDefinitions": [ + { + "name": "fargate-webapp-container", + "image": "httpd:2.4", + "portMappings": [ + { + "containerPort": 80, + "protocol": "tcp" + } + ], + "essential": true, + "entryPoint": [ + "sh", + "-c" + ], + "command": [ + "/bin/sh -c \"echo ' Amazon ECS Sample App

Amazon ECS Sample App

Congratulations!

Your application is now running on a container in Amazon ECS.

' > /usr/local/apache2/htdocs/index.html && httpd-foreground\"" + ] + } + ], + "requiresCompatibilities": [ + "FARGATE" + ] +} \ No newline at end of file diff --git a/workshops/ecs-spot-capacity-providers/templates/asg.json b/workshops/ecs-spot-capacity-providers/templates/asg.json new file mode 100755 index 00000000..05d83a5b --- /dev/null +++ b/workshops/ecs-spot-capacity-providers/templates/asg.json @@ -0,0 +1,48 @@ +{ + "AutoScalingGroupName": "%ASG_NAME%", + "MixedInstancesPolicy": { + "LaunchTemplate": { + "LaunchTemplateSpecification": { + "LaunchTemplateName": "EcsSpotWorkshop", + "Version": "1" + }, + "Overrides": [ + { + "InstanceType": "m4.large" + }, + { + "InstanceType": "m5.large" + }, + { + "InstanceType": "m5a.large" + }, + { + "InstanceType": "t2.large" + }, + { + "InstanceType": "t3.large" + }, + { + "InstanceType": "t3a.large" + } + ] + }, + "InstancesDistribution": { + "OnDemandAllocationStrategy": "prioritized", + "OnDemandBaseCapacity": 0, + "OnDemandPercentageAboveBaseCapacity": %OD_PERCENTAGE%, + "SpotAllocationStrategy": "capacity-optimized" + } + }, + "MinSize": 0, + "MaxSize": 20, + "DesiredCapacity": 0, + "DefaultCooldown": 300, + "HealthCheckGracePeriod": 300, + "HealthCheckType": "EC2", + "VPCZoneIdentifier": "%PUBLIC_SUBNET_LIST%", + "TerminationPolicies": [ + "DEFAULT" + ], + "NewInstancesProtectedFromScaleIn": true +} diff --git a/workshops/ecs-spot-capacity-providers/templates/ec2-task.json b/workshops/ecs-spot-capacity-providers/templates/ec2-task.json new file mode 100644 index 00000000..f8aaa6fc --- /dev/null +++ b/workshops/ecs-spot-capacity-providers/templates/ec2-task.json @@ -0,0 +1,22 @@ +{ + "family": "ec2-task", + "containerDefinitions": [ + { + "name": "ec2-task-container", + "image": "DOCKER_IMAGE_URI", + "cpu": 420, + "memory": 1680, + "memoryReservation": 1680, + "portMappings": [ + { + "containerPort": 80, + "protocol": "tcp" + } + ], + "essential": true + } + ], + "requiresCompatibilities": [ + "EC2" + ] +} \ No newline at end of file diff --git a/workshops/ecs-spot-capacity-providers/webapp/Dockerfile b/workshops/ecs-spot-capacity-providers/webapp/Dockerfile new file mode 100644 index 00000000..8a6c31bd --- /dev/null +++ b/workshops/ecs-spot-capacity-providers/webapp/Dockerfile @@ -0,0 +1,15 @@ +FROM amazonlinux:latest + +RUN yum update -y + +RUN yum -y install python3 python3-wheel python-pi + +COPY / /app + +WORKDIR /app + +RUN pip3 install -r requirements.txt + +ENTRYPOINT ["python3"] +EXPOSE 80 +CMD ["app.py"] diff --git a/workshops/ecs-spot-capacity-providers/webapp/app.py b/workshops/ecs-spot-capacity-providers/webapp/app.py new file mode 100644 index 00000000..5b713f58 --- /dev/null +++ b/workshops/ecs-spot-capacity-providers/webapp/app.py @@ -0,0 +1,113 @@ +from flask import Flask, render_template +from flask.ext.cors import CORS, cross_origin +import os +import requests +import json +import signal +import time +import socket +import sys +import boto3 + + +def checkSpotTermination(): + URL = "http://169.254.169.254/latest/meta-data/spot/termination-time" + response = requests.get(URL) + return (response.status_code == 200) + + +class Ec2SpotInterruptionHandler: + def __init__(self): + signal.signal(signal.SIGINT, self.exit_gracefully) + signal.signal(signal.SIGTERM, self.exit_gracefully) + + def exit_gracefully(self, signum, frame): + print("\nReceived {} signal".format(self.signals[signum])) + if self.signals[signum] == 'SIGTERM': + print("SIGTERM Signal Received. Let's wrap up..") + if checkSpotTermination(): + print("The instance got a Spot Notification for termination, this may have") + + +app = Flask(__name__) +cors = CORS(app) +app.config['CORS_HEADERS'] = 'Content-Type' + + +@app.route('/') +@cross_origin() +def index(): + response = "" + response +=" ECS Spot Workshop " + response += "

I am a Simple Containerized Web App Running with below Attributes


" + + try: + if checkSpotTermination(): + response += "

This Spot Instance got a Spot notification for interruption


" + + URL = "http://169.254.169.254/latest/dynamic/instance-identity/document" + InstanceData = requests.get(URL).json() + + instanceId = InstanceData['instanceId'] + response += "
  • My instance_id = {}
  • ".format(instanceId) + lifecycle = getInstanceLifecycle(instanceId, InstanceData['region']) + response += "
  • My Instance lifecycle = {}
  • ".format(lifecycle) + response += "
  • My instance_type = {}
  • ".format(InstanceData['instanceType']) + response += "
  • My Intance private_ipv4 = {}
  • ".format(InstanceData['privateIp']) + response += "
  • My availability_zone = {}
  • ".format(InstanceData['availabilityZone']) + response += "
  • My Region = {}
  • ".format(InstanceData['region']) + + publicIp = requests.get("http://169.254.169.254/latest/meta-data/public-ipv4") + response += "
  • My instance_type public_ipv4 = {}
  • ".format(publicIp.text) + AMIIndexId = requests.get("http://169.254.169.254/latest/meta-data/ami-launch-index") + response += "
  • My ami_launch_index = {}
  • ".format(AMIIndexId.text) + + AMIId = requests.get("http://169.254.169.254/latest/meta-data/ami-id") + response += "
  • My ami_launch_index = {}
  • ".format(AMIId.text) + + MacId = requests.get("http://169.254.169.254/latest/meta-data/mac") + Mac = MacId.text + + URL = "http://169.254.169.254/latest/meta-data/network/interfaces/macs/" + str(MacId.text) + "/subnet-id" + SubnetId = requests.get(URL) + response += "
  • My subnet_id = {}
  • ".format(SubnetId.text) + + URL = "http://169.254.169.254/latest/meta-data/network/interfaces/macs/" + str(MacId.text) + "/vpc-id" + VPCId = requests.get(URL) + response += "
  • My vpc_id = {}
  • ".format(VPCId.text) + + ECS_METADATA_URI = os.getenv('ECS_CONTAINER_METADATA_URI_V4') + container = requests.get(ECS_METADATA_URI).json() + + response += "
  • My DockerId = {}
  • ".format(container['DockerId']) + response += "
  • My Name = {}
  • ".format(container['Name']) + response += "
  • My DockerName = {}
  • ".format(container['DockerName']) + response += "
  • My Network Mode = {}
  • ".format(container['Networks'][0]['NetworkMode']) + response += "
  • My IPs = {}
  • ".format(container['Networks'][0]['IPv4Addresses']) + + ECS_METADATA_TASK_URI = ECS_METADATA_URI + "/task" + task = requests.get(ECS_METADATA_TASK_URI).json() + + response += "
  • My ECS Cluster Name = {}
  • ".format(task['Cluster']) + response += "
  • My Task Arn = {}
  • ".format(task['TaskARN']) + response += "
  • My Task Family:Version = {}:{}
  • ".format(task['Family'], task['Revision']) + + except Exception as inst: + response += "
  • Oops !!! Failed to access my instance metadata with error = {}
  • ".format(inst) + + return response + +def getInstanceLifecycle(instanceId, region): + ec2client = boto3.client('ec2', region_name=region) + describeInstance = ec2client.describe_instances(InstanceIds=[instanceId]) + instanceData=describeInstance['Reservations'][0]['Instances'][0] + if 'InstanceLifecycle' in instanceData.keys(): + return instanceData['InstanceLifecycle'] + else: + return "Ondemand" + + +if __name__ == '__main__': + handler = Ec2SpotInterruptionHandler() + print("Starting A Simple Web Service ...") + app.run(port=80,host='0.0.0.0') diff --git a/workshops/ecs-spot-capacity-providers/webapp/requirements.txt b/workshops/ecs-spot-capacity-providers/webapp/requirements.txt new file mode 100644 index 00000000..762eee2a --- /dev/null +++ b/workshops/ecs-spot-capacity-providers/webapp/requirements.txt @@ -0,0 +1,7 @@ +Flask==0.10.1 +Flask-Cors==1.10.2 +requests +signals +ec2-metadata +boto3 +