-
Notifications
You must be signed in to change notification settings - Fork 471
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[SYSTEMDS-1780] Final resource optimizer for AWS EMR
Closes #2135.
- Loading branch information
1 parent
c929843
commit e326add
Showing
56 changed files
with
5,175 additions
and
1,226 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
Region,Fee Ratio,EBS Price | ||
af-south-1,0.195918367,0.1047 | ||
ap-east-1,0.181818182,0.1056 | ||
ap-northeast-1,0.193548387,0.096 | ||
ap-northeast-2,0.203389831,0.0912 | ||
ap-northeast-3,0.193548387,0.096 | ||
ap-south-1,0.237623762,0.0912 | ||
ap-south-2,0.237623762,0.0912 | ||
ap-southeast-1,0.2,0.096 | ||
ap-southeast-2,0.2,0.096 | ||
ap-southeast-3,0.2,0.096 | ||
ap-southeast-4,0.2,0.096 | ||
ap-southeast-5,0.235294118,0.0864 | ||
ca-central-1,0.224299065,0.088 | ||
ca-west-1,0.224299065,0.088 | ||
eu-central-1,0.208695652,0.0952 | ||
eu-central-2,0.18972332,0.1142 | ||
eu-north-1,0.235294118,0.0836 | ||
eu-south-1,0.214285714,0.0924 | ||
eu-south-2,0.224299065,0.088 | ||
eu-west-1,0.224299065,0.088 | ||
eu-west-2,0.216216216,0.0928 | ||
eu-west-3,0.214285714,0.0928 | ||
il-central-1,0.213333333,0.1056 | ||
me-central-1,0.204255319,0.0968 | ||
me-south-1,0.204255319,0.0968 | ||
sa-east-1,0.156862745,0.152 | ||
us-east-1,0.25,0.08 | ||
us-east-2,0.25,0.08 | ||
us-west-1,0.214285714,0.096 | ||
us-west-2,0.25,0.08 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
#!/usr/bin/env bash | ||
|
||
ROPT_JAR_FILE="${SYSTEMDS_ROOT}/target/ResourceOptimizer.jar" | ||
DEFAULT_PROPERTIES="${SYSTEMDS_ROOT}/scripts/resource/options.properties" | ||
|
||
java -jar "$ROPT_JAR_FILE" "$@" -options "$DEFAULT_PROPERTIES" | ||
|
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
#------------------------------------------------------------- | ||
# | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
# | ||
#------------------------------------------------------------- | ||
|
||
# Configurations for EMR launch | ||
|
||
# User-defined configurations -------------------------------- | ||
|
||
# Program specific -------------------------------- | ||
|
||
# URI addres for the SystemDS jar file on S3 | ||
SYSTEMDS_JAR_URI= | ||
# DML script path (use s3a:// URI schema for remote scripts in S3) | ||
SYSTEMDS_PROGRAM=s3://systemds-testing/dml_scripts/Algorithm_L2SVM.dml | ||
# Set the the file path arguments with adapted URI address | ||
# for the actual file location and always s3a:// schema | ||
# comma separated values | ||
SYSTEMDS_ARGS= | ||
# comma separated key=value pairs | ||
SYSTEMDS_NVARGS=m=200000,n=10000 | ||
#Y=s3://systemds-testing/data/Y.csv,B=s3a://systemds-testing/data/B.csv | ||
|
||
# AWS specific ------------------------- | ||
|
||
# Inspect the version difference before changing to version defferent form 7.3.0 | ||
EMR_VERSION="emr-7.3.0" | ||
# output file of the resource optimization: hardware configurations | ||
INSTANCE_CONFIGS= | ||
# output file of the resource optimization: Spark configurations | ||
SPARK_CONFIGS= | ||
# existing SSH key (not created automatically) | ||
KEYPAIR_NAME= | ||
# Choose the same region as at executing resource optimizer | ||
REGION=us-east-1 | ||
# Provide optionally a (signle) security group id to be added as additional to the master node | ||
# If value empy the option won't be used and AWS won't attach an additional group and the SSH may be blocked | ||
# Multiple additional groups are not supported by the launch script and this one is attached to the master only | ||
SECURITY_GROUP_ID= | ||
# Provide already created names | ||
# or desired names for generation with 'generate_instance_profile.sh' | ||
INSTANCE_PROFILE_NAME= | ||
IAM_ROLE_NAME= | ||
# Desired subnet to be used by the cluster, if not defined a default one will be used | ||
TARGET_SUBNET= | ||
# S3 folder URI for landing of log files | ||
LOG_URI= | ||
|
||
# Execution specific ------------------------- | ||
|
||
# (number) - if 0 the cluster will be terminated automatically after program execution | ||
# - if greater than 0 the cluster will be terminated automatically after the given number of second in state idle | ||
# - if less than 0 no automatic temrination rules will be applied | ||
AUTO_TERMINATION_TIME=-1 | ||
|
||
# Automatic configurations (read only for users) ------------- | ||
|
||
# Current EMR Cluster ID | ||
CLUSTER_ID= | ||
# Public DNS name of the moster node in the current cluster | ||
CLUSTER_URL= |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
#!/usr/bin/env bash | ||
#------------------------------------------------------------- | ||
# | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
# | ||
#------------------------------------------------------------- | ||
|
||
# exit in case of error or unbound var | ||
set -euo pipefail | ||
|
||
# get file directory to allow finding the file with the utils | ||
SCRIPT_DIR="$(dirname "$(realpath "$0")")" | ||
|
||
source cluster.env | ||
source "$SCRIPT_DIR/cluster_utils.sh" | ||
|
||
if [ -n "$TARGET_SUBNET" ]; then | ||
SUBNET=$TARGET_SUBNET | ||
else | ||
#Get the first available subnet in the default VPC of the configured region | ||
SUBNET=$(aws ec2 describe-subnets --region $REGION \ | ||
--filter "Name=defaultForAz,Values=true" --query "Subnets[0].SubnetId" --output text) | ||
fi | ||
|
||
# generate the step definition into STEP variable | ||
generate_step_definition | ||
|
||
echo -e "\nLaunching EMR cluster via AWS CLI and adding a step to run $SYSTEMDS_PROGRAM with SystemDS" | ||
CLUSTER_INFO=$(aws emr create-cluster \ | ||
--applications Name=AmazonCloudWatchAgent Name=Spark \ | ||
--ec2-attributes '{ | ||
"KeyName":"'${KEYPAIR_NAME}'", | ||
"InstanceProfile":"EMR_EC2_DefaultRole", | ||
'"$( [ -n "$SECURITY_GROUP_ID'" ] && echo '"AdditionalMasterSecurityGroups": ["'${SECURITY_GROUP_ID}'"],' )"' | ||
"SubnetId": "'${SUBNET}'" | ||
}'\ | ||
--service-role EMR_DefaultRole \ | ||
--enable-debugging \ | ||
--release-label $EMR_VERSION \ | ||
--log-uri $LOG_URI \ | ||
--name "SystemDS cluster" \ | ||
--instance-groups file://$INSTANCE_CONFIGS \ | ||
--configurations file://$SPARK_CONFIGS \ | ||
--scale-down-behavior TERMINATE_AT_TASK_COMPLETION \ | ||
--no-termination-protected \ | ||
$( [ -n "$STEP" ] && echo "--steps $STEP" ) \ | ||
$( [ "$AUTO_TERMINATION_TIME" = 0 ] && echo "--auto-terminate" ) \ | ||
$( [ "$AUTO_TERMINATION_TIME" -gt 0 ] && echo "--auto-termination-policy IdleTimeout=$AUTO_TERMINATION_TIME" ) \ | ||
--region $REGION) | ||
|
||
CLUSTER_ID=$(echo $CLUSTER_INFO | jq .ClusterId | tr -d '"') | ||
echo "Cluster successfully initialized with cluster ID: "${CLUSTER_ID} | ||
set_config "CLUSTER_ID" $CLUSTER_ID | ||
|
||
# Wait for cluster to start | ||
echo -e "\nWaiting for cluster to enter running state..." | ||
aws emr wait cluster-running --cluster-id $CLUSTER_ID --region $REGION | ||
|
||
CLUSTER_URL=$(aws emr describe-cluster --cluster-id $CLUSTER_ID --region $REGION | jq .Cluster.MasterPublicDnsName | tr -d '"') | ||
set_config "CLUSTER_URL" "$CLUSTER_URL" | ||
|
||
echo "...launching process has finished and the cluster is not in state running." | ||
|
||
if [ "$AUTO_TERMINATION_TIME" = 0 ]; then | ||
echo -e "\nImmediate automatic termination was enabled so the cluster will terminate directly after the step completion" | ||
elif [ "$AUTO_TERMINATION_TIME" -gt 0 ]; then | ||
echo -e "\nDelayed automatic termination was enabled so the cluster will terminate $AUTO_TERMINATION_TIME | ||
seconds after entering idle state" | ||
else | ||
echo -e "\nAutomatic termination was not enabled so you should manually terminate the cluster" | ||
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
#!/usr/bin/env bash | ||
#------------------------------------------------------------- | ||
# | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
# | ||
#------------------------------------------------------------- | ||
|
||
# exit in case of error or unbound var | ||
set -euo pipefail | ||
|
||
# get file directory to allow finding the file with the utils | ||
SCRIPT_DIR="$(dirname "$(realpath "$0")")" | ||
|
||
source cluster.env | ||
source "$SCRIPT_DIR/cluster_utils.sh" | ||
|
||
# generate the step definition into STEP variable | ||
generate_step_definition | ||
if [ $STEP -z ]; then | ||
echo "Error: Empty state definition, probably due to empty SYSTEMDS_PROGRAM option." | ||
exit 1 | ||
fi | ||
|
||
echo "Adding a step to run $SYSTEMDS_PROGRAM with SystemDS" | ||
STEP_INFO=$(aws emr add-steps --cluster-id $CLUSTER_ID --region $REGION --steps $STEP) | ||
|
||
if [ "$AUTO_TERMINATION_TIME" = 0 ]; then | ||
STEP_ID=$(echo $STEP_INFO | jq .StepIds | tr -d '"' | tr -d ']' | tr -d '[' | tr -d '[:space:]' ) | ||
echo "Waiting for the step to finish before termination (immediate automatic termination enabled)" | ||
aws emr wait step-complete --cluster-id $CLUSTER_ID --step-id $STEP_ID --region $REGION | ||
echo "The step has finished and now the cluster will before immediately terminated" | ||
aws emr terminate-clusters --cluster-ids $CLUSTER_ID | ||
elif [ "$AUTO_TERMINATION_TIME" -gt 0 ]; then | ||
echo "Delayed automatic termination will apply only in case this option was set on cluster launch." | ||
echo "You should manually track the step completion" | ||
else | ||
echo "Automatic termination was not enabled so you should manually track the step completion and terminate the cluster" | ||
fi | ||
|
||
|
Oops, something went wrong.