Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Staging deployment and Railway statistics #157

Merged
merged 24 commits into from
Oct 1, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
9dd9832
Add rail-related tracking features
jpolchlo Aug 1, 2019
2132083
Adjust definitions of rail features, disaggregate rail counts
jpolchlo Aug 1, 2019
d3152ee
Fix casting error with JSON
jpolchlo Aug 1, 2019
3762f23
Unify table schemas and union
jpolchlo Aug 2, 2019
e97e21f
Address minor PR review comments
jpolchlo Aug 2, 2019
729259a
Address minor PR review comments
jpolchlo Aug 2, 2019
627f3e4
Write end position in augdiff and changeset streams to checkpoints ta…
jpolchlo Aug 6, 2019
ed5e6c3
Introduce new Makefile for streaming deployment
jpolchlo Aug 16, 2019
6fd0966
Clean up and improve streaming deployment scripts
jpolchlo Aug 19, 2019
4b96c25
Make script executable
jpolchlo Aug 19, 2019
e636e4e
Further changes to deployment scripts
jpolchlo Aug 20, 2019
4ab2bcc
Be smarter about which environment vars to check
jpolchlo Aug 20, 2019
957285c
Improve batch process deployment
jpolchlo Aug 23, 2019
a4b3172
Ensure streaming services are stopped during deployment cycle; simpli…
jpolchlo Aug 23, 2019
01bf457
Modify log messages
jpolchlo Aug 23, 2019
6da0912
Allow easy addition of extra EBS volumes in batch EMR deployment
jpolchlo Sep 18, 2019
06c46d4
Remove useful components to Vectorpipe and rename ChangesetORCCreator
jpolchlo Sep 19, 2019
3349bf5
Use VP functions for timestamp/seqence number conversions
jpolchlo Sep 25, 2019
baca909
[skip ci] Minor formatting update; leave note for future work
jpolchlo Sep 26, 2019
64ae32e
Clean up references
jpolchlo Sep 26, 2019
7dd079e
Minor fixes to address comments
jpolchlo Sep 26, 2019
5a26276
Move to VP 1.1.0
jpolchlo Sep 26, 2019
115ef17
Fix description
jpolchlo Sep 26, 2019
99d812e
Remove cluster configurations
jpolchlo Sep 26, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions deployment/batch/makefiles/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,18 @@ ifndef CLUSTER_ID
CLUSTER_ID=$(shell if [ -e "cluster-id.txt" ]; then cat cluster-id.txt; fi)
endif

ifndef CORE_EMR_ATTRS
EMR_ATTRS_CORE=
else
EMR_ATTRS_CORE=,${CORE_EMR_ATTRS}
endif

ifndef MASTER_EMR_ATTRS
EMR_ATTRS_MASTER=
else
EMR_ATTRS_MASTER=,${MASTER_EMR_ATTRS}
endif

rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d))

${INGEST_ASSEMBLY}: $(call rwildcard, ${INGEST_SRC_DIR}/src, *.scala) ${INGEST_SRC_DIR}/build.sbt
Expand All @@ -45,8 +57,8 @@ create-cluster:
--ec2-attributes KeyName=${EC2_KEY},SubnetId=${SUBNET_ID},EmrManagedMasterSecurityGroup=${MASTER_SECURITY_GROUP},EmrManagedSlaveSecurityGroup=${WORKER_SECURITY_GROUP},ServiceAccessSecurityGroup=${SERVICE_ACCESS_SG},AdditionalMasterSecurityGroups=${SANDBOX_SG},AdditionalSlaveSecurityGroups=${SANDBOX_SG} \
--applications Name=Ganglia Name=Hadoop Name=Hue Name=Spark Name=Zeppelin \
--instance-groups \
'Name=Master,${MASTER_BID_PRICE}InstanceCount=1,InstanceGroupType=MASTER,InstanceType=${MASTER_INSTANCE}' \
'Name=Workers,${WORKER_BID_PRICE}InstanceCount=${WORKER_COUNT},InstanceGroupType=CORE,InstanceType=${WORKER_INSTANCE}' \
'Name=Master,${MASTER_BID_PRICE}InstanceCount=1,InstanceGroupType=MASTER,InstanceType=${MASTER_INSTANCE}${EMR_ATTRS_MASTER}' \
'Name=Workers,${WORKER_BID_PRICE}InstanceCount=${WORKER_COUNT},InstanceGroupType=CORE,InstanceType=${WORKER_INSTANCE}${EMR_ATTRS_CORE}' \
| tee cluster-id.txt

upload-ingest: ${INGEST_ASSEMBLY}
Expand Down
10 changes: 7 additions & 3 deletions deployment/batch/makefiles/config-emr.mk
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,17 @@ export SANDBOX_SG := sg-6b227c23

export MASTER_INSTANCE := m3.xlarge
export MASTER_PRICE := 0.10
export WORKER_INSTANCE := m3.xlarge
export WORKER_PRICE := 0.07
export WORKER_COUNT := 32
export WORKER_INSTANCE := r3.xlarge
export WORKER_PRICE := 0.20
export WORKER_COUNT := 64
export USE_SPOT := true

export DRIVER_MEMORY := 10000M
export DRIVER_CORES := 4
export EXECUTOR_MEMORY := 10000M
export EXECUTOR_CORES := 8
export YARN_OVERHEAD := 1500

# Uncomment/edit the followings line to add extra attributes to the cluster creation
#export MASTER_EMR_ATTRS :=
export CORE_EMR_ATTRS := EbsConfiguration={EbsOptimized=true,EbsBlockDeviceConfigs=[{VolumeSpecification={VolumeType=gp2,SizeInGB=1024}}]}
12 changes: 11 additions & 1 deletion deployment/build-container.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
#!/bin/bash

if [ -z ${VERSION_TAG+x} ]; then
echo "No version tag has been set. Do not run this script directly; instead, issue"
echo " make build-container"
echo "from the 'streaming' directory."
exit 1
else
echo "Version tag is set to '${VERSION_TAG}'"
fi

set -xe
SBT_DIR="../src"
JAR_DIR=${SBT_DIR}/analytics/target/scala-2.11/
Expand All @@ -10,4 +19,5 @@ cd ${SBT_DIR}
cp ${JAR_DIR}/osmesa-analytics.jar ${DOCKER_DIR}/osmesa-analytics.jar

cd ${DOCKER_DIR}
docker build -f ${DOCKER_DIR}/Dockerfile --tag osm_analytics:latest ${DOCKER_DIR}

docker build -f ${DOCKER_DIR}/Dockerfile --tag osm_analytics:${VERSION_TAG} ${DOCKER_DIR}
6 changes: 1 addition & 5 deletions deployment/streaming/.gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1 @@
repository
docker-compose.local.yml
docker-compose.deploy.yml
config-local.mk
config-aws.mk
config-*.mk
188 changes: 95 additions & 93 deletions deployment/streaming/Makefile
Original file line number Diff line number Diff line change
@@ -1,104 +1,106 @@
include config-aws.mk # Variables for AWS options
include config-local.mk # Variables related to running locally
include config-deployment.mk

# The osmesa container
LOCAL_IMG := osm_analytics:latest
# If the user is on master branch, see if we should deploy to production
VERSION_TAG=$(shell ./scripts/get-tag.sh)
ifeq ($(VERSION_TAG), production)
DATABASE=${PRODUCTION_DB}
ECS_CLUSTER=${CLUSTER_NAME_DEPLOYMENT}
TASK_SUFFIX=
else
DATABASE=${STAGING_DB}
ECS_CLUSTER=${CLUSTER_NAME_STAGING}
TASK_SUFFIX=-staging
endif
DB_URI=${DB_BASE_URI}/${DATABASE}

.EXPORT_ALL_VARIABLES:

#########
# LOCAL #
#########
docker-compose.local.yml:
export LOCAL_IMG=${LOCAL_IMG}; \
export AUGDIFF_SOURCE=${LOCAL_AUGDIFF_SOURCE}; \
export CHANGESET_SOURCE=${LOCAL_CHANGESET_SOURCE}; \
export CHANGE_SOURCE=${LOCAL_CHANGE_SOURCE}; \
export AUGDIFF_START=${LOCAL_AUGDIFF_START}; \
export CHANGESET_START=${LOCAL_CHANGESET_START}; \
export CHANGE_START=${LOCAL_CHANGE_START}; \
./expand.sh docker-compose.local.yml.tpl > docker-compose.local.yml
#############################
# Docker image management #
#############################

start-local: docker-compose.local.yml
docker-compose -f docker-compose.local.yml up
.PHONY: build-container login-aws-registry tag-image push-image

stop-local:
docker-compose -f docker-compose.local.yml down


#########
# AWS #
#########
build-container:
cd .. && VERSION_TAG=${VERSION_TAG} ./build-container.sh

login-aws-registry:
eval `aws ecr get-login --no-include-email --region ${AWS_REGION}`

tag-image:
docker tag ${LOCAL_IMG} ${ECR_REPO}
tag-image: build-container
docker tag osm_analytics:${VERSION_TAG} ${ECR_IMAGE}:${VERSION_TAG}

push-image: login-aws-registry tag-image
docker push ${ECR_REPO}

.PHONY: docker-compose.deploy.yml

docker-compose.deploy.yml: docker-compose.deploy.yml.tpl
export ECR_REPO=${ECR_REPO}
export AWS_LOG_GROUP=${AWS_LOG_GROUP}; \
export AWS_REGION=${AWS_REGION}; \
export AUGDIFF_SOURCE=${AUGDIFF_SOURCE}; \
export AUGDIFF_START=${AUGDIFF_START}; \
export CHANGESET_SOURCE=${CHANGESET_SOURCE}; \
export CHANGESET_START=${CHANGESET_START}; \
export DB_URI=${DB_URI}; \
./expand.sh $< > $@

.PHONY: configure-cluster

configure-cluster:
ecs-cli configure \
--cluster ${CLUSTER_NAME} \
--region ${AWS_REGION} \
--config-name ${CONFIG_NAME}

cluster-up:
ecs-cli up \
--keypair ${KEYPAIR} \
--instance-role ${INSTANCE_ROLE} \
--security-group ${SECURITY_GROUP} \
--size 1 \
--instance-type ${INSTANCE_TYPE} \
--cluster-config ${CONFIG_NAME} \
--subnets ${SUBNETS} \
--vpc ${VPC} \
--force \
--verbose

cluster-down:
ecs-cli down --cluster-config ${CONFIG_NAME}

.PHONY: create-service

create-service: docker-compose.deploy.yml configure-cluster
ecs-cli compose \
--file $< create \
--cluster ${CLUSTER_NAME}

start-service: docker-compose.deploy.yml configure-cluster create-service
ecs-cli compose --file $< service up \
--deployment-min-healthy-percent 0 \
--create-log-groups \
--cluster ${CLUSTER_NAME}

stop-service: docker-compose.deploy.yml
ecs-cli compose --file $< down


#########
# ALL #
#########
build-container:
cd .. && ./build-container.sh

clean:
rm -f docker-compose.local.yml
rm -f docker-compose.deploy.yml

docker push ${ECR_IMAGE}:${VERSION_TAG}

#######################
# Streaming AWS Tasks #
#######################

.PHONY: create-log-groups define-streaming-vectortile-tasks define-staging-streaming-update-tasks define-production-streaming-update-tasks deploy-streaming-footprint-updater deploy-streaming-edit-histogram-updater deploy-streaming-stats-updaters

create-log-groups:
./scripts/create-log-groups.sh

define-streaming-vectortile-tasks:
./scripts/define-streaming-vectortile-tasks.sh

define-staging-streaming-update-tasks:
./scripts/define-staging-streaming-update-tasks.sh

define-production-streaming-update-tasks:
./scripts/define-production-streaming-update-tasks.sh

stop-streaming-footprint-updater:
./scripts/stop-streaming-service.sh streaming-user-footprint-tile-updater

deploy-streaming-footprint-updater: stop-streaming-footprint-updater
aws ecs create-service \
--cluster "${CLUSTER_NAME_DEPLOYMENT}" \
--service-name "streaming-user-footprint-tile-updater" \
--task-definition "streaming-edit-histogram-tile-updater" \
--desired-count 1 \
--launch-type FARGATE \
--scheduling-strategy REPLICA \
--network-configuration ${NETWORK_CONFIGURATION}

stop-streaming-edit-histogram-updater:
./scripts/stop-streaming-service.sh streaming-edit-histogram-tile-updater

deploy-streaming-edit-histogram-updater: stop-streaming-edit-histogram-updater
aws ecs create-service \
--cluster "${CLUSTER_NAME_DEPLOYMENT}" \
--service-name "streaming-edit-histogram-tile-updater" \
--task-definition "streaming-edit-histogram-tile-updater" \
--desired-count 1 \
--launch-type FARGATE \
--scheduling-strategy REPLICA \
--network-configuration ${NETWORK_CONFIGURATION}

stop-streaming-stats-updaters:
./scripts/stop-streaming-service.sh streaming-stats-updater

deploy-streaming-stats-updaters: stop-streaming-stats-updaters
aws ecs create-service \
--cluster "${ECS_CLUSTER}" \
--service-name "streaming-stats-updater" \
--task-definition "streaming-stats-updater${TASK_SUFFIX}" \
--desired-count 1 \
--launch-type FARGATE \
--scheduling-strategy REPLICA \
--network-configuration ${NETWORK_CONFIGURATION}

deploy-streaming-vectortile-tasks: deploy-footprint-updater deploy-streaming-edit-histogram-updater

###################
# Batch AWS Tasks #
###################

batch-generate-footprints:
./scripts/batch-generate-footprints.sh

batch-generate-edit-histograms:
./scripts/batch-generate-edit-histograms.sh

batch-generate-db-backfill:
./scripts/batch-process.sh "OSMesa Batch Process" "ChangesetStatsCreator" 64 "[\"spark-submit\", \"--deploy-mode\", \"cluster\", \"--class\", \"osmesa.analytics.oneoffs.ChangesetStatsCreator\", \"--conf\", \"spark.executor.memoryOverhead=2g\", \"--conf\", \"spark.sql.shuffle.partitions=2000\", \"--conf\", \"spark.speculation=true\", \"${OSMESA_ANALYTICS_JAR}\", \"--history\", \"${HISTORY_ORC}\", \"--changesets\", \"${CHANGESETS_ORC}\", \"--changeset-stream\", \"${CHANGESET_SOURCE}\", \"--database-url\", \"${DB_URI}\"]"
25 changes: 0 additions & 25 deletions deployment/streaming/config-aws.mk.example

This file was deleted.

42 changes: 42 additions & 0 deletions deployment/streaming/config-deployment.mk.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
################################################################################
# AWS properties
################################################################################
export KEYPAIR :=
export SUBNET :=
export AWS_REGION := us-east-1
export IAM_ACCOUNT :=

################################################################################
# Streaming resource definitions
################################################################################
export CLUSTER_NAME := osm-stat-stream-cluster
export STREAMING_INSTANCE_TYPE := m4.xlarge
export ECR_IMAGE :=
export AWS_LOG_GROUP := streaming-stats-updater
export ECS_SUBNET := ${SUBNET}
export ECS_SECURITY_GROUP :=

export AUGDIFF_SOURCE :=
export CHANGESET_SOURCE :=

export DB_BASE_URI :=
export PRODUCTION_DB :=
export STAGING_DB :=

export NETWORK_CONFIGURATION="{\"awsvpcConfiguration\": {\"subnets\": [\"${ECS_SUBNET}\"], \"securityGroups\": [\"${ECS_SECURITY_GROUP}\"], \"assignPublicIp\": \"DISABLED\"}}"

################################################################################
# Batch resource definitions
################################################################################
export SERVICE_ACCESS_SECURITY_GROUP := ${ECS_SECURITY_GROUP}
export EMR_MASTER_SECURITY_GROUP :=
export EMR_SLAVE_SECURITY_GROUP :=

export BATCH_INSTANCE_TYPE := m4.xlarge
export OSMESA_ANALYTICS_JAR := s3://<bucket>/osmesa-analytics.jar

export HISTORY_ORC :=
export CHANGESETS_ORC :=

export FOOTPRINT_VT_LOCATION :=
export HISTOGRAM_VT_LOCATION :=
7 changes: 0 additions & 7 deletions deployment/streaming/config-local.mk.example

This file was deleted.

42 changes: 0 additions & 42 deletions deployment/streaming/docker-compose.deploy.yml.tpl

This file was deleted.

Loading