diff --git a/hadoop/util.sh b/hadoop/util.sh index 67ffb16..4bbdac1 100644 --- a/hadoop/util.sh +++ b/hadoop/util.sh @@ -73,3 +73,34 @@ function use_java8() { restart_worker_services fi } + +# Mount given lustre fsx DNS as Shuffle Directory +# Mandatory functional param -> Lustre Dns Name +function mount_lustre_as_shuffle_dir() { + lustre_dns=$1 + if [ -z "$lustre_dns" ]; then + echo "Specifying Lustre DNS is must!" + return 1 + else + sudo yum -y install xmlstarlet + cluster_id=`nodeinfo cluster_id` + cluster_inst_id=`nodeinfo cluster_inst_id` + instance_folder_identifier="$cluster_id-$cluster_inst_id" + mkdir -p /lustre/ + mount -t lustre ${lustre_dns}@tcp:/fsx /lustre + mkdir -p /lustre/qubole/${instance_folder_identifier} + chmod 777 /lustre/qubole/${instance_folder_identifier} + chmod 777 /lustre/qubole/ + chmod 777 /lustre/ + xmlstarlet ed --inplace --update "/configuration/property[name='yarn.nodemanager.shuffle-dirs']/value" --value /lustre/qubole/${instance_folder_identifier} /usr/lib/hadoop2/etc/hadoop/yarn-site.xml + delete_previous_shuffle_dirs $cluster_id $instance_folder_identifier & + fi +} + +# Delete data of shuffle dirs that were created previously for cluster id's prev instances +# First param -> Cluster id for which shuffle -dir is to be deleted +# Second Param -> For that is to be avoided for deletion +function delete_previous_shuffle_dirs() { + cd /lustre/qubole + ls | grep $1- | grep -v $2 | xargs rm -rf +}