-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Functions for python3 virtualenv and using nfs mounts (#2)
- Loading branch information
Showing
3 changed files
with
102 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
#!/bin/bash -x | ||
|
||
# | ||
# Install python virtualenv in NFS mount. If it fails | ||
# fall back to local | ||
# | ||
# Installing python libraries in an NFS mount has the following advantages over | ||
# installing them locally on each node: | ||
# 1. It allows for faster cluster startup and upscaling since the libraries only | ||
# need to be installed once. This is especially pertinent with libraries that have | ||
# compiled components, like numpy, scipy, etc. | ||
# 2. One can install new libraries or upgrading existing ones at runtime, and the | ||
# changes would be immediately available to all the cluster's nodes | ||
# | ||
|
||
source /usr/lib/hustler/bin/qubole-bash-lib.sh | ||
source /usr/lib/bootstrap-functions/misc/mount_nfs.sh | ||
source /usr/lib/bootstrap-functions/misc/python_venv.sh | ||
|
||
mount_nfs_volume "fs-7abdefa3.efs.us-east-1.amazonaws.com:/" /mnt/efs | ||
|
||
if [[ $? == 0 ]]; then | ||
is_master=$(nodeinfo is_master) | ||
cluster_id=$(nodeinfo cluster_id) | ||
# Use the cluster id so we can install different virtualenvs for | ||
# different clusters | ||
install_location="/mnt/efs/${cluster_id}/py36" | ||
|
||
# symlink to same path as local install so we can | ||
# use in zeppelin | ||
symlink=/usr/lib/virtualenv/py36 | ||
|
||
if [[ "$is_master" != "1" ]]; then | ||
ln -s "$install_location" "$symlink" | ||
hadoop_use_venv "$install_location" | ||
# Install only from master. On worker nodes we just | ||
# need the change to use the new virtualenv | ||
exit 0 | ||
fi | ||
install_python_venv "36" "$install_location" | ||
ln -s "$install_location" "$symlink" | ||
else | ||
install_python_venv | ||
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
#!/bin/bash | ||
|
||
source /usr/lib/hustler/bin/qubole-bash-lib.sh | ||
|
||
# | ||
# Instructions for AWS EFS mount: | ||
# 1. After creating the EFS file system, create a security group | ||
# 2. Create an inbound traffic rule for this security group that allows traffic on | ||
# port 2049 (NFS) from this security group as described here: | ||
# https://docs.aws.amazon.com/efs/latest/ug/accessing-fs-create-security-groups.html | ||
# 3. Add this security group as a persistent security group for the cluster from which | ||
# you want to mount the EFS store, as described here: | ||
# http://docs.qubole.com/en/latest/admin-guide/how-to-topics/persistent-security-group.html | ||
# | ||
# TODO: add instructions for Azure file share | ||
# | ||
|
||
function mount_nfs_volume() { | ||
nfs_export=$1 | ||
mountpoint=$2 | ||
|
||
is_master=$(nodeinfo is_master) | ||
if [[ $is_master == "1" ]]; then | ||
mount -v -t nfs4 -o nfsvers=4.1,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2 "$nfs_export" "$mountpoint" | ||
else | ||
mount -v -t nfs4 -o nfsvers=4.1,ro,rsize=1048576,hard,timeo=600,retrans=2 "$nfs_export" "$mountpoint" | ||
fi | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
#!/bin/bash -x | ||
|
||
# | ||
# This function activates the new virtualenv, so install | ||
# any libraries you want after calling this with "pip install" | ||
# | ||
# Alternatively you can also use a requirements file. For example | ||
# to use a requirements file stored in S3 or Azure Blob Store, run | ||
# | ||
# /usr/lib/hadoop2/bin/hadoop dfs -get {s3|wasb}://path/to/requirements/file /tmp/requirements.txt | ||
# pip install -r /tmp/requirements.txt | ||
# | ||
|
||
function install_python_venv() { | ||
version=${$1:-36} | ||
location=${$2:-/usr/lib/virtualenv/py36} | ||
|
||
yum install -y "python${version}" | ||
mkdir -p $location | ||
|
||
virtualenv -p "/usr/bin/python${version}" $location | ||
hadoop_use_venv "$location" | ||
|
||
source ${location}/bin/activate | ||
} | ||
|
||
function hadoop_use_venv() { | ||
location="$1" | ||
echo "VIRTUAL_ENV_DISABLE_PROMPT=1 source ${location}/bin/activate ${location}" >> /usr/lib/hadoop2/etc/hadoop/hadoop-env.sh | ||
} |