Skip to content

Commit

Permalink
Autogenerate documentation for bootstrap-functions library (#27)
Browse files Browse the repository at this point in the history
  • Loading branch information
akaranjkar-qu authored Mar 18, 2020
1 parent f747ab1 commit 372fd09
Show file tree
Hide file tree
Showing 13 changed files with 301 additions and 87 deletions.
30 changes: 30 additions & 0 deletions README.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# bootstrap-functions
This repository holds common functions that can be used in Qubole node bootstraps

## How to use

Source the required script in your bootstrap script. For example, to mount an EFS volume with the bootstrap, you may do the following:

```
source /usr/lib/qubole/bootstrap-functions/misc/mount_nfs.sh

mount_nfs fs-7abd2444.efs.us-east-1.amazonaws.com:/ /mnt/efs
```

## Available functions
The following set of functions are available at present:

## Contributing
Please raise a pull request for any modifications or additions you would like to make. There may be a delay between when you want to start using a method and when it might be available via Qubole's AMI. To work around this, it is recommended to put a placeholder `source` line in your bootstrap script. For example

```
function mysparkfunction() {
# ... do some stuff
}

source /usr/lib/qubole/bootstrap-functions/spark/mysparkfunction.sh

mysparkfunction arg1 arg2 ...
```

This way, when the function makes it to the AMI, you will automatically use the copy in the bootstrap-functions library.
79 changes: 65 additions & 14 deletions common/utils.sh
Original file line number Diff line number Diff line change
@@ -1,41 +1,92 @@
#!/usr/bin/env bash
#
# @file common/utils.sh
# @brief Provides common utility functions

#--------------------------------------------------------------------------------
# Utility methods
#--------------------------------------------------------------------------------

# @description Function to populate nodeinfo
#
# Please call this method at start of node bootstrap
#
# @example
# populate_nodeinfo
#
# @noargs
populate_nodeinfo() {
source /usr/lib/hustler/bin/qubole-bash-lib.sh
}

# Returns 0 when run on a Hadoop2 cluster node.
# Returns 1 otherwise
# @description Function to check if the node belongs to a Hadoop2 cluster
#
# @example
# if is_hadoop2_cluster; then
# # do something here
# fi
#
# @noargs
#
# @exitcode 0 If the cluster runs hadoop2
# @exitcode 1 Otherwise
is_hadoop2_cluster() {
[[ `nodeinfo use_hadoop2` = "1" ]]
}


# Returns 0 when HiveServer2 is configured to run on the cluster master.
# Returns 1 otherwise
# @description Function to check if a HiveServer2 is configured to run on a master node
#
# @example
# if is_hs2_enabled; then
# # do something here
# fi
#
# @noargs
#
# @exitcode 0 When HiveServer2 is configured on a master node
# @exitcode 1 Otherwise
is_hs2_enabled() {
is_hadoop2_cluster && [[ `nodeinfo hive_use_hs2` = "1" ]]
}

# Returns 0 when run on a HiveServer2 cluster node.
# Returns 1 otherwise
# @description Function to check if a node belongs to a HiveServer2 cluster
#
# @example
# if is_hs2_cluster; then
# # do something here
# fi
#
# @noargs
#
# @exitcode 0 When node belongs to a HiveServer2 cluster
# @exitcode 1 Otherwise
is_hs2_cluster() {
is_hadoop2_cluster && [[ `nodeinfo is_hs2_cluster` = "1" ]]
}

# Returns 0 when run on a cluster master node.
# Returns 1 otherwise
# @description Function to check if a node is a cluster master node
#
# @example
# if is_master_node; then
# # do something here
# fi
#
# @noargs
#
# @exitcode 0 When node is a cluster master node
# @exitcode 1 Otherwise
is_master_node() {
[[ `nodeinfo is_master` = "1" ]]
}

# Returns 0 when run on a cluster worker node.
# Returns 1 otherwise
# @description Function to check if a node is a cluster worker node
#
# @example
# if is_worker_node; then
# # do something here
# fi
#
# @noargs
#
# @exitcode 0 When node is a cluster worker node
# @exitcode 1 Otherwise
is_worker_node() {
! is_master_node
}
Expand Down
20 changes: 20 additions & 0 deletions generate_docs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/usr/bin/env bash
cd "$(dirname "$0")"

# Cleanup older documentation
mkdir -p docs
rm -f docs/*.md

# Generate new documentation
directories=$(ls -d */ | grep -v "docs\|tests\|examples")
for dx in ${directories}; do
find ${dx} -type f -name "*.sh" -exec shdoc {} \; > docs/$(dirname ${dx}.).md
done

# Overwrite README.md
cp -f README.md README.bak
cp -f README.template README.md
for dx in ${directories}; do
d=$(dirname ${dx}.)
sed -i "/The following set of functions are available at present:/a * [${d}](docs/${d}.md)" README.md
done
47 changes: 32 additions & 15 deletions hadoop/util.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
#!/bin/bash
#
# @file hadoop/util.sh
# @brief Provides Hadoop2 utility functions

source /usr/lib/hustler/bin/qubole-bash-lib.sh
export PROFILE_FILE=${PROFILE_FILE:-/etc/profile}
Expand Down Expand Up @@ -64,12 +67,15 @@ function _restart_worker_services_ctl() {
# after the bootstrap is finished
}

##
# Restart hadoop services on the cluster master
# @description Function to restart hadoop services on the cluster master
#
# This may be used if you're using a different version
# of Java, for example
#
# @example
# restart_master_services
#
# @noargs
function restart_master_services() {
if [[ ${al2} == "true" || ${dont_use_monit} == "true" ]]; then
_restart_master_services_ctl
Expand All @@ -79,12 +85,15 @@ function restart_master_services() {
}


##
# Restart hadoop services on cluster workers
# @description Function to restart hadoop services on the cluster workers
#
# This only restarts the datanode service since the
# nodemanager is started after the bootstrap is run
#
# @example
# restart_worker_services
#
# @noargs
function restart_worker_services() {
if [[ ${al2} == "true" || ${dont_use_monit} == "true" ]]; then
_restart_worker_services_ctl
Expand All @@ -93,9 +102,12 @@ function restart_worker_services() {
fi
}

##
# Generic fucntion to restart hadoop services
# @description Generic function to restart hadoop services
#
# @example
# restart_hadoop_services
#
# @noargs
function restart_hadoop_services() {
local is_master=$(nodeinfo is_master)
if [[ ${is_master} == "1" ]]; then
Expand All @@ -105,15 +117,18 @@ function restart_hadoop_services() {
fi
}

##
# Use Java 8 for hadoop daemons and jobs
# @description Use Java 8 for hadoop daemons and jobs
#
# By default, the hadoop daemons and jobs on Qubole
# clusters run on Java 7. Use this function if you would like
# to use Java 8. This is only required if your cluster:
# is in AWS, and
# is running Hive or Spark < 2.2
# 1. is in AWS, and
# 2. is running Hive or Spark < 2.2
#
# @example
# use_java8
#
# @noargs
function use_java8() {
export JAVA_HOME=/usr/lib/jvm/java-1.8.0
export PATH=$JAVA_HOME/bin:$PATH
Expand All @@ -130,11 +145,13 @@ function use_java8() {
fi
}

##
# Wait until namenode is out of safe mode.
# Takes 2 optional params
# first : Number of attempts function will make to get namenode out of safemode. Default is 50
# second : Number of seconds each attempt will sleep for waiting for namenode to come out of sleep mode. Default is 5sec
# @description Wait until namenode is out of safe mode
#
# @example
# wait_until_namenode_running 25 5
#
# @arg $1 int Number of attempts function will make to get namenode out of safemode. Defaults to 50
# @arg $2 int Number of seconds each attempt will sleep for, waiting for namenode to come out of sleep mode. Defaults to 5
function wait_until_namenode_running() {
n=0
attempts=${1:-50}
Expand Down
13 changes: 10 additions & 3 deletions hive/glue-sync.sh
Original file line number Diff line number Diff line change
@@ -1,13 +1,20 @@
#!/bin/bash
#
# @file hive/glue-sync.sh
# @brief Provides function to install Hive Glue Catalog Sync Agent

source /usr/lib/hustler/bin/qubole-bash-lib.sh
source /usr/lib/qubole/bootstrap-functions/hive/hiveserver2.sh

##
# Installs Hive Glue Catalog Sync Agent
# param1 - Region for AWS Athena. Defaults to us-east-1
# @description Installs Hive Glue Catalog Sync Agent
#
# Requires Hive 2.x
# Currently supported only on AWS
#
# @example
# install_glue_sync us-east-1
#
# @arg $1 string Region for AWS Athena. Defaults to `us-east-1`
function install_glue_sync() {
aws_region=${1:-us-east-1}

Expand Down
45 changes: 37 additions & 8 deletions hive/hiveserver2.sh
Original file line number Diff line number Diff line change
@@ -1,32 +1,61 @@
#!/usr/bin/env bash
#
# @file hive/hiveserver2.sh
# @brief Provides functions to start/stop/restart HiveServer2

source /usr/lib/qubole/bootstrap-functions/common/utils.sh

#--------------------------------------------------------------------------------
# Methods to stop/start/restart HiveServer2
#--------------------------------------------------------------------------------

# @description Function to check if HiveServer2 is configured
#
# @example
# if [[ is_hs2_configured ]]; then
# # do something here
# fi
#
# @noargs
#
# @exitcode 0 If HiveServer2 is configured
# @exitcode 1 Otherwise
function is_hs2_configured() {
(is_master_node && is_hs2_enabled) || (is_worker_node && is_hs2_cluster)
}

# Stop HiveServer2 JVM - works on both Hadoop2 and HiveServer2 cluster
# @description Function to stop HiveServer2 JVM
#
# Works on both Hadoop2 and HiveServer2 clusters
#
# @example
# stop_hs2
#
# @noargs
function stop_hs2() {
if [[ is_hs2_configured ]]; then
monit stop hs2
fi
}

# Start HiveServer2 JVM - works on both Hadoop2 and HiveServer2 cluster
# @description Function to start HiveServer2 JVM
#
# Works on both Hadoop2 and HiveServer2 clusters
#
# @example
# start_hs2
#
# @noargs
function start_hs2() {
if [[ is_hs2_configured ]]; then
monit start hs2
fi
}

##
# Restart HiveServer2 JVM - works on both Hadoop2 and HiveServer2 cluster
# @description Function to restart HiveServer2 JVM
#
# Works on both Hadoop2 and HiveServer2 clusters
#
# @example
# restart_hs2
#
# @noargs
function restart_hs2() {
stop_hs2
sleep 5
Expand Down
19 changes: 13 additions & 6 deletions hive/ranger-client.sh
Original file line number Diff line number Diff line change
@@ -1,15 +1,22 @@
#!/bin/bash
#
# @file hive/ranger-client.sh
# @brief Provides function to install Apache Ranger client for Hive

source /usr/lib/qubole/bootstrap-functions/common/utils.sh
source /usr/lib/qubole/bootstrap-functions/hive/hiveserver2.sh

##
# Install Apache Ranger client for Hive
# Parameters:
# -h: Ranger admin host. Defaults to `localhost`
# -p: Ranger admin port. Defaults to `6080`
# -r: Ranger repository name. Defaults to `hivedev`
# @description Install Apache Ranger client for Hive
#
# Currently supported only on AWS
# Requires HiveServer2
#
# @example
# install_ranger -h example.host -p 6080 -r examplerepo
#
# @arg -h string Hostname of Ranger admin. Defaults to `localhost`
# @arg -p int Port where Ranger admin is running. Defaults to `6080`
# @arg -r string Name of Ranger repository. Defaults to `hivedev`
function install_ranger() {
populate_nodeinfo
if [[ is_hs2_configured ]]; then
Expand Down
Loading

0 comments on commit 372fd09

Please sign in to comment.