Autogenerate documentation for bootstrap-functions library (#27)

qubole · Mar 18, 2020 · 372fd09 · 372fd09
1 parent f747ab1
commit 372fd09
Show file tree

Hide file tree

Showing 13 changed files with 301 additions and 87 deletions.
diff --git a/README.template b/README.template
@@ -0,0 +1,30 @@
+# bootstrap-functions
+This repository holds common functions that can be used in Qubole node bootstraps
+
+## How to use
+
+Source the required script in your bootstrap script. For example, to mount an EFS volume with the bootstrap, you may do the following:
+
+```
+source /usr/lib/qubole/bootstrap-functions/misc/mount_nfs.sh
+
+mount_nfs fs-7abd2444.efs.us-east-1.amazonaws.com:/ /mnt/efs
+```
+
+## Available functions
+The following set of functions are available at present:
+
+## Contributing
+Please raise a pull request for any modifications or additions you would like to make. There may be a delay between when you want to start using a method and when it might be available via Qubole's AMI. To work around this, it is recommended to put a placeholder `source` line in your bootstrap script. For example
+
+```
+function mysparkfunction() {
+  # ... do some stuff
+}
+
+source /usr/lib/qubole/bootstrap-functions/spark/mysparkfunction.sh
+
+mysparkfunction arg1 arg2 ...
+```
+
+This way, when the function makes it to the AMI, you will automatically use the copy in the bootstrap-functions library.
diff --git a/common/utils.sh b/common/utils.sh
@@ -1,41 +1,92 @@
 #!/usr/bin/env bash
+#
+# @file common/utils.sh
+# @brief Provides common utility functions
 
-#--------------------------------------------------------------------------------
-# Utility methods
-#--------------------------------------------------------------------------------
-
+# @description Function to populate nodeinfo
+#
 # Please call this method at start of node bootstrap
+#
+# @example
+#   populate_nodeinfo
+#
+# @noargs
 populate_nodeinfo() {
   source /usr/lib/hustler/bin/qubole-bash-lib.sh
 }
 
-# Returns 0 when run on a Hadoop2 cluster node.
-# Returns 1 otherwise
+# @description Function to check if the node belongs to a Hadoop2 cluster
+#
+# @example
+#   if is_hadoop2_cluster; then
+#       # do something here
+#   fi
+#
+# @noargs
+#
+# @exitcode 0 If the cluster runs hadoop2
+# @exitcode 1 Otherwise
 is_hadoop2_cluster() {
    [[ `nodeinfo use_hadoop2` = "1" ]]
 }
 
 
-# Returns 0 when HiveServer2 is configured to run on the cluster master.
-# Returns 1 otherwise
+# @description Function to check if a HiveServer2 is configured to run on a master node
+#
+# @example
+#   if is_hs2_enabled; then
+#       # do something here
+#   fi
+#
+# @noargs
+#
+# @exitcode 0 When HiveServer2 is configured on a master node
+# @exitcode 1 Otherwise
 is_hs2_enabled() {
   is_hadoop2_cluster && [[ `nodeinfo hive_use_hs2` = "1" ]]
 }
 
-# Returns 0 when run on a HiveServer2 cluster node.
-# Returns 1 otherwise
+# @description Function to check if a node belongs to a HiveServer2 cluster
+#
+# @example
+#   if is_hs2_cluster; then
+#       # do something here
+#   fi
+#
+# @noargs
+#
+# @exitcode 0 When node belongs to a HiveServer2 cluster
+# @exitcode 1 Otherwise
 is_hs2_cluster() {
   is_hadoop2_cluster && [[ `nodeinfo is_hs2_cluster` = "1" ]]
 }
 
-# Returns 0 when run on a cluster master node.
-# Returns 1 otherwise
+# @description Function to check if a node is a cluster master node
+#
+# @example
+#   if is_master_node; then
+#       # do something here
+#   fi
+#
+# @noargs
+#
+# @exitcode 0 When node is a cluster master node
+# @exitcode 1 Otherwise
 is_master_node() {
    [[ `nodeinfo is_master` = "1" ]]
 }
 
-# Returns 0 when run on a cluster worker node.
-# Returns 1 otherwise
+# @description Function to check if a node is a cluster worker node
+#
+# @example
+#   if is_worker_node; then
+#       # do something here
+#   fi
+#
+# @noargs
+#
+# @exitcode 0 When node is a cluster worker node
+# @exitcode 1 Otherwise
 is_worker_node() {
    ! is_master_node
 }

diff --git a/generate_docs.sh b/generate_docs.sh
@@ -0,0 +1,20 @@
+#!/usr/bin/env bash
+cd "$(dirname "$0")"
+
+# Cleanup older documentation
+mkdir -p docs
+rm -f docs/*.md
+
+# Generate new documentation
+directories=$(ls -d */ | grep -v "docs\|tests\|examples")
+for dx in ${directories}; do
+    find ${dx} -type f -name "*.sh" -exec shdoc {} \; > docs/$(dirname ${dx}.).md
+done
+
+# Overwrite README.md
+cp -f README.md README.bak
+cp -f README.template README.md
+for dx in ${directories}; do
+    d=$(dirname ${dx}.)
+    sed -i "/The following set of functions are available at present:/a * [${d}](docs/${d}.md)" README.md
+done
diff --git a/hadoop/util.sh b/hadoop/util.sh
@@ -1,4 +1,7 @@
 #!/bin/bash
+#
+# @file hadoop/util.sh
+# @brief Provides Hadoop2 utility functions
 
 source /usr/lib/hustler/bin/qubole-bash-lib.sh
 export PROFILE_FILE=${PROFILE_FILE:-/etc/profile}
@@ -64,12 +67,15 @@ function _restart_worker_services_ctl() {
   # after the bootstrap is finished
 }
 
-##
-# Restart hadoop services on the cluster master
+# @description Function to restart hadoop services on the cluster master
 #
 # This may be used if you're using a different version
 # of Java, for example
 #
+# @example
+#   restart_master_services
+#
+# @noargs
 function restart_master_services() {
     if [[ ${al2} == "true" || ${dont_use_monit} == "true" ]]; then
         _restart_master_services_ctl
@@ -79,12 +85,15 @@ function restart_master_services() {
 }
 
 
-##
-# Restart hadoop services on cluster workers
+# @description Function to restart hadoop services on the cluster workers
 #
 # This only restarts the datanode service since the
 # nodemanager is started after the bootstrap is run
 #
+# @example
+#   restart_worker_services
+#
+# @noargs
 function restart_worker_services() {
     if [[ ${al2} == "true" || ${dont_use_monit} == "true" ]]; then
         _restart_worker_services_ctl
@@ -93,9 +102,12 @@ function restart_worker_services() {
     fi
 }
 
-##
-# Generic fucntion to restart hadoop services
+# @description Generic function to restart hadoop services
+#
+# @example
+#   restart_hadoop_services
 #
+# @noargs
 function restart_hadoop_services() {
     local is_master=$(nodeinfo is_master)
     if [[ ${is_master} == "1" ]]; then
@@ -105,15 +117,18 @@ function restart_hadoop_services() {
     fi
 }
 
-##
-# Use Java 8 for hadoop daemons and jobs
+# @description Use Java 8 for hadoop daemons and jobs
 #
 # By default, the hadoop daemons and jobs on Qubole
 # clusters run on Java 7. Use this function if you would like
 # to use Java 8. This is only required if your cluster:
-# is in AWS, and
-# is running Hive or Spark < 2.2
+# 1. is in AWS, and
+# 2. is running Hive or Spark < 2.2
 #
+# @example
+#   use_java8
+#
+# @noargs
 function use_java8() {
  export JAVA_HOME=/usr/lib/jvm/java-1.8.0
  export PATH=$JAVA_HOME/bin:$PATH
@@ -130,11 +145,13 @@ function use_java8() {
  fi
 }
 
-##
-# Wait until namenode is out of safe mode.
-# Takes 2 optional params
-# first : Number of attempts function will make to get namenode out of safemode. Default is 50
-# second : Number of seconds each attempt will sleep for waiting for namenode to come out of sleep mode. Default is 5sec
+# @description Wait until namenode is out of safe mode
+#
+# @example
+#   wait_until_namenode_running 25 5
+#
+# @arg $1 int Number of attempts function will make to get namenode out of safemode. Defaults to 50
+# @arg $2 int Number of seconds each attempt will sleep for, waiting for namenode to come out of sleep mode. Defaults to 5
 function wait_until_namenode_running() {
     n=0
     attempts=${1:-50}

diff --git a/hive/glue-sync.sh b/hive/glue-sync.sh
@@ -1,13 +1,20 @@
 #!/bin/bash
+#
+# @file hive/glue-sync.sh
+# @brief Provides function to install Hive Glue Catalog Sync Agent
 
 source /usr/lib/hustler/bin/qubole-bash-lib.sh
 source /usr/lib/qubole/bootstrap-functions/hive/hiveserver2.sh
 
-##
-# Installs Hive Glue Catalog Sync Agent
-# param1 - Region for AWS Athena. Defaults to us-east-1
+# @description Installs Hive Glue Catalog Sync Agent
+#
 # Requires Hive 2.x
+# Currently supported only on AWS
+#
+# @example
+#   install_glue_sync us-east-1
 #
+# @arg $1 string Region for AWS Athena. Defaults to `us-east-1`
 function install_glue_sync() {
     aws_region=${1:-us-east-1}
 

diff --git a/hive/hiveserver2.sh b/hive/hiveserver2.sh
@@ -1,32 +1,61 @@
 #!/usr/bin/env bash
+#
+# @file hive/hiveserver2.sh
+# @brief Provides functions to start/stop/restart HiveServer2
 
 source /usr/lib/qubole/bootstrap-functions/common/utils.sh
 
-#--------------------------------------------------------------------------------
-# Methods to stop/start/restart HiveServer2
-#--------------------------------------------------------------------------------
-
+# @description Function to check if HiveServer2 is configured
+#
+# @example
+#   if [[ is_hs2_configured ]]; then
+#       # do something here
+#   fi
+#
+# @noargs
+#
+# @exitcode 0 If HiveServer2 is configured
+# @exitcode 1 Otherwise
 function is_hs2_configured() {
   (is_master_node && is_hs2_enabled) || (is_worker_node && is_hs2_cluster)
 }
 
-# Stop HiveServer2 JVM - works on both Hadoop2 and HiveServer2 cluster
+# @description Function to stop HiveServer2 JVM
+#
+# Works on both Hadoop2 and HiveServer2 clusters
+#
+# @example
+#   stop_hs2
+#
+# @noargs
 function stop_hs2() {
   if [[ is_hs2_configured ]]; then
     monit stop hs2
   fi
 }
 
-# Start HiveServer2 JVM - works on both Hadoop2 and HiveServer2 cluster
+# @description Function to start HiveServer2 JVM
+#
+# Works on both Hadoop2 and HiveServer2 clusters
+#
+# @example
+#   start_hs2
+#
+# @noargs
 function start_hs2() {
   if [[ is_hs2_configured ]]; then
     monit start hs2
   fi
 }
 
-##
-# Restart HiveServer2 JVM - works on both Hadoop2 and HiveServer2 cluster
+# @description Function to restart HiveServer2 JVM
+#
+# Works on both Hadoop2 and HiveServer2 clusters
+#
+# @example
+#   restart_hs2
 #
+# @noargs
 function restart_hs2() {
   stop_hs2
   sleep 5

diff --git a/hive/ranger-client.sh b/hive/ranger-client.sh
@@ -1,15 +1,22 @@
 #!/bin/bash
+#
+# @file hive/ranger-client.sh
+# @brief Provides function to install Apache Ranger client for Hive
 
 source /usr/lib/qubole/bootstrap-functions/common/utils.sh
 source /usr/lib/qubole/bootstrap-functions/hive/hiveserver2.sh
 
-##
-# Install Apache Ranger client for Hive
-# Parameters:
-# -h: Ranger admin host. Defaults to `localhost`
-# -p: Ranger admin port. Defaults to `6080`
-# -r: Ranger repository name. Defaults to `hivedev`
+# @description Install Apache Ranger client for Hive
+#
+# Currently supported only on AWS
+# Requires HiveServer2
+#
+# @example
+#   install_ranger -h example.host -p 6080 -r examplerepo
 #
+# @arg -h string Hostname of Ranger admin. Defaults to `localhost`
+# @arg -p int Port where Ranger admin is running. Defaults to `6080`
+# @arg -r string Name of Ranger repository. Defaults to `hivedev`
 function install_ranger() {
     populate_nodeinfo
     if [[ is_hs2_configured ]]; then