forked from Azure/azurehpc-health-checks
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathinstall-nhc.sh
executable file
·128 lines (103 loc) · 3.41 KB
/
install-nhc.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#!/bin/bash
INSTALL_DIR=$1
CUDA_DIR=$2
if [[ -z "$INSTALL_DIR" ]];then
INSTALL_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
fi
if [[ -z "$CUDA_DIR" ]];then
CUDA_DIR=/usr/local/cuda
fi
SRC_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# not using src directory as insatll location
if [ $SRC_DIR != $INSTALL_DIR ]; then
INSTALL_DIR=$INSTALL_DIR/azurehpc-health-checks
fi
export AZ_NHC_VERSION_LOG=$INSTALL_DIR/docs/version.log
function install_lbnl_nhc(){
pushd $SRC_DIR/build
NHC_VERSION=1.4.3
wget -O nhc-$NHC_VERSION.tar.xz https://github.com/mej/nhc/releases/download/${NHC_VERSION}/lbnl-nhc-${NHC_VERSION}.tar.xz
tar -xf nhc-$NHC_VERSION.tar.xz
rm -f nhc-$NHC_VERSION.tar.xz
pushd lbnl-nhc-$NHC_VERSION
. /etc/os-release
case $ID in
ubuntu)
LIBEXEDIR=/usr/lib;;
*)
LIBEXEDIR=/usr/libexec;;
esac
./configure --prefix=/usr --sysconfdir=/etc --libexecdir=$LIBEXEDIR
sudo make test
echo -e "\n"
sudo make install
echo "NHC version: $NHC_VERSION" >> $AZ_NHC_VERSION_LOG
popd
popd
}
mkdir -p $INSTALL_DIR
mkdir -p $INSTALL_DIR/bin
mkdir -p $SRC_DIR/build
mkdir -p $INSTALL_DIR/docs
# create version log
AZVER=$(git describe --tags --abbrev=0)
cat > "$AZ_NHC_VERSION_LOG" <<EOL
This file contains the version of AzureHPC Health Checks and submodules.
Azure-NHC: $AZVER
submodules:
EOL
# install lbnl nhc
install_lbnl_nhc
# Install NHC dependencies
distro=`awk -F= '/^NAME/{print $2}' /etc/os-release`
if [[ $distro =~ "Ubuntu" ]]; then
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y libpci-dev hwloc build-essential libboost-program-options-dev libssl-dev cmake
elif [[ $distro =~ "AlmaLinux" ]]; then
sudo dnf install -y pciutils-devel hwloc openssl-devel boost-devel cmake
elif [[ $distro =~ "CentOS" ]]; then
sudo yum install -y pciutils-devel hwloc openssl-devel boost-devel cmake > /dev/null
echo "CentOS version is not officially supported, proceed w/ caution."
else
echo "OS version $distro is not supported. Proceed w/ caution."
fi
# Install build tools
# Check cmake version + install if necessary
output=$(cmake --version | sed -n 1p | sed 's/[^0-9]*//g')
export NHC_CMAKE=cmake
if [ $output -lt 3200 ]; then
echo "Upgrade cmake version to 3.20 or above to build nvbandwidth"
pushd $SRC_DIR/build
wget -q -O cmake.sh https://github.com/Kitware/CMake/releases/download/v3.28.0/cmake-3.28.0-linux-x86_64.sh
chmod +x cmake.sh
mkdir -p cmake
./cmake.sh --skip-license --prefix=./cmake
export NHC_CMAKE=$(pwd)/cmake/bin/cmake
rm cmake.sh
popd
fi
# Copy over necessary files
sudo cp $SRC_DIR/customTests/*.nhc /etc/nhc/scripts
if [ $SRC_DIR != $INSTALL_DIR ]; then
cp -r $SRC_DIR/conf/ $INSTALL_DIR
cp -r $SRC_DIR/distributed_nhc/ $INSTALL_DIR
cp $SRC_DIR/*.md $INSTALL_DIR/docs/
cp $SRC_DIR/LICENSE $INSTALL_DIR/docs/
cp $SRC_DIR/run-health-checks.sh $INSTALL_DIR
fi
cp -r $SRC_DIR/customTests/topofiles/ $INSTALL_DIR
# Install NHC custom tests
pushd customTests/
./custom-test-setup.sh $INSTALL_DIR $CUDA_DIR
popd
# create env file
env_file="$INSTALL_DIR/aznhc_env_init.sh"
cat > "$env_file" <<EOL
#!/bin/bash
# This file is used to source the NHC environment variables
# It is recommended to source this file in your .bashrc or .bash_profile
# to make the NHC commands available in your shell.
export AZ_NHC_ROOT=$INSTALL_DIR
alias aznhc="sudo $INSTALL_DIR/run-health-checks.sh"
EOL
chmod +x "$env_file"
exit 0