Skip to content

Commit

Permalink
Get mem/cpu info from cgroup
Browse files Browse the repository at this point in the history
  • Loading branch information
unkcpz committed Aug 13, 2024
1 parent f4c248d commit 12784a0
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 4 deletions.
6 changes: 6 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,12 @@ COPY --from=home_build /opt/conda/hq /usr/local/bin/
COPY --from=qe_conda_env ${QE_DIR} ${QE_DIR}

USER root

# XXX: move me to docker-stack
RUN apt-get update --yes && \
apt-get install --yes --no-install-recommends bc && \
apt-get clean && rm -rf /var/lib/apt/lists/*

COPY ./before-notebook.d/* /usr/local/bin/before-notebook.d/

# Remove content of $HOME
Expand Down
4 changes: 0 additions & 4 deletions before-notebook.d/41_setup-hq-computer.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,4 @@ else
--safe-interval 5.0
fi

# Start hq server with a worker
nohup hq server start 1>$HOME/.hq-stdout 2>$HOME/.hq-stderr &
nohup hq worker start --cpus=${LOCAL_MPI_PROCS} --resource "mem=sum(${LOCAL_MEM})" --no-detect-resources &

verdi daemon start || echo "start fail"
34 changes: 34 additions & 0 deletions before-notebook.d/42_start-hq.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#!/bin/bash

set -x

# computer memory from runtime
MEMORY_LIMIT=$(cat /sys/fs/cgroup/memory.max)

if [ "$MEMORY_LIMIT" = "max" ]; then
MEMORY_LIMIT=1024
echo "No memory limit set"
else
MEMORY_LIMIT=$(echo "scale=2; $MEMORY_LIMIT / (1024 * 1024)" | bc)
echo "Memory Limit: ${MEMORY_LIMIT} MB"
fi

# Compute number of cpus allocated to the container
CPU_LIMIT=$(awk '{print $1}' /sys/fs/cgroup/cpu.max)
CPU_PERIOD=$(awk '{print $2}' /sys/fs/cgroup/cpu.max)

if [ "$CPU_PERIOD" -ne 0 ]; then
CPU_NUMBER=$(echo "scale=2; $CPU_LIMIT / $CPU_PERIOD" | bc)
echo "Number of CPUs allocated: $CPU_NUMBER"
else
# if no limit (with local OCI without setting cpu limit, use all CPUs)
CPU_NUMBER=$(nproc)
echo "No CPU limit set"
fi

# Start hq server with a worker
run-one-constantly bash -c '
hq server start 1>$HOME/.hq-stdout 2>$HOME/.hq-stderr && \
hq worker start --cpus=${CPU_NUMBER} --resource "mem=sum(${LOCAL_MEM})" --no-detect-resources &
'

0 comments on commit 12784a0

Please sign in to comment.