Skip to content

Commit

Permalink
Add gpu_check (#146)
Browse files Browse the repository at this point in the history
  • Loading branch information
phoenixdong authored Jun 13, 2024
1 parent 2ebb6a1 commit bbeffa2
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 1 deletion.
11 changes: 10 additions & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,10 @@ jobs:
steps:
- name: Checkout Code
uses: actions/checkout@v2


- name: Check GPU is Free
run: tests/scripts/gpu_check.sh

- name: Megatron Unit Test
run: tests/scripts/unit_test_megatron.sh ${{github.sha}}

Expand All @@ -51,6 +54,9 @@ jobs:
- name: Checkout Code
uses: actions/checkout@v2

- name: Check GPU is Free
run: tests/scripts/gpu_check.sh

- name: Flagscale Unit Test
run: tests/scripts/unit_test_flagscale.sh ${{github.sha}}

Expand All @@ -74,5 +80,8 @@ jobs:
- name: Checkout Code
uses: actions/checkout@v2

- name: Check GPU is Free
run: tests/scripts/gpu_check.sh

- name: Flagscale Functional Test
run: tests/scripts/functional_test_flagscale.sh
38 changes: 38 additions & 0 deletions tests/scripts/gpu_check.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/bin/bash

# memory MB

gpu_count=$(nvidia-smi --query-gpu=name --format=csv,noheader | wc -l)

memory_usage_max=30000

while true; do

IFS=$'\n' read -d '' -r -a memory_usage_array <<< "$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits)"
IFS=$'\n' read -d '' -r -a memory_total_array <<< "$(nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits)"

need_wait=false

for ((i=0; i<$gpu_count; i++)); do

memory_usage_i=$((${memory_usage_array[$i]}))
memory_total_i=$((${memory_total_array[$i]}))
memory_remin_i=$(($memory_total_i-$memory_usage_i))

if [ $memory_remin_i -lt $memory_usage_max ]; then
need_wait=true
fi

done

if [ "$need_wait" = false ]; then
break
fi

echo "wait for gpu free"
sleep 5m

unset memory_usage_array
unset memory_total_array

done

0 comments on commit bbeffa2

Please sign in to comment.