forked from pytorch/torchsnapshot
-
Notifications
You must be signed in to change notification settings - Fork 0
72 lines (70 loc) · 2.46 KB
/
unit_test_gpu.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
name: unit test
on:
push:
branches: [ main ]
pull_request:
jobs:
unit_tests:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [linux.p3.8xlarge.nvidia.gpu]
python-version: [3.8]
cuda-tag: ["cu11"]
steps:
- name: Check out repo
uses: actions/checkout@v2
- name: Setup conda env
uses: conda-incubator/setup-miniconda@v2
with:
miniconda-version: "latest"
activate-environment: test
python-version: ${{ matrix.python-version }}
- name: Clean up previous CUDA driver installations
shell: bash
run: |
set -x
yum list installed | grep nvidia || true
yum list installed | grep cuda || true
sudo yum remove -y cuda || true
sudo yum remove -y cuda-drivers || true
sudo yum remove -y "*nvidia*" || true
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
uses: nick-fields/retry@71062288b76e2b6214ebde0e673ce0de1755740a
with:
timeout_minutes: 10
max_attempts: 3
command: |
set -ex
bash .github/scripts/install_nvidia_utils_linux.sh
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
- name: Install dependencies
shell: bash -l {0}
run: |
set -eux
conda activate test
pip install --pre torch -f https://download.pytorch.org/whl/nightly/cu113/torch_nightly.html
pip install -r requirements.txt
pip install -r dev-requirements.txt
pip install --no-build-isolation -e ".[dev]"
- name: Run unit tests with coverage
shell: bash -l {0}
run: |
set -eux
conda activate test
pytest --cov=. --cov-report xml tests/gpu_tests -vv
- name: Upload Coverage to Codecov
uses: codecov/codecov-action@v2