Skip to content

Commit

Permalink
[CI] follow-up fixes (microsoft#3072)
Browse files Browse the repository at this point in the history
  • Loading branch information
jeffra authored Mar 21, 2023
1 parent 9ea0fdc commit 3667758
Show file tree
Hide file tree
Showing 5 changed files with 19 additions and 26 deletions.
2 changes: 1 addition & 1 deletion CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@


# top-level repo folders
/.github/ @jeffra @mrwyattii
/.github/ @jeffra @mrwyattii @loadams
/azure/ @jeffra @awan-10
/benchmarks/ @jeffra @awan-10 @mrwyattii @molly-smith
/bin/ @jeffra
Expand Down
15 changes: 2 additions & 13 deletions tests/unit/checkpoint/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,29 +3,18 @@
from deepspeed.runtime.checkpoint_engine.torch_checkpoint_engine import TorchCheckpointEngine
from unit.common import DistributedTest
from unit.simple_model import *

from unit.checkpoint.common import checkpoint_correctness_verification
from unit.util import skip_on_arch

import pytest
import deepspeed
import torch


def _skip_on_older_arch(arch=7):
if deepspeed.accelerator.get_accelerator().device_name() == 'cuda':
if torch.cuda.get_device_capability()[0] < arch:
pytest.skip("needs higher compute capability than 7")
else:
assert deepspeed.accelerator.get_accelerator().device_name() == 'xpu'
return


class TestPipelineCheckpoint(DistributedTest):
world_size = 4

@pytest.mark.parametrize("zero_stage", [0, 1])
def test_checkpoint_pipe_engine(self, zero_stage, tmpdir):
_skip_on_older_arch()
skip_on_arch(min_arch=7)

config_dict = {
"train_batch_size": 2,
Expand Down
14 changes: 2 additions & 12 deletions tests/unit/runtime/pipe/test_pipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,12 @@
import torch.nn as nn
import pytest

import torch
import deepspeed
import deepspeed.comm as dist
from deepspeed.runtime.pipe.topology import PipeDataParallelTopology
from deepspeed.runtime.pipe.module import PipelineModule
from unit.alexnet_model import AlexNetPipe, train_cifar
from unit.common import DistributedTest
from unit.util import skip_on_arch

PipeTopo = PipeDataParallelTopology

Expand All @@ -19,15 +18,6 @@ def rel_diff(A, B):
return abs(A - B) / abs(A)


def _skip_on_older_arch(arch=7):
if deepspeed.accelerator.get_accelerator().device_name() == 'cuda':
if torch.cuda.get_device_capability()[0] < arch:
pytest.skip("needs higher compute capability than 7")
else:
assert deepspeed.accelerator.get_accelerator().device_name() == 'xpu'
return


@pytest.mark.parametrize('topo_config',
[
{
Expand All @@ -47,7 +37,7 @@ class TestPipeCifar10(DistributedTest):
world_size = 4

def test(self, topo_config):
_skip_on_older_arch()
skip_on_arch(min_arch=7)

config_dict = {
"train_batch_size": 16,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import torch
import deepspeed
from unit.common import DistributedTest
from unit.util import skip_on_arch


class Model(torch.nn.Module):
Expand Down Expand Up @@ -49,6 +50,8 @@ class TestSparseAdam(DistributedTest):
world_size = 2

def test(self):
skip_on_arch(min_arch=7)

config_dict = {
"train_batch_size": 2,
"steps_per_print": 1,
Expand Down
11 changes: 11 additions & 0 deletions tests/unit/util.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,20 @@
'''Copyright The Microsoft DeepSpeed Team'''

import pytest
import torch
import deepspeed
from deepspeed.git_version_info import torch_info


def skip_on_arch(min_arch=7):
if deepspeed.accelerator.get_accelerator().device_name() == 'cuda':
if torch.cuda.get_device_capability()[0] < min_arch:
pytest.skip(f"needs higher compute capability than {min_arch}")
else:
assert deepspeed.accelerator.get_accelerator().device_name() == 'xpu'
return


def required_torch_version():
TORCH_MAJOR = int(torch.__version__.split('.')[0])
TORCH_MINOR = int(torch.__version__.split('.')[1])
Expand Down

0 comments on commit 3667758

Please sign in to comment.