Skip to content

Commit

Permalink
Merge pull request #2051 from devitocodes/add_icx_support
Browse files Browse the repository at this point in the history
compiler: Add ICX support
  • Loading branch information
FabioLuporini authored Jun 6, 2023
2 parents ba4a49f + e355ed8 commit 0678627
Show file tree
Hide file tree
Showing 9 changed files with 131 additions and 51 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/docker-bases.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ jobs:
dockerfile: './docker/Dockerfile.cpu'
runner: ubuntu-latest

- tag: 'devitocodes/bases:cpu-icc'
- tag: 'devitocodes/bases:cpu-icc, devitocodes/bases:cpu-icx'
arch: 'arch=icc'
version: ''
dockerfile: './docker/Dockerfile.cpu'
Expand Down
14 changes: 11 additions & 3 deletions .github/workflows/pytest-core-nompi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ jobs:
pytest-ubuntu-py39-gcc9-omp,
pytest-osx-py37-clang-omp,
pytest-docker-py37-gcc-omp,
pytest-docker-py37-icc-omp
pytest-docker-py37-icc-omp,
pytest-docker-py38-icx-omp
]
set: [base, adjoint]
include:
Expand Down Expand Up @@ -105,6 +106,13 @@ jobs:
language: "openmp"
sympy: "1.11"

- name: pytest-docker-py38-icx-omp
python-version: '3.8'
os: ubuntu-22.04
arch: "icx"
language: "openmp"
sympy: "1.11"

- set: base
test-set: 'not adjoint'

Expand Down Expand Up @@ -133,13 +141,13 @@ jobs:
- name: Set run prefix
run: |
if [[ "${{ matrix.name }}" =~ "docker" ]]; then
echo "RUN_CMD=docker run --rm -e CODECOV_TOKEN=${{ secrets.CODECOV_TOKEN }} --name testrun devito_img" >> $GITHUB_ENV
echo "RUN_CMD=docker run --rm -e CODECOV_TOKEN=${{ secrets.CODECOV_TOKEN }} -e DEVITO_ARCH=${{ matrix.arch }} --name testrun devito_img" >> $GITHUB_ENV
else
echo "RUN_CMD=" >> $GITHUB_ENV
fi
id: set-run

- name: Install GCC ${{ matrix.arch }}
- name: Install ${{ matrix.arch }} compiler
if: "runner.os == 'linux' && !contains(matrix.name, 'docker')"
run : |
sudo apt-get install -y ${{ matrix.arch }}
Expand Down
35 changes: 25 additions & 10 deletions devito/arch/archinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,17 @@

__all__ = ['platform_registry', 'get_cpu_info', 'get_gpu_info', 'get_nvidia_cc',
'get_cuda_path', 'get_hip_path', 'check_cuda_runtime', 'get_m1_llvm_path',
'Platform', 'Cpu64', 'Intel64', 'Amd', 'Arm', 'Power', 'Device',
'NvidiaDevice', 'AmdDevice', 'IntelDevice',
'INTEL64', 'SNB', 'IVB', 'HSW', 'BDW', 'SKX', 'KNL', 'KNL7210', # Intel
'AMD', 'ARM', 'M1', 'GRAVITON', # ARM
'POWER8', 'POWER9', # Other loosely supported CPU architectures
'AMDGPUX', 'NVIDIAX', 'INTELGPUX'] # GPUs
'Platform', 'Cpu64', 'Intel64', 'IntelSkylake', 'Amd', 'Arm', 'Power',
'Device', 'NvidiaDevice', 'AmdDevice', 'IntelDevice',
# Intel
'INTEL64', 'SNB', 'IVB', 'HSW', 'BDW', 'KNL', 'KNL7210',
'SKX', 'KLX', 'CLX', 'CLK',
# ARM
'AMD', 'ARM', 'M1', 'GRAVITON',
# Other loosely supported CPU architectures
'POWER8', 'POWER9',
# GPUs
'AMDGPUX', 'NVIDIAX', 'INTELGPUX']


@memoized_func
Expand Down Expand Up @@ -494,7 +499,7 @@ def get_platform():
if 'phi' in brand:
# Intel Xeon Phi?
return platform_registry['knl']
# Unknown Xeon ? May happen on some virtualizes systems...
# Unknown Xeon ? May happen on some virtualized systems...
return platform_registry['intel64']
elif 'intel' in brand:
# Most likely a desktop i3/i5/i7
Expand Down Expand Up @@ -607,6 +612,14 @@ class Intel64(Cpu64):
known_isas = ('cpp', 'sse', 'avx', 'avx2', 'avx512')


class IntelSkylake(Intel64):
pass


class IntelGoldenCode(Intel64):
pass


class Arm(Cpu64):

known_isas = ('fp', 'asimd', 'asimdrdm')
Expand Down Expand Up @@ -725,11 +738,12 @@ def march(cls):
IVB = Intel64('ivb')
HSW = Intel64('hsw')
BDW = Intel64('bdw', isa='avx2')
SKX = Intel64('skx')
KLX = Intel64('klx')
CLX = Intel64('clx')
KNL = Intel64('knl')
KNL7210 = Intel64('knl', cores_logical=256, cores_physical=64, isa='avx512')
SKX = IntelSkylake('skx')
KLX = IntelSkylake('klx')
CLX = IntelSkylake('clx')
CLK = IntelSkylake('clk')

ARM = Arm('arm')
GRAVITON = Arm('graviton')
Expand All @@ -756,6 +770,7 @@ def march(cls):
'skx': SKX, # Skylake
'klx': KLX, # Kaby Lake
'clx': CLX, # Coffee Lake
'clk': CLK, # Cascade Lake
'knl': KNL,
'knl7210': KNL7210,
'arm': ARM, # Generic ARM CPU
Expand Down
104 changes: 73 additions & 31 deletions devito/arch/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@
from codepy.jit import compile_from_string
from codepy.toolchain import GCCToolchain

from devito.arch import (AMDGPUX, Cpu64, M1, NVIDIAX, SKX, POWER8, POWER9, GRAVITON,
get_nvidia_cc, check_cuda_runtime, get_m1_llvm_path)
from devito.arch import (AMDGPUX, Cpu64, M1, NVIDIAX, POWER8, POWER9, GRAVITON,
INTELGPUX, IntelSkylake, get_nvidia_cc, check_cuda_runtime,
get_m1_llvm_path)
from devito.exceptions import CompilationError
from devito.logger import debug, warning, error
from devito.parameters import configuration
Expand Down Expand Up @@ -375,13 +376,22 @@ class GNUCompiler(Compiler):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

self.cflags += ['-march=native', '-Wno-unused-result', '-Wno-unused-variable',
'-Wno-unused-but-set-variable']
platform = kwargs.pop('platform', configuration['platform'])

self.cflags += ['-march=native', '-Wno-unused-result',
'-Wno-unused-variable', '-Wno-unused-but-set-variable']

if configuration['safe-math']:
self.cflags.append('-fno-unsafe-math-optimizations')
else:
self.cflags.append('-ffast-math')

if isinstance(platform, IntelSkylake):
# The default is `=256` because avx512 slows down the CPU frequency;
# however, we empirically found that stencils generally benefit
# from `=512`
self.cflags.append('-mprefer-vector-width=512')

language = kwargs.pop('language', configuration['language'])
try:
if self.version >= Version("4.9.0"):
Expand Down Expand Up @@ -414,7 +424,7 @@ def __init__(self, *args, **kwargs):
class ClangCompiler(Compiler):

def __init__(self, *args, **kwargs):
super(ClangCompiler, self).__init__(*args, **kwargs)
super().__init__(*args, **kwargs)

self.cflags += ['-Wno-unused-result', '-Wno-unused-variable']
if not configuration['safe-math']:
Expand Down Expand Up @@ -481,7 +491,7 @@ class AOMPCompiler(Compiler):
"""AMD's fork of Clang for OpenMP offloading on both AMD and NVidia cards."""

def __init__(self, *args, **kwargs):
super(AOMPCompiler, self).__init__(*args, **kwargs)
super().__init__(*args, **kwargs)

self.cflags += ['-Wno-unused-result', '-Wno-unused-variable']
if not configuration['safe-math']:
Expand Down Expand Up @@ -531,7 +541,7 @@ def __lookup_cmds__(self):
class PGICompiler(Compiler):

def __init__(self, *args, **kwargs):
super(PGICompiler, self).__init__(*args, cpp=True, **kwargs)
super().__init__(*args, cpp=True, **kwargs)

self.cflags.remove('-std=c99')
self.cflags.remove('-O3')
Expand Down Expand Up @@ -671,39 +681,30 @@ def __lookup_cmds__(self):
class IntelCompiler(Compiler):

def __init__(self, *args, **kwargs):
super(IntelCompiler, self).__init__(*args, **kwargs)

self.cflags.append("-xhost")
super().__init__(*args, **kwargs)

language = kwargs.pop('language', configuration['language'])
platform = kwargs.pop('platform', configuration['platform'])
language = kwargs.pop('language', configuration['language'])
self.cflags.append("-xHost")

if configuration['safe-math']:
self.cflags.append("-fp-model=strict")
else:
self.cflags.append('-fast')
self.cflags.append('-fp-model=fast')

if platform is SKX:
if isinstance(platform, IntelSkylake):
# Systematically use 512-bit vectors on skylake
self.cflags.append("-qopt-zmm-usage=high")

try:
if self.version >= Version("15.0.0"):
# Append the OpenMP flag regardless of configuration['language'],
# since icc15 and later versions implement OpenMP 4.0, hence
# they support `#pragma omp simd`
self.ldflags.append('-qopenmp')
except (TypeError, ValueError):
if language == 'openmp':
# Note: fopenmp, not qopenmp, is what is needed by icc versions < 15.0
self.ldflags.append('-fopenmp')
if language == 'openmp':
self.ldflags.append('-qopenmp')

# Make sure the MPI compiler uses `icc` underneath -- whatever the MPI distro is
if kwargs.get('mpi'):
ver = check_output([self.MPICC, "--version"]).decode("utf-8")
if not ver.startswith("icc"):
warning("The MPI compiler `%s` doesn't use the Intel "
"C/C++ compiler underneath" % self.MPICC)
mpi_distro = sniff_mpi_distro('mpiexec')
if mpi_distro != 'IntelMPI':
warning("Expected Intel MPI distribution with `%s`, but found `%s`"
% (self.__class__.__name__, mpi_distro))

def __lookup_cmds__(self):
self.CC = 'icc'
Expand All @@ -727,16 +728,55 @@ def __lookup_cmds__(self):
class IntelKNLCompiler(IntelCompiler):

def __init__(self, *args, **kwargs):
super(IntelKNLCompiler, self).__init__(*args, **kwargs)
super().__init__(*args, **kwargs)

self.cflags += ["-xMIC-AVX512"]
self.cflags.append('-xMIC-AVX512')

language = kwargs.pop('language', configuration['language'])

if language != 'openmp':
warning("Running on Intel KNL without OpenMP is highly discouraged")


class OneapiCompiler(IntelCompiler):

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

platform = kwargs.pop('platform', configuration['platform'])
language = kwargs.pop('language', configuration['language'])

if language == 'openmp':
self.ldflags.remove('-qopenmp')
self.ldflags.append('-fopenmp')

if language == 'sycl':
self.cflags.append('-fsycl')
if platform is NVIDIAX:
self.cflags.append('-fsycl-targets=nvptx64-cuda')
else:
self.cflags.append('-fsycl-targets=spir64')

if platform is NVIDIAX:
self.cflags.append('-fopenmp-targets=nvptx64-cuda')
if platform is INTELGPUX:
self.cflags.append('-fopenmp-targets=spir64')
self.cflags.append('-fopenmp-target-simd')

if platform is INTELGPUX:
self.cflags.remove('-g') # -g disables some optimizations in IGC
self.cflags.append('-gline-tables-only')
self.cflags.append('-fdebug-info-for-profiling')

def __lookup_cmds__(self):
# OneAPI HPC ToolKit comes with icpx, which is clang++,
# and icx, which is clang
self.CC = 'icx'
self.CXX = 'icpx'
self.MPICC = 'mpicc'
self.MPICX = 'mpicx'


class CustomCompiler(Compiler):

"""
Expand Down Expand Up @@ -800,9 +840,11 @@ def __lookup_cmds__(self):
'nvidia': NvidiaCompiler,
'cuda': CudaCompiler,
'osx': ClangCompiler,
'intel': IntelCompiler,
'icpc': IntelCompiler,
'intel': OneapiCompiler,
'icx': OneapiCompiler,
'icpx': OneapiCompiler,
'icc': IntelCompiler,
'icpc': IntelCompiler,
'intel-knl': IntelKNLCompiler,
'knl': IntelKNLCompiler,
'dpcpp': DPCPPCompiler,
Expand Down
7 changes: 5 additions & 2 deletions devito/parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,8 +235,11 @@ class switchconfig(object):
Decorator to temporarily change `configuration` parameters.
"""

def __init__(self, **params):
self.params = {k.replace('_', '-'): v for k, v in params.items()}
def __init__(self, condition=True, **params):
if condition:
self.params = {k.replace('_', '-'): v for k, v in params.items()}
else:
self.params = {}

def __call__(self, func, *args, **kwargs):
@wraps(func)
Expand Down
12 changes: 10 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
from devito.checkpointing import NoopRevolver
from devito.finite_differences.differentiable import EvalDerivative
from devito.arch import Cpu64, Device, sniff_mpi_distro, Arm
from devito.arch.compiler import compiler_registry, IntelCompiler, NvidiaCompiler
from devito.arch.compiler import (compiler_registry, IntelCompiler, OneapiCompiler,
NvidiaCompiler)
from devito.ir.iet import (FindNodes, FindSymbols, Iteration, ParallelBlock,
retrieve_iteration_tree)
from devito.tools import as_tuple
Expand All @@ -26,7 +27,8 @@ def skipif(items, whole_module=False):
# Sanity check
accepted = set()
accepted.update({'device', 'device-C', 'device-openmp', 'device-openacc',
'device-aomp', 'cpu64-icc', 'cpu64-nvc', 'cpu64-arm', 'chkpnt'})
'device-aomp', 'cpu64-icc', 'cpu64-icx', 'cpu64-nvc', 'cpu64-arm',
'cpu64-icpx', 'chkpnt'})
accepted.update({'nompi', 'nodevice'})
unknown = sorted(set(items) - accepted)
if unknown:
Expand Down Expand Up @@ -70,6 +72,12 @@ def skipif(items, whole_module=False):
isinstance(configuration['platform'], Cpu64):
skipit = "`icc+cpu64` won't work with this test"
break
# Skip if it won't run with OneAPICompiler
if i == 'cpu64-icx' and \
isinstance(configuration['compiler'], OneapiCompiler) and \
isinstance(configuration['platform'], Cpu64):
skipit = "`icx+cpu64` won't work with this test"
break
# Skip if it won't run on Arm
if i == 'cpu64-arm' and isinstance(configuration['platform'], Arm):
skipit = "Arm doesn't support x86-specific instructions"
Expand Down
2 changes: 2 additions & 0 deletions tests/test_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@

from benchmarks.user.benchmark import run
from devito import configuration, switchconfig
from conftest import skipif
from subprocess import check_call


@skipif('cpu64-icx')
@pytest.mark.parametrize('mode, problem, op', [
('run', 'acoustic', 'forward'), ('run', 'acoustic', 'adjoint'),
('run', 'acoustic', 'jacobian'), ('run', 'acoustic', 'jacobian_adjoint'),
Expand Down
1 change: 0 additions & 1 deletion tests/test_buffering.py
Original file line number Diff line number Diff line change
Expand Up @@ -701,7 +701,6 @@ def test_everything():
assert np.all(u.data == u1.data)


@skipif('cpu64-icc')
@pytest.mark.parametrize('subdomain', ['domain', 'interior'])
def test_stencil_issue_1915(subdomain):
nt = 5
Expand Down
5 changes: 4 additions & 1 deletion tests/test_dimension.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
SparseFunction, SparseTimeFunction, Eq, Operator, Constant,
Dimension, DefaultDimension, SubDimension, switchconfig,
SubDomain, Lt, Le, Gt, Ge, Ne, Buffer, sin, SpaceDimension,
CustomDimension, dimensions)
CustomDimension, dimensions, configuration)
from devito.arch.compiler import IntelCompiler, OneapiCompiler
from devito.ir.iet import (Conditional, Expression, Iteration, FindNodes,
FindSymbols, retrieve_iteration_tree)
from devito.symbolics import indexify, retrieve_functions, IntDiv
Expand Down Expand Up @@ -1382,6 +1383,8 @@ def test_affiness(self):
iterations = [i for i in FindNodes(Iteration).visit(op) if i.dim is not time]
assert all(i.is_Affine for i in iterations)

@switchconfig(condition=isinstance(configuration['compiler'],
(IntelCompiler, OneapiCompiler)), safe_math=True)
def test_sparse_time_function(self):
nt = 20

Expand Down

0 comments on commit 0678627

Please sign in to comment.