Skip to content

Commit

Permalink
Merge pull request #2396 from devitocodes/moar-elastic-tweaks
Browse files Browse the repository at this point in the history
compiler: Yet another batch of compilation tweaks
  • Loading branch information
mloubout authored Jul 8, 2024
2 parents f67e7af + 2d0f60a commit 156db8e
Show file tree
Hide file tree
Showing 29 changed files with 579 additions and 350 deletions.
15 changes: 14 additions & 1 deletion devito/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,21 @@ def reinit_compiler(val):
# optimisations.
configuration.add('safe-math', 0, [0, 1], preprocessor=bool, callback=reinit_compiler)


# Enable/disable automatic padding for allocated data
configuration.add('autopadding', False, [False, True])
def _preprocess_autopadding(v):
return {
'0': False,
'1': np.float32,
True: np.float32,
'fp16': np.float16,
'fp32': np.float32,
'fp64': np.float64
}.get(v, v)

configuration.add('autopadding', False, # noqa: E305
[False, True, 0, 1, np.float16, np.float32, np.float64],
preprocessor=_preprocess_autopadding)

# Select target device
configuration.add('deviceid', -1, preprocessor=int, impacts_jit=False)
Expand Down
6 changes: 3 additions & 3 deletions devito/core/gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,17 +120,17 @@ def _rcompile_wrapper(cls, **kwargs0):
options0 = kwargs0.pop('options')

def wrapper(expressions, mode='default', options=None, **kwargs1):
options = {**options0, **(options or {})}
kwargs = {**kwargs0, **kwargs1}

if mode == 'host':
par_disabled = options['par-disabled']
options = options or {}
target = {
'platform': 'cpu64',
'language': 'C' if par_disabled else 'openmp',
'language': 'C' if options0['par-disabled'] else 'openmp',
'compiler': 'custom'
}
else:
options = {**options0, **(options or {})}
target = None

return rcompile(expressions, kwargs, options, target=target)
Expand Down
21 changes: 14 additions & 7 deletions devito/ir/clusters/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,24 +343,31 @@ def rule(size, e):
# Reconstruct the Clusters
processed = []
for c in clusters:
exprs = c.exprs

sub_iterators = dict(c.ispace.sub_iterators)
sub_iterators[d] = [i for i in sub_iterators[d] if i not in subiters]

# Apply substitutions to expressions
# Note: In an expression, there could be `u[t+1, ...]` and `v[t+1,
# ...]`, where `u` and `v` are TimeFunction with circular time
# buffers (save=None) *but* different modulo extent. The `t+1`
# indices above are therefore conceptually different, so they will
# be replaced with the proper ModuloDimension through two different
# calls to `xreplace_indices`
exprs = c.exprs
groups = as_mapper(mds, lambda d: d.modulo)
for size, v in groups.items():
subs = {md.origin: md for md in v}
func = partial(xreplace_indices, mapper=subs, key=partial(rule, size))
key = partial(rule, size)
if size == 1:
# Optimization -- avoid useless "% 1" ModuloDimensions
subs = {md.origin: 0 for md in v}
else:
subs = {md.origin: md for md in v}
sub_iterators[d].extend(v)

func = partial(xreplace_indices, mapper=subs, key=key)
exprs = [e.apply(func) for e in exprs]

# Augment IterationSpace
sub_iterators = dict(c.ispace.sub_iterators)
sub_iterators[d] = tuple(i for i in sub_iterators[d] + tuple(mds)
if i not in subiters)
ispace = IterationSpace(c.ispace.intervals, sub_iterators,
c.ispace.directions)

Expand Down
4 changes: 0 additions & 4 deletions devito/ir/clusters/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,10 +187,6 @@ def scope(self):
def functions(self):
return self.scope.functions

@cached_property
def has_increments(self):
return any(e.is_Increment for e in self.exprs)

@cached_property
def grid(self):
grids = set(f.grid for f in self.functions if f.is_AbstractFunction)
Expand Down
16 changes: 10 additions & 6 deletions devito/ir/iet/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1217,21 +1217,25 @@ def __repr__(self):
class Pragma(Node):

"""
One or more pragmas floating in the IET constructed through a callback.
One or more pragmas floating in the IET.
"""

def __init__(self, callback, arguments=None):
def __init__(self, pragma, arguments=None):
super().__init__()

self.callback = callback
if not isinstance(pragma, str):
raise TypeError("Pragma name must be a string, not %s" % type(pragma))

self.pragma = pragma
self.arguments = as_tuple(arguments)

def __repr__(self):
return '<Pragmas>'
return '<Pragma>'

@cached_property
def pragmas(self):
return as_tuple(self.callback(*self.arguments))
def _generate(self):
# Subclasses may override this property to customize the pragma generation
return self.pragma % self.arguments


class Transfer:
Expand Down
3 changes: 2 additions & 1 deletion devito/ir/iet/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,8 @@ def derive_parameters(iet, drop_locals=False, ordering='default'):
basics = FindSymbols('basics').visit(iet)
candidates.extend(i.function for i in basics)

# Filter off duplicates (e.g., `x_size` is extracted by both calls to FindSymbols)
# Filter off duplicates (e.g., `x_size` is extracted by both calls to
# FindSymbols)
candidates = filter_ordered(candidates)

# Filter off symbols which are defined somewhere within `iet`
Expand Down
15 changes: 7 additions & 8 deletions devito/ir/iet/visitors.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,7 +480,7 @@ def visit_Expression(self, o):
code = c.Assign(lhs, rhs)

if o.pragmas:
code = c.Module(list(o.pragmas) + [code])
code = c.Module(self._visit(o.pragmas) + (code,))

return code

Expand All @@ -489,7 +489,7 @@ def visit_AugmentedExpression(self, o):
c_rhs = ccode(o.expr.rhs, dtype=o.dtype, compiler=self._compiler)
code = c.Statement("%s %s= %s" % (c_lhs, o.op, c_rhs))
if o.pragmas:
code = c.Module(list(o.pragmas) + [code])
code = c.Module(self._visit(o.pragmas) + (code,))
return code

def visit_Call(self, o, nested_call=False):
Expand Down Expand Up @@ -555,15 +555,13 @@ def visit_Iteration(self, o):

# Attach pragmas, if any
if o.pragmas:
handle = c.Module(o.pragmas + (handle,))
pragmas = tuple(self._visit(i) for i in o.pragmas)
handle = c.Module(pragmas + (handle,))

return handle

def visit_Pragma(self, o):
if len(o.pragmas) == 1:
return o.pragmas[0]
else:
return c.Collection(o.pragmas)
return c.Pragma(o._generate)

def visit_While(self, o):
condition = ccode(o.condition)
Expand Down Expand Up @@ -1230,9 +1228,10 @@ def visit_Iteration(self, o):
nodes = self._visit(o.nodes)
dimension = uxreplace(o.dim, self.mapper)
limits = [uxreplace(i, self.mapper) for i in o.limits]
pragmas = self._visit(o.pragmas)
uindices = [uxreplace(i, self.mapper) for i in o.uindices]
return o._rebuild(nodes=nodes, dimension=dimension, limits=limits,
uindices=uindices)
pragmas=pragmas, uindices=uindices)

def visit_Definition(self, o):
try:
Expand Down
137 changes: 94 additions & 43 deletions devito/ir/support/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from functools import cached_property

from sympy import S
import sympy

from devito.ir.support.space import Backward, null_ispace
from devito.ir.support.utils import AccessMode, extrema
Expand Down Expand Up @@ -345,21 +346,48 @@ def distance(self, other):
if not (sit == oit and sai.root is oai.root):
# E.g., `self=R<f,[x + 2]>` and `other=W<f,[i + 1]>`
# E.g., `self=R<f,[x]>`, `other=W<f,[x + 1]>`,
# `self.itintervals=(x<0>,)` and `other.itintervals=(x<1>,)`
ret.append(S.Infinity)
break
# `self.itintervals=(x<0>,)`, `other.itintervals=(x<1>,)`
return vinf(ret)
except AttributeError:
# E.g., `self=R<f,[cy]>` and `self.itintervals=(y,)` => `sai=None`
pass

# In some cases, the distance degenerates because `self` and
# `other` never intersect, which essentially means there's no
# dependence between them. In this case, we set the distance to a
# dummy value (the imaginary unit). Hence, we call these "imaginary
# dependences". This occurs in just a small set of special cases,
# which we attempt to handle here
if any(d and d._defines & sit.dim._defines for d in (sai, oai)):
# Case 1: `sit` is an IterationInterval with statically known
# trip count. E.g. it ranges from 0 to 3; `other` performs a
# constant access at 4
for v in (self[n], other[n]):
try:
if bool(v < sit.symbolic_min or v > sit.symbolic_max):
return Vector(S.ImaginaryUnit)
except TypeError:
pass

# Case 2: `sit` is an IterationInterval over a local SubDimension
# and `other` performs a constant access
for d0, d1 in ((sai, oai), (oai, sai)):
if d0 is None and d1.is_Sub and d1.local:
return Vector(S.ImaginaryUnit)

# Case 3: `self` and `other` have some special form such that
# it's provable that they never intersect
if sai and sit == oit:
if disjoint_test(self[n], other[n], sai, sit):
return Vector(S.ImaginaryUnit)

if self.function._mem_shared:
# Special case: the distance between two regular, thread-shared
# objects fallbacks to zero, as any other value would be nonsensical.
# objects fallbacks to zero, as any other value would be nonsensical
ret.append(S.Zero)

elif sai and oai and sai._defines & sit.dim._defines:
# E.g., `self=R<f,[t + 1, x]>`, `self.itintervals=(time, x)`
# and `ai=t`
# E.g., `self=R<f,[t + 1, x]>`, `self.itintervals=(time, x)`, `ai=t`
if sit.direction is Backward:
ret.append(other[n] - self[n])
else:
Expand All @@ -373,45 +401,13 @@ def distance(self, other):
break

elif sai in self.ispace and oai in other.ispace:
# E.g., `self=R<f,[x, y]>`, `sai=time`, self.itintervals=(time, x, y)
# with `n=0`
# E.g., `self=R<f,[x, y]>`, `sai=time`,
# `self.itintervals=(time, x, y)`, `n=0`
continue

elif any(d and d._defines & sit.dim._defines for d in (sai, oai)):
# In some cases, the distance degenerates because `self` and
# `other` never intersect, which essentially means there's no
# dependence between them. In this case, we set the distance to
# a dummy value (the imaginary unit). Hence, we call these
# "imaginary dependences". This occurs in just a small set of
# special cases, which we handle here

# Case 1: `sit` is an IterationInterval with statically known
# trip count. E.g. it ranges from 0 to 3; `other` performs a
# constant access at 4
for v in (self[n], other[n]):
try:
if bool(v < sit.symbolic_min or v > sit.symbolic_max):
return Vector(S.ImaginaryUnit)
except TypeError:
pass

# Case 2: `sit` is an IterationInterval over a local SubDimension
# and `other` performs a constant access
for d0, d1 in ((sai, oai), (oai, sai)):
if d0 is None and d1.is_Sub and d1.local:
return Vector(S.ImaginaryUnit)

# Fallback
ret.append(S.Infinity)
break

elif self.findices[n] in sit.dim._defines:
# E.g., `self=R<u,[t+1, ii_src_0+1, ii_src_1+2]>` and `fi=p_src` (`n=1`)
ret.append(S.Infinity)
break

if S.Infinity in ret:
return Vector(*ret)
else:
# E.g., `self=R<u,[t+1, ii_src_0+1, ii_src_1+2]>`, `fi=p_src`, `n=1`
return vinf(ret)

n = len(ret)

Expand Down Expand Up @@ -1330,6 +1326,10 @@ def is_regular(self):

# *** Utils

def vinf(entries):
return Vector(*(entries + [S.Infinity]))


def retrieve_accesses(exprs, **kwargs):
"""
Like retrieve_terminals, but ensure that if a ComponentAccess is found,
Expand All @@ -1345,3 +1345,54 @@ def retrieve_accesses(exprs, **kwargs):
exprs1 = uxreplace(exprs, subs)

return compaccs | retrieve_terminals(exprs1, **kwargs) - set(subs.values())


def disjoint_test(e0, e1, d, it):
"""
A rudimentary test to check if two accesses `e0` and `e1` along `d` within
the IterationInterval `it` are independent.
This is inspired by the Banerjee test, but it's way more simplistic.
The test is conservative, meaning that if it returns False, then the accesses
might be independent, but it's not guaranteed. If it returns True, then the
accesses are definitely independent.
Our implementation focuses on tiny yet relevant cases, such as when the
iteration space's bounds are numeric constants, while the index accesses
functions reduce to numbers once the iteration variable is substituted with
one of the possible values in the iteration space.
Examples
--------
* e0 = 12 - zl, e1 = zl + 4, d = zl, it = zl[0,0]
where zl is a left SubDimension with thickness, say, 4
The test will return True, as the two index access functions never
overlap.
"""
if e0 == e1:
return False

if d.is_Custom:
subs = {}
elif d.is_Sub and d.is_left:
subs = {d.root.symbolic_min: 0, **dict([d.thickness.left])}
else:
return False

m = it.symbolic_min.subs(subs)
M = it.symbolic_max.subs(subs)

p00 = e0._subs(d, m)
p01 = e0._subs(d, M)

p10 = e1._subs(d, m)
p11 = e1._subs(d, M)

if any(not i.is_Number for i in [p00, p01, p10, p11]):
return False

i0 = sympy.Interval(min(p00, p01), max(p00, p01))
i1 = sympy.Interval(min(p10, p11), max(p10, p11))

return not bool(i0.intersect(i1))
5 changes: 2 additions & 3 deletions devito/operator/operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -1098,13 +1098,12 @@ def rcompile(expressions, kwargs, options, target=None):

if target is None:
cls = operator_selector(**kwargs)
kwargs['options'] = options
else:
kwargs = parse_kwargs(**target)
cls = operator_selector(**kwargs)
kwargs = cls._normalize_kwargs(**kwargs)

# Use the customized opt options
kwargs['options'] = options
kwargs['options'].update(options)

# Recursive profiling not supported -- would be a complete mess
kwargs.pop('profiler', None)
Expand Down
10 changes: 1 addition & 9 deletions devito/passes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,15 +82,7 @@ def is_gpu_create(obj, gpu_create):
except AttributeError:
functions = as_tuple(obj)

for i in functions:
try:
f = i.alias or i
except AttributeError:
f = i
if f not in gpu_create:
return False

return True
return all(f in gpu_create for f in functions)


# Import all compiler passes
Expand Down
Loading

0 comments on commit 156db8e

Please sign in to comment.