From ebf4b78ecfbb84fd2a630e77a8fdbc28380f0622 Mon Sep 17 00:00:00 2001
From: Mathias Louboutin <mathias.louboutin@gmail.com>
Date: Wed, 27 Sep 2023 09:38:50 -0400
Subject: [PATCH 1/2] compiler: prevent temporary for local reductions

---
 devito/builtins/utils.py         |  2 +-
 devito/ir/clusters/algorithms.py |  5 +++--
 devito/ir/clusters/cluster.py    | 13 ++++++++++++-
 devito/ir/equations/equation.py  |  5 +++++
 devito/types/sparse.py           |  4 ++--
 tests/test_dle.py                | 26 +++++++++++++++++++++++++-
 tests/test_dse.py                |  9 ++++++++-
 7 files changed, 56 insertions(+), 8 deletions(-)

diff --git a/devito/builtins/utils.py b/devito/builtins/utils.py
index 214aac104e..70f590d5de 100644
--- a/devito/builtins/utils.py
+++ b/devito/builtins/utils.py
@@ -34,7 +34,7 @@ def __init__(self, *functions, op=dv.mpi.MPI.SUM, dtype=None):
         self.op = op
 
     def __enter__(self):
-        i = dv.Dimension(name='i',)
+        i = dv.Dimension(name='mri',)
         self.n = dv.Function(name='n', shape=(1,), dimensions=(i,),
                              grid=self.grid, dtype=self.dtype)
         self.n.data[0] = 0
diff --git a/devito/ir/clusters/algorithms.py b/devito/ir/clusters/algorithms.py
index dcea8e2f1c..6b246f1f16 100644
--- a/devito/ir/clusters/algorithms.py
+++ b/devito/ir/clusters/algorithms.py
@@ -458,7 +458,7 @@ def normalize_reductions(cluster, sregistry, options):
 
     processed = []
     for e in cluster.exprs:
-        if e.is_Reduction and (e.lhs.is_Indexed or cluster.is_sparse):
+        if e.is_Reduction and e.lhs.is_Indexed and cluster.is_sparse:
             # Transform `e` such that we reduce into a scalar (ultimately via
             # atomic ops, though this part is carried out by a much later pass)
             # For example, given `i = m[p_src]` (i.e., indirection array), turn:
@@ -471,7 +471,8 @@ def normalize_reductions(cluster, sregistry, options):
             processed.extend([e.func(v, e.rhs, operation=None),
                               e.func(e.lhs, v)])
 
-        elif e.is_Reduction and e.lhs.is_Symbol and opt_mapify_reduce:
+        elif e.is_Reduction and e.lhs.is_Symbol and opt_mapify_reduce \
+                and not cluster.is_sparse:
             # Transform `e` into what is in essence an explicit map-reduce
             # For example, turn:
             # `s += f(u[x], v[x], ...)`
diff --git a/devito/ir/clusters/cluster.py b/devito/ir/clusters/cluster.py
index 0dc3200b4f..ef839e2222 100644
--- a/devito/ir/clusters/cluster.py
+++ b/devito/ir/clusters/cluster.py
@@ -230,7 +230,18 @@ def is_dense(self):
 
     @property
     def is_sparse(self):
-        return not self.is_dense
+        """
+        A cluster is sparse if it represent a sparse operation i.e if both
+
+            * The cluster contains sparse functions
+            * The cluster uses dense functions
+
+        If only the first case is true, the cluster only contains operation on the sparse
+        function itself without indirection and therefore only contains dense operations.
+        """
+        return (any(f.is_SparseFunction for f in self.functions) and
+                len([f for f in self.functions
+                     if (f.is_Function and not f.is_SparseFunction)]) > 0)
 
     @property
     def is_halo_touch(self):
diff --git a/devito/ir/equations/equation.py b/devito/ir/equations/equation.py
index a360132c1b..d416c22766 100644
--- a/devito/ir/equations/equation.py
+++ b/devito/ir/equations/equation.py
@@ -73,6 +73,11 @@ def apply(self, func):
         kwargs['conditionals'] = {k: func(v) for k, v in self.conditionals.items()}
         return self.func(*args, **kwargs)
 
+    def __repr__(self):
+        if not self.is_Reduction:
+            return super().__repr__()
+        else:
+            return '%s = %s(%s, %s)' % (self.lhs, self.operation, self.lhs, self.rhs)
     # Pickling support
     __reduce_ex__ = Pickable.__reduce_ex__
 
diff --git a/devito/types/sparse.py b/devito/types/sparse.py
index 42ecdc4a9a..90816fd7ad 100644
--- a/devito/types/sparse.py
+++ b/devito/types/sparse.py
@@ -338,11 +338,11 @@ def guard(self, expr=None):
                                   condition=condition, indirect=True)
 
         if expr is None:
-            out = self.indexify().xreplace({self._sparse_dim: cd})
+            out = self.indexify()._subs(self._sparse_dim, cd)
         else:
             functions = {f for f in retrieve_function_carriers(expr)
                          if f.is_SparseFunction}
-            out = indexify(expr).xreplace({f._sparse_dim: cd for f in functions})
+            out = indexify(expr).subs({f._sparse_dim: cd for f in functions})
 
         return out, temps
 
diff --git a/tests/test_dle.py b/tests/test_dle.py
index 8d58827a61..9c297e0dca 100644
--- a/tests/test_dle.py
+++ b/tests/test_dle.py
@@ -11,7 +11,7 @@
                     configuration, dimensions, info, cos)
 from devito.exceptions import InvalidArgument
 from devito.ir.iet import (Iteration, FindNodes, IsPerfectIteration,
-                           retrieve_iteration_tree)
+                           retrieve_iteration_tree, Expression)
 from devito.passes.iet.languages.openmp import Ompizer, OmpRegion
 from devito.tools import as_tuple
 from devito.types import Scalar
@@ -765,6 +765,30 @@ def test_array_sum_reduction(self, so, dim):
 
         assert np.allclose(f.data, 18)
 
+    def test_reduction_local(self):
+        grid = Grid((11, 11))
+        d = Dimension("i")
+        n = Function(name="n", dimensions=(d,), shape=(1,))
+        u = Function(name="u", grid=grid)
+        u.data.fill(1.)
+
+        op = Operator(Inc(n[0], u))
+        op()
+
+        cond = FindNodes(Expression).visit(op)
+        iterations = FindNodes(Iteration).visit(op)
+        # Should not creat any temporary for the reduction
+        assert len(cond) == 1
+        if configuration['language'] == 'C':
+            pass
+        elif Ompizer._support_array_reduction(configuration['compiler']):
+            assert "reduction(+:n[0])" in iterations[0].pragmas[0].value
+        else:
+            # E.g. old GCC's
+            assert "atomic update" in str(iterations[-1])
+
+        assert n.data[0] == 11*11
+
     def test_array_max_reduction(self):
         """
         Test generation of OpenMP sum-reduction clauses involving Function's.
diff --git a/tests/test_dse.py b/tests/test_dse.py
index e1ae16eb69..c111104506 100644
--- a/tests/test_dse.py
+++ b/tests/test_dse.py
@@ -2669,13 +2669,20 @@ def test_sparse_const(self):
 
         u = TimeFunction(name="u", grid=grid)
         src = PrecomputedSparseTimeFunction(name="src", grid=grid, npoint=1, nt=11,
-                                            r=2, interpolation_coeffs=np.ones((1, 3, 2)))
+                                            r=2, interpolation_coeffs=np.ones((1, 3, 2)),
+                                            gridpoints=[[5, 5, 5]])
+        u.data.fill(1.)
+
         op = Operator(src.interpolate(u))
 
         cond = FindNodes(Conditional).visit(op)
         assert len(cond) == 1
+        assert len(cond[0].args['then_body'][0].exprs) == 1
         assert all(e.is_scalar for e in cond[0].args['then_body'][0].exprs)
 
+        op()
+        assert np.all(src.data == 8)
+
 
 class TestIsoAcoustic(object):
 

From 47bf87c3922287304678ef170760000ad35e30f8 Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Thu, 28 Sep 2023 08:33:06 -0400
Subject: [PATCH 2/2] compiler: split normalization into sparse and dense

---
 devito/ir/clusters/algorithms.py | 52 ++++++++++++++++++++------------
 devito/ir/clusters/cluster.py    |  2 +-
 devito/ir/equations/equation.py  |  3 ++
 devito/ir/support/properties.py  |  7 +++++
 4 files changed, 43 insertions(+), 21 deletions(-)

diff --git a/devito/ir/clusters/algorithms.py b/devito/ir/clusters/algorithms.py
index 6b246f1f16..19d31d5173 100644
--- a/devito/ir/clusters/algorithms.py
+++ b/devito/ir/clusters/algorithms.py
@@ -7,7 +7,7 @@
 
 from devito.exceptions import InvalidOperator
 from devito.ir.support import (Any, Backward, Forward, IterationSpace,
-                               PARALLEL_IF_ATOMIC, pull_dims)
+                               pull_dims)
 from devito.ir.clusters.analysis import analyze
 from devito.ir.clusters.cluster import Cluster, ClusterGroup
 from devito.ir.clusters.visitors import Queue, QueueStateful, cluster_pass
@@ -402,7 +402,8 @@ def normalize(clusters, **kwargs):
     sregistry = kwargs['sregistry']
 
     clusters = normalize_nested_indexeds(clusters, sregistry)
-    clusters = normalize_reductions(clusters, sregistry, options)
+    clusters = normalize_reductions_dense(clusters, sregistry, options)
+    clusters = normalize_reductions_sparse(clusters, sregistry, options)
 
     return clusters
 
@@ -444,35 +445,22 @@ def pull_indexeds(expr, subs, mapper, parent=None):
     return cluster.rebuild(processed)
 
 
-@cluster_pass(mode='all')
-def normalize_reductions(cluster, sregistry, options):
+@cluster_pass(mode='dense')
+def normalize_reductions_dense(cluster, sregistry, options):
     """
     Extract the right-hand sides of reduction Eq's in to temporaries.
     """
     opt_mapify_reduce = options['mapify-reduce']
 
-    dims = [d for d, v in cluster.properties.items() if PARALLEL_IF_ATOMIC in v]
+    dims = [d for d in cluster.properties.dimensions
+            if cluster.properties.is_parallel_atomic(d)]
 
     if not dims:
         return cluster
 
     processed = []
     for e in cluster.exprs:
-        if e.is_Reduction and e.lhs.is_Indexed and cluster.is_sparse:
-            # Transform `e` such that we reduce into a scalar (ultimately via
-            # atomic ops, though this part is carried out by a much later pass)
-            # For example, given `i = m[p_src]` (i.e., indirection array), turn:
-            # `u[t, i] += f(u[t, i], src, ...)`
-            # into
-            # `s = f(u[t, i], src, ...)`
-            # `u[t, i] += s`
-            name = sregistry.make_name()
-            v = Symbol(name=name, dtype=e.dtype)
-            processed.extend([e.func(v, e.rhs, operation=None),
-                              e.func(e.lhs, v)])
-
-        elif e.is_Reduction and e.lhs.is_Symbol and opt_mapify_reduce \
-                and not cluster.is_sparse:
+        if e.is_Reduction and e.lhs.is_Symbol and opt_mapify_reduce:
             # Transform `e` into what is in essence an explicit map-reduce
             # For example, turn:
             # `s += f(u[x], v[x], ...)`
@@ -485,7 +473,31 @@ def normalize_reductions(cluster, sregistry, options):
             a = Array(name=name, dtype=e.dtype, dimensions=dims)
             processed.extend([Eq(a.indexify(), e.rhs),
                               e.func(e.lhs, a.indexify())])
+        else:
+            processed.append(e)
+
+    return cluster.rebuild(processed)
 
+
+@cluster_pass(mode='sparse')
+def normalize_reductions_sparse(cluster, sregistry, options):
+    """
+    Extract the right-hand sides of reduction Eq's in to temporaries.
+    """
+    processed = []
+    for e in cluster.exprs:
+        if e.is_Reduction and e.lhs.is_Indexed:
+            # Transform `e` such that we reduce into a scalar (ultimately via
+            # atomic ops, though this part is carried out by a much later pass)
+            # For example, given `i = m[p_src]` (i.e., indirection array), turn:
+            # `u[t, i] += f(u[t, i], src, ...)`
+            # into
+            # `s = f(u[t, i], src, ...)`
+            # `u[t, i] += s`
+            name = sregistry.make_name()
+            v = Symbol(name=name, dtype=e.dtype)
+            processed.extend([e.func(v, e.rhs, operation=None),
+                              e.func(e.lhs, v)])
         else:
             processed.append(e)
 
diff --git a/devito/ir/clusters/cluster.py b/devito/ir/clusters/cluster.py
index ef839e2222..2143331aba 100644
--- a/devito/ir/clusters/cluster.py
+++ b/devito/ir/clusters/cluster.py
@@ -228,7 +228,7 @@ def is_dense(self):
                 not self.is_halo_touch and
                 all(a.is_regular for a in self.scope.accesses))
 
-    @property
+    @cached_property
     def is_sparse(self):
         """
         A cluster is sparse if it represent a sparse operation i.e if both
diff --git a/devito/ir/equations/equation.py b/devito/ir/equations/equation.py
index d416c22766..ecef8b82fb 100644
--- a/devito/ir/equations/equation.py
+++ b/devito/ir/equations/equation.py
@@ -76,8 +76,11 @@ def apply(self, func):
     def __repr__(self):
         if not self.is_Reduction:
             return super().__repr__()
+        elif self.operation is OpInc:
+            return '%s += %s' % (self.lhs, self.rhs)
         else:
             return '%s = %s(%s, %s)' % (self.lhs, self.operation, self.lhs, self.rhs)
+
     # Pickling support
     __reduce_ex__ = Pickable.__reduce_ex__
 
diff --git a/devito/ir/support/properties.py b/devito/ir/support/properties.py
index f4ab873575..c3c808356e 100644
--- a/devito/ir/support/properties.py
+++ b/devito/ir/support/properties.py
@@ -139,6 +139,10 @@ class Properties(frozendict):
     A mapper {Dimension -> {properties}}.
     """
 
+    @property
+    def dimensions(self):
+        return tuple(self)
+
     def add(self, dims, properties=None):
         m = dict(self)
         for d in as_tuple(dims):
@@ -205,6 +209,9 @@ def is_parallel(self, dims):
         return any(len(self[d] & {PARALLEL, PARALLEL_INDEP}) > 0
                    for d in as_tuple(dims))
 
+    def is_parallel_atomic(self, dims):
+        return any(len(self[d] & {PARALLEL_IF_ATOMIC}) > 0 for d in as_tuple(dims))
+
     def is_parallel_relaxed(self, dims):
         return any(len(self[d] & PARALLELS) > 0 for d in as_tuple(dims))