docs: Use switchconfig and update FAQ

devitocodes · Apr 18, 2024 · 8e6ab03 · 8e6ab03
1 parent 5735e62
commit 8e6ab03
Show file tree

Hide file tree

Showing 4 changed files with 11 additions and 19 deletions.
diff --git a/FAQ.md b/FAQ.md
@@ -596,7 +596,7 @@ By default, Devito compiles the generated code using flags that maximize the run
 
 [top](#Frequently-Asked-Questions)
 
-## Can I control the MPI domain decomposition
+## Can I control the MPI domain decomposition?
 
 Until Devito v3.5 included, domain decomposition occurs along the fastest axis. As of later versions, domain decomposition occurs along the slowest axis, for performance reasons.  And yes, it is possible to control the domain decomposition in user code, but this is not neatly documented. Take a look at `class CustomTopology` in [distributed.py](https://github.com/devitocodes/devito/blob/master/devito/mpi/distributed.py) and `test_custom_topology` in [this file](https://github.com/devitocodes/devito/blob/master/tests/test_mpi.py). In essence, `Grid` accepts the optional argument `topology`, which allows the user to pass a custom topology as an n-tuple, where `n` is the number of distributed dimensions. For example, for a two-dimensional grid, the topology `(4, 1)` will decompose the slowest axis into four partitions, one partition per MPI rank, while the fastest axis will be replicated over all MPI ranks.
 

diff --git a/devito/logger.py b/devito/logger.py
@@ -77,7 +77,7 @@ def set_log_level(level, comm=None):
     """
     from devito import configuration
 
-    if comm is not None:
+    if comm is not None and configuration['mpi']:
         if comm.rank != 0:
             logger.removeHandler(stream_handler)
             logger.addHandler(logging.NullHandler())

diff --git a/devito/operator/operator.py b/devito/operator/operator.py
@@ -6,6 +6,7 @@
 from cached_property import cached_property
 from sympy import sympify
 
+from devito import switchconfig
 from devito.arch import compiler_registry, platform_registry
 from devito.data import default_allocator
 from devito.exceptions import InvalidOperator
@@ -871,8 +872,10 @@ def apply(self, **kwargs):
         # Post-process runtime arguments
         self._postprocess_arguments(args, **kwargs)
 
-        # Output summary of performance achieved
-        return self._emit_apply_profiling(args)
+        # Output summary of performance achieved and
+        # temporarily drop MPI for printing arguments
+        with switchconfig(mpi=False):
+            return self._emit_apply_profiling(args)
 
     # Performance profiling
 
@@ -910,10 +913,8 @@ def _emit_timings(timings, indent=''):
     def _emit_apply_profiling(self, args):
         """Produce a performance summary of the profiled sections."""
 
-        # In case 'MPI0' is selected for logging, restrict result printing to one rank
-        temp = configuration['log-level']
-        if configuration['mpi']:
-            set_log_level(configuration['log-level'], comm=args.comm)
+        # In case MPI is used restrict result logging to one rank
+        set_log_level('DEBUG', comm=args.comm)
 
         # Rounder to 2 decimal places
         fround = lambda i: ceil(i * 100) / 100
@@ -1008,16 +1009,7 @@ def lower_perfentry(v):
                     if a in args:
                         perf_args[a] = args[a]
                         break
-
-        if configuration['mpi']:
-            perf("Performance[mode=%s, mpi=%s] arguments: %s, " %
-                 (self._mode, configuration['mpi'], perf_args))
-        else:
-            perf("Performance[mode=%s] arguments: %s" % (self._mode, perf_args))
-
-        # Restore logging configuration to all ranks
-        if configuration['mpi']:
-            set_log_level(temp, comm=None)
+        perf("Performance[mode=%s] arguments: %s" % (self._mode, perf_args))
 
         return summary
 

diff --git a/devito/operator/profiling.py b/devito/operator/profiling.py
@@ -473,7 +473,7 @@ def add_glb_vanilla(self, key, time):
         ops = sum(v.ops for v in self.input.values())
         traffic = sum(v.traffic for v in self.input.values())
 
-        if np.isnan(traffic):
+        if np.isnan(traffic) or traffic == 0:
             return
 
         gflops = float(ops)/10**9