Skip to content

Commit

Permalink
Merge branch 'master' into merge_candidate
Browse files Browse the repository at this point in the history
  • Loading branch information
mcjamieson authored Jul 24, 2024
2 parents b70ad70 + e901e22 commit 5927fae
Show file tree
Hide file tree
Showing 12 changed files with 480 additions and 265 deletions.
3 changes: 3 additions & 0 deletions changelog
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,9 @@
possible side-effects. Also now removes the parent Loop if the hoist means
it no longer has a body.

63) PR #2660 for #1370. Adds 'collapse' and 'ignore_dependencies_for'
options to ParallelLoopTrans.

release 2.5.0 14th of February 2024

1) PR #2199 for #2189. Fix bugs with missing maps in enter data
Expand Down
5 changes: 4 additions & 1 deletion examples/nemo/scripts/omp_gpu_trans.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,10 @@ def trans(psyir):
# Skip if an outer loop is already parallelised
if loop.ancestor(Directive):
continue
omp_loop_trans.apply(loop, options={"force": True})
try:
omp_loop_trans.apply(loop, options={"force": True})
except TransformationError:
continue
omp_target_trans.apply(loop.parent.parent)
assigns = loop.walk(Assignment)
if len(assigns) == 1 and assigns[0].lhs.symbol.name == "zmax":
Expand Down
106 changes: 23 additions & 83 deletions examples/nemo/scripts/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,33 +245,33 @@ def insert_explicit_loop_parallelism(
:param schedule: the PSyIR Schedule to transform.
:type schedule: :py:class:`psyclone.psyir.nodes.node`
:param region_directive_trans: PSyclone transformation to insert the \
:param region_directive_trans: PSyclone transformation that inserts the
region directive.
:type region_directive_trans: \
:py:class:`psyclone.transformation.Transformation`
:param loop_directive_trans: PSyclone transformation to use to insert the \
loop directive.
:param loop_directive_trans: PSyclone transformation that inserts the
loop parallelisation directive.
:type loop_directive_trans: \
:py:class:`psyclone.transformation.Transformation`
:param collapse: whether to attempt to insert the collapse clause to as \
:param collapse: whether to attempt to insert the collapse clause to as
many nested loops as possible.
:param collapse: whether to insert directive on loops with Calls or \
CodeBlocks in their loop body.
'''
# Add the parallel directives in each loop
for loop in schedule.walk(Loop):
if loop.ancestor(Directive):
continue # Skip if an outer loop is already parallelised

opts = {}
opts = {"collapse": collapse, "verbose": True}

routine_name = loop.ancestor(Routine).name

if ('dyn_spg' in routine_name and len(loop.walk(Loop)) > 2):
print("Loop not parallelised because its in 'dyn_spg' and "
"its not the inner loop")
loop.append_preceding_comment(
"PSyclone: Loop not parallelised because it is in 'dyn_spg' "
"and is not the inner loop")
continue

# Skip if it is an array operation loop on an ice routine if along the
# third dim or higher or if the loop nests a loop over ice points
# (npti) or if the loop and array dims do not match.
Expand All @@ -287,93 +287,33 @@ def insert_explicit_loop_parallelism(
for ref in lp.stop_expr.walk(Reference))
or (str(len(loop.walk(Loop))) !=
loop.stop_expr.arguments[1].value))):
print("ICE Loop not parallelised for performance reasons")
loop.append_preceding_comment(
"PSyclone: ICE Loop not parallelised for performance reasons")
continue

# Skip if looping over ice categories, ice or snow layers
# as these have only 5, 4, and 1 iterations, respectively
if (any(ref.symbol.name in ('jpl', 'nlay_i', 'nlay_s')
for ref in loop.stop_expr.walk(Reference))):
print("Loop not parallelised because stops at 'jpl', 'nlay_i' "
"or 'nlay_s'.")
loop.append_preceding_comment(
"PSyclone: Loop not parallelised because stops at 'jpl',"
" 'nlay_i' or 'nlay_s'.")
continue

def skip_for_correctness(loop):
for call in loop.walk(Call):
if not isinstance(call, IntrinsicCall):
print(f"Loop not parallelised because it has a call to "
f"{call.routine.name}")
return True
if not call.is_available_on_device():
print(f"Loop not parallelised because it has a "
f"{call.intrinsic.name} not available on GPUs.")
return True
if loop.walk(CodeBlock):
print("Loop not parallelised because it has a CodeBlock")
return True
return False

# If we see one such ice linearised loop, we assume
# calls/codeblocks are not a problem (they are not)
if not any(ref.symbol.name in ('npti',)
for ref in loop.stop_expr.walk(Reference)):
if skip_for_correctness(loop):
continue

# pnd_lev requires manual privatisation of ztmp
if any(name in routine_name for name in ('tab_', 'pnd_')):
opts = {"force": True}

try:
# First check that the region_directive is feasible for this region
if region_directive_trans:
region_directive_trans.validate(loop, options=opts)

# If it is, apply the parallelisation directive
loop_directive_trans.apply(loop, options=opts)
# Only add the region directive if the loop was successfully
# parallelised.

# And if successful, the region directive on top.
if region_directive_trans:
region_directive_trans.apply(loop.parent.parent)
except TransformationError as err:
# This loop can not be transformed, proceed to next loop
print("Loop not parallelised because:", str(err))
continue

if collapse:

# Count the number of perfectly nested loops that can be collapsed
num_nested_loops = 0
next_loop = loop
previous_variables = []
while isinstance(next_loop, Loop):
previous_variables.append(next_loop.variable)
num_nested_loops += 1

# If it has more than one children, the next loop will not be
# perfectly nested, so stop searching. If there is no child,
# we have an empty loop (which would cause a crash when
# accessing the child next)
if len(next_loop.loop_body.children) != 1:
break

next_loop = next_loop.loop_body.children[0]
if not isinstance(next_loop, Loop):
break

# If it is a dependent (e.g. triangular) loop, it can not be
# collapsed
dependent_of_previous_variable = False
for bound in (next_loop.start_expr, next_loop.stop_expr,
next_loop.step_expr):
for ref in bound.walk(Reference):
if ref.symbol in previous_variables:
dependent_of_previous_variable = True
break
if dependent_of_previous_variable:
break

# Check that the next loop has no loop-carried dependencies
if not next_loop.independent_iterations():
break

# Add collapse clause to the parent directive
if num_nested_loops > 1:
loop.parent.parent.collapse = num_nested_loops
loop.append_preceding_comment(f"PSyclone: {err.value}")


def add_profiling(children):
Expand Down
Binary file modified psyclone.pdf
Binary file not shown.
17 changes: 10 additions & 7 deletions src/psyclone/psyir/tools/dependency_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -767,8 +767,9 @@ def can_loop_be_parallelised(self, loop,
loop_vars = [loop.variable.name for loop in loop.walk(Loop)]

result = True
symbol_table = loop.scope.symbol_table
# Now check all variables used in the loop
for signature in var_accesses.all_signatures:
for signature, var_info in var_accesses.items():
# This string contains derived type information, e.g.
# "a%b"
var_string = str(signature)
Expand All @@ -779,12 +780,14 @@ def can_loop_be_parallelised(self, loop,
if signature in signatures_to_ignore:
continue

# This returns the first component of the signature,
# i.e. in case of "a%b" it will only return "a"
var_name = signature.var_name
var_info = var_accesses[signature]
symbol_table = loop.scope.symbol_table
symbol = symbol_table.lookup(var_name)
# Access the symbol by inspecting the first access reference
try:
symbol = var_info.all_accesses[0].node.symbol
except AttributeError:
# If its a node without a symbol, look it up
var_name = signature.var_name
symbol = symbol_table.lookup(var_name)

# TODO #1270 - the is_array_access function might be moved
is_array = symbol.is_array_access(access_info=var_info)
if is_array:
Expand Down
32 changes: 29 additions & 3 deletions src/psyclone/psyir/transformations/omp_target_trans.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,9 @@

''' This module provides the OMPTargetTrans PSyIR transformation '''

from psyclone.psyir.nodes import CodeBlock, OMPTargetDirective
from psyclone.psyir.nodes import CodeBlock, OMPTargetDirective, Call
from psyclone.psyir.transformations.region_trans import RegionTrans
from psyclone.psyir.transformations import TransformationError


class OMPTargetTrans(RegionTrans):
Expand Down Expand Up @@ -86,12 +87,37 @@ class OMPTargetTrans(RegionTrans):
'''
excluded_node_types = (CodeBlock, )

def validate(self, node, options=None):
# pylint: disable=signature-differs
'''
Check that we can safely enclose the supplied node or list of nodes
within an OpenMPTargetDirective.
:param node: the PSyIR node or nodes to enclose in the OpenMP
target region.
:type node: List[:py:class:`psyclone.psyir.nodes.Node`]
:param options: a dictionary with options for transformations.
:type options: Optional[Dict[str, Any]]
:raises TransformationError: if it contains calls to routines that
are not available in the accelerator device.
'''
node_list = self.get_node_list(node)
super().validate(node, options)
for node in node_list:
for call in node.walk(Call):
if not call.is_available_on_device():
raise TransformationError(
f"'{call.routine.name}' is not available on the "
f"accelerator device, and therefore it cannot "
f"be called from within an OMP Target region.")

def apply(self, node, options=None):
''' Insert an OMPTargetDirective before the provided node or list
of nodes.
:param node: the PSyIR node or nodes to enclose in the OpenMP \
target region.
:param node: the PSyIR node or nodes to enclose in the OpenMP
target region.
:type node: List[:py:class:`psyclone.psyir.nodes.Node`]
:param options: a dictionary with options for transformations.
:type options: Optional[Dict[str,Any]]
Expand Down
Loading

0 comments on commit 5927fae

Please sign in to comment.