diff --git a/changelog b/changelog index ade0dd8d09..d790537af3 100644 --- a/changelog +++ b/changelog @@ -181,6 +181,9 @@ possible side-effects. Also now removes the parent Loop if the hoist means it no longer has a body. + 63) PR #2660 for #1370. Adds 'collapse' and 'ignore_dependencies_for' + options to ParallelLoopTrans. + release 2.5.0 14th of February 2024 1) PR #2199 for #2189. Fix bugs with missing maps in enter data diff --git a/examples/nemo/scripts/omp_gpu_trans.py b/examples/nemo/scripts/omp_gpu_trans.py index 7dabede56a..c890c8fe30 100755 --- a/examples/nemo/scripts/omp_gpu_trans.py +++ b/examples/nemo/scripts/omp_gpu_trans.py @@ -120,7 +120,10 @@ def trans(psyir): # Skip if an outer loop is already parallelised if loop.ancestor(Directive): continue - omp_loop_trans.apply(loop, options={"force": True}) + try: + omp_loop_trans.apply(loop, options={"force": True}) + except TransformationError: + continue omp_target_trans.apply(loop.parent.parent) assigns = loop.walk(Assignment) if len(assigns) == 1 and assigns[0].lhs.symbol.name == "zmax": diff --git a/examples/nemo/scripts/utils.py b/examples/nemo/scripts/utils.py index eb29c5e7fe..6ba331af5a 100755 --- a/examples/nemo/scripts/utils.py +++ b/examples/nemo/scripts/utils.py @@ -245,18 +245,16 @@ def insert_explicit_loop_parallelism( :param schedule: the PSyIR Schedule to transform. :type schedule: :py:class:`psyclone.psyir.nodes.node` - :param region_directive_trans: PSyclone transformation to insert the \ + :param region_directive_trans: PSyclone transformation that inserts the region directive. :type region_directive_trans: \ :py:class:`psyclone.transformation.Transformation` - :param loop_directive_trans: PSyclone transformation to use to insert the \ - loop directive. + :param loop_directive_trans: PSyclone transformation that inserts the + loop parallelisation directive. :type loop_directive_trans: \ :py:class:`psyclone.transformation.Transformation` - :param collapse: whether to attempt to insert the collapse clause to as \ + :param collapse: whether to attempt to insert the collapse clause to as many nested loops as possible. - :param collapse: whether to insert directive on loops with Calls or \ - CodeBlocks in their loop body. ''' # Add the parallel directives in each loop @@ -264,14 +262,16 @@ def insert_explicit_loop_parallelism( if loop.ancestor(Directive): continue # Skip if an outer loop is already parallelised - opts = {} + opts = {"collapse": collapse, "verbose": True} routine_name = loop.ancestor(Routine).name if ('dyn_spg' in routine_name and len(loop.walk(Loop)) > 2): - print("Loop not parallelised because its in 'dyn_spg' and " - "its not the inner loop") + loop.append_preceding_comment( + "PSyclone: Loop not parallelised because it is in 'dyn_spg' " + "and is not the inner loop") continue + # Skip if it is an array operation loop on an ice routine if along the # third dim or higher or if the loop nests a loop over ice points # (npti) or if the loop and array dims do not match. @@ -287,93 +287,33 @@ def insert_explicit_loop_parallelism( for ref in lp.stop_expr.walk(Reference)) or (str(len(loop.walk(Loop))) != loop.stop_expr.arguments[1].value))): - print("ICE Loop not parallelised for performance reasons") + loop.append_preceding_comment( + "PSyclone: ICE Loop not parallelised for performance reasons") continue + # Skip if looping over ice categories, ice or snow layers # as these have only 5, 4, and 1 iterations, respectively if (any(ref.symbol.name in ('jpl', 'nlay_i', 'nlay_s') for ref in loop.stop_expr.walk(Reference))): - print("Loop not parallelised because stops at 'jpl', 'nlay_i' " - "or 'nlay_s'.") + loop.append_preceding_comment( + "PSyclone: Loop not parallelised because stops at 'jpl'," + " 'nlay_i' or 'nlay_s'.") continue - def skip_for_correctness(loop): - for call in loop.walk(Call): - if not isinstance(call, IntrinsicCall): - print(f"Loop not parallelised because it has a call to " - f"{call.routine.name}") - return True - if not call.is_available_on_device(): - print(f"Loop not parallelised because it has a " - f"{call.intrinsic.name} not available on GPUs.") - return True - if loop.walk(CodeBlock): - print("Loop not parallelised because it has a CodeBlock") - return True - return False - - # If we see one such ice linearised loop, we assume - # calls/codeblocks are not a problem (they are not) - if not any(ref.symbol.name in ('npti',) - for ref in loop.stop_expr.walk(Reference)): - if skip_for_correctness(loop): - continue - - # pnd_lev requires manual privatisation of ztmp - if any(name in routine_name for name in ('tab_', 'pnd_')): - opts = {"force": True} - try: + # First check that the region_directive is feasible for this region + if region_directive_trans: + region_directive_trans.validate(loop, options=opts) + + # If it is, apply the parallelisation directive loop_directive_trans.apply(loop, options=opts) - # Only add the region directive if the loop was successfully - # parallelised. + + # And if successful, the region directive on top. if region_directive_trans: region_directive_trans.apply(loop.parent.parent) except TransformationError as err: # This loop can not be transformed, proceed to next loop - print("Loop not parallelised because:", str(err)) - continue - - if collapse: - - # Count the number of perfectly nested loops that can be collapsed - num_nested_loops = 0 - next_loop = loop - previous_variables = [] - while isinstance(next_loop, Loop): - previous_variables.append(next_loop.variable) - num_nested_loops += 1 - - # If it has more than one children, the next loop will not be - # perfectly nested, so stop searching. If there is no child, - # we have an empty loop (which would cause a crash when - # accessing the child next) - if len(next_loop.loop_body.children) != 1: - break - - next_loop = next_loop.loop_body.children[0] - if not isinstance(next_loop, Loop): - break - - # If it is a dependent (e.g. triangular) loop, it can not be - # collapsed - dependent_of_previous_variable = False - for bound in (next_loop.start_expr, next_loop.stop_expr, - next_loop.step_expr): - for ref in bound.walk(Reference): - if ref.symbol in previous_variables: - dependent_of_previous_variable = True - break - if dependent_of_previous_variable: - break - - # Check that the next loop has no loop-carried dependencies - if not next_loop.independent_iterations(): - break - - # Add collapse clause to the parent directive - if num_nested_loops > 1: - loop.parent.parent.collapse = num_nested_loops + loop.append_preceding_comment(f"PSyclone: {err.value}") def add_profiling(children): diff --git a/psyclone.pdf b/psyclone.pdf index ebe3889a6e..6001b37294 100644 Binary files a/psyclone.pdf and b/psyclone.pdf differ diff --git a/src/psyclone/psyir/tools/dependency_tools.py b/src/psyclone/psyir/tools/dependency_tools.py index af101023c8..99bbaec812 100644 --- a/src/psyclone/psyir/tools/dependency_tools.py +++ b/src/psyclone/psyir/tools/dependency_tools.py @@ -767,8 +767,9 @@ def can_loop_be_parallelised(self, loop, loop_vars = [loop.variable.name for loop in loop.walk(Loop)] result = True + symbol_table = loop.scope.symbol_table # Now check all variables used in the loop - for signature in var_accesses.all_signatures: + for signature, var_info in var_accesses.items(): # This string contains derived type information, e.g. # "a%b" var_string = str(signature) @@ -779,12 +780,14 @@ def can_loop_be_parallelised(self, loop, if signature in signatures_to_ignore: continue - # This returns the first component of the signature, - # i.e. in case of "a%b" it will only return "a" - var_name = signature.var_name - var_info = var_accesses[signature] - symbol_table = loop.scope.symbol_table - symbol = symbol_table.lookup(var_name) + # Access the symbol by inspecting the first access reference + try: + symbol = var_info.all_accesses[0].node.symbol + except AttributeError: + # If its a node without a symbol, look it up + var_name = signature.var_name + symbol = symbol_table.lookup(var_name) + # TODO #1270 - the is_array_access function might be moved is_array = symbol.is_array_access(access_info=var_info) if is_array: diff --git a/src/psyclone/psyir/transformations/omp_target_trans.py b/src/psyclone/psyir/transformations/omp_target_trans.py index 975020125e..61578c562d 100644 --- a/src/psyclone/psyir/transformations/omp_target_trans.py +++ b/src/psyclone/psyir/transformations/omp_target_trans.py @@ -38,8 +38,9 @@ ''' This module provides the OMPTargetTrans PSyIR transformation ''' -from psyclone.psyir.nodes import CodeBlock, OMPTargetDirective +from psyclone.psyir.nodes import CodeBlock, OMPTargetDirective, Call from psyclone.psyir.transformations.region_trans import RegionTrans +from psyclone.psyir.transformations import TransformationError class OMPTargetTrans(RegionTrans): @@ -86,12 +87,37 @@ class OMPTargetTrans(RegionTrans): ''' excluded_node_types = (CodeBlock, ) + def validate(self, node, options=None): + # pylint: disable=signature-differs + ''' + Check that we can safely enclose the supplied node or list of nodes + within an OpenMPTargetDirective. + + :param node: the PSyIR node or nodes to enclose in the OpenMP + target region. + :type node: List[:py:class:`psyclone.psyir.nodes.Node`] + :param options: a dictionary with options for transformations. + :type options: Optional[Dict[str, Any]] + + :raises TransformationError: if it contains calls to routines that + are not available in the accelerator device. + ''' + node_list = self.get_node_list(node) + super().validate(node, options) + for node in node_list: + for call in node.walk(Call): + if not call.is_available_on_device(): + raise TransformationError( + f"'{call.routine.name}' is not available on the " + f"accelerator device, and therefore it cannot " + f"be called from within an OMP Target region.") + def apply(self, node, options=None): ''' Insert an OMPTargetDirective before the provided node or list of nodes. - :param node: the PSyIR node or nodes to enclose in the OpenMP \ - target region. + :param node: the PSyIR node or nodes to enclose in the OpenMP + target region. :type node: List[:py:class:`psyclone.psyir.nodes.Node`] :param options: a dictionary with options for transformations. :type options: Optional[Dict[str,Any]] diff --git a/src/psyclone/psyir/transformations/parallel_loop_trans.py b/src/psyclone/psyir/transformations/parallel_loop_trans.py index d77960a033..b2b737543d 100644 --- a/src/psyclone/psyir/transformations/parallel_loop_trans.py +++ b/src/psyclone/psyir/transformations/parallel_loop_trans.py @@ -39,11 +39,13 @@ ''' This module provides the ParallelLoopTrans transformation.''' import abc +from collections.abc import Iterable from psyclone import psyGen +from psyclone.core import Signature from psyclone.domain.common.psylayer import PSyLoop from psyclone.psyir import nodes -from psyclone.psyir.nodes import Loop +from psyclone.psyir.nodes import Loop, Reference from psyclone.psyir.tools import DependencyTools, DTCode from psyclone.psyir.transformations.loop_trans import LoopTrans from psyclone.psyir.transformations.transformation_error import \ @@ -70,8 +72,8 @@ def _directive(self, children, collapse=None): :param children: list of nodes that will be children of this Directive. :type children: list of :py:class:`psyclone.psyir.nodes.Node` - :param int collapse: the number of tightly-nested loops to which \ - this directive applies or None. + :param int collapse: the number of tightly-nested loops to which + this directive applies or None. :returns: the new Directive node. :rtype: sub-class of :py:class:`psyclone.psyir.nodes.Directive`. @@ -83,24 +85,29 @@ def validate(self, node, options=None): :param node: the node we are checking. :type node: :py:class:`psyclone.psyir.nodes.Node` - :param options: a dictionary with options for transformations.\ - This transform supports "collapse", which is the\ - number of nested loops to collapse. + :param options: a dictionary with options for transformations. :type options: Optional[Dict[str, Any]] - :param int options["collapse"]: number of nested loops to collapse - or None. + :param bool|int options["collapse"]: if it's a bool and is False + (default), it won't collapse. If it's a bool and is True, it will + collapse as much as possible. If it's an integer, it will attempt + to collapse until the specified number of loops (if they exist and + are safe to collapse). The options 'ignore_dependencies_for' + and 'force' also affect the collapse applicability. :param bool options["force"]: whether to force parallelisation of the - target loop (i.e. ignore any dependence analysis). + target loop (i.e. ignore any dependence analysis). + :param list[str] options["ignore_dependencies_for"]: whether to ignore + some symbol names from the dependence analysis checks. :param bool options["sequential"]: whether this is a sequential loop. + :param bool options["verbose"]: whether to report the reasons the + validate and collapse steps have failed. - :raises TransformationError: if the \ - :py:class:`psyclone.psyir.nodes.Loop` loop iterates over \ - colours. - :raises TransformationError: if 'collapse' is supplied with an \ - invalid number of loops. - :raises TransformationError: if there is a data dependency that \ - prevents the parallelisation of the loop unless \ - `options["force"]` is True. + :raises TypeError: if 'collapse' is not an int or a bool. + :raises TypeError: if 'ignore_dependencies_for' is not a list of str. + :raises TransformationError: if the given loop iterates over a + colours (LFRic domain) iteration space. + :raises TransformationError: if there is a data dependency that + prevents the parallelisation of the loop and the provided + options don't disregard them. ''' # Check that the supplied node is a Loop and does not contain any @@ -109,8 +116,10 @@ def validate(self, node, options=None): if not options: options = {} - collapse = options.get("collapse", None) + verbose = options.get("verbose", False) + collapse = options.get("collapse", False) ignore_dep_analysis = options.get("force", False) + ignore_dependencies_for = options.get("ignore_dependencies_for", []) sequential = options.get("sequential", False) # Check we are not a sequential loop @@ -123,45 +132,47 @@ def validate(self, node, options=None): # If 'collapse' is specified, check that it is an int and that the # loop nest has at least that number of loops in it if collapse: - if not isinstance(collapse, int): - raise TransformationError( - f"The 'collapse' argument must be an integer but got an " - f"object of type {type(collapse)}") - if collapse < 2: - raise TransformationError( - f"It only makes sense to collapse 2 or more loops " - f"but got a value of {collapse}") - # Count the number of loops in the loop nest - loop_count = 0 - cnode = node - while isinstance(cnode, Loop): - loop_count += 1 - # Loops must be tightly nested (no intervening statements) - cnode = cnode.loop_body[0] - if collapse > loop_count: - raise TransformationError( - f"Cannot apply COLLAPSE({collapse}) clause to a loop nest " - f"containing only {loop_count} loops") + if not isinstance(collapse, (int, bool)): + raise TypeError( + f"The 'collapse' argument must be an integer or a bool but" + f" got an object of type {type(collapse)}") # Check that there are no loop-carried dependencies if sequential or ignore_dep_analysis: return + if ignore_dependencies_for: + if (not isinstance(ignore_dependencies_for, Iterable) or + isinstance(ignore_dependencies_for, str) or not all( + isinstance(v, str) for v in ignore_dependencies_for)): + raise TypeError( + f"The 'ignore_dependencies_for' option must be an Iterable" + f" object containing str representing the " + f"symbols to ignore, but got '{ignore_dependencies_for}'.") + dep_tools = DependencyTools() - if not node.independent_iterations(dep_tools=dep_tools, - test_all_variables=True): + signatures = [Signature(name) for name in ignore_dependencies_for] + + if not node.independent_iterations( + dep_tools=dep_tools, + test_all_variables=True, + signatures_to_ignore=signatures): # The DependencyTools also returns False for things that are # not an issue, so we ignore specific messages. for message in dep_tools.get_all_messages(): if message.code == DTCode.WARN_SCALAR_WRITTEN_ONCE: continue - all_msg_str = [str(message) for message in - dep_tools.get_all_messages()] - messages = "\n".join(all_msg_str) - raise TransformationError( - f"Dependency analysis failed with the following " - f"messages:\n{messages}") + all_msg_str = "\n".join([str(m) for m in + dep_tools.get_all_messages()]) + messages = (f"Loop cannot be parallelised because the " + f"dependency analysis reported:\n{all_msg_str}\n" + f"Consider using the \"ignore_dependencies_for\"" + f" transformation option if this is a false " + f"dependency.") + if verbose: + node.append_preceding_comment(f"PSyclone: {messages}") + raise TransformationError(messages) def apply(self, node, options=None): ''' @@ -181,30 +192,107 @@ def apply(self, node, options=None): At code-generation time (when gen_code()` is called), this node must be within (i.e. a child of) a PARALLEL region. - :param node: the supplied node to which we will apply the \ - Loop transformation. + :param node: the supplied node to which we will apply the + loop parallelisation transformation. :type node: :py:class:`psyclone.psyir.nodes.Node` - :param options: a dictionary with options for transformations. \ + :param options: a dictionary with options for transformations. :type options: Optional[Dict[str, Any]] - :param int options["collapse"]: the number of loops to collapse into \ - single iteration space or None. + :param bool|int options["collapse"]: if it's a bool and is False + (default), it won't collapse. If it's a bool and is True, it will + collapse as much as possible. If it's an integer, it will attempt + to collapse until the specified number of loops (if they exist and + are safe to collapse them). The options 'ignore_dependencies_for' + and 'force' also affect the collapse applicabilty analysis. + :param bool options["force"]: whether to force parallelisation of the + target loop (i.e. ignore any dependence analysis). + :param list[str] options["ignore_dependencies_for"]: whether to ignore + some symbol names from the dependence analysis checks. + :param bool options["sequential"]: whether this is a sequential loop. + :param bool options["verbose"]: whether to report the reasons the + validate and collapse steps have failed. ''' if not options: options = {} self.validate(node, options=options) - collapse = options.get("collapse", None) + verbose = options.get("verbose", False) + collapse = options.get("collapse", False) + ignore_dep_analysis = options.get("force", False) + list_of_names = options.get("ignore_dependencies_for", []) + list_of_signatures = [Signature(name) for name in list_of_names] # keep a reference to the node's original parent and its index as these # are required and will change when we change the node's location node_parent = node.parent node_position = node.position + # If 'collapse' is specified, check that it is an int and that the + # loop nest has at least that number of loops in it + if collapse: + # Count the number of perfectly nested loops that can be collapsed + num_collapsable_loops = 0 + next_loop = node + previous_iteration_variables = [] + while isinstance(next_loop, Loop): + previous_iteration_variables.append(next_loop.variable) + num_collapsable_loops += 1 + if not isinstance(collapse, bool): + if num_collapsable_loops >= collapse: + break + + # If it has more than one child, the next loop will not be + # perfectly nested, so stop searching. If there is no child, + # we have an empty loop and we also stop here. + if len(next_loop.loop_body.children) != 1: + if (next_loop.loop_body.children and + isinstance(next_loop.loop_body[0], Loop)): + next_loop.loop_body[0].append_preceding_comment( + "Loop cannot be collapsed because it has siblings") + break + + next_loop = next_loop.loop_body[0] + if not isinstance(next_loop, Loop): + break + + # If it is a loop dependent on a previous iteration variable + # (e.g. a triangular iteration space), it can not be collapsed + dependent_on_previous_variable = False + for bound in (next_loop.start_expr, next_loop.stop_expr, + next_loop.step_expr): + for ref in bound.walk(Reference): + if ref.symbol in previous_iteration_variables: + dependent_on_previous_variable = ref.symbol + break + if dependent_on_previous_variable: + if verbose: + next_loop.append_preceding_comment( + f"Loop cannot be collapsed because one of the " + f"bounds depends on the previous iteration variab" + f"le '{dependent_on_previous_variable.name}'") + break + + # Check that the next loop has no loop-carried dependencies + dtools = DependencyTools() + if not ignore_dep_analysis: + if not next_loop.independent_iterations( + dep_tools=dtools, + signatures_to_ignore=list_of_signatures): + if verbose: + msgs = dtools.get_all_messages() + next_loop.preceding_comment = ( + "\n".join([str(m) for m in msgs]) + + " Consider using the \"ignore_dependencies_" + "for\" transformation option if this is a " + "false dependency.") + break + else: + num_collapsable_loops = None + # Add our orphan loop directive setting its parent to the node's # parent and its children to the node. This calls down to the sub-class # to get the type of directive we require. - directive = self._directive([node.detach()], collapse) + directive = self._directive([node.detach()], num_collapsable_loops) # Add the loop directive as a child of the node's parent node_parent.addchild(directive, index=node_position) diff --git a/src/psyclone/tests/nemo/transformations/openacc/loop_directive_test.py b/src/psyclone/tests/nemo/transformations/openacc/loop_directive_test.py index 30af8aed15..52e31dc334 100644 --- a/src/psyclone/tests/nemo/transformations/openacc/loop_directive_test.py +++ b/src/psyclone/tests/nemo/transformations/openacc/loop_directive_test.py @@ -41,7 +41,7 @@ import pytest from psyclone.psyGen import TransInfo -from psyclone.psyir.transformations import TransformationError, ACCKernelsTrans +from psyclone.psyir.transformations import ACCKernelsTrans from psyclone.psyir.nodes import Loop from psyclone.errors import GenerationError @@ -196,16 +196,3 @@ def test_collapse(fortran_reader, fortran_writer): " !$acc loop independent collapse(2)\n" " do jj = 1, jpj, 1\n" " do ji = 1, jpi, 1\n" in code) - - -def test_collapse_err(fortran_reader): - ''' Check that attempting to apply the loop transformation with a - 'collapse' depth creater than the number of nested loops raises an - error. ''' - psyir = fortran_reader.psyir_from_source(DOUBLE_LOOP) - schedule = psyir.children[0] - acc_trans = TransInfo().get_trans_name('ACCLoopTrans') - with pytest.raises(TransformationError) as err: - acc_trans.apply(schedule.children[0], {"collapse": 3}) - assert ("Cannot apply COLLAPSE(3) clause to a loop nest containing " - "only 2 loops" in str(err.value)) diff --git a/src/psyclone/tests/psyir/transformations/omp_target_trans_test.py b/src/psyclone/tests/psyir/transformations/omp_target_trans_test.py index a1ed555c93..1c8ece7f1b 100644 --- a/src/psyclone/tests/psyir/transformations/omp_target_trans_test.py +++ b/src/psyclone/tests/psyir/transformations/omp_target_trans_test.py @@ -36,8 +36,7 @@ ''' Tests for the OMPTargetTrans transformation. ''' import pytest -from psyclone.psyir.nodes import Loop, Schedule, OMPTargetDirective, Routine, \ - CodeBlock +from psyclone.psyir.nodes import Loop, Schedule, OMPTargetDirective, Routine from psyclone.psyir.transformations import OMPTargetTrans, TransformationError @@ -102,20 +101,43 @@ def test_omptargettrans(sample_psyir): assert loops[0].parent.parent is loops[1].parent.parent -def test_omptargettrans_validate(sample_psyir): +def test_omptargettrans_validate(fortran_reader): ''' Test that OMPTargetTrans validation fails if it contains non-allowed constructs. ''' omptargettrans = OMPTargetTrans() - tree = sample_psyir.copy() - loops = tree.walk(Loop, stop_type=Loop) - # Valid loop - omptargettrans.validate(loops[0]) + code = ''' + function myfunc(a) + integer :: a + integer :: myfunc + end function + subroutine my_subroutine() + integer, dimension(10, 10) :: A + integer :: i + integer :: j + do i = 1, 10 + do j = 1, 10 + A(i, j) = myfunc(3) + end do + end do + do i = 1, 10 + do j = 1, 10 + char = 'a' // 'b' + end do + end do + end subroutine + ''' + psyir = fortran_reader.psyir_from_source(code) + loops = psyir.walk(Loop, stop_type=Loop) - # With a CodeBlock it should fail - loops[0].loop_body.addchild(CodeBlock([], CodeBlock.Structure.STATEMENT)) with pytest.raises(TransformationError) as err: omptargettrans.validate(loops[0]) - assert ("Nodes of type 'CodeBlock' cannot be enclosed by a OMPTargetTrans " - "transformation" in str(err.value)) + assert ("'myfunc' is not available on the accelerator device, and " + "therefore it cannot be called from within an OMP Target region." + in str(err.value)) + + with pytest.raises(TransformationError) as err: + omptargettrans.validate(loops[1]) + assert ("Nodes of type 'CodeBlock' cannot be enclosed by a OMPTarget" + "Trans transformation" in str(err.value)) diff --git a/src/psyclone/tests/psyir/transformations/parallel_loop_trans_test.py b/src/psyclone/tests/psyir/transformations/parallel_loop_trans_test.py index d1f071159c..22144f56b5 100644 --- a/src/psyclone/tests/psyir/transformations/parallel_loop_trans_test.py +++ b/src/psyclone/tests/psyir/transformations/parallel_loop_trans_test.py @@ -54,7 +54,7 @@ def _directive(self, children, collapse=None): ''' Creates an OMP Parallel Do directive for the purposes of testing. ''' - return OMPParallelDoDirective(children=children) + return OMPParallelDoDirective(children=children, collapse=collapse) CODE = ''' @@ -82,11 +82,187 @@ def test_paralooptrans_validate_force(fortran_reader): trans = ParaTrans() with pytest.raises(TransformationError) as err: trans.validate(loop) - assert "Dependency analysis failed with the following" in str(err.value) + assert "because the dependency analysis reported" in str(err.value) # Set the 'force' option to True - no exception should be raised. trans.validate(loop, {"force": True}) +def test_paralooptrans_validate_ignore_dependencies_for(fortran_reader, + fortran_writer): + ''' + Test that the 'ignore_dependencies_for' option allows the validate check to + succeed even when the dependency analysis finds a possible loop-carried + dependency, but the user guarantees that it's a false dependency. + + ''' + psyir = fortran_reader.psyir_from_source(CODE) + loop = psyir.walk(Loop)[0] + trans = ParaTrans() + with pytest.raises(TransformationError) as err: + trans.validate(loop, options={"verbose": True}) + assert ("Loop cannot be parallelised because the dependency analysis " + "reported:\nWarning: Variable 'sum' is read first, which indicates" + " a reduction. Variable: 'sum'.") in str(err.value) + # With the verbose option, the dependency issue will be log as a comment + assert ("! PSyclone: Loop cannot be parallelised because the dependency" + " analysis reported:" in fortran_writer(psyir)) + with pytest.raises(TypeError) as err: + trans.validate(loop, {"ignore_dependencies_for": "sum"}) + assert ("The 'ignore_dependencies_for' option must be an Iterable object " + "containing str representing the symbols to ignore, but" + " got 'sum'.") in str(err.value) + # Set the ignore_dependencies_for option to ignore "sum" + trans.validate(loop, {"ignore_dependencies_for": ["sum"]}) + + +def test_paralooptrans_apply_collapse(fortran_reader, fortran_writer): + ''' Test the 'collapse' option with valid bool and integer values. ''' + trans = ParaTrans() + psyir = fortran_reader.psyir_from_source(''' + subroutine my_sub() + integer :: i, j, k + real :: var(10,10,10) = 1 + + do i = 1, 10 + do j = 1, 10 + do k = 1, 10 + var(i,j,k) = var(i, j, k) + 1 + end do + end do + end do + end subroutine my_sub''') + + # When 'collapse' is an integer, it won't collapse more than the requested + # number of loops. But it won't fail if it can't + test_loop = psyir.copy().walk(Loop, stop_type=Loop)[0] + trans.apply(test_loop, {"collapse": 2}) + assert ("!$omp parallel do collapse(2)" + in fortran_writer(test_loop.parent.parent)) + + test_loop = psyir.copy().walk(Loop, stop_type=Loop)[0] + trans.apply(test_loop, {"collapse": 4}) + assert ("!$omp parallel do collapse(3)" + in fortran_writer(test_loop.parent.parent)) + + # When collapse is a bool, it will collapse all possible loops when True + test_loop = psyir.copy().walk(Loop, stop_type=Loop)[0] + trans.apply(test_loop, {"collapse": True}) + assert ("!$omp parallel do collapse(3)" + in fortran_writer(test_loop.parent.parent)) + + +def test_paralooptrans_collapse_options(fortran_reader, fortran_writer): + ''' + Test the 'collapse' option, also in combination with the 'force' and + 'ignore_dependencies_for' options. + ''' + psyir = fortran_reader.psyir_from_source(''' + subroutine my_sub() + integer :: i, j, k + real :: var(10,10,10) = 1 + integer, dimension(10) :: map + + do i = 1, 10 ! This loop is iteration independent + do j = 1, 10 ! This loop has a loop-carried dependency in var + do k = 1, j ! This loop bound depends on previous indices + var(i,j,k) = var(i, map(j), k) + end do + end do + end do + end subroutine my_sub''') + loop = psyir.walk(Loop)[0] + trans = ParaTrans() + # The validation does not see any problem because the outer loop can be + # parallel + trans.validate(loop) + + # By default it stops at collapse 1, because loop 2 has a dependency on, + # var. With verbose it adds the stopping reason as a comment. + test_loop = psyir.copy().walk(Loop, stop_type=Loop)[0] + trans.apply(test_loop, {"collapse": True, "verbose": True}) + assert '''\ +!$omp parallel do collapse(1) default(shared), private(i,j,k) +do i = 1, 10, 1 + ! Error: The write access to 'var(i,j,k)' and the read access to 'var(i,\ +map(j),k)' are dependent and cannot be parallelised. Variable: 'var'. \ +Consider using the "ignore_dependencies_for" transformation option if this \ +is a false dependency. + do j = 1, 10, 1 + do k = 1, j, 1 + var(i,j,k) = var(i,map(j),k) + enddo + enddo +enddo +''' in fortran_writer(test_loop.parent.parent) + + # Check that the collapse logic can uses the "ignore_dependencies_for" and + # "force" logic to skip dependencies + test_loop = psyir.copy().walk(Loop, stop_type=Loop)[0] + trans.apply(test_loop, {"collapse": True, "verbose": True, + "ignore_dependencies_for": ["var"]}) + assert '''\ +!$omp parallel do collapse(2) default(shared), private(i,j,k) +do i = 1, 10, 1 + do j = 1, 10, 1 + ! Loop cannot be collapsed because one of the bounds depends on the \ +previous iteration variable 'j' + do k = 1, j, 1 + var(i,j,k) = var(i,map(j),k) + enddo + enddo +enddo +''' in fortran_writer(test_loop.parent.parent) + + test_loop = psyir.copy().walk(Loop, stop_type=Loop)[0] + trans.apply(test_loop, {"collapse": True, "verbose": True, "force": True}) + assert '''\ +!$omp parallel do collapse(2) default(shared), private(i,j,k) +do i = 1, 10, 1 + do j = 1, 10, 1 + ! Loop cannot be collapsed because one of the bounds depends on the \ +previous iteration variable 'j' + do k = 1, j, 1 + var(i,j,k) = var(i,map(j),k) + enddo + enddo +enddo +''' in fortran_writer(test_loop.parent.parent) + + # Also it won't collapse if the loop inside is not perfectly nested, + # regardless of the force option. + psyir = fortran_reader.psyir_from_source(''' + subroutine my_sub() + integer :: i, j, k + real :: var(10,10,10) = 1 + integer, dimension(10) :: map + + do i = 1, 10 ! This loop is iteration independent + do j = 1, 10 ! This loop has a loop-carried dependency in var1 + do k = 1, 10 + var(i,j,k) = var(i, map(j), k) + end do + var(i,j,5) = 0 + end do + end do + end subroutine my_sub''') + loop = psyir.walk(Loop)[0] + trans = ParaTrans() + test_loop = psyir.copy().walk(Loop, stop_type=Loop)[0] + trans.apply(test_loop, {"collapse": True, "verbose": True, "force": True}) + assert '''\ +!$omp parallel do collapse(2) default(shared), private(i,j,k) +do i = 1, 10, 1 + do j = 1, 10, 1 + ! Loop cannot be collapsed because it has siblings + do k = 1, 10, 1 + var(i,j,k) = var(i,map(j),k) + enddo + var(i,j,5) = 0 + enddo +enddo +''' in fortran_writer(test_loop.parent.parent) + + def test_paralooptrans_validate_sequential(fortran_reader): ''' Test that the 'sequential' option allows the validate check to succeed even @@ -98,7 +274,7 @@ def test_paralooptrans_validate_sequential(fortran_reader): trans = ParaTrans() with pytest.raises(TransformationError) as err: trans.validate(loop) - assert "Dependency analysis failed with the following" in str(err.value) + assert "because the dependency analysis reported" in str(err.value) # Set the 'sequential' option to True - no exception should be raised. trans.validate(loop, {"sequential": True}) @@ -112,22 +288,10 @@ def test_paralooptrans_validate_collapse(fortran_reader): loop = psyir.walk(Loop)[0] trans = ParaTrans() # Check that we reject non-integer collapse arguments - with pytest.raises(TransformationError) as err: + with pytest.raises(TypeError) as err: trans.validate(loop, {"collapse": loop}) - assert ("The 'collapse' argument must be an integer but got an object " - "of type" in str(err.value)) - - # Check that we reject invalid depths - with pytest.raises(TransformationError) as err: - trans.validate(loop, {"collapse": 1}) - assert ("It only makes sense to collapse 2 or more loops but got a " - "value of 1" in str(err.value)) - - # Check that we reject attempts to collapse more loops than we have - with pytest.raises(TransformationError) as err: - trans.validate(loop, {"collapse": 3}) - assert ("Cannot apply COLLAPSE(3) clause to a loop nest containing " - "only 2 loops" in str(err.value)) + assert ("The 'collapse' argument must be an integer or a bool but got an" + " object of type" in str(err.value)) def test_paralooptrans_validate_colours(monkeypatch): diff --git a/src/psyclone/tests/psyir/transformations/transformations_test.py b/src/psyclone/tests/psyir/transformations/transformations_test.py index de2256e4c6..b5686d26c5 100644 --- a/src/psyclone/tests/psyir/transformations/transformations_test.py +++ b/src/psyclone/tests/psyir/transformations/transformations_test.py @@ -48,7 +48,6 @@ OMPDoDirective, OMPLoopDirective, Routine from psyclone.psyir.symbols import DataSymbol, INTEGER_TYPE, BOOLEAN_TYPE, \ ImportInterface, ContainerSymbol -from psyclone.psyir.tools import DependencyTools from psyclone.psyir.transformations import ProfileTrans, RegionTrans, \ TransformationError from psyclone.tests.utilities import get_invoke, Compile @@ -113,6 +112,48 @@ def test_accparallel(): "but found '3'." in str(err.value)) +def test_accparalleltrans_validate(fortran_reader): + ''' Test that ACCParallelTrans validation fails if it contains non-allowed + constructs. ''' + + omptargettrans = ACCParallelTrans() + + code = ''' + function myfunc(a) + integer :: a + integer :: myfunc + end function + subroutine my_subroutine() + integer, dimension(10, 10) :: A + integer :: i + integer :: j + do i = 1, 10 + do j = 1, 10 + A(i, j) = myfunc(3) + end do + end do + do i = 1, 10 + do j = 1, 10 + char = 'a' // 'b' + end do + end do + end subroutine + ''' + psyir = fortran_reader.psyir_from_source(code) + loops = psyir.walk(Loop, stop_type=Loop) + + with pytest.raises(TransformationError) as err: + omptargettrans.validate(loops[0]) + assert ("'myfunc' is not available on the accelerator device, and " + "therefore it cannot be called from within an ACC parallel region." + in str(err.value)) + + with pytest.raises(TransformationError) as err: + omptargettrans.validate(loops[1]) + assert ("Nodes of type 'CodeBlock' cannot be enclosed by a ACCParallel" + "Trans transformation" in str(err.value)) + + def test_accenterdata(): ''' Generic tests for the ACCEnterDataTrans class ''' acct = ACCEnterDataTrans() @@ -355,77 +396,6 @@ def test_omplooptrans_properties(): in str(err.value)) -def test_parallellooptrans_validate_dependencies(fortran_reader): - ''' Test that the parallellooptrans validation checks for loop carried - dependencies. ''' - - def create_loops(body): - psyir = fortran_reader.psyir_from_source(f''' - subroutine my_subroutine() - integer :: ji, jj, jk, jpkm1, jpjm1, jpim1 - real, dimension(10, 10, 10) :: zwt, zwd, zwi, zws - real :: total - {body} - end subroutine''') - return psyir.walk(Loop) - - # Use OMPLoopTrans as a concrete class of ParallelLoopTrans - omplooptrans = OMPLoopTrans() - # Example with a loop carried dependency in jk dimension - loops = create_loops(''' - do jk = 2, jpkm1, 1 - do jj = 2, jpjm1, 1 - do ji = 2, jpim1, 1 - zwt(ji,jj,jk) = zwd(ji,jj,jk) - zwi(ji,jj,jk) * & - zws(ji,jj,jk - 1) / zwt(ji,jj,jk - 1) - enddo - enddo - enddo''') - - # Check that the loop can not be parallelised due to the loop-carried - # dependency. - with pytest.raises(TransformationError) as err: - omplooptrans.validate(loops[0]) - assert ("Transformation Error: Dependency analysis failed with the " - "following messages:\nError: The write access to 'zwt(ji,jj,jk)' " - "and the read access to 'zwt(ji,jj,jk - 1)' are dependent and " - "cannot be parallelised. Variable: 'zwt'." in str(err.value)) - - # However, the inner loop can be parallelised because the dependency is - # just with 'jk' and it is not modified in the inner loops - omplooptrans.validate(loops[1]) - - # Reductions also indicate a data dependency that needs to be handled, so - # we don't permit the parallelisation of the loop (until we support - # reduction clauses) - loops = create_loops(''' - do jk = 2, jpkm1, 1 - do jj = 2, jpjm1, 1 - do ji = 2, jpim1, 1 - total = total + zwt(ji,jj,jk) - enddo - enddo - enddo''') - with pytest.raises(TransformationError) as err: - omplooptrans.validate(loops[0]) - assert ("Transformation Error: Dependency analysis failed with the " - "following messages:\nWarning: Variable 'total' is read first, " - "which indicates a reduction." in str(err.value)) - - # Shared scalars are race conditions but these are accepted because it - # can be manage with the appropriate clause - loops = create_loops(''' - do jk = 2, jpkm1, 1 - do jj = 2, jpjm1, 1 - do ji = 2, jpim1, 1 - total = zwt(ji,jj,jk) - enddo - enddo - enddo''') - assert not DependencyTools().can_loop_be_parallelised(loops[0]) - omplooptrans.validate(loops[0]) - - def test_omplooptrans_apply_firstprivate(fortran_reader, fortran_writer, tmpdir): ''' Test applying the OMPLoopTrans in cases where a firstprivate diff --git a/src/psyclone/transformations.py b/src/psyclone/transformations.py index 19ae91fc3e..456ce623b9 100644 --- a/src/psyclone/transformations.py +++ b/src/psyclone/transformations.py @@ -284,21 +284,21 @@ def _directive(self, children, collapse=None): Creates the type of directive needed for this sub-class of transformation. - :param children: list of Nodes that will be the children of \ + :param children: list of Nodes that will be the children of the created directive. :type children: list of :py:class:`psyclone.psyir.nodes.Node` - :param int collapse: currently un-used but required to keep \ + :param int collapse: currently un-used but required to keep interface the same as in base class. :returns: the new node representing the directive in the AST. :rtype: :py:class:`psyclone.psyir.nodes.OMPTaskloopDirective` :raises NotImplementedError: if a collapse argument is supplied ''' - # TODO 1370: OpenMP loop functions don't support collapse + # TODO 2672: OpenMP loop functions don't support collapse if collapse: raise NotImplementedError( "The COLLAPSE clause is not yet supported for " - "'!$omp taskloop' directives.") + "'!$omp taskloop' directives (#2672).") _directive = OMPTaskloopDirective(children=children, grainsize=self.omp_grainsize, num_tasks=self.omp_num_tasks, @@ -1217,6 +1217,7 @@ def validate(self, node_list, options=None): children of the same parent (siblings). ''' + node_list = self.get_node_list(node_list) if isinstance(node_list[0], InvokeSchedule): raise TransformationError( f"A {self.name} transformation cannot be applied to an " @@ -1612,6 +1613,7 @@ def validate(self, node_list, options=None): inserted directive should include the default_present clause. ''' + node_list = self.get_node_list(node_list) super().validate(node_list, options) if options is not None and "default_present" in options: if not isinstance(options["default_present"], bool): @@ -1619,6 +1621,13 @@ def validate(self, node_list, options=None): f"The provided 'default_present' option must be a " f"boolean, but found '{options['default_present']}'." ) + for node in node_list: + for call in node.walk(Call): + if not call.is_available_on_device(): + raise TransformationError( + f"'{call.routine.name}' is not available on the " + f"accelerator device, and therefore it cannot " + f"be called from within an ACC parallel region.") def apply(self, target_nodes, options=None): '''