From 696d92bbc1c3e1aa6e8308b74c3d02b6f9357aa8 Mon Sep 17 00:00:00 2001 From: Sergio Afonso Date: Tue, 26 Nov 2024 12:49:48 +0000 Subject: [PATCH] [MLIR][Flang][OpenMP] Host evaluation of trip count for Generic-SPMD This patch enables the initialization of the loop trip count for kernels representing `target teams distribute` or equivalent constructs. This involves updates to Flang lowering to make sure loop bounds are lowered in advance, omp.target verifier changes and tweaks to MLIR to LLVM IR translation. --- flang/lib/Lower/OpenMP/OpenMP.cpp | 20 +- .../test/Lower/OpenMP/eval-outside-target.f90 | 103 ++++++++++ .../test/Lower/OpenMP/target-generic-spmd.f90 | 191 ++++++++++++++++++ flang/test/Lower/OpenMP/target-spmd.f90 | 8 +- mlir/docs/Dialects/OpenMPDialect/_index.md | 7 +- .../mlir/Dialect/OpenMP/OpenMPDialect.h | 1 + mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 6 +- mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 79 +++++--- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 12 +- mlir/test/Dialect/OpenMP/invalid.mlir | 2 +- mlir/test/Dialect/OpenMP/ops.mlir | 18 +- 11 files changed, 395 insertions(+), 52 deletions(-) create mode 100644 flang/test/Lower/OpenMP/target-generic-spmd.f90 diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index f2ecfc49d5a05c..daa2d0dcea25b1 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -578,7 +578,6 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, HostEvalInfo &hostInfo = hostEvalInfo.back(); switch (extractOmpDirective(*ompEval)) { - // Cases where 'teams' and target SPMD clauses might be present. case OMPD_teams_distribute_parallel_do: case OMPD_teams_distribute_parallel_do_simd: cp.processThreadLimit(stmtCtx, hostInfo.ops); @@ -589,39 +588,38 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, [[fallthrough]]; case OMPD_distribute_parallel_do: case OMPD_distribute_parallel_do_simd: - cp.processCollapse(loc, eval, hostInfo.ops, hostInfo.iv); cp.processNumThreads(stmtCtx, hostInfo.ops); + [[fallthrough]]; + case OMPD_distribute: + case OMPD_distribute_simd: + cp.processCollapse(loc, eval, hostInfo.ops, hostInfo.iv); break; - // Cases where 'teams' clauses might be present, and target SPMD is - // possible by looking at nested evaluations. case OMPD_teams: cp.processThreadLimit(stmtCtx, hostInfo.ops); [[fallthrough]]; case OMPD_target_teams: cp.processNumTeams(stmtCtx, hostInfo.ops); - processSingleNestedIf([](Directive nestedDir) { - return nestedDir == OMPD_distribute_parallel_do || - nestedDir == OMPD_distribute_parallel_do_simd; - }); + processSingleNestedIf( + [](Directive nestedDir) { return topDistributeSet.test(nestedDir); }); break; - // Cases where only 'teams' host-evaluated clauses might be present. case OMPD_teams_distribute: case OMPD_teams_distribute_simd: cp.processThreadLimit(stmtCtx, hostInfo.ops); [[fallthrough]]; case OMPD_target_teams_distribute: case OMPD_target_teams_distribute_simd: + cp.processCollapse(loc, eval, hostInfo.ops, hostInfo.iv); cp.processNumTeams(stmtCtx, hostInfo.ops); break; // Standalone 'target' case. - case OMPD_target: { + case OMPD_target: processSingleNestedIf( [](Directive nestedDir) { return topTeamsSet.test(nestedDir); }); break; - } + default: break; } diff --git a/flang/test/Lower/OpenMP/eval-outside-target.f90 b/flang/test/Lower/OpenMP/eval-outside-target.f90 index 32c52462b86a76..65258c91e5daf3 100644 --- a/flang/test/Lower/OpenMP/eval-outside-target.f90 +++ b/flang/test/Lower/OpenMP/eval-outside-target.f90 @@ -155,3 +155,106 @@ subroutine distribute_parallel_do_simd() !$omp end distribute parallel do simd !$omp end teams end subroutine distribute_parallel_do_simd + +! BOTH-LABEL: func.func @_QPdistribute +subroutine distribute() + ! BOTH: omp.target + + ! HOST-SAME: host_eval(%{{.*}} -> %[[LB:.*]], %{{.*}} -> %[[UB:.*]], %{{.*}} -> %[[STEP:.*]] : i32, i32, i32) + + ! DEVICE-NOT: host_eval({{.*}}) + ! DEVICE-SAME: { + + ! BOTH: omp.teams + !$omp target teams + + ! BOTH: omp.distribute + ! BOTH-NEXT: omp.loop_nest + + ! HOST-SAME: (%{{.*}}) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) + !$omp distribute + do i=1,10 + call foo() + end do + !$omp end distribute + !$omp end target teams + + ! BOTH: omp.target + ! BOTH-NOT: host_eval({{.*}}) + ! BOTH-SAME: { + ! BOTH: omp.teams + !$omp target teams + call foo() !< Prevents this from being Generic-SPMD. + + ! BOTH: omp.distribute + !$omp distribute + do i=1,10 + call foo() + end do + !$omp end distribute + !$omp end target teams + + ! BOTH: omp.teams + !$omp teams + + ! BOTH: omp.distribute + !$omp distribute + do i=1,10 + call foo() + end do + !$omp end distribute + !$omp end teams +end subroutine distribute + +! BOTH-LABEL: func.func @_QPdistribute_simd +subroutine distribute_simd() + ! BOTH: omp.target + + ! HOST-SAME: host_eval(%{{.*}} -> %[[LB:.*]], %{{.*}} -> %[[UB:.*]], %{{.*}} -> %[[STEP:.*]] : i32, i32, i32) + + ! DEVICE-NOT: host_eval({{.*}}) + ! DEVICE-SAME: { + + ! BOTH: omp.teams + !$omp target teams + + ! BOTH: omp.distribute + ! BOTH-NEXT: omp.simd + ! BOTH-NEXT: omp.loop_nest + + ! HOST-SAME: (%{{.*}}) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) + !$omp distribute simd + do i=1,10 + call foo() + end do + !$omp end distribute simd + !$omp end target teams + + ! BOTH: omp.target + ! BOTH-NOT: host_eval({{.*}}) + ! BOTH-SAME: { + ! BOTH: omp.teams + !$omp target teams + call foo() !< Prevents this from being Generic-SPMD. + + ! BOTH: omp.distribute + ! BOTH-NEXT: omp.simd + !$omp distribute simd + do i=1,10 + call foo() + end do + !$omp end distribute simd + !$omp end target teams + + ! BOTH: omp.teams + !$omp teams + + ! BOTH: omp.distribute + ! BOTH-NEXT: omp.simd + !$omp distribute simd + do i=1,10 + call foo() + end do + !$omp end distribute simd + !$omp end teams +end subroutine distribute_simd diff --git a/flang/test/Lower/OpenMP/target-generic-spmd.f90 b/flang/test/Lower/OpenMP/target-generic-spmd.f90 new file mode 100644 index 00000000000000..d6cd8ae229b3a6 --- /dev/null +++ b/flang/test/Lower/OpenMP/target-generic-spmd.f90 @@ -0,0 +1,191 @@ +! RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s + +! CHECK-LABEL: func.func @_QPdistribute_generic() { +subroutine distribute_generic() + ! CHECK: omp.target + ! CHECK-NOT: host_eval({{.*}}) + ! CHECK-SAME: { + !$omp target + !$omp teams + !$omp distribute + do i = 1, 10 + call foo(i) + end do + !$omp end distribute + call bar() !< Prevents this from being Generic-SPMD. + !$omp end teams + !$omp end target + + ! CHECK: omp.target + ! CHECK-NOT: host_eval({{.*}}) + ! CHECK-SAME: { + !$omp target teams + !$omp distribute + do i = 1, 10 + call foo(i) + end do + !$omp end distribute + call bar() !< Prevents this from being Generic-SPMD. + !$omp end target teams + + ! CHECK: omp.target + ! CHECK-NOT: host_eval({{.*}}) + ! CHECK-SAME: { + !$omp target teams + !$omp distribute + do i = 1, 10 + call foo(i) + end do + !$omp end distribute + + !$omp distribute + do i = 1, 10 + call foo(i) + end do + !$omp end distribute + !$omp end target teams +end subroutine distribute_generic + +! CHECK-LABEL: func.func @_QPdistribute_spmd() { +subroutine distribute_spmd() + ! CHECK: omp.target + ! CHECK-SAME: host_eval({{.*}}) + !$omp target + !$omp teams + !$omp distribute + do i = 1, 10 + call foo(i) + end do + !$omp end distribute + !$omp end teams + !$omp end target + + ! CHECK: omp.target + ! CHECK-SAME: host_eval({{.*}}) + !$omp target teams + !$omp distribute + do i = 1, 10 + call foo(i) + end do + !$omp end distribute + !$omp end target teams +end subroutine distribute_spmd + +! CHECK-LABEL: func.func @_QPdistribute_simd_generic() { +subroutine distribute_simd_generic() + ! CHECK: omp.target + ! CHECK-NOT: host_eval({{.*}}) + ! CHECK-SAME: { + !$omp target + !$omp teams + !$omp distribute simd + do i = 1, 10 + call foo(i) + end do + !$omp end distribute simd + call bar() !< Prevents this from being Generic-SPMD. + !$omp end teams + !$omp end target + + ! CHECK: omp.target + ! CHECK-NOT: host_eval({{.*}}) + ! CHECK-SAME: { + !$omp target teams + !$omp distribute simd + do i = 1, 10 + call foo(i) + end do + !$omp end distribute simd + call bar() !< Prevents this from being Generic-SPMD. + !$omp end target teams + + ! CHECK: omp.target + ! CHECK-NOT: host_eval({{.*}}) + ! CHECK-SAME: { + !$omp target teams + !$omp distribute simd + do i = 1, 10 + call foo(i) + end do + !$omp end distribute simd + + !$omp distribute simd + do i = 1, 10 + call foo(i) + end do + !$omp end distribute simd + !$omp end target teams +end subroutine distribute_simd_generic + +! CHECK-LABEL: func.func @_QPdistribute_simd_spmd() { +subroutine distribute_simd_spmd() + ! CHECK: omp.target + ! CHECK-SAME: host_eval({{.*}}) + !$omp target + !$omp teams + !$omp distribute simd + do i = 1, 10 + call foo(i) + end do + !$omp end distribute simd + !$omp end teams + !$omp end target + + ! CHECK: omp.target + ! CHECK-SAME: host_eval({{.*}}) + !$omp target teams + !$omp distribute simd + do i = 1, 10 + call foo(i) + end do + !$omp end distribute simd + !$omp end target teams +end subroutine distribute_simd_spmd + +! CHECK-LABEL: func.func @_QPteams_distribute_spmd() { +subroutine teams_distribute_spmd() + ! CHECK: omp.target + ! CHECK-SAME: host_eval({{.*}}) + !$omp target + !$omp teams distribute + do i = 1, 10 + call foo(i) + end do + !$omp end teams distribute + !$omp end target +end subroutine teams_distribute_spmd + +! CHECK-LABEL: func.func @_QPteams_distribute_simd_spmd() { +subroutine teams_distribute_simd_spmd() + ! CHECK: omp.target + ! CHECK-SAME: host_eval({{.*}}) + !$omp target + !$omp teams distribute simd + do i = 1, 10 + call foo(i) + end do + !$omp end teams distribute simd + !$omp end target +end subroutine teams_distribute_simd_spmd + +! CHECK-LABEL: func.func @_QPtarget_teams_distribute_spmd() { +subroutine target_teams_distribute_spmd() + ! CHECK: omp.target + ! CHECK-SAME: host_eval({{.*}}) + !$omp target teams distribute + do i = 1, 10 + call foo(i) + end do + !$omp end target teams distribute +end subroutine target_teams_distribute_spmd + +! CHECK-LABEL: func.func @_QPtarget_teams_distribute_simd_spmd() { +subroutine target_teams_distribute_simd_spmd() + ! CHECK: omp.target + ! CHECK-SAME: host_eval({{.*}}) + !$omp target teams distribute simd + do i = 1, 10 + call foo(i) + end do + !$omp end target teams distribute simd +end subroutine target_teams_distribute_simd_spmd diff --git a/flang/test/Lower/OpenMP/target-spmd.f90 b/flang/test/Lower/OpenMP/target-spmd.f90 index bb90e5b3fc4857..43613819ccc8e9 100644 --- a/flang/test/Lower/OpenMP/target-spmd.f90 +++ b/flang/test/Lower/OpenMP/target-spmd.f90 @@ -12,7 +12,7 @@ subroutine distribute_parallel_do_generic() call foo(i) end do !$omp end distribute parallel do - call bar() + call bar() !< Prevents this from being SPMD. !$omp end teams !$omp end target @@ -25,7 +25,7 @@ subroutine distribute_parallel_do_generic() call foo(i) end do !$omp end distribute parallel do - call bar() + call bar() !< Prevents this from being SPMD. !$omp end target teams ! CHECK: omp.target @@ -83,7 +83,7 @@ subroutine distribute_parallel_do_simd_generic() call foo(i) end do !$omp end distribute parallel do simd - call bar() + call bar() !< Prevents this from being SPMD. !$omp end teams !$omp end target @@ -96,7 +96,7 @@ subroutine distribute_parallel_do_simd_generic() call foo(i) end do !$omp end distribute parallel do simd - call bar() + call bar() !< Prevents this from being SPMD. !$omp end target teams ! CHECK: omp.target diff --git a/mlir/docs/Dialects/OpenMPDialect/_index.md b/mlir/docs/Dialects/OpenMPDialect/_index.md index e0dd3f598e84b6..3a885f96835800 100644 --- a/mlir/docs/Dialects/OpenMPDialect/_index.md +++ b/mlir/docs/Dialects/OpenMPDialect/_index.md @@ -543,9 +543,10 @@ combined with a `target` construct. Additionally, the runtime library targeted by the MLIR to LLVM IR translation of the OpenMP dialect supports the optimized launch of SPMD kernels (i.e. `target teams distribute parallel {do,for}` in OpenMP), which requires -specifying in advance what the total trip count of the loop is. Consequently, it -is also beneficial to evaluate the trip count on the host device prior to the -kernel launch. +specifying in advance what the total trip count of the loop is. This is also the +case of Generic-SPMD kernels (i.e. `target teams distribute`). Consequently, it +is also necessary to evaluate the trip count on the host device prior to the +kernel launch in these cases. These host-evaluated values in MLIR would need to be placed outside of the `omp.target` region and also attached to the corresponding nested operations, diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPDialect.h b/mlir/include/mlir/Dialect/OpenMP/OpenMPDialect.h index bee21432196e42..248ac2eb72c61a 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPDialect.h +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPDialect.h @@ -22,6 +22,7 @@ #include "mlir/IR/SymbolTable.h" #include "mlir/Interfaces/ControlFlowInterfaces.h" #include "mlir/Interfaces/SideEffectInterfaces.h" +#include "llvm/Frontend/OpenMP/OMPDeviceConstants.h" #define GET_TYPEDEF_CLASSES #include "mlir/Dialect/OpenMP/OpenMPOpsTypes.h.inc" diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index f2a3d5082ecab4..8205c91c058e4e 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -1262,9 +1262,9 @@ def TargetOp : OpenMP_Op<"target", traits = [ /// operations, the top level one will be the one captured. Operation *getInnermostCapturedOmpOp(); - /// Tells whether this target region represents a single worksharing loop - /// wrapped by omp.teams omp.distribute and omp.parallel constructs. - bool isTargetSPMDLoop(); + /// Infers the kernel type (Generic, SPMD or Generic-SPMD) based on the + /// contents of the target region. + llvm::omp::OMPTgtExecModeFlags getKernelExecFlags(); }] # clausesExtraClassDeclaration; let assemblyFormat = clausesAssemblyFormat # [{ diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index c3157f531d725a..d32e68eaf2a81d 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -30,6 +30,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/TypeSwitch.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" +#include "llvm/Frontend/OpenMP/OMPDeviceConstants.h" #include #include #include @@ -1720,7 +1721,7 @@ LogicalResult TargetOp::verify() { return emitError("target containing multiple teams constructs"); // Check that host_eval values are only used in legal ways. - bool isTargetSPMD = isTargetSPMDLoop(); + llvm::omp::OMPTgtExecModeFlags execFlags = getKernelExecFlags(); for (Value hostEvalArg : cast(getOperation()).getHostEvalBlockArgs()) { for (Operation *user : hostEvalArg.getUsers()) { @@ -1735,7 +1736,8 @@ LogicalResult TargetOp::verify() { "and 'thread_limit' in 'omp.teams'"; } if (auto parallelOp = dyn_cast(user)) { - if (isTargetSPMD && hostEvalArg == parallelOp.getNumThreads()) + if (execFlags == llvm::omp::OMP_TGT_EXEC_MODE_SPMD && + hostEvalArg == parallelOp.getNumThreads()) continue; return emitOpError() @@ -1743,15 +1745,15 @@ LogicalResult TargetOp::verify() { "'omp.parallel' when representing target SPMD"; } if (auto loopNestOp = dyn_cast(user)) { - if (isTargetSPMD && + if (execFlags != llvm::omp::OMP_TGT_EXEC_MODE_GENERIC && (llvm::is_contained(loopNestOp.getLoopLowerBounds(), hostEvalArg) || llvm::is_contained(loopNestOp.getLoopUpperBounds(), hostEvalArg) || llvm::is_contained(loopNestOp.getLoopSteps(), hostEvalArg))) continue; - return emitOpError() - << "host_eval argument only legal as loop bounds and steps in " - "'omp.loop_nest' when representing target SPMD"; + return emitOpError() << "host_eval argument only legal as loop bounds " + "and steps in 'omp.loop_nest' when " + "representing target SPMD or Generic-SPMD"; } return emitOpError() << "host_eval argument illegal use in '" @@ -1801,32 +1803,61 @@ Operation *TargetOp::getInnermostCapturedOmpOp() { return capturedOp; } -bool TargetOp::isTargetSPMDLoop() { +llvm::omp::OMPTgtExecModeFlags TargetOp::getKernelExecFlags() { + using namespace llvm::omp; + + // Make sure this region is capturing a loop. Otherwise, it's a generic + // kernel. Operation *capturedOp = getInnermostCapturedOmpOp(); if (!isa_and_present(capturedOp)) - return false; + return OMP_TGT_EXEC_MODE_GENERIC; - // Accept optional SIMD leaf construct. - Operation *workshareOp = capturedOp->getParentOp(); - if (isa_and_present(workshareOp)) - workshareOp = workshareOp->getParentOp(); + SmallVector wrappers; + cast(capturedOp).gatherWrappers(wrappers); + assert(!wrappers.empty()); - if (!isa_and_present(workshareOp)) - return false; + // Ignore optional SIMD leaf construct. + auto *innermostWrapper = wrappers.begin(); + if (isa(innermostWrapper)) + innermostWrapper = std::next(innermostWrapper); - Operation *distributeOp = workshareOp->getParentOp(); - if (!isa_and_present(distributeOp)) - return false; + long numWrappers = std::distance(innermostWrapper, wrappers.end()); - Operation *parallelOp = distributeOp->getParentOp(); - if (!isa_and_present(parallelOp)) - return false; + // Detect Generic-SPMD: target-teams-distribute[-simd]. + if (numWrappers == 1) { + if (!isa(innermostWrapper)) + return OMP_TGT_EXEC_MODE_GENERIC; - Operation *teamsOp = parallelOp->getParentOp(); - if (!isa_and_present(teamsOp)) - return false; + Operation *teamsOp = (*innermostWrapper)->getParentOp(); + if (!isa_and_present(teamsOp)) + return OMP_TGT_EXEC_MODE_GENERIC; + + if (teamsOp->getParentOp() == *this) + return OMP_TGT_EXEC_MODE_GENERIC_SPMD; + } + + // Detect SPMD: target-teams-distribute-parallel-wsloop[-simd]. + if (numWrappers == 2) { + if (!isa(innermostWrapper)) + return OMP_TGT_EXEC_MODE_GENERIC; + + innermostWrapper = std::next(innermostWrapper); + if (!isa(innermostWrapper)) + return OMP_TGT_EXEC_MODE_GENERIC; + + Operation *parallelOp = (*innermostWrapper)->getParentOp(); + if (!isa_and_present(parallelOp)) + return OMP_TGT_EXEC_MODE_GENERIC; + + Operation *teamsOp = parallelOp->getParentOp(); + if (!isa_and_present(teamsOp)) + return OMP_TGT_EXEC_MODE_GENERIC; + + if (teamsOp->getParentOp() == *this) + return OMP_TGT_EXEC_MODE_SPMD; + } - return teamsOp->getParentOp() == (*this); + return OMP_TGT_EXEC_MODE_GENERIC; } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 3a61861f455d86..84767120aaa8dc 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -26,6 +26,7 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/TypeSwitch.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" +#include "llvm/Frontend/OpenMP/OMPDeviceConstants.h" #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/IRBuilder.h" @@ -4335,7 +4336,7 @@ static void initTargetRuntimeBounds( if (numThreads) bounds.MaxThreads = moduleTranslation.lookupValue(numThreads); - if (targetOp.isTargetSPMDLoop()) { + if (targetOp.getKernelExecFlags() != llvm::omp::OMP_TGT_EXEC_MODE_GENERIC) { llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); bounds.LoopTripCount = nullptr; @@ -4549,10 +4550,11 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = moduleTranslation.getOpenMPBuilder()->createTarget( - ompLoc, targetOp.isTargetSPMDLoop(), isOffloadEntry, ifCond, allocaIP, - builder.saveIP(), entryInfo, defaultBounds, runtimeBounds, - kernelInput, genMapInfoCB, bodyCB, argAccessorCB, dds, - targetOp.getNowait()); + ompLoc, + targetOp.getKernelExecFlags() == llvm::omp::OMP_TGT_EXEC_MODE_SPMD, + isOffloadEntry, ifCond, allocaIP, builder.saveIP(), entryInfo, + defaultBounds, runtimeBounds, kernelInput, genMapInfoCB, bodyCB, + argAccessorCB, dds, targetOp.getNowait()); if (failed(handleError(afterIP, opInst))) return failure(); diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir index 00bffb6a19a129..fc79ae99d39bc2 100644 --- a/mlir/test/Dialect/OpenMP/invalid.mlir +++ b/mlir/test/Dialect/OpenMP/invalid.mlir @@ -2186,7 +2186,7 @@ func.func @omp_target_host_eval3(%x : i32) { // ----- func.func @omp_target_host_eval3(%x : i32) { - // expected-error @below {{op host_eval argument only legal as loop bounds and steps in 'omp.loop_nest' when representing target SPMD}} + // expected-error @below {{op host_eval argument only legal as loop bounds and steps in 'omp.loop_nest' when representing target SPMD or Generic-SPMD}} omp.target host_eval(%x -> %arg0 : i32) { omp.wsloop { omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) { diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir index 10dd4bb3b994c6..224d69dda08f2e 100644 --- a/mlir/test/Dialect/OpenMP/ops.mlir +++ b/mlir/test/Dialect/OpenMP/ops.mlir @@ -2762,7 +2762,7 @@ func.func @omp_target_host_eval(%x : i32) { } // CHECK: omp.target host_eval(%{{.*}} -> %[[HOST_ARG:.*]] : i32) { - // CHECK: omp.teams + // CHECK: omp.teams { // CHECK: omp.parallel num_threads(%[[HOST_ARG]] : i32) { // CHECK: omp.distribute { // CHECK: omp.wsloop { @@ -2783,6 +2783,22 @@ func.func @omp_target_host_eval(%x : i32) { } omp.terminator } + + // CHECK: omp.target host_eval(%{{.*}} -> %[[HOST_ARG:.*]] : i32) { + // CHECK: omp.teams { + // CHECK: omp.distribute { + // CHECK: omp.loop_nest (%{{.*}}) : i32 = (%[[HOST_ARG]]) to (%[[HOST_ARG]]) step (%[[HOST_ARG]]) { + omp.target host_eval(%x -> %arg0 : i32) { + omp.teams { + omp.distribute { + omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) { + omp.yield + } + } + omp.terminator + } + omp.terminator + } return }