NVIDIA · schweitzpgi · Jul 12, 2024 · Jul 5, 2024 · Jul 10, 2024 · Jul 11, 2024
diff --git a/include/cudaq/Optimizer/Dialect/CC/CCOps.td b/include/cudaq/Optimizer/Dialect/CC/CCOps.td
@@ -90,6 +90,8 @@ def cc_ScopeOp : CCOp<"scope",
   let extraClassDeclaration = [{
     using BodyBuilderFn =
         llvm::function_ref<void(mlir::OpBuilder &, mlir::Location)>;
+
+    bool hasAllocation(bool quantumAllocs = true);
   }];
 }
 

diff --git a/include/cudaq/Optimizer/Transforms/Passes.td b/include/cudaq/Optimizer/Transforms/Passes.td
@@ -47,17 +47,6 @@ def ApplySpecialization : Pass<"apply-op-specialization", "mlir::ModuleOp"> {
   ];
 }
 
-def PySynthCallableBlockArgs : Pass<"py-synth-callable-block-args", "mlir::func::FuncOp"> {
-  let summary = "Synthesize / Inline cc.callable_func on function block arguments.";
-  let description = [{
-    This pass is leveraged by the Python bindings to synthesize any 
-    cc.callable block arguments. By synthesis we mean replace all uses of the 
-    callable block argument with a specific in-Module function call (func.call) 
-    retrieved at runtime (the name of the function passed to the kernel at the 
-    cc.callable block argument index).
-  }];
-}
-
 def BasisConversionPass: Pass<"basis-conversion", "mlir::ModuleOp"> {
   let summary = "Converts kernels to a set of basis operations.";
   let description = [{
@@ -572,6 +561,18 @@ def PruneCtrlRelations : Pass<"pruned-ctrl-form", "mlir::func::FuncOp"> {
   }];
 }
 
+def PySynthCallableBlockArgs :
+    Pass<"py-synth-callable-block-args", "mlir::func::FuncOp"> {
+  let summary = "Synthesize / Inline cc.callable_func on function block arguments.";
+  let description = [{
+    This pass is leveraged by the Python bindings to synthesize any 
+    cc.callable block arguments. By synthesis we mean replace all uses of the 
+    callable block argument with a specific in-Module function call (func.call) 
+    retrieved at runtime (the name of the function passed to the kernel at the 
+    cc.callable block argument index).
+  }];
+}
+
 def QuakeSynthesize : Pass<"quake-synth", "mlir::ModuleOp"> {
   let summary =
     "Synthesize concrete quantum program from Quake code plus runtime values.";

diff --git a/lib/Optimizer/Dialect/CC/CCOps.cpp b/lib/Optimizer/Dialect/CC/CCOps.cpp
@@ -9,7 +9,7 @@
 #include "cudaq/Optimizer/Dialect/CC/CCOps.h"
 #include "cudaq/Optimizer/Builder/Factory.h"
 #include "cudaq/Optimizer/Dialect/CC/CCDialect.h"
-#include "cudaq/Optimizer/Dialect/Quake/QuakeTypes.h"
+#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h"
 #include "llvm/ADT/TypeSwitch.h"
 #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
 #include "mlir/Dialect/Utils/IndexingUtils.h"
@@ -1437,6 +1437,29 @@ void cudaq::cc::ScopeOp::getSuccessorRegions(
   regions.push_back(RegionSuccessor(getResults()));
 }
 
+// If quantumAllocs, then just look for any allocate memory effect. Otherwise,
+// look for any allocate memory other than from the quake dialect.
+template <bool quantumAllocs>
+bool hasAllocation(Region &region) {
+  for (auto &block : region)
+    for (auto &op : block) {
+      if (auto mem = dyn_cast<MemoryEffectOpInterface>(op))
+        if (mem.hasEffect<MemoryEffects::Allocate>())
+          if (quantumAllocs || !isa<quake::AllocaOp>(op))
+            return true;
+      for (auto &opReg : op.getRegions())
+        if (hasAllocation<quantumAllocs>(opReg))
+          return true;
+    }
+  return false;
+}
+
+bool cudaq::cc::ScopeOp::hasAllocation(bool quantumAllocs) {
+  if (quantumAllocs)
+    return ::hasAllocation</*quantumAllocs=*/true>(getRegion());
+  return ::hasAllocation</*quantumAllocs=*/false>(getRegion());
+}
+
 namespace {
 // If there are no allocations in the scope, then the scope is not needed as
 // there is nothing to deallocate. This transformation does the following
@@ -1464,9 +1487,8 @@ struct EraseScopeWhenNotNeeded : public OpRewritePattern<cudaq::cc::ScopeOp> {
 
   LogicalResult matchAndRewrite(cudaq::cc::ScopeOp scope,
                                 PatternRewriter &rewriter) const override {
-    for (auto &reg : scope->getRegions())
-      if (hasAllocation(reg))
-        return success();
+    if (scope.hasAllocation())
+      return success();
 
     // scope does not allocate, so the region can be inlined into the parent.
     auto loc = scope.getLoc();
@@ -1501,19 +1523,6 @@ struct EraseScopeWhenNotNeeded : public OpRewritePattern<cudaq::cc::ScopeOp> {
     rewriter.replaceOp(scope, succBlock->getArguments());
     return success();
   }
-
-  static bool hasAllocation(Region &region) {
-    for (auto &block : region)
-      for (auto &op : block) {
-        if (auto mem = dyn_cast<MemoryEffectOpInterface>(op))
-          if (mem.hasEffect<MemoryEffects::Allocate>())
-            return true;
-        for (auto &opReg : op.getRegions())
-          if (hasAllocation(opReg))
-            return true;
-      }
-    return false;
-  }
 };
 } // namespace
 

diff --git a/lib/Optimizer/Transforms/LowerToCFG.cpp b/lib/Optimizer/Transforms/LowerToCFG.cpp
@@ -7,6 +7,7 @@
  ******************************************************************************/
 
 #include "PassDetails.h"
+#include "cudaq/Optimizer/Builder/Factory.h"
 #include "cudaq/Optimizer/Dialect/CC/CCOps.h"
 #include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h"
 #include "cudaq/Optimizer/Transforms/Passes.h"
@@ -46,6 +47,17 @@ class RewriteScope : public OpRewritePattern<cudaq::cc::ScopeOp> {
                                 PatternRewriter &rewriter) const override {
     auto loc = scopeOp.getLoc();
     auto *initBlock = rewriter.getInsertionBlock();
+    Value stacksave;
+    auto module = scopeOp.getOperation()->getParentOfType<ModuleOp>();
+    auto ptrTy = cudaq::cc::PointerType::get(rewriter.getI8Type());
+    if (scopeOp.hasAllocation(/*quantumAllocs=*/false)) {
+      auto fun = cudaq::opt::factory::createFunction(
+          "llvm.stacksave", ArrayRef<Type>{ptrTy}, {}, module);
+      fun.setPrivate();
+      auto call = rewriter.create<func::CallOp>(
+          loc, ptrTy, fun.getSymNameAttr(), ArrayRef<Value>{});
+      stacksave = call.getResult(0);
+    }
     auto initPos = rewriter.getInsertionPoint();
     auto *endBlock = rewriter.splitBlock(initBlock, initPos);
     ValueRange scopeResults;
@@ -70,6 +82,14 @@ class RewriteScope : public OpRewritePattern<cudaq::cc::ScopeOp> {
     rewriter.setInsertionPointToEnd(initBlock);
     rewriter.create<cf::BranchOp>(loc, entryBlock, ValueRange{});
     rewriter.inlineRegionBefore(scopeOp.getInitRegion(), endBlock);
+    if (stacksave) {
+      rewriter.setInsertionPointToStart(endBlock);
+      auto fun = cudaq::opt::factory::createFunction(
+          "llvm.stackrestore", {}, ArrayRef<Type>{ptrTy}, module);
+      fun.setPrivate();
+      rewriter.create<func::CallOp>(loc, ArrayRef<Type>{}, fun.getSymNameAttr(),
+                                    ArrayRef<Value>{stacksave});
+    }
     rewriter.replaceOp(scopeOp, scopeResults);
     return success();
   }

diff --git a/test/Quake/stack.qke b/test/Quake/stack.qke
@@ -0,0 +1,32 @@
+// ========================================================================== //
+// Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates.                 //
+// All rights reserved.                                                       //
+//                                                                            //
+// This source code and the accompanying materials are made available under   //
+// the terms of the Apache License 2.0 which accompanies this distribution.   //
+// ========================================================================== //
+
+// RUN: cudaq-opt -lower-to-cfg %s | cudaq-opt | FileCheck %s
+
+func.func private @foo(%0: !cc.ptr<i8>)
+
+func.func @stacksaveandrestore() {
+  cc.scope {
+    %0 = cc.alloca i8
+    func.call @foo(%0) : (!cc.ptr<i8>) -> ()
+    cc.continue
+  }
+  return
+}
+
+// CHECK-LABEL:   func.func @stacksaveandrestore() {
+// CHECK:           %[[VAL_0:.*]] = call @llvm.stacksave() : () -> !cc.ptr<i8>
+// CHECK:           cf.br ^bb1
+// CHECK:         ^bb1:
+// CHECK:           %[[VAL_1:.*]] = cc.alloca i8
+// CHECK:           call @foo(%[[VAL_1]]) : (!cc.ptr<i8>) -> ()
+// CHECK:           cf.br ^bb2
+// CHECK:         ^bb2:
+// CHECK:           call @llvm.stackrestore(%[[VAL_0]]) : (!cc.ptr<i8>) -> ()
+// CHECK:           return
+// CHECK:         }