NVIDIA · annagrin · Oct 11, 2024 · Sep 17, 2024 · Oct 17, 2024 · Oct 17, 2024
diff --git a/include/cudaq/Optimizer/Builder/Intrinsics.h b/include/cudaq/Optimizer/Builder/Intrinsics.h
@@ -55,6 +55,10 @@ static constexpr const char createCudaqStateFromDataFP32[] =
 // Delete a state created by the runtime functions above.
 static constexpr const char deleteCudaqState[] = "__nvqpp_cudaq_state_delete";
 
+// Get state of a kernel (placeholder function, calls are always replaced in
+// opts)
+static constexpr const char getCudaqState[] = "__nvqpp_cudaq_state_get";
+
 /// Builder for lowering the clang AST to an IR for CUDA-Q. Lowering includes
 /// the transformation of both quantum and classical computation. Different
 /// features of the CUDA-Q programming model are lowered into different dialects

diff --git a/include/cudaq/Optimizer/Transforms/Passes.td b/include/cudaq/Optimizer/Transforms/Passes.td
@@ -779,6 +779,44 @@ def DeleteStates : Pass<"delete-states", "mlir::ModuleOp"> {
   }];
 }
 
+def StateInitialization : Pass<"state-initialization", "mlir::func::FuncOp"> {
+  let summary =
+    "Replace `quake.init_state` instructions with call to the kernel generating the state";
+  let description = [{
+    Argument synthesis for state pointers for quantum devices substitutes state
+    argument by a new state created from `__nvqpp_cudaq_state_get` intrinsic, which
+    in turn accepts the name for the synthesized kernel that generated the state.
+
+    This optimization completes the replacement of `quake.init_state` instruction by:
+
+    - Replace `quake.init_state` by a call that `get_state` call refers to.
+    - Remove all unneeded instructions.
+
+    For example:
+
+    Before StateInitialization (state-initialization):
+    ```
+    func.func @foo() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} {
+      %0 = cc.string_literal "__nvqpp__mlirgen__test_init_state.modified_0" : !cc.ptr<!cc.array<i8 x 45>>
+      %1 = cc.cast %0 : (!cc.ptr<!cc.array<i8 x 45>>) -> !cc.ptr<i8>
+      %2 = call @__nvqpp_cudaq_state_get(%1) : (!cc.ptr<i8>) -> !cc.ptr<!cc.state>
+      %3 = call @__nvqpp_cudaq_state_numberOfQubits(%2) : (!cc.ptr<!cc.state>) -> i64
+      %4 = quake.alloca !quake.veq<?>[%3 : i64]
+      %5 = quake.init_state %4, %2 : (!quake.veq<?>, !cc.ptr<!cc.state>) -> !quake.veq<?>
+      return
+    }
+    ```
+
+    After StateInitialization (state-initialization):
+    ```
+    func.func @foo() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} {
+      %5 = call @__nvqpp__mlirgen__test_init_state.modified_0() : () -> !quake.veq<?>
+      return
+    }
+    ```
+  }];
+}
+
 def StatePreparation : Pass<"state-prep", "mlir::ModuleOp"> {
   let summary =
     "Convert state vector data into gates";
@@ -828,6 +866,17 @@ def StatePreparation : Pass<"state-prep", "mlir::ModuleOp"> {
   ];
 }
 
+def StateValidation : Pass<"state-validation", "mlir::ModuleOp"> {
+  let summary =
+    "Make sure MLIR is valid after synthesis for quantum devices";
+  let description = [{
+    Argument synthesis should replace all `quake.init` from state instructions
+    and calls to state-related runtime functions.
+    Make sure none of them left, and remove definitions for state-related
+    runtime functions.
+  }];
+}
+
 def PromoteRefToVeqAlloc : Pass<"promote-qubit-allocation"> {
   let summary = "Promote single qubit allocations.";
   let description = [{

diff --git a/lib/Optimizer/Builder/Intrinsics.cpp b/lib/Optimizer/Builder/Intrinsics.cpp
@@ -269,6 +269,10 @@ static constexpr IntrinsicCode intrinsicTable[] = {
 
     {cudaq::deleteCudaqState, {}, R"#(
   func.func private @__nvqpp_cudaq_state_delete(%p : !cc.ptr<!cc.state>) -> ()
+  )#"},
+
+    {cudaq::getCudaqState, {}, R"#(
+  func.func private @__nvqpp_cudaq_state_get(%p : !cc.ptr<i8>) -> !cc.ptr<!cc.state>
   )#"},
 
     {cudaq::getNumQubitsFromCudaqState, {}, R"#(

diff --git a/lib/Optimizer/CodeGen/VerifyNVQIRCalls.cpp b/lib/Optimizer/CodeGen/VerifyNVQIRCalls.cpp
@@ -49,7 +49,8 @@ struct VerifyNVQIRCallOpsPass
           cudaq::getNumQubitsFromCudaqState,
           cudaq::createCudaqStateFromDataFP32,
           cudaq::createCudaqStateFromDataFP64,
-          cudaq::deleteCudaqState};
+          cudaq::deleteCudaqState,
+          cudaq::getCudaqState};
       // It must be either NVQIR extension functions or in the allowed list.
       return std::find(NVQIR_FUNCS.begin(), NVQIR_FUNCS.end(), functionName) !=
                  NVQIR_FUNCS.end() ||

diff --git a/lib/Optimizer/Transforms/CMakeLists.txt b/lib/Optimizer/Transforms/CMakeLists.txt
@@ -50,7 +50,9 @@ add_cudaq_library(OptTransforms
   QuakeSynthesizer.cpp
   RefToVeqAlloc.cpp
   RegToMem.cpp
+  StateInitialization.cpp
   StatePreparation.cpp
+  StateValidation.cpp
   UnitarySynthesis.cpp
   WiresToWiresets.cpp
 

diff --git a/lib/Optimizer/Transforms/StateInitialization.cpp b/lib/Optimizer/Transforms/StateInitialization.cpp
@@ -0,0 +1,141 @@
+/*******************************************************************************
+ * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates.                  *
+ * All rights reserved.                                                        *
+ *                                                                             *
+ * This source code and the accompanying materials are made available under    *
+ * the terms of the Apache License 2.0 which accompanies this distribution.    *
+ ******************************************************************************/
+
+#include "PassDetails.h"
+#include "cudaq/Optimizer/Builder/Intrinsics.h"
+#include "cudaq/Optimizer/Dialect/CC/CCOps.h"
+#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h"
+#include "cudaq/Optimizer/Transforms/Passes.h"
+#include "mlir/Dialect/Complex/IR/Complex.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+#include "mlir/Transforms/Passes.h"
+#include <span>
+
+namespace cudaq::opt {
+#define GEN_PASS_DEF_STATEINITIALIZATION
+#include "cudaq/Optimizer/Transforms/Passes.h.inc"
+} // namespace cudaq::opt
+
+#define DEBUG_TYPE "state-initialization"
+
+using namespace mlir;
+
+namespace {
+
+static bool isCall(Operation *op, std::vector<const char *> &&names) {
+  if (op) {
+    if (auto callOp = dyn_cast<func::CallOp>(op)) {
+      if (auto calleeAttr = callOp.getCalleeAttr()) {
+        auto funcName = calleeAttr.getValue().str();
+        if (std::find(names.begin(), names.end(), funcName) != names.end())
+          return true;
+      }
+    }
+  }
+  return false;
+}
+
+static bool isGetStateCall(Operation *op) {
+  return isCall(op, {cudaq::getCudaqState});
+}
+
+static bool isNumberOfQubitsCall(Operation *op) {
+  return isCall(op, {cudaq::getNumQubitsFromCudaqState});
+}
+
+// clang-format off
+/// Replace `quake.init_state` by a call to a (modified) kernel that produced the state.
+/// ```
+///  %0 = cc.string_literal "callee.modified_0" : !cc.ptr<!cc.array<i8 x 27>>
+///  %1 = cc.cast %0 : (!cc.ptr<!cc.array<i8 x 27>>) -> !cc.ptr<i8>
+///  %2 = call @__nvqpp_cudaq_state_get(%1) : (!cc.ptr<i8>) -> !cc.ptr<!cc.state>
+///  %3 = call @__nvqpp_cudaq_state_numberOfQubits(%2) : (!cc.ptr<!cc.state>) -> i64
+///  %4 = quake.alloca !quake.veq<?>[%3 : i64]
+///  %5 = quake.init_state %4, %2 : (!quake.veq<?>, !cc.ptr<!cc.state>) -> !quake.veq<?>
+/// ───────────────────────────────────────────
+/// ...
+///  %5 = call @callee.modified_0() : () -> !quake.veq<?>
+/// ```
+// clang-format on
+class StateInitPattern : public OpRewritePattern<quake::InitializeStateOp> {
+public:
+  using OpRewritePattern::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(quake::InitializeStateOp initState,
+                                PatternRewriter &rewriter) const override {
+    auto loc = initState.getLoc();
+    auto allocaOp = initState.getOperand(0).getDefiningOp();
+    auto stateOp = initState.getOperand(1);
+
+    if (isa<cudaq::cc::StateType>(stateOp.getType())) {
+      auto getStateOp = stateOp.getDefiningOp();
+      auto numOfQubits = allocaOp->getOperand(0).getDefiningOp();
+
+      if (isGetStateCall(getStateOp)) {
+        auto calleeNameOp = getStateOp->getOperand(0);
+        if (auto cast =
+                dyn_cast<cudaq::cc::CastOp>(calleeNameOp.getDefiningOp())) {
+          calleeNameOp = cast.getOperand();
+
+          if (auto literal = dyn_cast<cudaq::cc::CreateStringLiteralOp>(
+                  calleeNameOp.getDefiningOp())) {
+            auto calleeName = literal.getStringLiteral();
+
+            Value result =
+                rewriter
+                    .create<func::CallOp>(loc, initState.getType(), calleeName,
+                                          mlir::ValueRange{})
+                    .getResult(0);
+            rewriter.replaceAllUsesWith(initState, result);
+            initState.erase();
+            allocaOp->dropAllUses();
+            rewriter.eraseOp(allocaOp);
+            if (isNumberOfQubitsCall(numOfQubits)) {
+              numOfQubits->dropAllUses();
+              rewriter.eraseOp(numOfQubits);
+            }
+            getStateOp->dropAllUses();
+            rewriter.eraseOp(getStateOp);
+            cast->dropAllUses();
+            rewriter.eraseOp(cast);
+            literal->dropAllUses();
+            rewriter.eraseOp(literal);
+            return success();
+          }
+        }
+      }
+    }
+    return failure();
+  }
+};
+
+class StateInitializationPass
+    : public cudaq::opt::impl::StateInitializationBase<
+          StateInitializationPass> {
+public:
+  using StateInitializationBase::StateInitializationBase;
+
+  void runOnOperation() override {
+    auto *ctx = &getContext();
+    auto func = getOperation();
+    RewritePatternSet patterns(ctx);
+    patterns.insert<StateInitPattern>(ctx);
+
+    LLVM_DEBUG(llvm::dbgs() << "Before state initialization: " << func << '\n');
+
+    if (failed(applyPatternsAndFoldGreedily(func.getOperation(),
+                                            std::move(patterns))))
+      signalPassFailure();
+
+    LLVM_DEBUG(llvm::dbgs() << "After state initialization: " << func << '\n');
+  }
+};
+} // namespace
diff --git a/lib/Optimizer/Transforms/StateValidation.cpp b/lib/Optimizer/Transforms/StateValidation.cpp
@@ -0,0 +1,127 @@
+/*******************************************************************************
+ * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates.                  *
+ * All rights reserved.                                                        *
+ *                                                                             *
+ * This source code and the accompanying materials are made available under    *
+ * the terms of the Apache License 2.0 which accompanies this distribution.    *
+ ******************************************************************************/
+
+#include "PassDetails.h"
+#include "cudaq/Optimizer/Builder/Intrinsics.h"
+#include "cudaq/Optimizer/Dialect/CC/CCOps.h"
+#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h"
+#include "cudaq/Optimizer/Transforms/Passes.h"
+#include "mlir/Dialect/Complex/IR/Complex.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+#include "mlir/Transforms/Passes.h"
+
+namespace cudaq::opt {
+#define GEN_PASS_DEF_STATEVALIDATION
+#include "cudaq/Optimizer/Transforms/Passes.h.inc"
+} // namespace cudaq::opt
+
+#define DEBUG_TYPE "state-validation"
+
+using namespace mlir;
+
+/// Validate that quantum code does not contain runtime calls and remove runtime
+/// function definitions.
+namespace {
+
+static bool isRuntimeStateCallName(llvm::StringRef funcName) {
+  static std::vector<const char *> names = {
+      cudaq::getCudaqState, cudaq::createCudaqStateFromDataFP32,
+      cudaq::createCudaqStateFromDataFP64, cudaq::deleteCudaqState,
+      cudaq::getNumQubitsFromCudaqState};
+  if (std::find(names.begin(), names.end(), funcName) != names.end())
+    return true;
+  return false;
+}
+
+static bool isRuntimeStateCall(Operation *callOp) {
+  if (callOp) {
+    if (auto call = dyn_cast<func::CallOp>(callOp)) {
+      if (auto calleeAttr = call.getCalleeAttr()) {
+        auto funcName = calleeAttr.getValue().str();
+        if (isRuntimeStateCallName(funcName))
+          return true;
+      }
+    }
+  }
+  return false;
+}
+
+class ValidateStateCallPattern : public OpRewritePattern<func::CallOp> {
+public:
+  using OpRewritePattern::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(func::CallOp callOp,
+                                PatternRewriter &rewriter) const override {
+    if (isRuntimeStateCall(callOp)) {
+      auto name = callOp.getCalleeAttr().getValue();
+      callOp.emitError(
+          "Synthesis did not remove func call for quantum platform: " + name);
+    }
+    return failure();
+  }
+};
+
+class ValidateStateInitPattern
+    : public OpRewritePattern<quake::InitializeStateOp> {
+public:
+  using OpRewritePattern::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(quake::InitializeStateOp initState,
+                                PatternRewriter &rewriter) const override {
+    auto stateOp = initState.getOperand(1);
+    if (isa<cudaq::cc::StateType>(stateOp.getType()))
+      initState.emitError("Synthesis did not remove `quake.init_state <veq> "
+                          "<state>` instruction");
+
+    return failure();
+  }
+};
+
+class StateValidationPass
+    : public cudaq::opt::impl::StateValidationBase<StateValidationPass> {
+protected:
+public:
+  using StateValidationBase::StateValidationBase;
+
+  mlir::ModuleOp getModule() { return getOperation(); }
+
+  void runOnOperation() override final {
+    auto *ctx = &getContext();
+    auto module = getModule();
+    SmallVector<Operation *> toErase;
+
+    for (Operation &op : *module.getBody()) {
+      auto func = dyn_cast<func::FuncOp>(op);
+      if (!func)
+        continue;
+
+      RewritePatternSet patterns(ctx);
+      patterns.insert<ValidateStateCallPattern, ValidateStateInitPattern>(ctx);
+
+      LLVM_DEBUG(llvm::dbgs() << "Before state validation: " << func << '\n');
+
+      if (failed(applyPatternsAndFoldGreedily(func.getOperation(),
+                                              std::move(patterns))))
+        signalPassFailure();
+
+      // Delete runtime function definitions.
+      if (func.getBody().empty() && isRuntimeStateCallName(func.getName()))
+        toErase.push_back(func);
+
+      LLVM_DEBUG(llvm::dbgs() << "After state validation: " << func << '\n');
+    }
+
+    for (auto *op : toErase)
+      op->erase();
+  }
+};
+
+} // namespace
diff --git a/python/runtime/cudaq/algorithms/py_state.cpp b/python/runtime/cudaq/algorithms/py_state.cpp
@@ -96,8 +96,9 @@ class PyRemoteSimulationState : public RemoteSimulationState {
     }
   }
 
-  std::pair<std::string, std::vector<void *>> getKernelInfo() const override {
-    return {kernelName, argsData->getArgs()};
+  std::optional<std::pair<std::string, std::vector<void *>>>
+  getKernelInfo() const override {
+    return std::make_pair(kernelName, argsData->getArgs());
   }
 
   std::complex<double> overlap(const cudaq::SimulationState &other) override {

diff --git a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp
@@ -523,7 +523,7 @@ MlirModule synthesizeKernel(const std::string &name, MlirModule module,
   auto isLocalSimulator = platform.is_simulator() && !platform.is_emulated();
   auto isSimulator = isLocalSimulator || isRemoteSimulator;
 
-  cudaq::opt::ArgumentConverter argCon(name, unwrap(module), isSimulator);
+  cudaq::opt::ArgumentConverter argCon(name, unwrap(module));
   argCon.gen(runtimeArgs.getArgs());
   std::string kernName = cudaq::runtime::cudaqGenPrefixName + name;
   SmallVector<StringRef> kernels = {kernName};