Skip to content

Commit

Permalink
Merge branch 'main' into tnguyen/dynamics-mgmn
Browse files Browse the repository at this point in the history
  • Loading branch information
1tnguyen authored Jan 15, 2025
2 parents 82d7a79 + 742a31d commit c5927c8
Show file tree
Hide file tree
Showing 14 changed files with 353 additions and 26 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/config/gitlab_commits.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
nvidia-mgpu-repo: cuda-quantum/cuquantum-mgpu.git
nvidia-mgpu-commit: dadce3edc10564e94cd260590344d5840880087a
nvidia-mgpu-commit: 806e7fe5c459f52296ae0d3bd8bc57c3ea806152
6 changes: 3 additions & 3 deletions .github/workflows/docker_images.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ jobs:
run: |
if [ -n "$(echo ${{ inputs.platforms }} | grep ',')" ]; then
# multi-platform builds get no platform tag
echo "runner=linux-amd64-cpu8" >> $GITHUB_OUTPUT
echo "runner=linux-amd64-cpu16" >> $GITHUB_OUTPUT
echo "build_docs=${{ inputs.build_docs != 'false' }}" >> $GITHUB_OUTPUT
is_versioned=${{ github.ref_type == 'tag' || startsWith(github.ref_name, 'releases/') || startsWith(github.ref_name, 'staging/') }}
has_continuous_deployment=${{ startsWith(github.ref_name, 'experimental/') || github.ref_name == 'main' }}
Expand All @@ -71,12 +71,12 @@ jobs:
elif [ -n "$(echo ${{ inputs.platforms }} | grep -i arm)" ]; then
platform_tag=`echo ${{ inputs.platforms }} | sed 's/linux\///g' | tr -d ' '`
echo "platform_tag=$platform_tag" >> $GITHUB_OUTPUT
echo "runner=linux-arm64-cpu8" >> $GITHUB_OUTPUT
echo "runner=linux-arm64-cpu16" >> $GITHUB_OUTPUT
echo "build_docs=${{ inputs.build_docs == 'true' }}" >> $GITHUB_OUTPUT
else
platform_tag=`echo ${{ inputs.platforms }} | sed 's/linux\///g' | tr -d ' '`
echo "platform_tag=$platform_tag" >> $GITHUB_OUTPUT
echo "runner=linux-amd64-cpu8" >> $GITHUB_OUTPUT
echo "runner=linux-amd64-cpu16" >> $GITHUB_OUTPUT
echo "build_docs=${{ inputs.build_docs != 'false' }}" >> $GITHUB_OUTPUT
fi
Expand Down
40 changes: 40 additions & 0 deletions docs/sphinx/snippets/cpp/using/backends/trajectory.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*******************************************************************************
* Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates. *
* All rights reserved. *
* *
* This source code and the accompanying materials are made available under *
* the terms of the Apache License 2.0 which accompanies this distribution. *
******************************************************************************/

// [Begin Documentation]
#include <cudaq.h>

struct xOp {
void operator()(int qubit_count) __qpu__ {
cudaq::qvector q(qubit_count);
x(q);
mz(q);
}
};

int main() {
// Add a simple bit-flip noise channel to X gate
const double error_probability = 0.1;

cudaq::bit_flip_channel bit_flip(error_probability);
// Add noise channels to our noise model.
cudaq::noise_model noise_model;
// Apply the bitflip channel to any X-gate on any qubits
noise_model.add_all_qubit_channel<cudaq::types::x>(bit_flip);

const int qubit_count = 2;
// Due to the impact of noise, our measurements will no longer be uniformly in
// the |11> state.
auto counts =
cudaq::sample({.shots = 1000, .noise = noise_model}, xOp{}, qubit_count);

// The probability that we get the perfect result (11) should be ~ 0.9 * 0.9 =
// 0.81
counts.dump();
return 0;
}
45 changes: 45 additions & 0 deletions docs/sphinx/snippets/cpp/using/backends/trajectory_observe.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*******************************************************************************
* Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates. *
* All rights reserved. *
* *
* This source code and the accompanying materials are made available under *
* the terms of the Apache License 2.0 which accompanies this distribution. *
******************************************************************************/

#include <iostream>

// [Begin Documentation]
#include <cudaq.h>

struct xOp {
void operator()() __qpu__ {
cudaq::qubit q;
x(q);
}
};

int main() {
// Add a simple bit-flip noise channel to X gate
const double error_probability = 0.1;

cudaq::bit_flip_channel bit_flip(error_probability);
// Add noise channels to our noise model.
cudaq::noise_model noise_model;
// Apply the bitflip channel to any X-gate on any qubits
noise_model.add_all_qubit_channel<cudaq::types::x>(bit_flip);

double noisy_exp_val =
cudaq::observe({.noise = noise_model, .num_trajectories = 1024}, xOp{},
cudaq::spin::z(0));

// True expectation: 0.1 - 0.9 = -0.8 (|1> has <Z> of -1 and |1> has <Z> of
// +1)
std::cout << "Noisy <Z> with 1024 trajectories = " << noisy_exp_val << "\n";

// Rerun with a higher number of trajectories
noisy_exp_val =
cudaq::observe({.noise = noise_model, .num_trajectories = 8192}, xOp{},
cudaq::spin::z(0));
std::cout << "Noisy <Z> with 8192 trajectories = " << noisy_exp_val << "\n";
return 0;
}
43 changes: 43 additions & 0 deletions docs/sphinx/snippets/python/using/backends/trajectory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# ============================================================================ #
# Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. #
# All rights reserved. #
# #
# This source code and the accompanying materials are made available under #
# the terms of the Apache License 2.0 which accompanies this distribution. #
# ============================================================================ #

#[Begin Docs]
import cudaq

# Use the `nvidia` target
cudaq.set_target("nvidia")

# Let's define a simple kernel that we will add noise to.
qubit_count = 2


@cudaq.kernel
def kernel(qubit_count: int):
qvector = cudaq.qvector(qubit_count)
x(qvector)
mz(qvector)


# Add a simple bit-flip noise channel to X gate
error_probability = 0.1
bit_flip = cudaq.BitFlipChannel(error_probability)

# Add noise channels to our noise model.
noise_model = cudaq.NoiseModel()
# Apply the bit-flip channel to any X-gate on any qubits
noise_model.add_all_qubit_channel("x", bit_flip)

# Due to the impact of noise, our measurements will no longer be uniformly
# in the |11> state.
noisy_counts = cudaq.sample(kernel,
qubit_count,
noise_model=noise_model,
shots_count=1000)

# The probability that we get the perfect result (11) should be ~ 0.9 * 0.9 = 0.81
noisy_counts.dump()
44 changes: 44 additions & 0 deletions docs/sphinx/snippets/python/using/backends/trajectory_observe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# ============================================================================ #
# Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. #
# All rights reserved. #
# #
# This source code and the accompanying materials are made available under #
# the terms of the Apache License 2.0 which accompanies this distribution. #
# ============================================================================ #

#[Begin Docs]
import cudaq
from cudaq import spin

# Use the `nvidia` target
cudaq.set_target("nvidia")


@cudaq.kernel
def kernel():
q = cudaq.qubit()
x(q)


# Add a simple bit-flip noise channel to X gate
error_probability = 0.1
bit_flip = cudaq.BitFlipChannel(error_probability)

# Add noise channels to our noise model.
noise_model = cudaq.NoiseModel()
# Apply the bit-flip channel to any X-gate on any qubits
noise_model.add_all_qubit_channel("x", bit_flip)

noisy_exp_val = cudaq.observe(kernel,
spin.z(0),
noise_model=noise_model,
num_trajectories=1024).expectation()
# True expectation: 0.1 - 0.9 = -0.8 (|1> has <Z> of -1 and |1> has <Z> of +1)
print("Noisy <Z> with 1024 trajectories =", noisy_exp_val)

# Rerun with a higher number of trajectories
noisy_exp_val = cudaq.observe(kernel,
spin.z(0),
noise_model=noise_model,
num_trajectories=8192).expectation()
print("Noisy <Z> with 8192 trajectories =", noisy_exp_val)
108 changes: 100 additions & 8 deletions docs/sphinx/using/backends/simulators.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@ and multi-QPU (`mqpu` :ref:`platform <mqpu-platform>`) distribution whereby each
Host CPU memory can be leveraged in addition to GPU memory to accommodate the state vector
(i.e., maximizing the number of qubits to be simulated).

* Trajectory simulation for noisy quantum circuits

The :code:`nvidia` target supports noisy quantum circuit simulations using quantum trajectory method across all configurations: single GPU, multi-node multi-GPU, and with host memory.
When simulating many trajectories with small state vectors, the simulation is batched for optimal performance.

.. _cuQuantum single-GPU:


Expand Down Expand Up @@ -266,6 +271,100 @@ environment variable to another integer value as shown below.
nvq++ --target nvidia --target-option mgpu,fp64 program.cpp [...] -o program.x
CUDAQ_MGPU_FUSE=5 mpiexec -np 2 ./program.x
Trajectory Noisy Simulation
++++++++++++++++++++++++++++++++++

When a :code:`noise_model` is provided to CUDA-Q, the :code:`nvidia` target will incorporate quantum noise into the quantum circuit simulation according to the noise model specified.


.. tab:: Python

.. literalinclude:: ../../snippets/python/using/backends/trajectory.py
:language: python
:start-after: [Begin Docs]

.. code:: bash
python3 program.py
{ 00:15 01:92 10:81 11:812 }
.. tab:: C++
.. literalinclude:: ../../snippets/cpp/using/backends/trajectory.cpp
:language: cpp
:start-after: [Begin Documentation]
.. code:: bash
nvq++ --target nvidia program.cpp [...] -o program.x
./program.x
{ 00:15 01:92 10:81 11:812 }
In the case of bit-string measurement sampling as in the above example, each measurement 'shot' is executed as a trajectory, whereby Kraus operators specified in the noise model are sampled.
For observable expectation value estimation, the statistical error scales asymptotically as :math:`1/\sqrt{N_{trajectories}}`, where :math:`N_{trajectories}` is the number of trajectories.
Hence, depending on the required level of accuracy, the number of trajectories can be specified accordingly.
.. tab:: Python
.. literalinclude:: ../../snippets/python/using/backends/trajectory_observe.py
:language: python
:start-after: [Begin Docs]
.. code:: bash
python3 program.py
Noisy <Z> with 1024 trajectories = -0.810546875
Noisy <Z> with 8192 trajectories = -0.800048828125
.. tab:: C++
.. literalinclude:: ../../snippets/cpp/using/backends/trajectory_observe.cpp
:language: cpp
:start-after: [Begin Documentation]
.. code:: bash
nvq++ --target nvidia program.cpp [...] -o program.x
./program.x
Noisy <Z> with 1024 trajectories = -0.810547
Noisy <Z> with 8192 trajectories = -0.800049
The following environment variable options are applicable to the :code:`nvidia` target for trajectory noisy simulation. Any environment variables must be set
prior to setting the target.
.. list-table:: **Additional environment variable options for trajectory simulation**
:widths: 20 30 50
* - Option
- Value
- Description
* - ``CUDAQ_OBSERVE_NUM_TRAJECTORIES``
- positive integer
- The default number of trajectories for observe simulation if none was provided in the `observe` call. The default value is 1000.
* - ``CUDAQ_BATCH_SIZE``
- positive integer or `NONE`
- The number of state vectors in the batched mode. If `NONE`, the batch size will be calculated based on the available device memory. Default is `NONE`.
* - ``CUDAQ_BATCHED_SIM_MAX_BRANCHES``
- positive integer
- The number of trajectory branches to be tracked simultaneously in the gate fusion. Default is 16.
* - ``CUDAQ_BATCHED_SIM_MAX_QUBITS``
- positive integer
- The max number of qubits for batching. If the qubit count in the circuit is more than this value, batched trajectory simulation will be disabled. The default value is 20.
* - ``CUDAQ_BATCHED_SIM_MIN_BATCH_SIZE``
- positive integer
- The minimum number of trajectories for batching. If the number of trajectories is less than this value, batched trajectory simulation will be disabled. Default value is 4.
.. note::
Batched trajectory simulation is only available on the single-GPU execution mode of the :code:`nvidia` target.
If batched trajectory simulation is not activated, e.g., due to problem size, number of trajectories, or the nature of the circuit (dynamic circuits with mid-circuit measurements and conditional branching), the required number of trajectories will be executed sequentially.
.. _OpenMP CPU-only:
OpenMP CPU-only
Expand Down Expand Up @@ -382,17 +481,14 @@ Specific aspects of the simulation can be configured by setting the following of
* **`CUDA_VISIBLE_DEVICES=X`**: Makes the process only see GPU X on multi-GPU nodes. Each MPI process must only see its own dedicated GPU. For example, if you run 8 MPI processes on a DGX system with 8 GPUs, each MPI process should be assigned its own dedicated GPU via `CUDA_VISIBLE_DEVICES` when invoking `mpiexec` (or `mpirun`) commands.
* **`OMP_PLACES=cores`**: Set this environment variable to improve CPU parallelization.
* **`OMP_NUM_THREADS=X`**: To enable CPU parallelization, set X to `NUMBER_OF_CORES_PER_NODE/NUMBER_OF_GPUS_PER_NODE`.
* **`CUDAQ_TENSORNET_CONTROLLED_RANK=X`**: Specify the number of controlled qubits whereby the full tensor body of the controlled gate is expanded. If the number of controlled qubits is greater than this value, the gate is applied as a controlled tensor operator to the tensor network state. Default value is 1.
.. note::
This backend requires an NVIDIA GPU and CUDA runtime libraries.
If you do not have these dependencies installed, you may encounter an error stating `Invalid simulator requested`.
See the section :ref:`dependencies-and-compatibility` for more information about how to install dependencies.
.. note::

Setting random seed, via :code:`cudaq::set_random_seed`, is not supported for this backend due to a limitation of the :code:`cuTensorNet` library. This will be fixed in future release once this feature becomes available.

Matrix product state
+++++++++++++++++++++++++++++++++++
Expand Down Expand Up @@ -436,10 +532,6 @@ Specific aspects of the simulation can be configured by defining the following e
If you do not have these dependencies installed, you may encounter an error stating `Invalid simulator requested`.
See the section :ref:`dependencies-and-compatibility` for more information about how to install dependencies.
.. note::

Setting random seed, via :code:`cudaq::set_random_seed`, is not supported for this backend due to a limitation of the :code:`cuTensorNet` library. This will be fixed in future release once this feature becomes available.

.. note::
The parallelism of Jacobi method (the default `CUDAQ_MPS_SVD_ALGO` setting) gives GPU better performance on small and medium size matrices.
If you expect a large number of singular values (e.g., increasing the `CUDAQ_MPS_MAX_BOND` setting), please adjust the `CUDAQ_MPS_SVD_ALGO` setting accordingly.
Expand Down
8 changes: 4 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@ dependencies = [
'numpy >= 1.24',
'scipy >= 1.10.1',
'requests >= 2.31',
'nvidia-cublas-cu12 ~= 12.0; platform_machine == "x86_64"',
'nvidia-cuda-runtime-cu12 ~= 12.0; platform_machine == "x86_64"',
'nvidia-cusolver-cu12 ~= 11.4; platform_machine == "x86_64"',
'nvidia-cuda-nvrtc-cu12 ~= 12.0; platform_machine == "x86_64"'
'nvidia-cublas-cu12 ~= 12.0',
'nvidia-cuda-runtime-cu12 ~= 12.0',
'nvidia-cusolver-cu12 ~= 11.4',
'nvidia-cuda-nvrtc-cu12 ~= 12.0'
]
classifiers = [
'Intended Audience :: Science/Research',
Expand Down
7 changes: 7 additions & 0 deletions python/cudaq/runtime/observe.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ def observe(kernel,
*args,
shots_count=0,
noise_model=None,
num_trajectories=None,
execution=None):
"""Compute the expected value of the `spin_operator` with respect to
the `kernel`. If the input `spin_operator` is a list of `SpinOperator` then compute
Expand All @@ -67,6 +68,7 @@ def observe(kernel,
noise_model (Optional[`NoiseModel`]): The optional :class:`NoiseModel` to add
noise to the kernel execution on the simulator. Defaults to an empty
noise model.
`num_trajectories` (Optional[int]): The optional number of trajectories for noisy simulation. Only valid if a noise model is provided. Key-word only.
Returns:
:class:`ObserveResult`:
Expand Down Expand Up @@ -123,6 +125,11 @@ def observe(kernel,
else:
ctx = cudaq_runtime.ExecutionContext('observe', shots_count)
ctx.setSpinOperator(localOp)
if num_trajectories is not None:
if noise_model is None:
raise RuntimeError(
"num_trajectories is provided without a noise_model.")
ctx.numberTrajectories = num_trajectories
cudaq_runtime.setExecutionContext(ctx)
kernel(*args)
res = ctx.result
Expand Down
2 changes: 2 additions & 0 deletions python/runtime/common/py_ExecutionContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ void bindExecutionContext(py::module &mod) {
.def_readwrite("totalIterations",
&cudaq::ExecutionContext::totalIterations)
.def_readwrite("batchIteration", &cudaq::ExecutionContext::batchIteration)
.def_readwrite("numberTrajectories",
&cudaq::ExecutionContext::numberTrajectories)
.def("setSpinOperator", [](cudaq::ExecutionContext &ctx,
cudaq::spin_op &spin) { ctx.spin = &spin; })
.def("getExpectationValue",
Expand Down
Loading

0 comments on commit c5927c8

Please sign in to comment.