From 65b044130f15982971e614bcead79f820e839198 Mon Sep 17 00:00:00 2001
From: Pedro Maciel Xavier <pedroxavier@psr-inc.com>
Date: Thu, 20 Apr 2023 14:04:23 -0300
Subject: [PATCH] Add explanation on encoding error

---
 docs/make.jl                   |  10 ++-
 docs/src/booklet/4-encoding.md |  29 ++++++++-
 src/compiler/constraints.jl    |  11 ----
 src/model/virtual.jl           | 113 +++++++++++++++++++++------------
 4 files changed, 108 insertions(+), 55 deletions(-)

diff --git a/docs/make.jl b/docs/make.jl
index 4a289630..101429f1 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -11,14 +11,20 @@ makedocs(;
     doctest = true,
     clean   = true,
     format  = Documenter.HTML(
+        sidebar_sitename = false,
+        mathengine       = Documenter.KaTeX(
+            Dict(
+                :macros => Dict(
+                    raw"\set" => raw"\left\lbrace{#1}\right\rbrace"
+                )
+            )
+        ),
         assets           = [
             "assets/extra_styles.css",
             "assets/favicon.ico",
             asset("https://tikzjax.com/v1/fonts.css"; class = :css),
             asset("https://tikzjax.com/v1/tikzjax.js"; class = :js),
         ],
-        mathengine       = Documenter.KaTeX(),
-        sidebar_sitename = false,
     ), 
     sitename = "ToQUBO.jl",
     authors  = "Pedro Maciel Xavier and Pedro Ripper and Tiago Andrade and Joaquim Dias Garcia and David E. Bernal Neira",
diff --git a/docs/src/booklet/4-encoding.md b/docs/src/booklet/4-encoding.md
index 0bdf08c3..b46d64a8 100644
--- a/docs/src/booklet/4-encoding.md
+++ b/docs/src/booklet/4-encoding.md
@@ -18,11 +18,10 @@ Also, they differ in the magnitude of their coefficients ``\Delta``.
 | Bounded-Coefficient   |    ``O(n)``        |    ``O(n)``      |       -             | ``O(1)``         |
 | Arithmetic Prog       |  ``O(\sqrt{n})``   |  ``O(\sqrt{n})`` |       -             | ``O(\sqrt{n})``  |
 
-
 ### Linear Encoding
 ```@docs
-ToQUBO.Binary
 ToQUBO.Unary
+ToQUBO.Binary
 ToQUBO.Arithmetic
 ToQUBO.OneHot
 ```
@@ -41,6 +40,32 @@ ToQUBO.DomainWall
 ToQUBO.Bounded
 ```
 
+### Encoding Error
+Let ``\set{x_{i}}_{i \in [k]}`` be the collection of ``k`` evenly spaced samples from the discretization of an interval ``[a, b] \subseteq \mathbb{R}``.
+
+The representation error for a given point ``x`` with respect to ``\set{x_{i}}_{i \in [k]}`` is
+
+```math
+e_{k}(x) = \min_{i \in [k]} \left|x - x_{i}\right|
+```
+
+Assuming that ``x`` behaves as a uniformly distributed random variable, the expected absolute encoding error is
+
+```math
+\begin{align*}
+\mathbb{E}\left[{e_{k}(x)}\right] &= \frac{1}{b - a} \int_{a}^{b} e_{k}(x) ~\mathrm{d}x \\
+                              &= \frac{1}{4} \frac{b - a}{k - 1}
+\end{align*}
+```
+
+Thus, for encoding methods that rely on the regular division of an interval, it is possible to define the number of samples ``k`` necessary to limit the expected error according to an upper bound ``\tau``, that is,
+
+```math
+\mathbb{E}\left[{e_{k}(x)}\right] \le \tau \implies k \ge 1 + \frac{b - a}{4 \tau}
+```
+
+This allows the compiler to automatically infer the number of bits to allocate for an encoded variable given the tolerance factor.
+
 ## Constraints
 
 A QUBO model is unconstrained. So when `ToQUBO` is reformulating a problem, it needs to encode all constraints into the objective function losing as little information as possible.
diff --git a/src/compiler/constraints.jl b/src/compiler/constraints.jl
index 16cc4ae5..92bf3540 100644
--- a/src/compiler/constraints.jl
+++ b/src/compiler/constraints.jl
@@ -265,17 +265,6 @@ end
         ::MOI.SOS1{T},
         ::AbstractArchitecture,
     ) where {T}
-
-Turns constraints of the form
-
-```math
-\begin{array}{rl}
-\text{s.t} & \sum{\mathbf{x}} \leq \min \mathbf{x}
-\end{array}
-```
-
-into
-
 """
 function toqubo_constraint(
     model::VirtualModel{T},
diff --git a/src/model/virtual.jl b/src/model/virtual.jl
index d9726205..beaf999e 100644
--- a/src/model/virtual.jl
+++ b/src/model/virtual.jl
@@ -9,7 +9,8 @@ Maps newly created virtual variable `v` within the virtual model structure. It f
  1. Maps `v`'s source to it in the model's `source` mapping.
  2. For every one of `v`'s targets, maps it to itself and adds a binary constraint to it.
  2. Adds `v` to the end of the model's `varvec`.  
-""" function encode! end
+"""
+function encode! end
 
 @doc raw"""
 # Variable Expansion methods:
@@ -157,7 +158,8 @@ Every linear encoding ``\xi`` is of the form
 \xi(\mathbf{y}) = \alpha + \sum_{i = 1}^{n} \gamma_{i} y_{i}
 ```
 
-""" abstract type LinearEncoding <: Encoding end
+"""
+abstract type LinearEncoding <: Encoding end
 
 function VirtualVariable{T}(
     e::LinearEncoding,
@@ -191,7 +193,8 @@ end
     Mirror()
 
 Mirrors binary variable ``x \in \mathbb{B}`` with a twin variable ``y \in \mathbb{B}``.
-""" struct Mirror <: LinearEncoding end
+"""
+struct Mirror <: LinearEncoding end
 
 function encode!(model::VirtualModel{T}, e::Mirror, x::Union{VI,Nothing}) where {T}
     return encode!(model, e, x, ones(T, 1))
@@ -199,7 +202,8 @@ end
 
 @doc raw"""
     Linear()
-""" struct Linear <: LinearEncoding end
+"""
+struct Linear <: LinearEncoding end
 
 function encode!(
     model::VirtualModel{T},
@@ -217,7 +221,7 @@ end
     Unary()
 
 ## Integer
-Let ``x \in [a, b] \subset \mathbb{Z}, n = b - a, \mathbf{y} \in \mathbb{B}^{n}``.
+Let ``x \in [a, b] \subset \mathbb{Z}``, ``n = b - a`` and ``\mathbf{y} \in \mathbb{B}^{n}``.
 
 ```math
 \xi{[a, b]}(\mathbf{y}) = a + \sum_{j = 1}^{b - a} y_{j}
@@ -293,7 +297,7 @@ end
     Binary()
 
 ## Integer
-Let ``x \in [a, b] \subset \mathbb{Z}``, ``n = \left\lceil \log_{2}(b - a) + 1 \right\rceil``, ``\mathbf{y} \in \mathbb{B}^{n}``.
+Let ``x \in [a, b] \subset \mathbb{Z}``, ``n = \left\lceil \log_{2}(b - a) + 1 \right\rceil`` and ``\mathbf{y} \in \mathbb{B}^{n}``.
 
 ```math
 \xi{[a, b]}(\mathbf{y}) = a + \left(b - a - 2^{n - 1} + 1\right) y_{n} + \sum_{j = 1}^{n - 1} 2^{j - 1} y_{j}
@@ -314,7 +318,8 @@ n \ge \log_{2} \left[1 + \frac{b - a}{4 \tau}\right]
 ```
 
 binary variables become necessary.
-""" struct Binary <: LinearEncoding end
+"""
+struct Binary <: LinearEncoding end
 
 function encode!(
     model::VirtualModel{T},
@@ -372,14 +377,29 @@ end
 @doc raw"""
     Arithmetic()
 
-Let ``x \in [a, b] \subset \mathbb{Z}, n = b - a, \mathbf{y} \in \mathbb{B}^{n}``.
+## Integer
+Let ``x \in [a, b] \subset \mathbb{Z}``, ``n = \left\lceil{ \frac{1}{2} {\sqrt{1 + 8 (b - a)}} - \frac{1}{2} }\right\rceil`` and ``\mathbf{y} \in \mathbb{B}^{n}``.
+
+```math
+\xi{[a, b]}(\mathbf{y}) = a + \left( {b - a - \frac{n (n - 1)}{2}} \right) y_{n} + \sum_{j = 1}^{n - 1} j y_{j}
+```
+
+## Real
+Given ``n \in \mathbb{N}`` for ``x \in [a, b] \subset \mathbb{R}``,
+
+```math
+\xi{[a, b]}(\mathbf{y}) = a + \frac{b - a}{n (n + 1)} \sum_{j = 1}^{n} j y_{j}
+```
+
+### Encoding error
+Given ``\tau > 0``, for the expected encoding error to be less than or equal to ``\tau``, at least
 
 ```math
-x = \xi(\mathbf{y}) = a + \sum_{i = 1}^{N - 1} i\,y_{i} + \left({ n - \frac{N\,(N - 1)}{2}}\right)\,y_{N}
+n \ge \frac{1}{2} \left[ 1 + \sqrt{3 + \frac{(b - a)}{2 \tau})} \right]
 ```
 
-Where ``N = \left\lceil{ \frac{1}{2} {\sqrt{1 + 8 n}} - \frac{1}{2} }\right\rceil`` is the number of bits.
-""" struct Arithmetic <: LinearEncoding end
+"""
+struct Arithmetic <: LinearEncoding end
 
 function encode!(
     model::VirtualModel{T},
@@ -433,21 +453,24 @@ end
 @doc raw"""
     OneHot()
 
-The one-hot encoding is a linear technique used to represent a variable
-``x \in \{ \gamma_{j} \}_{j \in [n]}``.
+The one-hot encoding is a linear technique used to represent a variable ``x \in \set{\gamma_{j}}_{j \in [n]}``.
+
+The associated encoding function is combined with a constraint assuring that only one and exactly one of the expansion's variables ``y_{j}`` is activated at a time.
+
+```math
+\xi[\set{\gamma_{j}}_{j \in [n]}](\mathbf{y}) = \sum_{j = 1}^{n} \gamma_{j} y_{j} ~\textrm{s.t.}~ \sum_{j = 1}^{n} y_{j} = 1
+```
 
-The encoding function is combined with a constraint assuring that only
-one and exactly one of the expansion's variables ``y_{j}`` is activated
-at a time.
+When a variable is encoded following this approach, a penalty term of the form
 
 ```math
-\begin{array}{rl}
-x = \xi(\mathbf{y}) = &  \sum_{j = 1}^{n} \gamma_{j} y_{j} \\
-        \mathrm{s.t.} & \sum_{j = 1}^{n} y_{j} = 1
-\end{array}
+\rho \left[ \sum_{j = 1}^{n} y_{j} - 1 \right]^{2}
 ```
 
-""" struct OneHot <: LinearEncoding end
+is added to the objective function.
+
+"""
+struct OneHot <: LinearEncoding end
 
 function VirtualVariable{T}(
     e::OneHot,
@@ -516,11 +539,12 @@ end
 A *sequential encoding* is one of the form
 
 ```math
-\xi(\mathbf{y}) = \sum_{i = 1}^{n} \gamma_{i} \left({y_{i + 1} \ast y_{i}}\right)
+\xi[\set{\gamma_{j}}_{j \in [n]}](\mathbf{y}) = \sum_{j = 1}^{n} \gamma_{j} \left({y_{j + 1} \ast y_{j}}\right)
 ```
 
 where ``\mathbf{y} \in \mathbb{B}^{n + 1}`` and ``\ast`` is a binary operator.
-""" abstract type SequentialEncoding <: Encoding end
+"""
+abstract type SequentialEncoding <: Encoding end
 
 function encode!(
     model::VirtualModel{T},
@@ -539,17 +563,18 @@ end
 @doc raw"""
     DomainWall()
 
-The Domain Wall[^Chancellor2019] encoding method is a sequential approach that requires only
-``n - 1`` bits to represent ``n`` distinct values.
+The Domain Wall[^Chancellor2019] encoding method is a sequential approach that requires ``n - 1`` bits to represent ``n`` distinct values.
 
-!!! table "Encoding Analysis"
-    |             | bits      | linear | quadratic | ``\Delta`` |
-    | :-:         | :--:      | :----: | :-------: | :--------: |
-    | Domain Wall | ``n - 1`` | ``n``  |           | ``O(n)``   |
+```math
+\xi{[\set{\gamma_{j}}_{j \in [n]}]}(\mathbf{y}) = \sum_{j = 1}^{n} \gamma_{j} (y_{j} - y_{j + 1}) ~\textrm{s.t.}~ \sum_{j = 1}^{n} y_{j} \oplus y_{j + 1} = 1, y_{1} = 1, y_{n + 1} = 0
+```
+
+where ``\mathbf{y} \in \mathbb{B}^{n + 1}``.
 
 [^Chancellor2019]:
     Nicholas Chancellor, **Domain wall encoding of discrete variables for quantum annealing and QAOA**, *Quantum Science Technology 4*, 2019.
-""" struct DomainWall <: SequentialEncoding end
+"""
+struct DomainWall <: SequentialEncoding end
 
 function VirtualVariable{T}(
     e::DomainWall,
@@ -603,16 +628,20 @@ end
 @doc raw"""
     Bounded{E,T}(μ::T) where {E<:Encoding,T}
 
-The bounded-coefficient encoding method[^Karimi2019] consists in limiting the magnitude of the
-coefficients in the encoding expansion to a parameter ``\mu``.
+The bounded-coefficient encoding method[^Karimi2019] consists in limiting the magnitude of the coefficients in the encoding expansion to a parameter ``\mu``.
 
 [^Karimi2019]:
     Karimi, S. & Ronagh, P. **Practical integer-to-binary mapping for quantum annealers**. *Quantum Inf Process 18, 94* (2019). [{doi}](https://doi.org/10.1007/s11128-019-2213-x)
 
+This can be applied to the [`Unary`](@ref), [`Binary`](@ref) and [`Arithmetic`](@ref) encoding schemas, as discussed below.
+
+    Bounded{Unary,T}(μ::T) where {T}
+
+Given ``\mu > 0``, let ``x \in [a, b] \subset \mathbb{Z}`` and ``n = b - a``.
+
     Bounded{Binary,T}(μ::T) where {T}
 
-## Rationale
-Let ``x \in [a, b] \subset \mathbb{Z}`` and ``n = b - a``.
+Given ``\mu > 0``, let ``x \in [a, b] \subset \mathbb{Z}`` and ``n = b - a``.
 
 First,
 
@@ -641,6 +670,14 @@ and
 \epsilon = n - 2^{k} + 1 - r \times \mu
 ```
 
+Therefore,
+
+```math
+\xi_{\mu}{[a, b]}(\mathbf{y}) = \sum_{j = 1} \gamma_{j} y_{j}
+```
+
+where
+
 ```math
 \gamma_{j} = \left\lbrace\begin{array}{cl}
     2^{j} & \text{if } 1 \le j \le k   \\
@@ -649,12 +686,8 @@ and
 \end{array}\right.
 ```
 
-    Bounded{Unary,T}(μ::T) where {T}
-
-Let ``x \in [a, b] \subset \mathbb{Z}`` and ``n = b - a``.
-
-
-""" struct Bounded{E<:LinearEncoding,T} <: LinearEncoding
+"""
+struct Bounded{E<:LinearEncoding,T} <: LinearEncoding
     μ::T
 
     function Bounded{E,T}(μ::T) where {E,T}