From c0f946acd45fedfa678b785a66a4a030f47c3867 Mon Sep 17 00:00:00 2001
From: Alexander Cai <alexandercai@outlook.com>
Date: Fri, 6 Sep 2024 12:51:46 -0400
Subject: [PATCH] general refactoring

---
 Makefile                    |  16 +-
 book/_toc.yml               |  13 +-
 book/appendix.md            |   6 -
 book/background.md          |  37 ++++
 book/bandits.md             | 247 ++++++---------------------
 book/challenges.md          |  76 ---------
 book/contextual_bandits.md  | 168 ++++++++++++++++++
 book/exploration.md         |   3 +-
 book/fitted_dp.md           | 104 +-----------
 book/imitation_learning.md  |   1 +
 book/index.md               | 176 ++++++++++++++++++-
 book/intro.md               | 153 -----------------
 book/mdps.md                | 307 +++++++++++++++++----------------
 book/pg.md                  | 149 +++++++++++++---
 book/planning.md            |  15 ++
 book/shared/npg_line.png    | Bin 0 -> 32010 bytes
 book/shared/references.bib  | 329 ++++++++++++++++++++----------------
 book/shared/trajectory.png  | Bin 0 -> 36453 bytes
 book/supervised_learning.md | 120 +++++++++++++
 environment.yml             |   2 +-
 graphs.md                   |  23 +++
 21 files changed, 1074 insertions(+), 871 deletions(-)
 delete mode 100644 book/appendix.md
 create mode 100644 book/background.md
 delete mode 100644 book/challenges.md
 create mode 100644 book/contextual_bandits.md
 delete mode 100644 book/intro.md
 create mode 100644 book/planning.md
 create mode 100644 book/shared/npg_line.png
 create mode 100644 book/shared/trajectory.png
 create mode 100644 book/supervised_learning.md
 create mode 100644 graphs.md

diff --git a/Makefile b/Makefile
index 3b20ae7..c2664a7 100644
--- a/Makefile
+++ b/Makefile
@@ -2,18 +2,17 @@ ENV_NAME = rlbook
 
 RUN = micromamba run -n $(ENV_NAME)
 
-_NOTEBOOKS = $(addprefix book/, intro bandits mdps fitted_dp control pg exploration)
-
-NOTEBOOKS = $(addsuffix .md, $(_NOTEBOOKS))
-
-IPYNBS = $(addsuffix .ipynb, $(_NOTEBOOKS))
+_NOTEBOOKS = $(addprefix book/, bandits contextual_bandits control exploration fitted_dp imitation_learning mdps pg planning supervised_learning)
 
 _META = \
-	appendix \
+	background \
 	bibliography \
-	challenges \
 	index
 
+NOTEBOOKS = $(addsuffix .md, $(_NOTEBOOKS))
+
+IPYNBS = $(addsuffix .ipynb, $(_NOTEBOOKS))
+
 META = $(addsuffix .md, $(addprefix book/, $(_META)))
 
 SOLUTIONS = book/solutions/bandits.py
@@ -50,3 +49,6 @@ lab:
 
 lint:
 	$(RUN) ruff check --fix $(IPYNBS)
+
+publish: book/_build/html
+	$(RUN) ghp-import --cname "rlbook.adzc.ai" --no-jekyll --push --force book/_build/html
diff --git a/book/_toc.yml b/book/_toc.yml
index 8ecb5f0..018f641 100644
--- a/book/_toc.yml
+++ b/book/_toc.yml
@@ -6,14 +6,15 @@ root: index.md
 options:
   numbered: true
 chapters:
-  - file: intro.md
-  - file: bandits.md
   - file: mdps.md
-  - file: fitted_dp.md
   - file: control.md
+  - file: bandits.md
+  - file: supervised_learning.md
+  - file: fitted_dp.md
   - file: pg.md
-  - file: exploration.md
   - file: imitation_learning.md
-# - file: challenges
-# - file: appendix
+  - file: planning.md
+  - file: exploration.md
+  - file: contextual_bandits.md
   - file: bibliography.md
+  - file: background.md
diff --git a/book/appendix.md b/book/appendix.md
deleted file mode 100644
index d65e599..0000000
--- a/book/appendix.md
+++ /dev/null
@@ -1,6 +0,0 @@
-# Derivations
-
-## Natural policy gradient
-
-The TRPO objective is
-$$\max_\theta \E_{s_0, \dots, s_{H-1} \sim \rho_{\theta^k}}$$
diff --git a/book/background.md b/book/background.md
new file mode 100644
index 0000000..5b691d1
--- /dev/null
+++ b/book/background.md
@@ -0,0 +1,37 @@
+(background)=
+# Appendix: Background
+
+## O notation
+
+Throughout this chapter and the rest of the book, we will describe the
+asymptotic behavior of a function using $O$ notation.
+
+For two functions $f(t)$ and $g(t)$, we say that $f(t) \le O(g(t))$ if
+$f$ is asymptotically upper bounded by $g$. Formally, this means that
+there exists some constant $C > 0$ such that $f(t) \le C \cdot g(t)$ for
+all $t$ past some point $t_0$.
+
+We say $f(t) < o(g(t))$ if asymptotically $f$ grows strictly slower than
+$g$. Formally, this means that for *any* scalar $C > 0$, there exists
+some $t_0$ such that $f(t) \le C \cdot g(t)$ for all $t > t_0$.
+Equivalently, we say $f(t) < o(g(t))$ if
+$\lim_{t \to \infty} f(t)/g(t) = 0$.
+
+$f(t) = \Theta(g(t))$ means that $f$ and $g$ grow at the same rate
+asymptotically. That is, $f(t) \le O(g(t))$ and $g(t) \le O(f(t))$.
+
+Finally, we use $f(t) \ge \Omega(g(t))$ to mean that $g(t) \le O(f(t))$,
+and $f(t) > \omega(g(t))$ to mean that $g(t) < o(f(t))$.
+
+We also use the notation $\tilde O(g(t))$ to hide logarithmic factors.
+That is, $f(t) = \tilde O(g(t))$ if there exists some constant $C$ such
+that $f(t) \le C \cdot g(t) \cdot \log^k(t)$ for some $k$ and all $t$.
+
+Occasionally, we will also use $O(f(t))$ (or one of the other symbols)
+as shorthand to manipulate function classes. For example, we might write
+$O(f(t)) + O(g(t)) = O(f(t) + g(t))$ to mean that the sum of two
+functions in $O(f(t))$ and $O(g(t))$ is in $O(f(t) + g(t))$.
+
+## Python
+
+
diff --git a/book/bandits.md b/book/bandits.md
index 77910cb..331bf0e 100644
--- a/book/bandits.md
+++ b/book/bandits.md
@@ -14,7 +14,7 @@ kernelspec:
 (bandits)=
 # Multi-Armed Bandits
 
-```{code-cell}
+```{code-cell} ipython3
 :tags: [hide-input]
 
 from jaxtyping import Float, Array
@@ -22,7 +22,6 @@ import numpy as np
 
 # from bokeh.plotting import figure, show, output_notebook
 import latexify
-from abc import ABC, abstractmethod  # "Abstract Base Class"
 from typing import Callable, Union
 import matplotlib.pyplot as plt
 
@@ -36,10 +35,12 @@ plt.style.use("fivethirtyeight")
 
 
 def random_argmax(ary: Array) -> int:
+    """Take an argmax and randomize between ties."""
     max_idx = np.flatnonzero(ary == ary.max())
     return np.random.choice(max_idx).item()
 
 
+# used as decorator
 latex = latexify.algorithmic(
     prefixes={"mab"},
     identifiers={"arm": "a_t", "reward": "r", "means": "mu"},
@@ -82,7 +83,7 @@ The name “multi-armed bandits” comes from slot machines in casinos, which ar
 
 Let $K$ denote the number of arms. We’ll label them $0, \dots, K-1$ and use *superscripts* to indicate the arm index; since we seldom need to raise a number to a power, this won’t cause much confusion. In this chapter, we’ll consider the **Bernoulli bandit** setting from the examples above, where arm $k$ either returns reward $1$ with probability $\mu^k$ or $0$ otherwise. The agent gets to pull an arm $T$ times in total. We can formalize the Bernoulli bandit in the following Python code:
 
-```{code-cell}
+```{code-cell} ipython3
 class MAB:
     """
     The Bernoulli multi-armed bandit environment.
@@ -104,14 +105,14 @@ class MAB:
         return +reward
 ```
 
-```{code-cell}
+```{code-cell} ipython3
 mab = MAB(means=np.array([0.1, 0.8, 0.4]), T=100)
 ```
 
 In pseudocode, the agent’s interaction with the MAB environment can be
 described by the following process:
 
-```{code-cell}
+```{code-cell} ipython3
 @latex
 def mab_loop(mab: MAB, agent: "Agent") -> int:
     for t in range(mab.T):
@@ -125,8 +126,8 @@ mab_loop
 
 The `Agent` class stores the pull history and uses it to decide which arm to pull next. Since we are working with Bernoulli bandits, we can summarize the pull history concisely in a $\mathbb{N}^{K \times 2}$ array.
 
-```{code-cell}
-class Agent(ABC):
+```{code-cell} ipython3
+class Agent:
     def __init__(self, K: int, T: int):
         """The MAB agent that decides how to choose an arm given the past history."""
         self.K = K
@@ -135,14 +136,12 @@ class Agent(ABC):
         self.choices = []
         self.history = np.zeros((K, 2), dtype=int)
 
-    @abstractmethod
     def choose_arm(self) -> int:
         """Choose an arm of the MAB. Algorithm-specific."""
         ...
 
-    @property
     def count(self) -> int:
-        """The number of pulls made."""
+        """The number of pulls made. Also the current step index."""
         return len(self.rewards)
 
     def update_history(self, arm: int, reward: int):
@@ -170,7 +169,7 @@ $$
 $$
 ::::
 
-```{code-cell}
+```{code-cell} ipython3
 def regret_per_step(mab: MAB, agent: Agent):
     """Get the difference from the average reward of the optimal arm. The sum of these is the regret."""
     return [mab.means[mab.best_arm] - mab.means[arm] for arm in agent.choices]
@@ -198,7 +197,7 @@ We’d like to achieve **sublinear regret** in expectation, i.e. $\E[\text{Regre
 The rest of the chapter comprises a series of increasingly sophisticated
 MAB algorithms.
 
-```{code-cell}
+```{code-cell} ipython3
 :tags: [hide-input]
 
 def plot_strategy(mab: MAB, agent: Agent):
@@ -226,7 +225,7 @@ def plot_strategy(mab: MAB, agent: Agent):
 A trivial strategy is to always choose arms at random (i.e. "pure
 exploration").
 
-```{code-cell}
+```{code-cell} ipython3
 :label: pure_exploration
 
 class PureExploration(Agent):
@@ -252,7 +251,7 @@ $$
 
 This scales as $\Theta(T)$, i.e. *linear* in the number of timesteps $T$. There’s no learning here: the agent doesn’t use any information about the environment to improve its strategy. You can see that the distribution over its arm choices always appears "(uniformly) random".
 
-```{code-cell}
+```{code-cell} ipython3
 agent = PureExploration(mab.K, mab.T)
 mab_loop(mab, agent)
 plot_strategy(mab, agent)
@@ -264,7 +263,7 @@ How might we improve on pure exploration? Instead, we could try each arm
 once, and then commit to the one with the highest observed reward. We’ll
 call this the **pure greedy** strategy.
 
-```{code-cell}
+```{code-cell} ipython3
 :label: pure_greedy
 
 class PureGreedy(Agent):
@@ -297,7 +296,7 @@ $$
 
 Which is still $\Theta(T)$, the same as pure exploration!
 
-```{code-cell}
+```{code-cell} ipython3
 agent = PureGreedy(mab.K, mab.T)
 mab_loop(mab, agent)
 plot_strategy(mab, agent)
@@ -313,7 +312,7 @@ The cumulative regret is a straight line because the regret only depends on the
 We can improve the pure greedy algorithm as follows: let’s reduce the variance of the reward estimates by pulling each arm $N_{\text{explore}}> 1$ times before committing. This is called the **explore-then-commit** strategy. Note that the “pure greedy” strategy above is just the special case where
 $N_{\text{explore}}= 1$.
 
-```{code-cell}
+```{code-cell} ipython3
 class ExploreThenCommit(Agent):
     def __init__(self, K: int, T: int, N_explore: int):
         super().__init__(K, T)
@@ -323,7 +322,7 @@ class ExploreThenCommit(Agent):
         return solutions.etc_choose_arm(self)
 ```
 
-```{code-cell}
+```{code-cell} ipython3
 agent = ExploreThenCommit(mab.K, mab.T, mab.T // 15)
 mab_loop(mab, agent)
 plot_strategy(mab, agent)
@@ -466,18 +465,23 @@ beforehand – we can instead interleave exploration and exploitation by,
 at each timestep, choosing a random action with some probability. We
 call this the **epsilon-greedy** algorithm.
 
-```{code-cell}
+```{code-cell} ipython3
 class EpsilonGreedy(Agent):
-    def __init__(self, K: int, T: int, get_epsilon: Callable[[int], float]):
+    def __init__(
+        self,
+        K: int,
+        T: int,
+        ε_array: Float[Array, " T"],
+    ):
         super().__init__(K, T)
-        self.get_epsilon = get_epsilon
+        self.ε_array = ε_array
 
     def choose_arm(self):
         return solutions.epsilon_greedy_choose_arm(self)
 ```
 
-```{code-cell}
-agent = EpsilonGreedy(mab.K, mab.T, lambda t: 0.1)
+```{code-cell} ipython3
+agent = EpsilonGreedy(mab.K, mab.T, np.full(mab.T, 0.1))
 mab_loop(mab, agent)
 plot_strategy(mab, agent)
 ```
@@ -566,9 +570,14 @@ This bound would then suffice for applying the UCB algorithm! That is, the upper
 
 $$M^k_t := \hat \mu^k_t + \sqrt{\frac{\ln(2t/\delta')}{2N^k_t}},$$
 
-where we can choose $\delta'$ depending on how tight we want the interval to be. A smaller $\delta'$ would give us a larger and higher-confidence interval, and vice versa. We can now use this to define the UCB algorithm.
+where we can choose $\delta'$ depending on how tight we want the interval to be.
+
+- A smaller $\delta'$ would give us a larger and higher-confidence interval, emphasizing the exploration term.
+- A larger $\delta'$ would give a tighter and lower-confidence interval, prioritizing the current sample averages.
+
+We can now use this to define the UCB algorithm.
 
-```{code-cell}
+```{code-cell} ipython3
 class UCB(Agent):
     def __init__(self, K: int, T: int, delta: float):
         super().__init__(K, T)
@@ -589,8 +598,8 @@ Intuitively, UCB prioritizes arms where:
 As desired, this explores in a smarter, *adaptive* way compared to the
 previous algorithms. Does it achieve lower regret?
 
-```{code-cell}
-agent = UCB(mab.K, mab.T, 0.05)
+```{code-cell} ipython3
+agent = UCB(mab.K, mab.T, 0.9)
 mab_loop(mab, agent)
 plot_strategy(mab, agent)
 ```
@@ -650,8 +659,8 @@ $$
 \end{aligned}
 $$
 
-In fact, we can do a more sophisticated analysis to trim off a factor of
-$\sqrt{K}$ and show $\text{Regret}_T = \tilde O(\sqrt{TK})$.
+In fact, we can do a more sophisticated analysis to trim off a factor of $\sqrt{K}$
+and show $\text{Regret}_T = \tilde O(\sqrt{TK})$.
 
 +++
 
@@ -690,16 +699,18 @@ From this Bayesian perspective, the **Thompson sampling** algorithm
 follows naturally: just sample from the distribution of the optimal arm,
 given the observations!
 
-```{code-cell}
-class Distribution(ABC):
-    @abstractmethod
-    def sample(self) -> Float[Array, " K"]: ...
+```{code-cell} ipython3
+class Distribution:
+    def sample(self) -> Float[Array, " K"]:
+        """Sample a vector of means for the K arms."""
+        ...
 
-    @abstractmethod
-    def update(self, arm: int, reward: float): ...
+    def update(self, arm: int, reward: float):
+        """Condition on obtaining `reward` from the given arm."""
+        ...
 ```
 
-```{code-cell}
+```{code-cell} ipython3
 class ThompsonSampling(Agent):
     def __init__(self, K: int, T: int, prior: Distribution):
         super().__init__(K, T)
@@ -753,7 +764,7 @@ distribution upon observing a reward, rather than having to recompute
 the entire posterior distribution from scratch.
 :::
 
-```{code-cell}
+```{code-cell} ipython3
 class Beta(Distribution):
     def __init__(self, K: int, alpha: int = 1, beta: int = 1):
         self.alphas = np.full(K, alpha)
@@ -767,7 +778,7 @@ class Beta(Distribution):
         self.betas[arm] += 1 - reward
 ```
 
-```{code-cell}
+```{code-cell} ipython3
 beta_distribution = Beta(mab.K)
 agent = ThompsonSampling(mab.K, mab.T, beta_distribution)
 mab_loop(mab, agent)
@@ -792,161 +803,7 @@ the *constant factor* is optimal as well.
 
 +++
 
-## Contextual bandits
-
-In the above MAB environment, the reward distributions of the arms
-remain constant. However, in many real-world settings, we might receive
-additional information that affects these distributions. For example, in
-the online advertising case where each arm corresponds to an ad we could
-show the user, we might receive information about the user's preferences
-that changes how likely they are to click on a given ad. We can model
-such environments using **contextual bandits**.
-
-:::{prf:definition} Contextual bandit
-:label: contextual_bandit
-
-At each timestep $t$, a new *context*
-$x_t$ is drawn from some distribution $\nu_{\text{x}}$. The learner gets
-to observe the context, and choose an action $a_t$ according to some
-context-dependent policy $\pi_t(x_t)$. Then, the learner observes the
-reward from the chosen arm $r_t \sim \nu^{a_t}(x_t)$. The reward
-distribution also depends on the context.
-:::
-
-+++
-
-Assuming our context is *discrete*, we can just perform the same
-algorithms, treating each context-arm pair as its own arm. This gives us
-an enlarged MAB of $K |\mathcal{X}|$ arms.
-
-:::{attention}
-Write down the UCB algorithm for this enlarged MAB. That is, write an
-expression for $\pi_t(x_t) = \argmax_a \dots$.
-:::
-
-Recall that running UCB for $T$ timesteps on an MAB with $K$ arms
-achieves a regret bound of $\tilde{O}(\sqrt{TK})$. So in this problem,
-we would achieve regret $\tilde{O}(\sqrt{TK|\mathcal{X}|})$ in the
-contextual MAB, which has a polynomial dependence on $|\mathcal{X}|$.
-But in a situation where we have large, or even infinitely many
-contexts, e.g. in the case where our context is a continuous value, this
-becomes intractable.
-
-Note that this "enlarged MAB" treats the different contexts as entirely
-unrelated to each other, while in practice, often contexts are *related*
-to each other in some way: for example, we might want to advertise
-similar products to users with similar preferences. How can we
-incorporate this structure into our solution?
-
-+++
-
-(lin_ucb)=
-### Linear contextual bandits
-
-We want to model the *mean reward* of arm $k$ as a function of the
-context, i.e. $\mu^k(x)$. One simple model is the *linear* one:
-$\mu^k(x) = x^\top \theta^k$, where $x \in \mathcal{X} = \mathbb{R}^d$ and
-$\theta^k \in \mathbb{R}^d$ describes a *feature direction* for arm $k$. Recall
-that **supervised learning** gives us a way to estimate a conditional
-expectation from samples: We learn a *least squares* estimator from the
-timesteps where arm $k$ was selected:
-$$\hat \theta_t^k = \argmin_{\theta \in \mathbb{R}^d} \sum_{\{ i \in [t] : a_i = k \}} (r_i - x_i^\top \theta)^2.$$
-This has the closed-form solution known as the *ordinary least squares*
-(OLS) estimator:
-
-:::{math}
-:label: ols_bandit
-
-\begin{aligned}
-    \hat \theta_t^k          & = (A_t^k)^{-1} \sum_{\{ i \in [t] : a_i = k \}} x_i r_i \\
-    \text{where} \quad A_t^k & = \sum_{\{ i \in [t] : a_i = k \}} x_i x_i^\top.
-\end{aligned}
-:::
-
-We can now apply the UCB algorithm in this environment in order to
-balance *exploration* of new arms and *exploitation* of arms that we
-believe to have high reward. But how should we construct the upper
-confidence bound? Previously, we treated the pulls of an arm as i.i.d.
-samples and used Hoeffding's inequality to bound the distance of the
-sample mean, our estimator, from the true mean. However, now our
-estimator is not a sample mean, but rather the OLS estimator above {eq}`ols_bandit`. Instead, we'll use **Chebyshev's
-inequality** to construct an upper confidence bound.
-
-:::{prf:theorem} Chebyshev's inequality
-:label: chebyshev
-
-For a random variable $Y$ such that
-$\E Y = 0$ and $\E Y^2 = \sigma^2$,
-$$|Y| \le \beta \sigma \quad \text{with probability} \ge 1 - \frac{1}{\beta^2}$$
-:::
-
-Since the OLS estimator is known to be unbiased (try proving this
-yourself), we can apply Chebyshev's inequality to
-$x_t^\top (\hat \theta_t^k - \theta^k)$:
-
-$$\begin{aligned}
-    x_t^\top \theta^k \le x_t^\top \hat \theta_t^k + \beta \sqrt{x_t^\top (A_t^k)^{-1} x_t} \quad \text{with probability} \ge 1 - \frac{1}{\beta^2}
-\end{aligned}$$
-
-:::{attention}
-We haven't explained why $x_t^\top (A_t^k)^{-1} x_t$ is the correct
-expression for the variance of $x_t^\top \hat \theta_t^k$. This result
-follows from some algebra on the definition of the OLS estimator {eq}`ols_bandit`.
-:::
-
-The first term is exactly our predicted reward $\hat \mu^k_t(x_t)$. To
-interpret the second term, note that
-$$x_t^\top (A_t^k)^{-1} x_t = \frac{1}{N_t^k} x_t^\top (\Sigma_t^k)^{-1} x_t,$$
-where
-$$\Sigma_t^k = \frac{1}{N_t^k} \sum_{\{ i \in [t] : a_i = k \}} x_i x_i^\top$$
-is the empirical covariance matrix of the contexts (assuming that the
-context has mean zero). That is, the learner is encouraged to choose
-arms when $x_t$ is *not aligned* with the data seen so far, or if arm
-$k$ has not been explored much and so $N_t^k$ is small.
-
-We can now substitute these quantities into UCB to get the **LinUCB**
-algorithm:
-
-```{code-cell}
-class LinUCBPseudocode(Agent):
-    def __init__(
-        self, K: int, T: int, D: int, lam: float, get_c: Callable[[int], float]
-    ):
-        super().__init__(K, T)
-        self.lam = lam
-        self.get_c = get_c
-        self.contexts = [None for _ in range(K)]
-        self.A = np.repeat(lam * np.eye(D)[...], K)
-        self.targets = np.zeros(K, D)
-        self.w = np.zeros(K, D)
-
-    def choose_arm(self, context: Float[Array, " D"]):
-        c = self.get_c(self.count)
-        scores = self.w @ context + c * np.sqrt(
-            context.T @ np.linalg.solve(self.A, context)
-        )
-        return random_argmax(scores)
-
-    def update_history(self, context: Float[Array, " D"], arm: int, reward: int):
-        self.A[arm] += np.outer(context, context)
-        self.targets[arm] += context * reward
-        self.w[arm] = np.linalg.solve(self.A[arm], self.targets[arm])
-```
-
-:::{attention}
-Note that the matrix $A_t^k$ above might not be invertible. When does this occur? One way to address this is to include a $\lambda I$ regularization term to ensure that $A_t^k$ is invertible. This is equivalent to solving a *ridge regression* problem instead of the unregularized least squares problem. Implement this solution. TODO SOLUTION CURRENTLY SHOWN
-:::
-
-+++
-
-$c_t$ is similar to the $\log (2t/\delta')$ term of UCB: It controls the
-width of the confidence interval. Here, we treat it as a tunable
-parameter, though in a theoretical analysis, it would depend on $A_t^k$
-and the probability $\delta$ with which the bound holds.
-
-Using similar tools for UCB, we can also prove an $\tilde{O}(\sqrt{T})$
-regret bound. The full details of the analysis can be found in Section 3 of {cite}`agarwal_reinforcement_2022`.
-
-+++
-
 ## Summary
+
+In this chapter,
+we explored the **multi-armed bandit** setting for analyzing sequential decision-making in an unknown environment.
diff --git a/book/challenges.md b/book/challenges.md
deleted file mode 100644
index c8a1af1..0000000
--- a/book/challenges.md
+++ /dev/null
@@ -1,76 +0,0 @@
-# Guarantees for policy gradient methods
-
-What kinds of problems are policy gradient methods good at solving?
-
-For example, consider a very simple MDP in which
-
-outline
-
-\# sparse reward setting
-
-e.g. l/r, move to end; random policy 1/(2\^n)
-
-no rewards early on, so no gradients
-
-\## possible solutions
-
-if simulator: use better starting
-
-imitation learning (today)
-
-exploration - ucb-vi
-
-reward shaping
-
-\# guarantees for pg
-
-sl works in many settings
-
-want to show that some benefits extend to rl
-
-eg sample efficiency needed for softmax (log linear) policy
-
-\- eg under npg
-
-what features do we need for good learning? (approximation error between
-ground truth and our function class)
-
-hopefully samples poly(dim(φ), 1/eps)
-
-need some coverage over state space
-
----
-
-but convergence guarantees are hard
-
-\# imitation learning
-
-eg how humans learn by imitating experts
-
-access to expert demonstrations
-
-use sl to create a policy
-
-input: senses output: action
-
-\## setting
-
-unknown reward function
-
-assume expert has good policy
-
-goal is to learn a policy as good as expert
-
----
-
-\# BC
-
-e.g. maximum likelihood (stochastic)
-
-or classification error (deterministic)
-
-or squared error for continuous actions
-
----
-
-theorem: il is almost as easy as sl
diff --git a/book/contextual_bandits.md b/book/contextual_bandits.md
new file mode 100644
index 0000000..1afe425
--- /dev/null
+++ b/book/contextual_bandits.md
@@ -0,0 +1,168 @@
+---
+jupytext:
+  text_representation:
+    extension: .md
+    format_name: myst
+    format_version: 0.13
+    jupytext_version: 1.16.2
+kernelspec:
+  display_name: Python 3 (ipykernel)
+  language: python
+  name: python3
+---
+
+(contextual_bandits)=
+# Contextual bandits
+
+In the above MAB environment, the reward distributions of the arms
+remain constant. However, in many real-world settings, we might receive
+additional information that affects these distributions. For example, in
+the online advertising case where each arm corresponds to an ad we could
+show the user, we might receive information about the user's preferences
+that changes how likely they are to click on a given ad. We can model
+such environments using **contextual bandits**.
+
+:::{prf:definition} Contextual bandit
+:label: contextual_bandit
+
+At each timestep $t$, a new *context*
+$x_t$ is drawn from some distribution $\nu_{\text{x}}$. The learner gets
+to observe the context, and choose an action $a_t$ according to some
+context-dependent policy $\pi_t(x_t)$. Then, the learner observes the
+reward from the chosen arm $r_t \sim \nu^{a_t}(x_t)$. The reward
+distribution also depends on the context.
+:::
+
++++
+
+Assuming our context is *discrete*, we can just perform the same
+algorithms, treating each context-arm pair as its own arm. This gives us
+an enlarged MAB of $K |\mathcal{X}|$ arms.
+
+:::{attention}
+Write down the UCB algorithm for this enlarged MAB. That is, write an
+expression for $\pi_t(x_t) = \argmax_a \dots$.
+:::
+
+Recall that running UCB for $T$ timesteps on an MAB with $K$ arms
+achieves a regret bound of $\tilde{O}(\sqrt{TK})$. So in this problem,
+we would achieve regret $\tilde{O}(\sqrt{TK|\mathcal{X}|})$ in the
+contextual MAB, which has a polynomial dependence on $|\mathcal{X}|$.
+But in a situation where we have large, or even infinitely many
+contexts, e.g. in the case where our context is a continuous value, this
+becomes intractable.
+
+Note that this "enlarged MAB" treats the different contexts as entirely
+unrelated to each other, while in practice, often contexts are *related*
+to each other in some way: for example, we might want to advertise
+similar products to users with similar preferences. How can we
+incorporate this structure into our solution?
+
++++
+
+(lin_ucb)=
+## Linear contextual bandits
+
+We want to model the *mean reward* of arm $k$ as a function of the
+context, i.e. $\mu^k(x)$. One simple model is the *linear* one:
+$\mu^k(x) = x^\top \theta^k$, where $x \in \mathcal{X} = \mathbb{R}^d$ and
+$\theta^k \in \mathbb{R}^d$ describes a *feature direction* for arm $k$. Recall
+that **supervised learning** gives us a way to estimate a conditional
+expectation from samples: We learn a *least squares* estimator from the
+timesteps where arm $k$ was selected:
+$$\hat \theta_t^k = \argmin_{\theta \in \mathbb{R}^d} \sum_{\{ i \in [t] : a_i = k \}} (r_i - x_i^\top \theta)^2.$$
+This has the closed-form solution known as the *ordinary least squares*
+(OLS) estimator:
+
+:::{math}
+:label: ols_bandit
+
+\begin{aligned}
+    \hat \theta_t^k          & = (A_t^k)^{-1} \sum_{\{ i \in [t] : a_i = k \}} x_i r_i \\
+    \text{where} \quad A_t^k & = \sum_{\{ i \in [t] : a_i = k \}} x_i x_i^\top.
+\end{aligned}
+:::
+
+We can now apply the UCB algorithm in this environment in order to
+balance *exploration* of new arms and *exploitation* of arms that we
+believe to have high reward. But how should we construct the upper
+confidence bound? Previously, we treated the pulls of an arm as i.i.d.
+samples and used Hoeffding's inequality to bound the distance of the
+sample mean, our estimator, from the true mean. However, now our
+estimator is not a sample mean, but rather the OLS estimator above {eq}`ols_bandit`. Instead, we'll use **Chebyshev's
+inequality** to construct an upper confidence bound.
+
+:::{prf:theorem} Chebyshev's inequality
+:label: chebyshev
+
+For a random variable $Y$ such that
+$\E Y = 0$ and $\E Y^2 = \sigma^2$,
+$$|Y| \le \beta \sigma \quad \text{with probability} \ge 1 - \frac{1}{\beta^2}$$
+:::
+
+Since the OLS estimator is known to be unbiased (try proving this
+yourself), we can apply Chebyshev's inequality to
+$x_t^\top (\hat \theta_t^k - \theta^k)$:
+
+$$\begin{aligned}
+    x_t^\top \theta^k \le x_t^\top \hat \theta_t^k + \beta \sqrt{x_t^\top (A_t^k)^{-1} x_t} \quad \text{with probability} \ge 1 - \frac{1}{\beta^2}
+\end{aligned}$$
+
+:::{attention}
+We haven't explained why $x_t^\top (A_t^k)^{-1} x_t$ is the correct
+expression for the variance of $x_t^\top \hat \theta_t^k$. This result
+follows from some algebra on the definition of the OLS estimator {eq}`ols_bandit`.
+:::
+
+The first term is exactly our predicted reward $\hat \mu^k_t(x_t)$. To
+interpret the second term, note that
+$$x_t^\top (A_t^k)^{-1} x_t = \frac{1}{N_t^k} x_t^\top (\Sigma_t^k)^{-1} x_t,$$
+where
+$$\Sigma_t^k = \frac{1}{N_t^k} \sum_{\{ i \in [t] : a_i = k \}} x_i x_i^\top$$
+is the empirical covariance matrix of the contexts (assuming that the
+context has mean zero). That is, the learner is encouraged to choose
+arms when $x_t$ is *not aligned* with the data seen so far, or if arm
+$k$ has not been explored much and so $N_t^k$ is small.
+
+We can now substitute these quantities into UCB to get the **LinUCB**
+algorithm:
+
+```{code-cell}
+class LinUCBPseudocode(Agent):
+    def __init__(
+        self, K: int, T: int, D: int, lam: float, get_c: Callable[[int], float]
+    ):
+        super().__init__(K, T)
+        self.lam = lam
+        self.get_c = get_c
+        self.contexts = [None for _ in range(K)]
+        self.A = np.repeat(lam * np.eye(D)[...], K)
+        self.targets = np.zeros(K, D)
+        self.w = np.zeros(K, D)
+
+    def choose_arm(self, context: Float[Array, " D"]):
+        c = self.get_c(self.count)
+        scores = self.w @ context + c * np.sqrt(
+            context.T @ np.linalg.solve(self.A, context)
+        )
+        return random_argmax(scores)
+
+    def update_history(self, context: Float[Array, " D"], arm: int, reward: int):
+        self.A[arm] += np.outer(context, context)
+        self.targets[arm] += context * reward
+        self.w[arm] = np.linalg.solve(self.A[arm], self.targets[arm])
+```
+
+:::{attention}
+Note that the matrix $A_t^k$ above might not be invertible. When does this occur? One way to address this is to include a $\lambda I$ regularization term to ensure that $A_t^k$ is invertible. This is equivalent to solving a *ridge regression* problem instead of the unregularized least squares problem. Implement this solution. TODO SOLUTION CURRENTLY SHOWN
+:::
+
++++
+
+$c_t$ is similar to the $\log (2t/\delta')$ term of UCB: It controls the
+width of the confidence interval. Here, we treat it as a tunable
+parameter, though in a theoretical analysis, it would depend on $A_t^k$
+and the probability $\delta$ with which the bound holds.
+
+Using similar tools for UCB, we can also prove an $\tilde{O}(\sqrt{T})$
+regret bound. The full details of the analysis can be found in Section 3 of {cite}`agarwal_reinforcement_2022`.
diff --git a/book/exploration.md b/book/exploration.md
index 7e4fd2a..24e4736 100644
--- a/book/exploration.md
+++ b/book/exploration.md
@@ -11,6 +11,7 @@ kernelspec:
   name: python3
 ---
 
+(exploration)=
 # Exploration in MDPs
 
 ## Introduction
@@ -74,7 +75,7 @@ Performance of explore-then-exploitexplore_then_exploit_performance As long as e
 
 We also explored the exploration-exploitation tradeoff in the chapter on {ref}`bandits`. Recall tthat in the MAB setting, we have $K$ arms, each of which has an unknown reward distribution, and we want to learn which of the arms is *optimal*, i.e. has the highest mean reward.
 
-One algorithm that struck a good balance between exploration and exploitation was the **upper confidence bound** algorithm {ref}`ucb`: For each arm, we construct a *confidence interval* for its true mean award, and then choose the arm with the highest upper confidence bound. In summary, $$k_{t+1} \gets \argmax_{k \in [K]} \frac{R^{k}_t}{N^{k}_t} + \sqrt{\frac{\ln(2t/\delta)}{2 N^{k}_t}}$$ where $N_t^k$ indicates the number of times arm $k$ has been pulled up until time $t$, $R_t^k$ indicates the total reward obtained by pulling arm $k$ up until time $t$, and $\delta > 0$ controls the width of the confidence interval. How might we extend UCB to the MDP case?
+One algorithm that struck a good balance between exploration and exploitation was the **upper confidence bound** algorithm {ref}`ucb`: For each arm, we construct a *confidence interval* for its true mean award, and then choose the arm with the highest upper confidence bound. In summary, $$k_{t+1} \gets \arg\max_{k \in [K]} \frac{R^{k}_t}{N^{k}_t} + \sqrt{\frac{\ln(2t/\delta)}{2 N^{k}_t}}$$ where $N_t^k$ indicates the number of times arm $k$ has been pulled up until time $t$, $R_t^k$ indicates the total reward obtained by pulling arm $k$ up until time $t$, and $\delta > 0$ controls the width of the confidence interval. How might we extend UCB to the MDP case?
 
 Let us formally describe an unknown MDP as an MAB problem. In an unknown MDP, we want to learn which *policy* is optimal. So if we want to apply MAB techniques to solving an MDP, it makes sense to think of *arms* as *policies*. There are $K = (|\mathcal{A}|^{|\mathcal{S}|})^\hor$ deterministic policies in a finite MDP. Then, "pulling" arm $\pi$ corresponds to using $\pi$ to act through a trajectory in the MDP, and observing the total reward.
 
diff --git a/book/fitted_dp.md b/book/fitted_dp.md
index 3f6f601..1d79d1c 100644
--- a/book/fitted_dp.md
+++ b/book/fitted_dp.md
@@ -366,106 +366,4 @@ def fitted_policy_iteration(
     return π
 ```
 
-(supervised_learning)=
-## Supervised learning
-
-This section will cover the details of implementing the `fit` function above:
-That is, how to use a dataset of labelled samples $(x_1, y_1), \dots, (x_N, y_N)$ to find a function $f$ that minimizes the empirical risk.
-This requires two ingredients:
-
-1. A **function class** $\mathcal{F}$ to search over
-2. A **fitting method** for minimizing the empirical risk over this class
-
-The two main function classes we will cover are **linear models** and **neural networks**.
-Both of these function classes are *parameterized* by some parameters $\theta$,
-and the fitting method will search over these parameters to minimize the empirical risk:
-
-:::{prf:definition} Parameterized empirical risk minimization
-:label: parameterized_empirical_risk_minimization
-
-Given a dataset of samples $(x_1, y_1), \dots, (x_N, y_N)$ and a class of functions $\mathcal{F}$ parameterized by $\theta$,
-we to find a parameter (vector) $\hat \theta$ that minimizes the empirical risk:
-
-$$
-\hat \theta = \arg\min_{\theta} \frac{1}{N} \sum_{i=1}^N (y_i - f_\theta(x_i))^2
-$$
-:::
-
-The most common fitting method for parameterized models is **gradient descent**.
-
-:::{prf:algorithm} Gradient descent
-Letting $L(\theta) \in \mathbb{R}$ denote the empirical risk in terms of the parameters,
-the gradient descent algorithm updates the parameters according to the rule
-
-$$
-\theta^{t+1} = \theta^t - \eta \nabla_\theta L(\theta^t)
-$$
-
-where $\eta > 0$ is the **learning rate**.
-:::
-
-```{code-cell}
-Params = Float[Array, " D"]
-
-
-def gradient_descent(
-    loss: Callable[[Params], float],
-    θ_init: Params,
-    η: float,
-    epochs: int,
-):
-    """
-    Run gradient descent to minimize the given loss function
-    (expressed in terms of the parameters).
-    """
-    θ = θ_init
-    for _ in range(epochs):
-        θ = θ - η * grad(loss)(θ)
-    return θ
-```
-
-### Linear regression
-
-In linear regression, we assume that the function $f$ is linear in the parameters:
-
-$$
-\mathcal{F} = \{ x \mapsto \theta^\top x \mid \theta \in \mathbb{R}^D \}
-$$
-
-This function class is extremely simple and only contains linear functions.
-To expand its expressivity, we can _transform_ the input $x$ using some feature function $\phi$,
-i.e. $\widetilde x = \phi(x)$, and then fit a linear model in the transformed space instead.
-
-```{code-cell}
-def fit_linear(X: Float[Array, "N D"], y: Float[Array, " N"], φ=lambda x: x):
-    """Fit a linear model to the given dataset using ordinary least squares."""
-    X = vmap(φ)(X)
-    θ = np.linalg.lstsq(X, y, rcond=None)[0]
-    return lambda x: np.dot(φ(x), θ)
-```
-
-### Neural networks
-
-In neural networks, we assume that the function $f$ is a composition of linear functions (represented by matrices $W_i$) and non-linear activation functions (denoted by $\sigma$):
-
-$$
-\mathcal{F} = \{ x \mapsto \sigma(W_L \sigma(W_{L-1} \dots \sigma(W_1 x + b_1) \dots + b_{L-1}) + b_L) \}
-$$
-
-where $W_i \in \mathbb{R}^{D_{i+1} \times D_i}$ and $b_i \in \mathbb{R}^{D_{i+1}}$ are the parameters of the $i$-th layer, and $\sigma$ is the activation function.
-
-This function class is much more expressive and contains many more parameters.
-This makes it more susceptible to overfitting on smaller datasets,
-but also allows it to represent more complex functions.
-In practice, however, neural networks exhibit interesting phenomena during training,
-and are often able to generalize well even with many parameters.
-
-Another reason for their popularity is the efficient **backpropagation** algorithm
-for computing the gradient of the empirical risk with respect to the parameters.
-Essentially, the hierarchical structure of the neural network, i.e. computing the
-output of the network as a composition of functions, allows us to use the chain rule
-to compute the gradient of the output with respect to the parameters of each layer.
-
-{cite}`nielsen_neural_2015` provides a comprehensive introduction to neural networks and backpropagation.
-
-## Bias correction for Q-learning
+## Summary
diff --git a/book/imitation_learning.md b/book/imitation_learning.md
index e893360..e0176e6 100644
--- a/book/imitation_learning.md
+++ b/book/imitation_learning.md
@@ -11,6 +11,7 @@ kernelspec:
   name: python3
 ---
 
+(imitation_learning)=
 # Imitation Learning
 
 Imagine you are tasked with learning how to drive. How do, or did, you go about it?
diff --git a/book/index.md b/book/index.md
index a2cf697..b712b13 100644
--- a/book/index.md
+++ b/book/index.md
@@ -1,7 +1,177 @@
 # Introduction
 
-This is an undergraduate textbook on reinforcement learning.
+Welcome to the study of reinforcement learning!
+This textbook accompanies the undergraduate course [CS 1840/STAT 184](http://lucasjanson.fas.harvard.edu/CS_Stat_184_0.html) taught at Harvard.
+It is intended to be a friendly yet rigorous introduction to this active subfield of machine learning.
 
-```{tableofcontents}
-```
++++
 
+## Prerequisites
+
+This book assumes the same prerequisites as the course: You should be familiar with multivariable calculus, linear algebra, and probability.
+For Harvard undergraduates, this is fulfilled by Math 21a, Math 21b, and Stat 110, or their equivalents.
+Stat 111 is strongly recommended but not required.
+Specifically, we will assume that you know the following topics. The _italicized terms_ have brief re-introductions in the text or in the {ref}`background`:
+
+-   **Linear Algebra:** Vectors and matrices, matrix multiplication, matrix
+    inversion, eigenvalues and eigenvectors.
+-   **Multivariable Calculus:** Partial derivatives, the chain rule, Taylor series, _gradients, directional derivatives, Lagrange multipliers._
+-   **Probability:** Random variables, probability distributions,
+    expectation and variance, the law of iterated expectations (Adam's rule), covariance, conditional probability, Bayes's rule, and the law of total probability.
+
+You should also be comfortable with programming in Python.
+See {ref}`programming` for more about this textbook's philosophy regarding programming.
+
++++
+
+## Reinforcement learning in a nutshell
+
+Broadly speaking,
+RL studies **sequential decision-making** in **dynamic environments.**
+An RL algorithm finds a **policy,** or strategy, that maximizes the **reward** it obtains from the environment.
+
+RL provides a powerful framework for attacking a wide variety of problems,
+including robotic control, video games and board games, resource management, language modelling, and more.
+It also provides an interdisciplinary paradigm for studying animal and human behavior.
+Many of the most stunning results in machine learning, ranging from AlphaGo to ChatGPT, are built using RL algorithms.
+
+How does RL compare to the other two core machine learning paradigms,
+**supervised learning** and **unsupervised learning?**
+
+- **Supervised learning** (SL) concerns itself with learning a mapping from inputs to outputs.
+  Typically the data takes the form of _statistically independent_ input-output pairs.
+  In RL, however, the data is generated by the agent interacting with the environment,
+  meaning the sequential observations of the state are _not independent_ from each other.
+
+  Conversely, SL is a well-studied field that provides many useful tools for RL.
+
+- **Unsupervised learning** concerns itself with learning the _structure_ of data without the use of outside feedback or labels.
+  In RL, though, the agent receives a **reward signal** from the environment,
+  which can be thought of as a sort of feedback.
+
+  Unsupervised learning is crucial in many real-world applications of RL for dimensionality reduction and other purposes.
+
++++
+
+## Core tasks in reinforcement learning
+
+What tasks, exactly, does RL comprise?
+An RL algorithm must typically solve two main subtasks:
+
+- **Policy evaluation (prediction):**
+  How 'good' is a specific state, or state-action pair (under a given policy)?
+  That is, how much reward does it lead to in the long run?
+
+- **Policy optimization (control):**
+  Suppose we fully understand how the environment behaves.
+  What is the best action to take in every scenario?
+
+<!-- **Recursion (bootstrapping):** How can we "reuse" our current predictions to generate new information? -->
+
+<!-- **Exploration-exploitation tradeoff:** Should we try new actions, or capitalize on actions that we currently believe to be good? -->
+
++++
+
+## Course overview
+
+The course will progress through the following units:
+
+{ref}`mdps` introduces **Markov Decision Processes,**
+the core mathematical framework for describing a large class of interactive environments.
+
+{ref}`lqr` is a standalone chapter on the **linear quadratic regulator** (LQR),
+an important tool for *continuous control*,
+in which the state and action spaces are no longer _finite_ but rather _continuous_.
+This has widespread applications in robotics.
+
+{ref}`bandits` introduces the **multi-armed bandit** (MAB) model for _stateless_ sequential decision-making tasks.
+In exploring a number of algorithms,
+we will see how each of them strikes a different balance between _exploring_ new options and _exploiting_ known options.
+This **exploration-exploitation tradeoff** is a core consideration in RL algorithm design.
+
+{ref}`supervised_learning` is a standalone crash course on some tools from supervised learning that we will use in later chapters.
+
+{ref}`fitted_dp` introduces **fitted dynamic programming** (fitted DP) algorithms for solving MDPs.
+These algorithms use supervised learning to approximately evaluate policies when they cannot be evaluated exactly.
+
+{ref}`pg` explores an important class of algorithms based on iteratively improving a policy.
+We will also encounter the use of _deep neural networks_ to express more complicated policies and approximate complicated functions.
+
+{ref}`imitation_learning` attempts to learn a good policy from expert demonstrations.
+At its most basic, this is an application of supervised learning to RL tasks.
+
+{ref}`planning` looks at ways to _explicitly_ plan ahead when the environment's dynamics are known.
+We will study the _Monte Carlo Tree Search_ heuristic,
+which has been used to great success in the famous AlphaGo algorithm and its successors.
+
+{ref}`exploration` continues to investigate the exploration-exploitation tradeoff.
+We will extend ideas from multi-armed bandits to the MDP setting.
+
+{ref}`contextual_bandits` extends the multi-armed bandit setting with some observed state.
+
+{ref}`background` contains an overview of selected background mathematical content and programming content.
+
+<!-- 
+| Chapter | States | Actions | Rewards (or costs) |
+|:-------:|:------:|:-------:|:-------:|
+| {ref}`bandits` | N/A | Finite | Stochastic |
+| {ref}`mdps` | Finite | Finite | Deterministic |
+| {ref}`fitted_dp` | Large or continuous | Finite | Deterministic |
+| {ref}`lqr` | Continuous | Continuous | Deterministic |
+-->
+
++++
+
+## Notation
+
+We will use the following notation throughout the book.
+This notation is inspired by {cite}`sutton_reinforcement_2018` and {cite}`agarwal_reinforcement_2022`.
+We use $[N]$ as shorthand for the set $\{ 0, 1, \dots, N-1 \}$.
+
+| Element      | Space                    | Definition (of element)   |
+|:------------:|:------------------------:|:--------------------------|
+|      $s$     | $\mathcal{S}$            | A state.                  |
+|      $a$     | $\mathcal{A}$            | An action.                |
+|      $r$     |                          | A reward.                 |
+|   $\gamma$   |                          | A discount factor.        |
+|    $\tau$    | $\mathcal{T}$             | A trajectory.             |
+|     $\pi$    | $\Pi$                    | A policy.                 |
+|   $V^\pi$    | $\mathcal{S} \to \mathbb{R}$                         | The value function of policy $\pi$.                               |
+|   $Q^\pi$    | $\mathcal{S} \times \mathcal{A} \to \mathbb{R}$                         | The action-value function (a.k.a. Q-function) of policy $\pi$. |
+|   $A^\pi$    | $\mathcal{S} \times \mathcal{A} \to \mathbb{R}$                         | The advantage function of policy $\pi$.    |
+|              | $\triangle(\mathcal{X})$ | A distribution supported on $\mathcal{X}$. |
+|    $\hi$     |   $[\hor]$               | Time horizon index of an MDP (subscript).    |
+|    $k$       |   $[K]$                  | Arm index of a multi-armed bandit (superscript).  |
+|    $t$       |   $[T]$                  | Iteration index of an algorithm (subscript).  |
+|    $\theta$  | $\Theta$                 | A set of parameters. |
+
+Note that throughout the text, certain symbols will stand for either random variables or fixed values.
+We aim to clarify in ambiguous settings.
+Be warned that 
+
++++
+
+(programming)=
+## Programming
+
+Why include code in a textbook?
+We believe that implementing an algorithm is a strong test of your understanding of it;
+mathematical notation can often abstract away details,
+while a computer must be given every single instruction.
+We have sought to write readable Python code that is self-contained within each file.
+This approach is inspired by {cite}`sussman_functional_2013`.
+There are some ways in which the code style differs from typical software projects:
+
+- We keep use of language features to a minimum,
+  even if it leads to code that could otherwise be more concisely or idiomatically expressed.
+- The variable names used in the code match those used in the main text.
+  For example, the variable `s` will be used instead of the more explicit `state`.
+
+We also make extensive use of Python _type annotations_ to explicitly specify variable types, including shapes of vectors and matrices using the [jaxtyping](https://github.com/patrick-kidger/jaxtyping) library.
+
+This is an interactive book built with [Jupyter Book](https://jupyterbook.org/en/stable/intro.html).
+It uses [Python 3.11](https://docs.python.org/3.11/contents.html).
+It uses the [JAX](https://jax.readthedocs.io/en/latest/index.html) library for numerical computing.
+JAX was chosen for the clarity of its functional style and due to its mature RL ecosystem,
+sustained in large part by the Google DeepMind research group and a large body of open-source contributors.
+We use the standard [Gymnasium](https://gymnasium.farama.org/) library for interfacing with RL environments.
diff --git a/book/intro.md b/book/intro.md
deleted file mode 100644
index 7a006a6..0000000
--- a/book/intro.md
+++ /dev/null
@@ -1,153 +0,0 @@
----
-jupytext:
-  text_representation:
-    extension: .md
-    format_name: myst
-    format_version: 0.13
-    jupytext_version: 1.16.2
-kernelspec:
-  display_name: Python 3 (ipykernel)
-  language: python
-  name: python3
----
-
-# Introduction
-
-Welcome to the study of reinforcement learning! This textbook accompanies the undergraduate course [CS/STAT 184](https://shamulent.github.io/CS_Stat184_Fall23.html) taught at Harvard and is intended to be a friendly yet rigorous introduction to this active subfield of machine learning. Here are some questions you might have before embarking on this journey:
-
-+++
-
-## Prerequisites
-
-This book assumes familiarity with multivariable calculus, linear algebra, and probability. For Harvard undergraduates, this would be fulfilled by Math 21a, Math 21b, and Stat 110. Stat 111 is strongly recommended but not required. Here is a non-comprehensive list of topics you should be familiar with before starting this book:
-
--   **Linear Algebra:** Vectors and matrices, matrix multiplication, matrix
-    inversion, eigenvalues and eigenvectors, and the Gram-Schmidt
-    process.
--   **Multivariable Calculus:** Partial derivatives, gradients,
-    directional derivatives, the chain rule, Taylor series.
--   **Probability:** Random variables, probability distributions,
-    expectation and variance, the law of iterated expectations (Adam's rule), covariance, conditional probability, Bayes's rule, and the law of total probability.
-
-+++
-
-## Reinforcement learning in a nutshell
-
-Broadly speaking, RL is a subfield of machine learning that studies how an agent can learn to make sequential decisions in a dynamic environment. It provides a powerful framework for attacking a wide variety of problems, including robotic control, video games and board games, resource management, language modelling, and more. It also provides an interdisciplinary paradigm for studying animal and human behavior. Many of the most stunning results in machine learning, ranging from AlphaGo to ChatGPT, are built on top of RL.
-
-**How does reinforcement learning differ from other machine learning paradigms?** Machine learning is often broken down into the three subcategories of supervised learning, unsupervised learning, and reinforcement learning.
-
--   **Supervised learning.** Supervised learning concerns itself with
-    learning a mapping from inputs to outputs (e.g. image
-    classification). Typically the data takes the form of input-output
-    pairs that are assumed to be sampled independently from some
-    generating distribution. In RL, however, the data is generated by
-    the agent interacting with the environment, meaning the observations
-    depend on the agent's behaviour and are not independent from each
-    other. This requires a more general set of tools.
-
-    Conversely, supervised learning is a well-studied field that
-    provides many useful tools for RL. For example, it may be useful to
-    use supervised learning to predict how valuable a given state is, or
-    to predict the probability of transitioning to a given state.
-
--  **Unsupervised learning.** Unsupervised learning deals with learning the
-    structure of data without the use of labels.
-
-### Challenges of reinforcement learning
-
-The RL task is quite general. It will be helpful to decompose it into a few subtasks that can be tackled separately. This decomposition also provides a useful framework for analyzing the algorithms we will encounter.
-
-**Policy evaluation (prediction):** How 'good' is a specific state, or state-action pair? That is, how much reward does it lead to in the long run?
-
-**Policy optimization (control):** Suppose we have a complete, accurate model of how the environment behaves. What is the best action to take in every scenario?
-
-**Recursion (bootstrapping):** How can we "reuse" our current predictions to generate new information?
-
-**Exploration-exploitation tradeoff:** Should we try new actions, or capitalize on actions that we currently believe to be good?
-
-+++
-
-## Overview
-
-{ref}`bandits` discusses **multi-armed bandits,** a simple model for
-reinforcement learning. In this setting, there are multiple arms, each with their own reward distribution. The agent must decide which arm to pull at each time step.
-
-{ref}`mdps` introduces (finite) **Markov Decision Processes**, a mathematical framework for describing interactive environments. Certain states and actions will be rewarded, and the agent's goal is to maximize its total reward.
-
-{ref}`fitted_dp` introduces **fitted dynamic programming** algorithms for solving MDPs when the state space is too large to be enumerated. These algorithms borrow ideas from supervised learning to approximate the value function (discussed in {ref}`mdps`).
-
-{ref}`lqr` is a standalone chapter on the **linear quadratic regulator**,
-an important tool for *continuous control*, in which the state space is no longer finite (i.e. $|\mathcal{S}| < \infty$) but rather continuous (i.e. $|\mathcal{S}| = \mathbb{R}^{n_s}$).
-
-+++
-
-| Chapter | States | Actions | Rewards (or costs) |
-|:-------:|:------:|:-------:|:-------:|
-| {ref}`bandits` | N/A | Finite | Stochastic |
-| {ref}`mdps` | Finite | Finite | Deterministic |
-| {ref}`fitted_dp` | Large or continuous | Finite | Deterministic |
-| {ref}`lqr` | Continuous | Continuous | Deterministic |
-
-+++
-
-## Notation
-
-We will use the following notation throughout the book. This notation is
-inspired by {cite}`sutton_reinforcement_2018` and {cite}`agarwal_reinforcement_2022`.
-
-| Notation      | Definition                |
-|:-------------:|:--------------------------|
-|      $s$      | A state.                  |
-|      $a$      | An action.                |
-|      $r$      | A reward.                 |
-|      $p$      | A probability.            |
-|     $\pi$     | A policy.                 |
-|      $V$      | A value function.         |
-|      $Q$      | An action-value function. |
-|      $A$      | An advantage function.    |
-|   $\gamma$    | A discount factor.        |
-|    $\tau$     | A trajectory.             |
-| $\mathcal{S}$ | A state space.            |
-| $\mathcal{A}$ | An action space.          |
-
-+++
-
-## Programming
-
-Why include code in a textbook? We believe that implementing an algorithm is a strong test of your understanding of it; mathematical notation can often abstract away details, while a computer must be given every detailed instruction. We have sought to write concise, readable, idiomatic Python code that is self-contained within each file. This approach is inspired by {cite}`sussman_functional_2013`.
-
-This is an interactive book built with [Jupyter Book](https://jupyterbook.org/en/stable/intro.html). It uses [Python 3.11](https://docs.python.org/3.11/contents.html). It uses the [JAX](https://jax.readthedocs.io/en/latest/index.html) library for numerical computing. JAX was chosen for the clarity of its functional style and due to its mature RL ecosystem, sustained in large part by the Google DeepMind research group and a large body of open-source contributors. We use the standard [Gymnasium](https://gymnasium.farama.org/) library for interfacing with RL environments.
-
-+++
-
-## O notation
-
-Throughout this chapter and the rest of the book, we will describe the
-asymptotic behavior of a function using $O$ notation.
-
-For two functions $f(t)$ and $g(t)$, we say that $f(t) \le O(g(t))$ if
-$f$ is asymptotically upper bounded by $g$. Formally, this means that
-there exists some constant $C > 0$ such that $f(t) \le C \cdot g(t)$ for
-all $t$ past some point $t_0$.
-
-We say $f(t) < o(g(t))$ if asymptotically $f$ grows strictly slower than
-$g$. Formally, this means that for *any* scalar $C > 0$, there exists
-some $t_0$ such that $f(t) \le C \cdot g(t)$ for all $t > t_0$.
-Equivalently, we say $f(t) < o(g(t))$ if
-$\lim_{t \to \infty} f(t)/g(t) = 0$.
-
-$f(t) = \Theta(g(t))$ means that $f$ and $g$ grow at the same rate
-asymptotically. That is, $f(t) \le O(g(t))$ and $g(t) \le O(f(t))$.
-
-Finally, we use $f(t) \ge \Omega(g(t))$ to mean that $g(t) \le O(f(t))$,
-and $f(t) > \omega(g(t))$ to mean that $g(t) < o(f(t))$.
-
-We also use the notation $\tilde O(g(t))$ to hide logarithmic factors.
-That is, $f(t) = \tilde O(g(t))$ if there exists some constant $C$ such
-that $f(t) \le C \cdot g(t) \cdot \log^k(t)$ for some $k$ and all $t$.
-
-Occasionally, we will also use $O(f(t))$ (or one of the other symbols)
-as shorthand to manipulate function classes. For example, we might write
-$O(f(t)) + O(g(t)) = O(f(t) + g(t))$ to mean that the sum of two
-functions in $O(f(t))$ and $O(g(t))$ is in $O(f(t) + g(t))$.
diff --git a/book/mdps.md b/book/mdps.md
index 482b318..300f407 100644
--- a/book/mdps.md
+++ b/book/mdps.md
@@ -12,18 +12,16 @@ kernelspec:
 ---
 
 (mdps)=
-# Finite Markov Decision Processes
+# Markov Decision Processes
 
-```{contents}
-:local:
-```
+## Introduction
 
 ```{code-cell}
 :tags: [hide-input]
 
 from typing import NamedTuple
 from jaxtyping import Float, Array
-import jax.numpy as np
+import jax.numpy as jnp
 from jax import vmap
 from functools import partial
 ```
@@ -32,16 +30,18 @@ The field of RL studies how an agent can learn to make sequential decisions in a
 
 Let’s consider some examples of sequential decision problems to identify the key common properties we’d like to capture:
 
--   **Board games** like chess or Go, where the player takes turns with
-    the opponent to make moves on the board.
-
--   **Video games** like Breakout, where the player
-    controls a character to reach the goal.
-
--   **Robotic control**, where the robot can move and interact with the
-    real-world environment to complete some task.
+- **Board games and video games,** where a player takes actions in a virtual environment.
+- **Inventory management,** where a company must efficiently move resources from producers to consumers.
+- **Robotic control**, where a robot can move and interact with the real world to complete some task.
 
-All of these fit into the RL framework. Furthermore, these are environments where the **state transitions**, the “rules” of the environment, only depend on the *most recent* state and action. This is called the **Markov property**.
+In these environments and many others, the **state transitions**,
+the “rules” of the environment,
+only depend on the *most recent* state and action (generally speaking).
+For example, if you want to take a break while playing a game of chess,
+you could take a picture of the board,
+and later on reset the board to that state and continue playing;
+the past history of moves doesn't matter (generally speaking).
+This is called the **Markov property.**
 
 :::{prf:definition} Markov property
 :label: markov
@@ -52,38 +52,33 @@ state and action:
 
 $$\P(s_{\hi+1} \mid s_0, a_0, \dots, s_\hi, a_\hi) = P(s_{\hi+1} \mid s_\hi, a_\hi)$$
 
-where $P : \mathcal{S} \times \mathcal{A} \to \Delta(\mathcal{S})$ describes the state transitions.
+where $P : \mathcal{S} \times \mathcal{A} \to \triangle(\mathcal{S})$ describes the state transitions.
 (We’ll elaborate on this notation later in the chapter.)
 :::
 
-We’ll see that this simple assumption leads to a rich set of problems
-and algorithms. Environments with the Markov property are called
-**Markov decision processes** (MDPs) and will be the focus of this
-chapter.
+Environments that satisfy the Markov property are called **Markov decision processes** (MDPs).
+This chapter will focus on introducing core vocabulary for MDPs that will be useful throughout the book.
 
 :::{attention}
-What information might be encoded in the state for each of
-the above examples? What might the valid set of actions be? Describe the
-state transitions heuristically and verify that they satisfy the Markov
-property.
+What information might be encoded in the _state_ for each of the above examples?
+What might the valid set of _actions_ be?
+Describe the _state transitions_ heuristically and verify that they satisfy the Markov property.
 :::
 
-MDPs are usually classified as **finite-horizon**, where the
-interactions end after some finite number of time steps, or
-**infinite-horizon**, where the interactions can continue indefinitely.
-We’ll begin with the finite-horizon case and discuss the
-infinite-horizon case in the second half of the chapter.
+MDPs are usually classified as **finite-horizon**, where the interactions end after some finite number of time steps,
+or **infinite-horizon**, where the interactions can continue indefinitely.
+We’ll begin with the finite-horizon case and discuss the infinite-horizon case in the second half of the chapter.
 
-In each setting, we’ll describe how to evaluate different **policies**
-(strategies for choosing actions) and how to compute (or approximate)
-the **optimal policy** for a given MDP. We’ll introduce the **Bellman
-consistency condition**, which allows us to analyze the whole series of
-interactions in terms of individual timesteps.
+We’ll describe how to _evaluate_ different strategies, called **policies,** and how to compute (or approximate)
+the **optimal policy** for a given MDP.
+We’ll introduce the **Bellman consistency condition**, which allows us to analyze the whole sequence of interactions in terms of individual timesteps.
 
 ## Finite-horizon MDPs
 
+### Definition
+
 ::::{prf:definition} Finite-horizon Markov decision process
-:label: finite_mdp
+:label: finite_horizon_mdp
 
 The components of a finite-horizon Markov decision process are:
 
@@ -93,10 +88,10 @@ The components of a finite-horizon Markov decision process are:
 2.  The **actions** that the agent can take. We use $\mathcal{A}$ to denote the
     set of possible actions, called the **action space**.
 
-3.  Some **initial state distribution** $\mu \in \Delta(\mathcal{S})$.
+3.  Some **initial state distribution** $\mu \in \triangle(\mathcal{S})$.
 
 4.  The **state transitions** (a.k.a. **dynamics**)
-    $P : \mathcal{S} \times \mathcal{A} \to \Delta(\mathcal{S})$ that describe what state the agent
+    $P : \mathcal{S} \times \mathcal{A} \to \triangle(\mathcal{S})$ that describe what state the agent
     transitions to after taking an action.
 
 5.  The **reward** signal. In this course we'll take it to be a
@@ -104,13 +99,13 @@ The components of a finite-horizon Markov decision process are:
     $r : \mathcal{S} \times \mathcal{A} \to \mathbb{R}$, but in general many results will
     extend to a *stochastic* reward signal.
 
-6.  A time horizon $H \in \mathbb{N}$ that specifies the number of
+6.  A time horizon $\hor \in \mathbb{N}$ that specifies the number of
     interactions in an **episode**.
 
 Combined together, these objects specify a finite-horizon Markov
 decision process:
 
-$$M = (\mathcal{S}, \mathcal{A}, \mu, P, r, H).$$
+$$M = (\mathcal{S}, \mathcal{A}, \mu, P, r, \hor).$$
 
 When there are **finitely** many states and actions, i.e.
 $|\mathcal{S}|, |\mathcal{A}| < \infty$, we can express
@@ -119,23 +114,24 @@ values):
 
 $$
 \begin{aligned}
-    r &\in \mathbb{R}^{|\mathcal{S}| \times |\mathcal{A}|} &
+    \mu &\in [0, 1]^{|\mathcal{S}|} &
     P &\in [0, 1]^{(|\mathcal{S} \times \mathcal{A}|) \times |\mathcal{S}|} &
-    \mu &\in [0, 1]^{|\mathcal{S}|}
+    r &\in \mathbb{R}^{|\mathcal{S}| \times |\mathcal{A}|}
 \end{aligned}
 $$
+::::
 
 :::{attention}
-Verify that these types make sense!
+Verify that the types and shapes provided above make sense!
 :::
-::::
 
 ```{code-cell}
 class MDP(NamedTuple):
+    """A description of a Markov decision process with finitely many states and actions."""
     S: int  # number of states
     A: int  # number of actions
     μ: Float[Array, " S"]
-    P: Float[Array, "S A S"]
+    P: Float[Array, "S A S"]  # "current" state, "current" action, "next" state
     r: Float[Array, "S A"]
     H: int
     γ: float = 1.0  # discount factor (used later)
@@ -144,22 +140,21 @@ class MDP(NamedTuple):
 :::{prf:example} Tidying MDP
 :label: tidy_mdp
 
-Let's consider an extremely simple decision problem throughout this
-chapter: the task of keeping your room tidy!
+Let's consider a simple decision problem throughout this chapter:
+the task of keeping your room tidy!
 
 Your room has the possible states
-$\mathcal{S} = \{ \text{orderly}, \text{messy} \}$. You can take either
-of the actions $\mathcal{A} = \{ \text{ignore}, \text{tidy} \}$. The room starts
-off orderly.
+$\mathcal{S} = \{ \text{orderly}, \text{messy} \}.$
+You can take either of the actions $\mathcal{A} = \{ \text{ignore}, \text{tidy} \}.$
+The room starts off orderly.
 
-The **state transitions** are as follows: if you tidy the room, it becomes
-(or remains) orderly; if you ignore the room, it _might_ become messy (see table
-below).
+The **state transitions** are as follows:
+if you tidy the room, it becomes (or remains) orderly;
+if you ignore the room, it _might_ become messy (see table below).
 
-The **rewards** are as follows: You get penalized for tidying an orderly
-room (a waste of time) or ignoring a messy room, but you get rewarded
-for ignoring an orderly room (since you can enjoy). Tidying a messy room
-is a chore that gives no reward.
+The **rewards** are as follows: You get penalized for tidying an orderly room (a waste of time) or ignoring a messy room,
+but you get rewarded for ignoring an orderly room (since you can enjoy your additional time).
+Tidying a messy room is a chore that gives no reward.
 
 These are summarized in the following table:
 
@@ -171,7 +166,7 @@ $$\begin{array}{ccccc}
     \text{messy} & \text{tidy} & 1 & 0 & 0 \\
 \end{array}$$
 
-Consider a time horizon of $H = 7$ days (one interaction per day). Let
+Consider a time horizon of $\hor = 7$ days (one interaction per day). Let
 $t = 0$ correspond to Monday and $t = 6$ correspond to Sunday.
 :::
 
@@ -179,8 +174,8 @@ $t = 0$ correspond to Monday and $t = 6$ correspond to Sunday.
 tidy_mdp = MDP(
     S=2,  # 0 = orderly, 1 = messy
     A=2,  # 0 = ignore, 1 = tidy
-    μ=np.array([1.0, 0.0]),  # start in orderly state
-    P=np.array(
+    μ=jnp.array([1.0, 0.0]),  # start in orderly state
+    P=jnp.array(
         [
             [
                 [0.7, 0.3],  # orderly, ignore
@@ -192,15 +187,15 @@ tidy_mdp = MDP(
             ],
         ]
     ),
-    r=np.array(
+    r=jnp.array(
         [
             [
-                1.0,  # orderly, ignore
+                1.0,   # orderly, ignore
                 -1.0,  # orderly, tidy
             ],
             [
                 -1.0,  # messy, ignore
-                0.0,  # messy, tidy
+                0.0,   # messy, tidy
             ],
         ]
     ),
@@ -213,12 +208,12 @@ tidy_mdp = MDP(
 :::{prf:definition} Policies
 :label: policy
 
-A **policy** $\pi$ describes the agent's strategy: which actions it
-takes in a given situation. A key goal of RL is to find the **optimal
-policy** that maximizes the total reward on average.
+A **policy** $\pi$ describes the agent's strategy:
+which actions it takes in a given situation.
+A key goal of RL is to find the **optimal policy** that maximizes the total reward on average.
 
 There are three axes along which policies can vary: their outputs,
-inputs, and time-dependence. We'll discuss each of these in turn.
+inputs, and time-dependence.
 
 1.  **Deterministic or stochastic.** A deterministic policy outputs
     actions while a stochastic policy outputs *distributions* over
@@ -230,31 +225,22 @@ inputs, and time-dependence. We'll discuss each of these in turn.
     actions, and rewards. We'll only consider state-dependent policies
     in this course.
 
-3.  **Stationary or time-dependent.** A stationary policy remains the
-    same function at all time steps, while a time-dependent policy
-    $\pi = \{ \pi_0, \dots, \pi_{H-1} \}$ specifies a different function
-    $\pi_\hi$ at each time step $\hi$.
+3.  **Stationary or time-dependent.** A stationary (a.k.a. time-homogeneous) policy
+    remains the same function at all time steps, while a time-dependent policy can depend on the current timestep.
+    For consistency with states and actions, we will denote the timestep as a subscript,
+    i.e. $\pi = \{ \pi_0, \dots, \pi_{\hor-1} \}.$
 :::
 
 Note that for finite state and action spaces,
 we can represent a randomized mapping $\mathcal{S} \to \Delta(\mathcal{A})$
-as a matrix $\pi \in [0, 1]^{\mathcal{S}, \mathcal{A}}$ where each row describes
+as a matrix $\pi \in [0, 1]^{\mathcal{S} \times \mathcal{A}}$ where each row describes
 the policy's distribution over actions for the corresponding state.
 
-```{code-cell}
-# In code, we use the `Policy` type to represent a randomized mapping from states to actions.
-# In the finite-horizon case, an array of `H` of these, one for at each time step,
-# would constitute a time-dependent policy.
-Policy = Float[Array, "S A"]
-```
-
-A fascinating result is that every finite-horizon MDP has an optimal
-deterministic time-dependent policy! Intuitively, the Markov property
-implies that the current state contains all the information we need to
-make the optimal decision. We'll prove this result constructively later
-in the chapter.
+A fascinating result is that every finite-horizon MDP has an optimal deterministic time-dependent policy!
+Intuitively, the Markov property implies that the current state contains all the information we need to make the optimal decision.
+We'll prove this result constructively later in the chapter.
 
-:::{prf:example} Tidying policies
+:::{prf:example} Policies for the tidying MDP
 :label: tidy_policy
 
 Here are some possible policies for the tidying MDP {prf:ref}`tidy_mdp`:
@@ -270,9 +256,9 @@ Here are some possible policies for the tidying MDP {prf:ref}`tidy_mdp`:
 
 ```{code-cell}
 # arrays of shape (H, S, A) represent time-dependent policies
-tidy_policy_always_tidy = np.zeros((7, 2, 2)).at[:, :, 1].set(1.0)
-tidy_policy_weekends = np.zeros((7, 2, 2)).at[5:7, :, 1].set(1.0).at[0:5, :, 0].set(1.0)
-tidy_policy_messy_only = np.zeros((7, 2, 2)).at[:, 1, 1].set(1.0).at[:, 0, 0].set(1.0)
+tidy_policy_always_tidy = jnp.zeros((7, 2, 2)).at[:, :, 1].set(1.0)
+tidy_policy_weekends = jnp.zeros((7, 2, 2)).at[5:7, :, 1].set(1.0).at[0:5, :, 0].set(1.0)
+tidy_policy_messy_only = jnp.zeros((7, 2, 2)).at[:, 1, 1].set(1.0).at[:, 0, 0].set(1.0)
 ```
 
 (trajectories)=
@@ -285,84 +271,92 @@ A sequence of states, actions, and rewards is called a **trajectory**:
 
 $$\tau = (s_0, a_0, r_0, \dots, s_{H-1}, a_{H-1}, r_{H-1})$$
 
-where
-$r_\hi = r(s_\hi, a_\hi)$. (Note that sources differ as to whether to include
-the reward at the final time step. This is a minor detail.)
+where $r_\hi = r(s_\hi, a_\hi)$.
+(Note that some sources omit the reward at the final time step. This is a minor detail.)
 :::
 
 ```{code-cell}
 class Transition(NamedTuple):
+    """A single state-action-reward interaction with the environment."""
     s: int
     a: int
     r: float
+```
 
+Once we've chosen a policy,
+we can sample trajectories by repeatedly choosing actions according to the policy,
+transitioning according to the state transitions, and observing the rewards.
 
-Trajectory = list[Transition]
-```
+:::{image} shared/trajectory.png
+:width: 240px
+:align: center
+:::
 
-Once we've chosen a policy, we can sample trajectories by repeatedly
-choosing actions according to the policy, transitioning according to the
-state transitions, and observing the rewards. That is, a policy induces
-a distribution $\rho^{\pi}$ over trajectories. (We assume that $\mu$ and
-$P$ are clear from context.)
+That is, a policy induces a distribution $\rho^{\pi}$ over trajectories.
+(We assume that $\mu$ and $P$ are clear from context.)
 
 :::{prf:example} Trajectories in the tidying environment
 :label: tidy_traj
 
 Here is a possible trajectory for the tidying example:
 
-| $t$ |   $0$   |   $1$   |   $2$   |  $3$   |  $4$  |   $5$   |   $6$   |
-|:---:|:-------:|:-------:|:-------:|:------:|:-----:|:-------:|:-------:|
-| $s$ | orderly | orderly | orderly | messy  | messy | orderly | orderly |
-| $a$ |  tidy   | ignore  | ignore  | ignore | tidy  | ignore  | ignore  |
-| $r$ |  $-1$   |   $1$   |   $1$   |  $-1$  |  $0$  |   $1$   |   $1$   |
+| $\hi$ |   $0$   |   $1$   |   $2$   |  $3$   |  $4$  |   $5$   |   $6$   |
+|:-----:|:-------:|:-------:|:-------:|:------:|:-----:|:-------:|:-------:|
+| $s$   | orderly | orderly | orderly | messy  | messy | orderly | orderly |
+| $a$   |  tidy   | ignore  | ignore  | ignore | tidy  | ignore  | ignore  |
+| $r$   |  $-1$   |   $1$   |   $1$   |  $-1$  |  $0$  |   $1$   |   $1$   |
 
 Could any of the policies in {prf:ref}`tidy_policy` have generated this trajectory?
 :::
 
-Note that for a state-dependent policy, using the Markov property {prf:ref}`markov`, we can specify this probability distribution in
-an **autoregressive** way (i.e. one timestep at a time):
+Note that for a state-dependent policy, using the Markov property {prf:ref}`markov`,
+we can write down the likelihood function of this probability distribution in an **autoregressive** way (i.e. one timestep at a time):
 
 :::{prf:definition} Autoregressive trajectory distribution
 :label: autoregressive_trajectories
 
-$$\rho^{\pi}(\tau) := \mu(s_0) \pi_0(a_0 \mid s_0) P(s_1 \mid s_0, a_0) \cdots P(s_{H-1} \mid s_{H-2}, a_{H-2}) \pi_{H-1}(a_{H-1} \mid s_{H-1})$$
+$$\rho^{\pi}(\tau) := \mu(s_0) \pi_0(a_0 \mid s_0) P(s_1 \mid s_0, a_0) \cdots P(s_{\hor-1} \mid s_{\hor-2}, a_{\hor-2}) \pi_{\hor-1}(a_{\hor-1} \mid s_{\hor-1})$$
 :::
 
 ```{code-cell}
-def trajectory_log_likelihood(mdp: MDP, tau: Trajectory, pi: Policy) -> float:
-    """
-    Compute the log likelihood of a trajectory under a given MDP and policy.
-    """
-    total = np.log(mdp.μ[tau[0].s])
-    total += np.log(pi[tau[0].s, tau[0].a])
+def trajectory_log_likelihood(
+    mdp: MDP,
+    τ: list[Transition],
+    π: Float[Array, "S A"],
+) -> float:
+    """Compute the log-likelihood of a trajectory under a given MDP and policy."""
+    total = jnp.log(mdp.μ[τ[0].s])
+    total += jnp.log(π[τ[0].s, τ[0].a])
     for i in range(1, mdp.H):
-        total += np.log(mdp.P[tau[i - 1].s, tau[i - 1].a, tau[i].s])
-        total += np.log(pi[tau[i].s, tau[i].a])
+        total += jnp.log(mdp.P[τ[i - 1].s, τ[i - 1].a, τ[i].s])
+        total += jnp.log(π[τ[i].s, τ[i].a])
     return total
 ```
 
-:::{tip}
+:::{attention}
 How would you modify this to include stochastic rewards?
 :::
 
-For a deterministic policy $\pi$, we have that
-$\pi_\hi(a \mid s) = \mathbb{I}[a = \pi_\hi(s)]$; that is, the probability
-of taking an action is $1$ if it's the unique action prescribed by the
-policy for that state and $0$ otherwise. In this case, the only
-randomness in sampling trajectories comes from the initial state
-distribution $\mu$ and the state transitions $P$.
+For a deterministic policy $\pi$, we have that $\pi_\hi(a \mid s) = \mathbb{I}[a = \pi_\hi(s)]$;
+that is, the probability of taking an action is $1$ if it's the unique action prescribed by the policy for that state and $0$ otherwise.
+In this case, the only randomness in sampling trajectories comes from the initial state distribution $\mu$ and the state transitions $P$.
 
 +++
 
 ### Value functions
 
-The main goal of RL is to find a policy that maximizes the average total
-reward $r_0 + \cdots + r_{H-1}$. (Note that this is a random variable
-that depends on the policy.) Let's introduce some notation for analyzing
-this quantity.
+The main goal of RL is to find a policy that maximizes the expected total
+reward $\E [r_0 + \cdots + r_{\hor-1}]$.
 
-A policy's **value function** at time $h$ is its expected remaining reward *from a given state*:
+:::{attention}
+Note that $r_0 + \cdots + r_{\hor-1}$ is a random variable.
+What sources of randomness does it depend on?
+Describe the generating process.
+:::
+
+Let's introduce some notation for analyzing this quantity.
+
+A policy's **value function** at time $\hi$ is its expected remaining reward *from a given state*:
 
 :::{prf:definition} Value function
 :label: value
@@ -393,7 +387,7 @@ def q_to_v(
     Compute the value function for a given policy in a known finite MDP
     at a single timestep from its action-value function.
     """
-    return np.sum(policy * q, axis=1)
+    return jnp.sum(policy * q, axis=1)
 ```
 
 and the
@@ -415,12 +409,13 @@ def v_to_q(
     return mdp.r + mdp.γ * mdp.P @ v
 
 
+# convert a list of v functions to a list of q functions
 v_ary_to_q_ary = vmap(v_to_q, in_axes=(None, 0))
 ```
 
 #### Greedy policies
 
-For any given $q \in \mathbb{R}^{|\mathcal{S}| \times |\mathcal{A}|}$, we can define the **greedy policy** $\hat \pi_q$ as the policy that selects the action with the highest $q$-value at each state:
+For any given $Q \in \mathbb{R}^{|\mathcal{S}| \times |\mathcal{A}|}$, we can define the **greedy policy** $\hat \pi_Q$ as the policy that selects the action with the highest $Q$-value at each state:
 
 ```{code-cell}
 def q_to_greedy(q: Float[Array, "S A"]) -> Float[Array, "S A"]:
@@ -428,7 +423,9 @@ def q_to_greedy(q: Float[Array, "S A"]) -> Float[Array, "S A"]:
     Get the (deterministic) greedy policy w.r.t. an action-value function.
     Return the policy as a matrix of shape (S, A) where each row is a one-hot vector.
     """
-    return np.eye(q.shape[1])[np.argmax(q, axis=1)]
+    A = q.shape[1]
+    a_ary = jnp.argmax(q, axis=1)
+    return jnp.eye(A)[a_ary]
 
 
 def v_to_greedy(mdp: MDP, v: Float[Array, " S"]) -> Float[Array, "S A"]:
@@ -448,7 +445,9 @@ who is credited with introducing dynamic programming in 1953.
 :::{prf:theorem} Bellman consistency equation for the value function
 :label: bellman_consistency
 
-$$V_\hi^\pi(s) = \E_{\substack{a \sim \pi_\hi(s) \\ s' \sim P(s, a)}} [r(s, a) + V_{\hi+1}^\pi(s')]$$
+$$
+V_\hi^\pi(s) = \E_{\substack{a \sim \pi_\hi(s) \\ s' \sim P(s, a)}} [r(s, a) + V_{\hi+1}^\pi(s')]
+$$
 :::
 
 ```{code-cell}
@@ -462,11 +461,11 @@ def check_bellman_consistency_v(
     satisfies the Bellman consistency equation.
     """
     return all(
-        np.allclose(
+        jnp.allclose(
             # lhs
             v_ary[h],
             # rhs
-            np.sum(policy[h] * (mdp.r + mdp.γ * mdp.P @ v_ary[h + 1]), axis=1),
+            jnp.sum(policy[h] * (mdp.r + mdp.γ * mdp.P @ v_ary[h + 1]), axis=1),
         )
         for h in range(mdp.H - 1)
     )
@@ -529,7 +528,7 @@ def bellman_operator_looping(
     Looping definition of the Bellman operator.
     Concise version is below
     """
-    v_new = np.zeros(mdp.S)
+    v_new = jnp.zeros(mdp.S)
     for s in range(mdp.S):
         for a in range(mdp.A):
             for s_next in range(mdp.S):
@@ -548,7 +547,7 @@ def bellman_operator(
     v: Float[Array, " S"],
 ) -> Float[Array, " S"]:
     """For a known finite MDP, the Bellman operator can be exactly evaluated."""
-    return np.sum(policy * (mdp.r + mdp.γ * mdp.P @ v), axis=1)
+    return jnp.sum(policy * (mdp.r + mdp.γ * mdp.P @ v), axis=1)
     return q_to_v(policy, v_to_q(mdp, v))  # equivalent
 ```
 
@@ -595,10 +594,10 @@ equation to compute the value function at each time step.
 ```{code-cell}
 def dp_eval_finite(mdp: MDP, policy: Float[Array, "S A"]) -> Float[Array, "H S"]:
     """Evaluate a policy using dynamic programming."""
-    V_ary = [None] * mdp.H + [np.zeros(mdp.S)]  # initialize to 0 at end of time horizon
+    V_ary = [None] * mdp.H + [jnp.zeros(mdp.S)]  # initialize to 0 at end of time horizon
     for h in range(mdp.H - 1, -1, -1):
         V_ary[h] = bellman_operator(mdp, policy[h], V_ary[h + 1])
-    return np.stack(V_ary[:-1])
+    return jnp.stack(V_ary[:-1])
 ```
 
 This runs in time $O(H \cdot |\mathcal{S}|^2 \cdot |\mathcal{A}|)$ by counting the
@@ -811,16 +810,16 @@ $$
 def find_optimal_policy(mdp: MDP):
     Q = [None] * mdp.H
     pi = [None] * mdp.H
-    V = [None] * mdp.H + [np.zeros(mdp.S)]  # initialize to 0 at end of time horizon
+    V = [None] * mdp.H + [jnp.zeros(mdp.S)]  # initialize to 0 at end of time horizon
 
     for h in range(mdp.H - 1, -1, -1):
         Q[h] = mdp.r + mdp.P @ V[h + 1]
-        pi[h] = np.eye(mdp.S)[np.argmax(Q[h], axis=1)]  # one-hot
-        V[h] = np.max(Q[h], axis=1)
+        pi[h] = jnp.eye(mdp.S)[jnp.argmax(Q[h], axis=1)]  # one-hot
+        V[h] = jnp.max(Q[h], axis=1)
 
-    Q = np.stack(Q)
-    pi = np.stack(pi)
-    V = np.stack(V[:-1])
+    Q = jnp.stack(Q)
+    pi = jnp.stack(pi)
+    V = jnp.stack(V[:-1])
 
     return pi, V, Q
 ```
@@ -839,9 +838,9 @@ setting.
 
 ```{code-cell}
 π_opt, V_opt, Q_opt = find_optimal_policy(tidy_mdp)
-assert np.allclose(π_opt, tidy_policy_messy_only)
-assert np.allclose(V_opt, V_messy)
-assert np.allclose(Q_opt[:-1], v_ary_to_q_ary(tidy_mdp, V_messy)[1:])
+assert jnp.allclose(π_opt, tidy_policy_messy_only)
+assert jnp.allclose(V_opt, V_messy)
+assert jnp.allclose(Q_opt[:-1], v_ary_to_q_ary(tidy_mdp, V_messy)[1:])
 "Assertions passed (the 'tidy when messy' policy is optimal)"
 ```
 
@@ -892,7 +891,7 @@ The other components of the MDP remain the same:
 
 $$M = (\mathcal{S}, \mathcal{A}, \mu, P, r, \gamma).$$
 
-Code-wise, we can reuse the `MDP` class from before {prf:ref}`finite_mdp` and set `mdp.H = float('inf')`.
+Code-wise, we can reuse the `MDP` class from before {prf:ref}`finite_horizon_mdp` and set `mdp.H = float('inf')`.
 
 ```{code-cell}
 tidy_mdp_inf = tidy_mdp._replace(H=float("inf"), γ=0.95)
@@ -1101,10 +1100,10 @@ least one nonzero element.)
 def eval_deterministic_infinite(
     mdp: MDP, policy: Float[Array, "S A"]
 ) -> Float[Array, " S"]:
-    pi = np.argmax(policy, axis=1)  # un-one-hot
-    P_π = mdp.P[np.arange(mdp.S), pi]
-    r_π = mdp.r[np.arange(mdp.S), pi]
-    return np.linalg.solve(np.eye(mdp.S) - mdp.γ * P_π, r_π)
+    pi = jnp.argmax(policy, axis=1)  # un-one-hot
+    P_π = mdp.P[jnp.arange(mdp.S), pi]
+    r_π = mdp.r[jnp.arange(mdp.S), pi]
+    return jnp.linalg.solve(jnp.eye(mdp.S) - mdp.γ * P_π, r_π)
 ```
 
 :::{prf:example} Tidying policy evaluation
@@ -1153,7 +1152,7 @@ takes $O(|\mathcal{S}|^2)$ time for the matrix-vector multiplication.
 
 ```{code-cell}
 def supremum_norm(v):
-    return np.max(np.abs(v))  # same as np.linalg.norm(v, np.inf)
+    return jnp.max(jnp.abs(v))  # same as jnp.linalg.norm(v, jnp.inf)
 
 
 def loop_until_convergence(op, v, ε=1e-6):
@@ -1167,7 +1166,7 @@ def loop_until_convergence(op, v, ε=1e-6):
 
 def iterative_evaluation(mdp: MDP, pi: Float[Array, "S A"], ε=1e-6) -> Float[Array, " S"]:
     op = partial(bellman_operator, mdp, pi)
-    return loop_until_convergence(op, np.zeros(mdp.S), ε)
+    return loop_until_convergence(op, jnp.zeros(mdp.S), ε)
 ```
 
 Then, as we showed in {eq}`bellman_convergence`, by the Banach fixed-point theorem:
@@ -1259,11 +1258,11 @@ gives the **Bellman optimality operator**
 
 ```{code-cell}
 def bellman_optimality_operator(mdp: MDP, v: Float[Array, " S"]) -> Float[Array, " S"]:
-    return np.max(mdp.r + mdp.γ * mdp.P @ v, axis=1)
+    return jnp.max(mdp.r + mdp.γ * mdp.P @ v, axis=1)
 
 
 def check_optimal(v: Float[Array, " S"], mdp: MDP):
-    return np.allclose(v, bellman_optimality_operator(v, mdp))
+    return jnp.allclose(v, bellman_optimality_operator(v, mdp))
 ```
 
 (value_iteration)=
@@ -1278,7 +1277,7 @@ algorithm is known as **value iteration**.
 def value_iteration(mdp: MDP, ε: float = 1e-6) -> Float[Array, " S"]:
     """Iterate the Bellman optimality operator until convergence."""
     op = partial(bellman_optimality_operator, mdp)
-    return loop_until_convergence(op, np.zeros(mdp.S), ε)
+    return loop_until_convergence(op, jnp.zeros(mdp.S), ε)
 ```
 
 ```{code-cell}
@@ -1386,7 +1385,7 @@ def policy_iteration(mdp: MDP, ε=1e-6) -> Float[Array, "S A"]:
     """Iteratively improve the policy and value function."""
     def op(pi):
         return v_to_greedy(mdp, eval_deterministic_infinite(mdp, pi))
-    π_init = np.ones((mdp.S, mdp.A)) / mdp.A  # uniform random policy
+    π_init = jnp.ones((mdp.S, mdp.A)) / mdp.A  # uniform random policy
     return loop_until_convergence(op, π_init, ε)
 ```
 
diff --git a/book/pg.md b/book/pg.md
index a1c9f82..8ee5c0c 100644
--- a/book/pg.md
+++ b/book/pg.md
@@ -11,27 +11,48 @@ kernelspec:
   name: python3
 ---
 
-# Policy Gradient Algorithms
-
-A key task in RL is finding the **optimal policy** in a given environment,
-that is, the policy that achieves the most total reward in all states.
-Given this task, why not optimize directly over _policies?_
-
-Algorithms based on this idea are called _policy optimization algorithms._
-We've already seen some examples of this,
+(pg)=
+# Policy Optimization
+
+The core task of RL is finding the **optimal policy** in a given environment.
+This is essentially an _optimization problem:_
+out of some space of policies,
+we want to find the one that achieves the maximum total reward (in expectation).
+
+It's typically intractable to compute the optimal policy exactly.
+Instead, **policy optimization algorithms** start from some randomly initialized policy,
+and then _improve_ it step by step.
+We've already seen some examples of these,
 namely {ref}`policy_iteration` for finite MDPs and {ref}`iterative_lqr` in continuous control.
-
-**Policy gradient algorithms** form a specific subclass for policies that can be described by a set of **parameters.**
-These are responsible for groundbreaking applications including AlphaGo, OpenAI Five, and large language models,
+In particular, we often use policies that can be described by some finite set of _parameters._
+For such parameterized policies,
+we can approximate the **policy gradient:**
+the gradient of the expected total reward with respect to the parameters.
+This tells us the direction the parameters should be updated to achieve a higher total reward (in expectation).
+Policy gradient methods are responsible for groundbreaking applications including AlphaGo, OpenAI Five, and large language models,
 many of which use policies parameterized as deep neural networks.
 
 1. We begin the chapter with a short review of gradient ascent,
-a simple and general **optimization method.**
-2. We'll then apply this technique directly to maximize the _\hiotal reward_.
+a general **optimization method.**
+2. We'll then see how to estimate the **policy gradient,**
+   enabling us to apply (stochastic) gradient ascent in the RL setting.
 3. Then we'll explore some _proximal optimization_ techniques that ensure the steps taken are "not too large".
    This is helpful to stabilize training and widely used in practice.
 
-+++
+```{code-cell} ipython3
+import numpy as np
+import jax
+from jaxtyping import Float, Array
+from bokeh.plotting import figure, show, output_notebook
+from bokeh.models import Arrow, VeeHead, ColumnDataSource, LinearColorMapper, BasicTicker, ColorBar
+from bokeh.transform import linear_cmap
+from bokeh.layouts import gridplot
+from typing import TypeVar, Callable
+
+Params = TypeVar("Params")
+
+output_notebook()
+```
 
 ## Gradient Ascent
 
@@ -41,7 +62,43 @@ where you keep taking steps in the steepest direction upwards.
 Here, your vertical position $y$ is the function being optimized,
 and your horizontal position $(x, z)$ is the input to the function.
 The _slope_ of the mountain at your current position is given by the _gradient_,
-written $\nabla y(x, z) \in \R^2$.
+written $\nabla y(x, z) \in \mathbb{R}^2$.
+
+```{code-cell} ipython3
+def f(x, y):
+    """Himmelblau's function"""
+    return (x**2 + y - 11)**2 + (x + y**2 - 7)**2
+
+x = np.linspace(-5, 5, 400)
+y = np.linspace(-5, 5, 400)
+X, Y = np.meshgrid(x, y)
+Z = f(X, Y)
+
+p = figure(width=600, height=600, title="Himmelblau's function")
+
+mapper = LinearColorMapper(palette="Viridis256", low=Z.min(), high=Z.max())
+p.image(image=[Z], x=-5, y=-5, dw=10, dh=10, color_mapper=mapper)
+
+color_bar = ColorBar(color_mapper=mapper)
+p.add_layout(color_bar, 'right')
+
+tx, ty = 1., 1.
+gx, gy = jax.grad(f, argnums=(0, 1))(tx, ty)
+
+p.scatter(x=[tx], y=[ty], size=10, color="red")
+
+p.add_layout(Arrow(
+    end=VeeHead(size=15),
+    x_start=tx,
+    y_start=ty,
+    x_end=tx + gx.item() * 0.01,
+    y_end=ty + gy.item() * 0.01,
+    line_color="blue",
+))
+
+show(p)
+```
+
 For differentiable functions, this can be thought of as the vector of partial derivatives,
 
 $$
@@ -77,7 +134,7 @@ The case of a two-dimensional input is easy to visualize.
 But this idea can be straightforwardly extended to higher-dimensional inputs.
 
 From now on, we'll use $J$ to denote the function we're trying to maximize,
-and $\theta$ to denote the parameters being optimized over.
+and $\theta$ to denote the parameters being optimized over. (In the above example, $\theta = \begin{pmatrix} x & z \end{pmatrix}^\top$).
 
 Notice that our parameters will stop changing once $\nabla J(\theta) = 0.$
 Once we reach this **stationary point,** our current parameters are 'locally optimal' in some sense;
@@ -85,6 +142,40 @@ it's impossible to increase the function by moving in any direction.
 If $J$ is _convex_, then the only point where this happens is at the *global optimum.*
 Otherwise, if $J$ is nonconvex, the best we can hope for is a *local optimum.*
 
+:::{note}
+How does a computer compute the gradient of a function?
+
+One way is _symbolic differentiation,_
+which is similar to the way you might compute it by hand:
+the computer applies a list of rules to transform the _symbols_ involved.
+Python's `sympy` package supports symbolic differentiation.
+However, functions implemented in code may not always have a straightforward symbolic representation.
+
+Another way is _numerical differentiation,_
+which is based on the limit definition of a (directional) derivative:
+
+$$
+\nabla_{\boldsymbol{u}} J(\boldsymbol{x}) = \lim_{\varepsilon \to 0}
+\frac{J(\boldsymbol{x} + \varepsilon \boldsymbol{u}) - J(\boldsymbol{x})}{\varepsilon}
+$$
+
+Then, we can substitute a small value of $\varepsilon$ on the r.h.s. to approximate the directional derivative.
+How small, though? If we need an accurate estimate,
+we may need such a small value of $\varepsilon$ that typical computers will run into rounding errors.
+Also, to compute the full gradient,
+we would need to compute the r.h.s. once for each input dimension.
+This is an issue if computing $J$ is expensive.
+
+**Automatic differentiation** achieves the best of both worlds.
+Like symbolic differentiation,
+we manually implement the derivative rules for a few basic operations.
+However, instead of executing these on the _symbols_,
+we execute them on the _values_ when the function gets called,
+like in numerical differentiation.
+This allows us to differentiate through programming constructs such as branches or loops,
+and doesn't involve any arbitrarily small values.
+:::
+
 +++
 
 ### Stochastic gradient ascent
@@ -97,13 +188,17 @@ In these cases, we often compute some _estimate_ of the gradient at each step, $
 This is called **stochastic** gradient ascent.
 In the SL example above, we might randomly choose a *minibatch* of samples and use them to estimate the true prediction error. (This approach is known as **_minibatch_ SGD**.)
 
-```python
-def sgd_pseudocode(
+```{code-cell} ipython3
+def sgd(
     θ_init: Params,
     estimate_gradient: Callable[[Params], Params],
     η: float,
     n_steps: int,
 ):
+    """Perform `n_steps` steps of SGD.
+
+    `estimate_gradient` eats the current parameters and returns an estimate of the objective function's gradient at those parameters.
+    """
     θ = θ_init
     for step in range(n_steps):
         θ += η * estimate_gradient(θ)
@@ -113,7 +208,9 @@ def sgd_pseudocode(
 What makes one gradient estimator better than another?
 Ideally, we want this estimator to be **unbiased;** that is, on average, it matches a single true gradient step:
 
-$$\E [\tilde \nabla J(\theta)] = \nabla J(\theta).$$
+$$
+\E [\tilde \nabla J(\theta)] = \nabla J(\theta).
+$$
 
 We also want the _variance_ of the estimator to be low so that its performance doesn't change drastically at each step.
 
@@ -160,13 +257,21 @@ What does $\theta$ correspond to, though?
 In general, $\pi$ is a function, and optimizing over the space of arbitrary input-output mappings would be intractable.
 Instead, we need to describe $\pi$ in terms of some finite set of _parameters_ $\theta$.
 
++++
+
 (parameterizations)=
 ### Example policy parameterizations
 
 What are some ways we could parameterize our policy?
 
++++
+
+#### Tabular representation
+
 If both the state and action spaces are finite, perhaps we could simply learn a preference value $\theta_{s,a}$ for each state-action pair.
-Then to turn this into a valid distribution, we perform a "softmax" operation: we exponentiate each of them, and divide by the total:
+Then to turn this into a valid distribution, we perform a **softmax** operation:
+we exponentiate each of them,
+and then normalize to form a valid distribution:
 
 $$\pi^\text{softmax}_\theta(a | s) = \frac{\exp(\theta_{s,a})}{\sum_{s,a'} \exp (\theta_{s,a'})}.$$
 
@@ -211,6 +316,8 @@ More generally, we could map states and actions to unnormalized scores via some
 
 The score can then be written as $$\nabla \log \pi_\theta(a|s) = \nabla f_\theta(s, a) - \E_{a \sim \pi_\theta(s)} \nabla f_\theta (s, a')$$
 
++++
+
 ### Continuous action spaces
 
 Consider a continuous $n$-dimensional action space $\mathcal{A} = \mathbb{R}^n$. Then for a stochastic policy, we could use a function to predict the *mean* action and then add some random noise about it. For example, we could use a neural network to predict the mean action $\mu_\theta(s)$ and then add some noise $\epsilon \sim \mathcal{N}(0, \sigma^2 I)$ to it:
@@ -893,5 +1000,3 @@ TODO
 - Trust region policy optimization
 - Natural policy gradient
 - Proximal policy optimization
-
-
diff --git a/book/planning.md b/book/planning.md
new file mode 100644
index 0000000..6b93bda
--- /dev/null
+++ b/book/planning.md
@@ -0,0 +1,15 @@
+
+
+
++++
+
++++
+
++++
+
+(planning)=
+# Planning
+
+## Monte Carlo Tree Search
+
+(INCOMPLETE)
diff --git a/book/shared/npg_line.png b/book/shared/npg_line.png
new file mode 100644
index 0000000000000000000000000000000000000000..8203102339ac104c6048009c86b407fcd7916062
GIT binary patch
literal 32010
zcma&O1z1+=);0WqfCxx;hk}H(NF#`VfHaDTq=eEb($WZm2q>t4l#~cacXxw?bV#>S
zBJt0K=RN;9=l{<4{$AJK?yV2cTI;^&J?9u>j=4hAROBz>P~)IbsEfB1WHnGIbYBz-
zZ4Da>{zPt}GZ}spak{1Bq-ksB^wh|~6m`eQ$<E5w$?CB&ql>A7<6~PJ0d8S#elA7}
zCnq~cQ63)ae|~`5*1?>I@y|&ke8~knh5L>u6oC=)4_by)`ePKz<LPZ#X)V_e>l06P
zG|f-3w!|*D2a=Ius*k%9+@LKz|8a1|((>xi;n1G*w~e9V+@hh;(NR&|Z0!fD7CB@k
zS5z<>)%P=S)oHEj15s_81YSSL+Ac6MUHWmNcQ3bCgn&);=c|VJ#Se?_vJPc>n9T5t
zUu3p?14RVC1}|bsBY#5D#Hb)EEBh#!5+fuyI9Qp~j}iVhGQ^nh67qR<JYV=*S=JZQ
zui$T6W%;m);jiPazz>tyA0Ij^CrZ+-kCg-|v-)Xq1ZZ(Ig^;kVlw0nEi8?KzGPPb%
z37Bb(xNJ^J>{8l}7G~D(-FYvRSu$1eH23Zw5r-cN3TYZ)v?5<ej|F98s-BMO=Q4@e
z&tgR2vbKKyB6g$sG7k^ISh01(WR<)9d^=fcT3S+SD#qU4p2c`6pKgiG%Z-T&lj+*$
z7L%3F9zK10G8KSJ!Rv8g_hWH!*g{Icth4sb)dzw$B%w`tB_;9$pYHUP*c$i;1}5Jx
zFuUd^$;_Pf5YJc3)VwF5>073z{prau&RGAzKx9HfxYAp0qt&4tluu)$tj_)W%N6fa
z>yCc^icCx-5EB#gI9yI&8ZF}doRfowy7Nx3ug&pFjN&6Rv)TFp+)}$4bg#o@Hjm%)
zZ(~KBvsCs}wH~s_hs!!S2_79C<vrb;EZ<OxFMYa6FY3I4fzl~+xUBCw6Ck=lL`97y
zo%LGm>4xFjNPcU3J30!FPL#B+uFj$_8I$Yk)yT-m=h^piXB$IEgk9EjUYaK;Jbm%v
z1tA^XMU*RCZhIv=cjZydSc$E?AJ)Zhrv`d@VX3K?XvJL#YmR>6lHy=4bjRZ|Ffi<_
z4y&^D+%ErBkIO?Td&QzZ^}-JaSi=-uy!onuqH51S)Dt>tU3}OYF<d%%H|WscUVRYw
zR^KgC=DpYP;mq!^f&AUO<U+QScp<XMn>Mep9zTApc7+t>Lw;3v$*M+DQj*%7<U9d=
z7(Sh-;n3&%C`E4rB*gGYC_>fJ6crWirfbnqH{NnT6b!6o6A>1kE*jSND=RCLESHbD
z!o{U^S`ruMRsE-Gr(em=#RUs>bi6<1_Dlt~OcItz>&cU>LoHj_QOm;I^m_vnWsc`<
ztADe?dirBsr2cp-l>A`0<|JUQCBkw-)y9Uq*mf#BPQbi=YKlQ$U*FEzIUpi}KrW2(
z`FzJ)7B;qYMbGw*j)pAlY+kcY%0in7YW_I9m(<sMb8~sjdJ<kX1QFa!)3|VQ^!(iU
zt#;1bk2izxEqXtqRaRC`Z*E4Mp6u5sC@N+hzSq3X@Rr9Q+^8Wi31aM3+ars?EHYN*
zSRbj=BTbW2SRIT^t*mNEY}BOd+{N9j8%cwo$~A-@=m^ndb`?|yndYi<#Gs?Te*K#F
zsErtQiC}Va@|9xR!AuZ?VDVKhuErN+oV_*P-mcG{C8?*#Esd4%RG*%>lai8ZjV^|T
z;c@8}Vjut6mycq-Grh49?y^2=)E><)?7H=FC~>fO^xoL&z~zTus3jd2@IEQW`NH|A
zpO~Pd6LV=?`(9uV5wGZ7%#j@!_4;+5VIBIePFZ<*^0QwHOA{5>^*y$4eKx3}&&<qR
z*@MI7qs5_ZKuknO_i$rhpCcw2)?_{*Wo;W4AMMIRX))&&tz8ax9v&X&v6bqlJq{nT
zyPrHs(r%COJW2GZGR-Z6>m<&*YJIY@YM(sq>{Lb$%+f$Q0rj;f81Q3w^x~9A)Izir
zR&QivTtTu;yx3E@hQNz`AEnO~+D_5Kqqc}`R{Fv3mg~4NvzbO&(P?REhc&k2rAY<H
za<a0Ff`Wy5hMZTf)Sevw$@f0>syyBsi@k1}(46Rb@_TXc{BJh+0VY;fEIvNIgWrpZ
z+lz_b$)+7|d_9B1!<!*5aQa^ika=!7{AFge$npR}-+p_x2`61zKtO;Go{?74qjY=@
z6BD!4VIJp;UYT!^<*@PkXb~m<BZaihpjU8SjqCj{xNc2Lm$ksUB9TukY=?riZh+-~
za+y~=`|GV+FKMomTcvdD^Kx_Jo3w=A^6>CT^Hfq%X)d*&vs>;@bBHjg_Mn2`l5=$x
zuCK4hpb)zyCpVk+RzJ&5Vh{p!du!(A+ISg>w6wHNm78;w0dJARyrRd^c8eNE%u36~
zR853|_c0lgPY-ujEelUaMs$>8dC}tH;^yg9@7%fd<Ov5X2m{2yd5#!GbKEF$ZoS~k
zN^hcZiF!oCuZeV1F+Ze_u$d?iu%Bzz#vU;5{!j;J%5H6h4+#tA$c*})?)TRTIPZSW
z*gEh#Pp~yzcTTU=4$X4t^GtFRhaq9A;_WmIh};?2ic+^-j_ThFG?l*>-V=r<ddi6K
z@e#p+_EWf2AH?7l?Bj!qjf-Ov5+a}S+^5sf)Xawi;p*yYAzu16I{KsaSTQA^vH#)N
zN5wQ585#7wweNm;dA#ElPYHM`x6oBS2q$gdAf%xQef5fvl#EQ8i+rwZF`*$tJ*7AG
zHVOR6%-;H#u-i^X^I#xO06G&Bld$)RxGXafd>K+gT;1KzU6FWoywyP9da%XBs+tgX
z?mYfS$HneJuS9w&udu!pc@_x?Iux(hk*oOW(Jwnk$D4Y3w0Cp$U&6wE5PwG8BXugi
zw>gztE#x>__!t`>U(VQ=xjCE;_v6mAh;bwK?ujL*u+4ZW5#&yk+Q#pM)YW~vpFi4=
z>wUWOK9Gn-uJ-$Ps{M@#zo@9F=HlU8gL@^;_V&^q9^w!<fi<TmG;FE~jjt~&37$N!
zhUjaKV2G%%mr=Mx3`Nd(XQ|K5&aM{1F%JSkufpYGh0D6H4p%cz&5_3Xevi)DWEBmA
z=MIrizmkku*Si47`ZAY@nK(FbkR{jm+|@2EFP9uRA;ZD^q!RxEPUy(@?=$mn4diOP
zj;9wE0%vE9FI~Dc4G}{onZn?G#Anw1A>{g`>$%0AM0v>VaNTwXYc4J>LCM7g33t2Q
z745>u16YtdZosXm>F69NzZL$rBYaO2&YrH+DGI*y?)>rK57(W-qazRH55h!{Y+QeJ
zUFS2bMF~TdXnsj^aJZUl|7Xt{D&#rXlFxP`{g9QvG37x>U|+m=EBkH^V~k>CeEiGz
z*KL{k`87OmlRR!=RM7r8YMDFJKp=&oR&Mv`_;8m}(nHv7cO~G<!;0j`1D~jCiY}Bn
zF5=VD(jrH+(0c6du`4V-5h<yEo!@yDcJ>R2UWe=uAK!-G$J~da4M&t?t;BtQgHpf@
z4~dX`la|?`oChQ2&Xxh^ZW|e$ZDVpWGXMyqUkhE7!ghvbLK1E}wKXTZJgMpF*-AOl
zzoBH?og6(wmNvKML?&0is@;0?djT8kop=6FM`n;mp7tZi^FBS{sF5<2JleXsE4|;r
zQ`2afTlHyjsLava-{0R5LTsekQ%ogM^5r$lp^UN8va-6au8YOR#Vd)ANAiry_A6L7
z#>@Q1$Mtqk;RHoLxQ2(a9xYVop?>!6%T&YaaPC}$)Jg4ho!?-@1<V#Cx*%8F1W+^>
z;k@$g!p%UuIv=!iTM*WUuzIei$NQXm_e9LQX-Ub+A#P@d4ZNG+e0>`$cUtycOmN3H
zX%4Gvi(-RS!CxCIQRT&>5r9Pcbjp(gWoT|51UWn9VTH@ex`g{)<JL?ApVN}sIP5@N
z!rYIxDA%1OB9vj*yKAsbu4U_{iTiNj%JHzIprG^O;^KxTCg)Ij`T3bYH;)Ii@6A9J
zhW(HEgTB1F`e6NJ7zrx)gTdv+8rGJS^t4Z(WRlbXDxR~Ls`e~9#-kR*u5#Ozft&~E
zM04h#%JYv4$|pUY4WC{dTTHCNVeI(mh>)5Zr%X?OBaHlUvn58RqFHC`^S1_G_m58r
z=*4T_U9;r19u;(2A0-346D$6VAJVd#o}ONsXEoFqY63-o8V8%+r?{xr?(QL-S{+Sn
z4-I5Jy#)okmX5bu7*;fCHfv730tgQ-nVT;<Nb|C>orijXg{u6sQ9*{TOvT2=2KWng
zW0|3F<%CaJS-BA)FIw~oQNPm-vd|`-a;HqUq3q8Fhr9p>4-OBrPL}}@K*$8aqBM+-
zQX{9bzM<jTiGB9VeLDb-^D-ZWa;p!Ylo9GQ!`~02tDaYLi>pKMiOnBaeY?H2`>0y~
z3nc9UwfuMQ-aUZZ97NMdG)Y$dv$x&=hzcz_aE|S!VVscl=yXoWq}#%xf{M!B(~`u*
zL_@Jih17!Y*7-VNWSn=G)|`y*ggXYN3Wduvcx*`{muzan<f%Sj_UWfONaw-5@~3JE
zDlGE+nOeFz7jQIzk7zR!hipweoibYNj;9oLvIronTV7rssdB$AbMxlR>S|~x1<&1b
zDQoA_<}j+16IOS~1d4y&zI*oqfOAIfDx{Wohfd#M0XPc@2ngWuj!m3Z+hpbCvz3-H
z&Y!om{_}&Yet950dsm!I<%3$#6Kl>QIyyQ&fPSd7;kLoO_4+Jk0RfVl<GuHX@Ba)u
znUnayYa|o%Cq2g+vf8)P<DK0=`3QRL(~@Go?Y+Ib=ewfVRNp)(duSm3yX@QEeXp4S
ziiF=0+fmLOyUu^g^sC%5c~mplA2KYiy#5j)&m4)3#PYOPyJQeYOjf;QyBCkKv9TQ_
zo2(k_{$S}>S!o6}UI%a<;m;oW2L#N(e>N}mrF3*1j#oY-Ns)V5zq>k&xjz84thBTg
z+ot5}SB)NC3mk_V%h$Y;l9DV&3)$aYw+)AC9rHfv5I_ss4bPm>sCM!T2yDW~$B(Jm
zV*4QFt$#cf4;&>AXS2;aLGQ};k2N`M@!u!AOMN#j#~{|SPyYbw8FWi9cix`8)jIF6
zD@8>`6&w-K0tub>$+s(wEiH3h*Qc}$ZtkUyx)NOL;==AL_E~DP#7WeDrZ2>5oKFdd
zeDILg)I>u^Cm5*7y)xV8<hA|JklZXr@{$b2?%f-9;Izq30b;Z=M(VXycb<~h5cP)Z
zzHe-7EJx0o7y00CiL5Yg2ql@}78VK$ik!`^D1k~<Z~W8UTyL~1@jeJ_gy`5Fv#rs7
z93!gT$f(fM*xikP4hygLfo)a5l-Hk&*KH>o1259_W~j5E($YmG52nx6CFZtcwwqeF
zf4TjvKlxTDuhj@IEH@Ji%gt3U!HN0Q&)@tjD%{hK2@k)o07J;W`-jcoo+-tc)NfG^
zcc~{z-<L(ay}hlVh{M5vNRwC`D4;9aroX%R&G37X$vbfo_g$yoFTU;%O-@$#YX$}K
zvKD2oUDGyY3RY%y|Fp<^{dx;zfG?6oZcqa}0Dl3h!FVV5*cb4H2>@Mf4s|W<@bGZa
zv3qwS90O&CA(`b?c#TGd5+$C{Vqsty0yfMmD!L^tjRxt5*Kt7^>i)T_4~iRCb8Esi
z(^Vc%NF5VUguYTi;MA2X*pRD`<pnOt%)$bMGRc0>?9P`lVXq@$avpuWk2|E}opAzv
z6;EviZN@2)`e9jsAVDZjhjEGWtx@OGKnrl7FtOB(AAF`N(7dXLlzm7bBsZ2hX4!Jv
zAk(NNKK;tqyq$5i=vcqd>N~Nzx;p$&Eg%d+5)!`$B{oTUd4zEIYC$l_v--|&HBqiD
z;jTkcek75VTf-K(RSvs}R9?6!E*!O6R#sg6{QR#zd<g4$f87L%DUxr2`TU{QF6G6<
z^G($R$bGZKW&#k@ZQ$bLRqppp2tT(!?vIWpMP=lTd|Zpp1%V>zW)MbA{F4gvPobf>
z^pYM2%9x3X;_)-Hv&m6XlJCTSSEr?>T1W&O|5)wa@fr?bQ>~HobW<?cO!^X2RdWCS
z{VP|l^onw3SSz=m@JifwSB+ZfW?t3&j%W3KQS1GRKKZsFBtN){@ltQn%A&a2&Xu8D
zg9ykEvj8{&pX3x0R#gxF5Z-^}%I7~ta#NhXthChdaA!GI(o<B6gAC9}py!`GBdC0k
zR-0{Wjst<jUHGE+Nh7Q+)lpTS=h9BsHYg(0xf7h4IyxEEW(&(?Pfa;RAU$XAk~T#R
zCrRXd9z6kOI~YoywbCX=3~2PLuG9lMq}-oA{jR9Zq&g%#p#3cNotwk%!2oC5?k@d`
z?)O+&x11V-+SBI7{(fRuXP%NAtF)A29?l!fyTyT7Yx`^4b1f@_4ewGveaZ?AU48NX
zJxzl5$up?#1kay8M=9pse;FFeIsD~|`IjdS9|}rJ?m>#2B1zN8biP*gW%ArjDf$pu
z=0eM1{LRW=_)uyyxM>Oi9#o0f@R<-^ir2{0LevwVeu7WEw`MtFZ~(%QkE`nF#FooV
z446u`%LEkL)=}{2e{;*D7P6xB>DRRYVn$|_ej6a^w{-q{x%v;6tzRW*LzN#)FS~9t
zer1H4lM~m-$Oz>F7+1UWoZ`c0{8wTgis{NBn|hIw_r~$mX!8$P(G{|0&%>QewT{nL
z+AB?=6qvTpV_v^DwiD1(b^Ga<nX}Wj(DmMOUC00uI2sz-%Bi=gQ%u*$%1`S0m5es~
zjkX84PyX~1(bIp~?O2N{7`pb1A#B<8$NGBCN!VlDSuIe}IAWrr`~a>R!~Mma)Sb+R
z@_d_W|Fg54;;cOa0P10q3XPg>3IM|2C`EG>z<^pd?%y9bj*N`p2mlPglQm682R-2<
zJ{wfQ=13NKjImGY>2Dv{F~=wl7A<h=R|Z27f07dqr#%Gl>TOGJ!N;If$VJd!{8;GJ
zQ0~0SQK+7&<+_?v@{Qu%+uiIhWaQ*<8`Wy3-rw1)_z&F(oO57Jmxgj^eEPRimJ{j&
z0s>s;US9u}D?bn$J=mxOC<Mt__6(2`+;Vcl1_R(evN~M39+qpF#Q4x$w=ryceYxj|
z7D4sq714@mqFk1Tv-gWFm?R~uyEjDsd~Ok$_D+j}6OnE|{~O5b;H|0vVKK4n&18)z
z4bDI(@zH9}ilGpng9F#yDXg2njjdW;UovX9!L#u`?!P2x(Tie{3%e0PFM-gxk+Crr
zKu)mvO#r_KohLym%eR}6sXP9CaKL!^GA3YG^{F?C%#qW8HV!r_HopybAsQapm-yd^
zPPDaU_hVtf_(yAGtblp=AwF@sYGQMH3}<9?v_I52oT2!F!otRt!E7R0+Av>V^e~$1
z_;bxK8=xf5EH8%u4{B^`nt}ftDYB%1Qr=FdhLjh89CJI%1817S$~X2(5VMxX%LLZe
z)?UTL1R(STxYo?fOy1Y8^);u*n@?&s4Lr6n!x^N)cXpgAcZVJbdmah`tIMsvJgO>n
zoGMu~T41hPdAO22=oN0hyAyv4GKPbLgB{f7&kssej$3j;teJli`L=J^J;QR!`&0t`
z+&OiqySJlQXJYQ@*R@8n%uG-F0NOYC{d0cBlKG~LjNqj9BTy)GDqZ>bLDK+4580Cf
z2z;O{b6pJQy}T!b97n%;q~Q6^cYhE`*?g;WnVg&)`8=W_t*u!hDrJFr&)q3wZc$Nc
z&`vSc4(IO-ye)30mGYtm+2NYo{>DqDn}Hu8wIS-8OwtWtv0i`HuYVA>M@IpW$iUG?
zA#4m3VkX!z2CoAqB+jdgyE;49;yOmPv;j)qc>HR##FhaE2Zz^f$D(q7(j8gPpTB<D
zL)Je6uCA7;1!8cSVF=(i0R)-n=jV@<+OvqdtObJ<nPFKcDk7r&TU7F<2)!o+0BkpY
zAOQoul$4a9?If*>i%V;3Yb``sp4;vdPzlg(<cyc+RvUqI0Xj(?NM*Ng-&TMF0Ov^1
zi-v0ezyxyLd7NL8InsVuI)xTQ2o3=q$<j&yR`TWf*VEI6m0R_=BZZGi5a9rBI(OV9
zMAjE@FghsnmT1Os^&^02AQp4<_xI;Huii!KVcu~RQ0F`#CY|rD;R0}Kff}1qjhV>W
zQ*K-e=m`lnL|Y6ZV3^&X@*WoU`<AZCVD<g$tz1s?jtzsXJLxL%dG>R1FGD$YkPHtM
zA|Jk7IgTIGp^R4Qcqgj)d;7<ajGuLj6z3#?x#Sr);UM}r6lVj^U9#%K70z83SV=DZ
zN=jhjSTq>V`+H`6F>$xr*b*5zIh8yqV)=!Hgmh}WBtVkbQ<f&Y62BW75+bdv9A-!Z
z&&6d>U0FW|f>u+>lvgMm=Oci%XPjb8TtdPgV$gg=Qyc2x`RD}hlOFFAKOhd%P-b9f
zRLa)5xw%Iwp9x6*Srf=9;+XRIbq5gzfDXh8S?iWrZ&vLm3~3ms@~6(JgC2cyFh>A-
z7Xx*P(j{V#pKWZrKlj4Rn;8{iKv1oR)$@ZbnLgN_6L#O@LWC^P#z8r)ZEnT_y%zvP
zLvPYega+g|kGA7c@nZn;Xad0%k+?2hzFY@@5@GKM@7^XiG`;&pF9c+&qn{l-i1r7F
z>JdMcho>i>=b@9+>d?hd@~c^e1i+OWpf8c<b?oLJ6a-m=rnR%vKPH9@#N*!Vdpu!O
z0&;W9n9x;#6O4~EEuhK}0ZM9#U|_j^odRNV>*r66sp|!6?;6efsy*F5NP1Eu>LJn=
znQOlI<Hrv~CjuB>50AzNijrszFFnf2+Ij{w8kKlarc0MFKo!(IrCeH-_*R4n_`tJu
zik^@`1r9DP6-L%zaFA@G;%N&Mf?znO785OS+U=KmnLPF<o(98)%E@8rJ$%>)0am%)
zOrw25_r($xQ?@5TA|HaLvALNU$}FrR^0)|o%+luMdQiL;w<lpa#D#DUc$}6rrMFuL
z+a{^0sW;Lz5P1URTtwg`q^145y*~x(3q6BIBvMl3Bbb?(q&GS9Ege3b^8vm@i`*Xk
z9tP<JL67wG^uP|U0QS5;bNJv+AU@p-_%`FgENw)|1Ho`mz--VIyCnsFZVrgQ@Ux$n
zkUa-HxGOPJ1<L*6Vo;`Ldb0ECP|;A(q3or%9jgoV7x-`gzr7m4$#b!QTX%v&Z^3W>
zYkSOokoT;6HXHU|x@f(%u+$m0_ihC~H!*e<W(^E7F^(1H4s7Xd_LuHO#Y9JcbEa5g
zrzL4<;NugA-x}@t!~+Fc|9czm|3?o`js)Wr4hC$>d5~QG_Uf8IV9cswNYg+t!iDPU
zYNVS1dKS{$V3d^9-;e+Di8u|X@AfU>SIp8*8bpfGS>+o@^Ka0^%z5H@)*%#x$XBmW
zK*mZdDp;79X5#jcd}lS1hXydS4$7IWxYrS-n9Eu%paVOIq=SQlrP4+10!~SW&BWJp
z>IuX?m+e?^>4iiNzP=@uo*Z)SXM@MdnhG<MvxCG?>a<LZC|0mXk^_=JR`QJ+ut4LO
zg(#1e@|HxTR-{_6u$%)l86NKb@LI*}FTQz;!_%PJ(`O&j8>3T}N5~Ql*pA_AlMlNr
zd<{W;nVFy0PVq$U6SdVR&rk<~>G1)mUWkMQJ1_oAd9Hv%J!|WPT|_V*gWG-0Zy-+m
zjxw}CMa>1%1Dz3l6j%{-M4kp%1L_Ta@j?bVd#oxSGMuHL4IsLz>7=0^5Le5Vi4yO;
zT9@xndr<a%b?sT^3MtO5GH=a^xmZg2@DQWfQp$u;dJ;~~>?cLn;}~P#zYm3GBj}Fg
z#38Bmdw^EaQ1JYk%hmJ}ZvL-d6UXnYy1l^gF-=?law)Y|De3)Us(Dm&wAS#$kLh;G
zcmyqty!Lu{s@7qEvDFvjviRXkV`4B-paS=nR;_J+br}T#ed)7-HP;Itni#3YKv_$S
zVy7F9K`)rzJb4Q>Swn+(d}5;Gp~ekoh)g6vL*|TK@vN1VS+Q3%oYNl`EpI-)K=7Vc
zyj$~0rAl6Bywzo_AoJM*di>a-QF7);1#{1jS^0T+y~`t-UH1RHr--DmkUlQ*%*=MP
zBaZicpRW?<V;k(fMn`dky8OP(QAzSXWSi)*Lg(CLV<ZBoqAvKt5+gAQ5=8p;VKpQv
zR7RAjFGR_KvyTAvsFt-bz66*5X85BwF-)+#)O>M_*eEYAuhy<EAd6N3)=(9(P!KLF
z6%SB~4b$`67q^CQocEZ_A~e{~yQOq>_}nK7j_v+Fr%vI>=Uf3>Stf94W~N<!>`!TF
zGmwK7l$1F452D#MQ7BvrZmtq5F=l3TAhL*z`_htZ$X}MrH?cw_usp`l!I6`ez@?>T
zY~;yt+|`m_(=`HAd?5jDE0>WILm^k7^9Z;+l7UduAg7>w5Iq9E3gsi|c{mL{9CXxY
z{VHk%(z|a~1rbOd+^C9x(t;pH&~DE`!zKu_G9$D|fXUc7IMm-W@Jw60mlj_XhaKv!
z#P@9g$5z&KI{!vh-5-(5lx0$6j-n$oe%$zZuk4sEz)fg(Tlh{RVk~sF93q;Uo8O3a
zJs`&@`VC;Xx7hl@5vb}&MS!Zo2dWOzHUbSPv-?jD?m3$KOstJ`w#}{iH>b;Qkoly&
zb+lVDV!C@?IO!!xs458<23(lS^t#>OQoRCj3@w4$fq{bhuMRPa0Xg=+T^Cbn@87C#
zFk?qM8S5fCwXwwI>*)LzTg;bKUiU9)PKb5)Cw1MS;w54u--wIJGLN<_8TI_X=B4hO
zG@1~6nla+;=p=+|nc?}$wRUvlRRosL7Hr`24)goA(390al_xH4nP1LzMEneblo&6g
zWx47<<L?Rc>joBU%qU3+kZ~wWBs^bk8DQdh6h{S{eaL_%%`#l~omO71V@pm<EN8vA
zdA`BhoS&JYpc4;PEykGe^ZC!6vN|IJL}EeZ$&oMZh*r**UB<>&ZK@oIyTUa?4Z+8(
z*}|w3vGXfW)+rco?)`n-Vs$hf`)lXVxv<SCx<#Id%EK*T1xe>6MC!iC|CA^&wCCuX
z5@5F+Yf05aSt>@=!DD_ib9N1iMkG62F+IW3%rBPkDTZJnsa1*^IzwWYjG8M4vBQhq
z5r{!%ehfU{T^*JO7hLe1>nJ4XjMzPv8Iy_0w#MSZa-RPD`1hOI3<*CkHZl|G+nNWZ
zq?lT4a(RrEaRuNEun6$(>R|Efm<58R;}S3SyWnD#1!3{_iv?q2(qnfP>t}z>l(1ZY
zHC@WFg*(@}PKj|RI8E07w*+mzc`2v(x_=LuRJ$hgj~mpJQkUhu_*BUt1_tlp`Led=
zKEIsoYZ|-6qGyJMy8E!B$!DUiH1<=kW)IX)g+%zK=#&7;Z1fM?t0;5f`#6bQC-VAX
z((W30I~qcY(I4o*cao(FAJfY#xn?XgPBfxW+Nm5#9DDkdTrMD;S@ON98GO?UE|#>+
zO_~eE7hUS^RnZ!yrFC|;UBW!vP;UP8po~l4CfqgwFE%mi21JwF12I{ww%+fz;G0|`
z;h?l3$3B%TdMQ5$o-frBOdu*snIib%P7Pyp>r6u$=4Sg<=m#4La*gNS#K8{p--VAn
z50LeLHbI+bTgq9v9?(O^HqXf?$$Zf^F`6Ye_ErY?XTmP}F<w|%w+SJs<5#vZN-3J?
zIsK75^@a3<s_9w4nbb2X=zf16nm;~uf#$quLzBkA@cdmfKCI2#b;A^2)jIhF;D+R$
zg97|Wi^Q_gzm>Nz-!3lBm@IG0Lr88-scC}i?+rO2#+3~r;v>mcOr<{xMkcJ4uYWkb
zKtB<S%Aa99J1A8A;FKVbRPgT|Dh|eTxOx5F48K5G6G!KZHzxfvPN_f6Vmy+`m@w76
zf4q-&hGk;Q%iLX|FgosgqMEK5t2mQ^cRULuR}SDm=4RaFGsoQc!o2+#yWk_Y+0~3^
zlbkw3x^Jiu{k<LzTrYpahFqA4>!t<gx~Z90SzM4$e4B98-{%VSk$&Y=Kcp9yR;_a2
z`AT5?v%h!F>{lgTD+8Qr;lu#uvq#I)MAW<naixw<y!}FAE6rr|>hf1FliW4!#0nH5
zs*oFE<HLU4U3On7Po$Ts<0v(6P{V9N?+FoXf8D6J!&zi!;PYZ@<Pe6)x@M+bg^*lY
zHe(v$u8|EWQ86>FPZGQ|pN|~zNJ;Rx<UCn14D@8t<IBxHSE!6XJMAX}o8O$^eilfn
zJh0ZQosG<u6K(WFDdWBMead+cb(4sB=x?{ss8h9?A$yyw3q3}+fu=$97QgYWCzcO#
z)`ce8{Z7LlHL{YeWSYaiehQI(wbS0L9G3e>A>TR=ms$t+^l+ksy)O7TQ)lZbE9qrD
z1sMoj8cK{z6}%@_0kMQszVEQ##;Ird*mBPsZcUPozxumrU*N9e?TcSe52JM-)EVHo
zJI=&hq8ND5YkQC1tNhKm%V#^XjOWKV$PN*3`>J9`=Z-xEw}7S^dgW6Nk~#cOGmwNK
zoi1beF;e-n?wO4UbdsH}FQ6Q+*2KvF&8IHSXIE3d1Iu0aD)?6HKp%aOTv<|)7Mvux
zBpl7~Pjgd|%dJ{@$WrzdfNRWepsfkLMXrN4C1;Ocz<azcXe#`XEZf(=A25UuJThgi
zPuAC1qg+8JpqVikfOJY(Bt@3^p-q<Q?2fc?;f@MV=<f#O+qaB-8uGQ)LcMv|COw@%
zs@Gw7w$@q5l@=edH<zDu>yS)ioX3UGzSkex>wS;^u3p>U4FSW!r4+u&7OcKH{vB&Z
zJu|r^jD`3%nbzlm;;rmMCYz8)!U*76WU68UYqSe5dfbI;Wjbtaf6U1otSdJcmx>=d
zheM8R<;8y8M-(c1pz7{UOLz6=12(u?91hthx}Ms?x#d?Bf1hrE5~Izc$Wf@JuaCde
z%J%#W=lSQ>_GzYO;ZhvE52lc-p~2OzDTQF;x(OQ(^WGCIj=?T=kG<zReeUtWRv|5B
z(^&vuK;%mFl_VjV)OAw6v=a|?WOt1MDJKr1XG;XTU&T~JY64JbX#c6NmQq~Y>Q7_5
z@%XS!9`h90w(nr><43%0n_-WiAf*IvE05GlGZjNp;(b8hOlD~hl4uauU#9-gyqM`+
zCT_ssET8>(=wy{&LlSwNKkq`X#U*nG%zuH>uU4++EBFtOj?U=3!lR4t!W=Ut%jBJt
z#nP926Tah1EU^dnzziGYu7-7=gx^dV=PgNr?H8NFovvV^lBYjgpg61)9AD_^H2{7G
z5RM7GIiw8>fM#&C1==UulkS^Ipy6$8ZQTIsjPzoTPX26OVV|*5^E725nu)RrAGuyn
zrtea}_=4nD45d5o#m&a{$a|g4-{92LP~9O63kz!m3Qq*S4?dG-u>O#OTmzze)6e;i
z?7|3;9}fQfG1*-i+}hb`%+9TT0bQ0kfS9i9MZ?fK>D2Wi(;7Z3efuUZ?%TMWWyQ5&
z$3E|SINYfmJKqGL@*b14%{%zj$N(OtM3ewvsc?jzhtr9nqd+<%qNEJ|v%fh9h6teS
z{pE?!9mNGLwFzl0nKa|=&VlT5a&iJ*AT(|eOP9T#XhPqHn3#BadHDhm0nybUc3><K
zB76?1mSx8u;;>!Q{jT}04@|E&@$r!!efS^ZK!1~lBqf#zd0yZl!1;xpu`y8c^75VW
zVie$Kc~ESX=_Iwiy^Zn#+!*by2mUP8L`jqn=!z50AMkWvA=UK}<v3DZ^Ko>X*ck?R
z?$7Gu^UyH@RcgB8YpA6~iZX<bIax6#78W`xIVFX2LiN3nH3|hBool~Tk6Rv^GC&uA
z>Sw=mc^js14(c(!OWo_q@VqzJJeSL&uh!KbI~=(PyhUnu#w2g{D!x_|kaQFZnowlL
zWfc_-K>6+M?PE5~p$(5sK+u9%e?SjLi~-OzZw86O$lCfPn4{`Pb^ly#_vwE;F`BG)
zrTzH*&Ro+cv!(Uf;Oi=8bH?QhaW&({hGvDde>Uc4SLt2siuhk_s_zm@vHcIpPA_gr
zEl+>{k{R2~)D#T`J`X{g5@=DB^vSVCL6Jv?UN|5%4A2QNP<h40auya>Ky5TgJAp1v
z#?U=Vw#%0XS57jSl~CEb=cL$TO+0<6qS2eo_N1=VXQn!N^d}Q~b?e>NdwU@APx4;G
z$4VT60nME3p&rUxgx_L-$siII(3_DOZz=G}`hc$n<UX=Q?-P-5S`lC1=r@`B95``M
zpl}oo$%ADAWGW!p4fXZOLkU6tSFO;)ZeOkUR#{+_ly~e^;vqKbzQ$SkZJapKB|ji1
zt{5jE)$%_=1>4tD;JHkJ*Z1$=!A%&mv4}W0!8XOMSBiT2^eM_GKAvt8)U``&Y;!FP
z-i;vH27nJvf7NRzzr6(-i;Q<k;y6y5?*`-KH_4|9Y_E2S*SXNxP%w*@QD&d}(ASl-
zA=UdSF!4;eP^P75Fu7u4OiCdV^N>czC1)CZZ?9iPMQJW;S-7|e8Jn2&eKl2b20H}w
zLC{dpRPUFL$dt?zpqO))VmlfgrdL_0$Ccf96l2&rN$T-T61Pc_`x5NYoDr#XZ%+>a
z$_})8oH2X`Nxd&GUxCF2bwgM8UGqAKlqi&v(u@Z4qj!Nq#kFs^e^ah8dDP4TI-U=(
z!W(<*D;C$}wnm4PD}fh`Be@1rVD&Nqr^~$A8XV#6^^z%gTyzpP6)mZR*w|o1r56y;
zJdQ0W5YR2Ocn-mD2wZM{kLPU%qI{uF-&0+3dmW0I$5cx{^+&NtpTJcp4cqhS<+-;s
zXZRto=Zf#gxHu{hNU=b00X4)J+-e6)DG`Ww6m;D==<dsbVe{KM_@*MD-(U=?+Wg+H
zCJKYv&Q81nvo0AsJ6_3?1C!1VA|!|m2E93?ISMwq)!&1W`G9g_``gLc0Of43ii^eG
z`&|ElQiPRWMczR&RE>lva)$o}uw&?1!4Z=OhY=w;c_7GM(yUQN6Xnj@)$(A~^Rows
zA~=c8JXRp05|NW*feX`Pckmv_yPj8TaIy&AiNrE__|r(P853N+mSNABgsO&8vFTNP
z#&NVR%Gu`{X&PJXGhSLZap1o7d^wV;G(PRPyteJCRbIwhmbK283l(pr=N22cK(HWb
zY1*P3jor#6R=?=vTu{5sc)hZJ^sZ&al+An{urL)iiod`q26}^8Ob;CSs+U=s68#=d
z+gv@0e9^;Qm*<&zaHL26;tcWOq5K#l!|`m+aFg7#3#z-E+O@Akzf*p_j#<JVx`S}1
z<QL#Vc4}=>^qs;IH3_I8b<u6_{GfVOrnZ~(b6G+IQWmw$k<TcDqp69!I2b1~sYLmp
z*zsRw3KIl`^h;q$C+p5y+XhxOJlwv0{k%mSZDUeUz}7@$*u(9!oj(LOi&p3*W1<C@
z!j>d;8;f}h))DS1wUD>rpG=xV@E);8yR9*Sp&pPrv&7QcljkPV{6`fpp>#g5fg%)g
z`;4b%d4m4uW^Bz`KRc;m#W3p|C{)i-^$(u^tH(@6y*}U)+_yX2_l}hz3umK$DNEy+
zZr)3r+Wrm^?C~hyRjRWaD)N(lm4PpO|3!Xv4T%?yov&Znmg4J<<y%WNVpLN4UC&=5
znOdO*zGx*FjF=b<G_nD$)5ABQN*G<(s#{hLlZXur`b)6N8{p|$D%U#mEHgMIS2qAT
zCbpVH;LKF8To{}Cxic&RRBjl)3eV@?nD}4pMBh`p^H1zF2tYLwZF$-%oF&E7H6=cN
z$mPM@&lylrA|vi2JT-w1KJYxBpTx*WRep_j<%OIOah(f+gAyTa!i#6=0_v^E&h6kK
zSJRRrP?8O5<*kl`b>#fAU|75HNbj~evS(TJu+ZmN0Xf}~GXK23h#`*v$6@{#e0ERy
z-xzeC0mm$4H(0jx1Za?T{FszxNE1MOa9~C5Tdx`UO7nBeB+!O@K%o4@FRd<*@O{HH
z*m3et+e-5bR@ADsHRms$Z1a0j;0MeS6YiP_kfUo=JY9JBo206OGQ*1h<lq4Jh^dnP
zg@YfoOEYpwGI)u-wp&wwK6wZ(=4RNqR)zbDvawSQMDBqV^#dvgdfO@Dmu2u%XO1-;
zeo*|<;v*tD1~?lT`ODHUB*T!3^$@57nveFd1E1kje(q1~XBdVbn4|2j&;Lfk`JIuE
zDg56^xH6R=qq@XEubi*7-@`3!9(kR|>;jJbx1U!72VK5<rq&>%BFS!GNc~?(_#<p2
z<nrHWIMUKC2bPG9-u#@_-MQh8iJYe3^GNVMQbax?f*XSUt6aoz%I==)7;Yk`O8Y%y
z4_WF-Np^XCJ|x%0p~LQx^o>mrLpqEduLl&Dc-nTW<{`UqVmlHfyGSTq$iGfuQdUDE
zgzpp>nzq$)`ky3xmmK*N^<X149}_>GHJ6^*Cml>K@x+%ze>Ci`ha$&hg$23NybVj;
zrk`G3VFam=Sv}`%mGmo!`bT4fU|w}kI3thh5nJ(tF=qbOixYYQ9i2bOvw1mn!*2%-
zM7}-);=2$hL`h{SBK7j+GsoR~uPoRF3Z#+>JC$8!{-)L<0O7$X)Pgb5<mRl;IhKiD
zWx)K==$BdP6<a<m;r$>**lZ+XqDXcqued)gn}?T_Q}{OS_q-WBx(n-^GCx8bs5s%4
zvugj1f)kL~f4@vw%!6t0p-1M7$y3q6r-%hR3ko|s?NmAIgf4STXXAdSEQdTQ@HdZU
zvBO9D+q-mqyPkLJEU*RSyx|vE)Clc;YK}8>5_OhG<$U3uIWB{lPebW}EM`aVK$lG3
zL$44V+xX5#|C^5$+RyUnP2kP4+C$&!@czh^8tdOX%cFJY&+@3mK9bp6U%}jwdQoi1
zE!T=D31%Rm0V#2Y6@@aF^~GO@Ur*+L)rD*<7SQY9gxeJNaA^q~8nSb&i;BmbI0=Bv
z5WUyRNKYUlQrCtr>Jup7Bbi&3)Ucvi^Tisf4~lXJ#WY`v4ELnlKiMDmc5pBrvLAyZ
z9|7)Mh-AtLv5TWh!#P;cz{lFWQ+HA1Y$cH}>%U;mdBKZrI*j2cE$=#o{%r@o9PDG4
z%X8_LB5G&*9}j7QIi3jW^S5N!PKKwDZZI#`x~KE=Z5UG1BlKE5-U#`OrLk?GM1$V;
zlnp4F4$&e%GYFx!<_RR6g&I0?LpUKi{vvei_7>4Jx=Ij7k4Uq4=Pg7}=;FeaF_D{E
zCP6-yrNT`vETkJqWnBdOKc^yj<Z_8cG!prqod8&(C-E^L8oS-jH_5;ixAG?tgEY$f
z#G6A>Bq~G(kzlfrv!8nXSmHAw-E4UNogB=u3=~(zYrk3v_R=?X$XXX6YrWInrV-|R
z9G}lxdjX%k-qh?%Dn-)imq(vVU!QGAkw1KBrO_3N$oY=mlIO6D0mQLSS#g=p_ir(#
z>mXVV|3x`6jGlP0AZYnEbadb%T|GeyLV#ddRf*85^8hy>GHHeIjKas7$V><js+<6Q
zl1RdTqwRfHe%dn~xv-i%&`Utj$RqSHR%|&rImCw*{PJZ}ESwE$>K6c|5pO)Sm$WB)
zpdHx@4b9K@3$St?lr$rTIcTWXK|2^s8AS&;!3re6dru2X5K3R@Ow271m8oVlx{aqS
zmVkd22Fw0i>cc*y6ci=^=?FucMk=22fY$&U<qB<6gbrfjk~0bmQ-Tv%cRvbDAK=44
zLjhk51n<i|S9<0wn)MG|_D+R(iHRNcFE~XA%Q@^G=t&wA<8G@9@QF+8j^1KUc5As}
zhQY$ZVh?Umu%4*Up`+FI^%21^0vIWf88?oJ9%w6U!@R|rt3`$pW+$j_akFNXa9w8R
zTAB%d*r3-pa+i5z1G;XudinL~iw^2)od{~^X}|JX_5Z~6df*P3h(ibgXM$gE28<(M
zeLExO7QKu*I@kt<Dzb+I|4=FRqiG*(Vo!N(q|;yqD=Rjycy%vLTYYJvCng#skR!s_
z94|YE+S=aE%<BRV1+=t-LqiA33I5L2)IskVT+hwGl0RylY>Kkj+S*!;7G8jce0Cwy
zQXBNZ)J{JmeJk`WsWv)jIzt0k&o5qb-BjlOfzu`6Jv@J=qi;Ir)rAe+eH9#_)UUQa
zv-ppGSVTxnjCRpz=nsdoy2N`hlZY2iFYy9a51G*-D=vYN1eiQ4^**hhmIR*C3$1IY
zSiw!kh=m|l*q#|i?2z_7*Zr>;z(kv2=!L_RWNvm=O+!OtZupk0c9h<gdh-%4J4J4f
za!ORWaI~S95+=1S)sfN5V1>m?#39WeQeUcfVz0=2Uo*IU`vNjs0a#wMYCR=FvK}TV
z0c-~%-D|Mn=<d(Y%&1!)Yh59QnKIvs3Nck}8tm}f6TdJB7gFlW-(wye8PnrnR0qZP
zyq*pS719~$BA=_e%1CVQqGkZp^Nf920Q-{et*b-3v7BXa=W9U$Q;cGEApr_;(+8{r
z!<N_4q3lkSGI&h*d82T(AP;(W9lz8D?Uh$1y+!YvsF8?Ox^Z26D<GI&x5i9n*0eBf
zmrV9%FS7o-;vX*NWpd!vXT3pF<uKzxyYEHMO^0zL*<naRL_{P;B>-ksv&?_e($R5R
z@wk5$$~|RCerI|0TFYczxNDg`QDN8qdKixnwC`x2we|=SMG~f9`$G2)<pV8P7#Tg-
znsKl$M7-=MgpZIF$4K10e^rJ#%blE<XmX3YK3p1q>nlmZwo>Ok<Fz`$N)pb0)PIRC
z&@Pbql=bzOFaR*qpQg0Z0v4A4LhK60y<oNl2Zflz7~ggbMgYk-0uKS4Wo#UrtO<&n
zvZ4bWU9{?;=s&RmMgQGu*-{NBCWS_s!|ULs+YsAeP(m!MV4j3oR${P0HOn)2Vt_4p
zyxNln@!x};H`zwY`@~~s?K_G4-Wq#JB3Qy_US6N91s6BccLv7;4g6qWO^p=N>je*2
z68v-M^-Nal96q`6QSa~t8Z6<4?-~cMTm%Xz{q|C)--mv`qVAv#pwIA9=q(f@BO?lu
z7UDmG+^MFe<zH+)W(+n$=)haW>=%F=4(Tj|2Sxyy+5}A)lF%&JrJKO#VYy94sBVq(
zvsg|w4%1A|lyTN5saXgnw(82bXg$<?645_QTiXf--2j!_h}0S5&h3AyR*wm@LxN3=
zg9~4VD5lke_`baz$MxeaHpAo^w9USC5b+e}8O2f2P5(WN5lJsY)IU(OGG}qXEj+ef
zBcxgj<o;4$65qjo*9g=b(A4F#)Qn^x>4hWJBaJ+KL0Gu|;-=HrpyG<|^(q5hvjszR
zsCJRdP&bp^I`p{e<sZ@!O4gIzS%;!{X_0gLC`qvMWh6^UFx0$OCxF-LRl}NQXI+`c
znN^*g<uqm3BQaq$nh3g3_UraDR9BCj&Y1>G_aeTGa_WzWk>f`Jy&Qi6&!=A%c^}#0
zREsAD7(;DLcKae%L<0E$F*G37HE7}@gS>FS&`=8t3*c?L1rjGRXAJ(K)c&zrXp9vT
z=?^V!m_3i{bD$+G<<67E^mxWZ6GTrwonGC&Wsh*Yti*)nr9H^spC49`gMm_3SC`tS
z|Mf*xjVq+cBn%Q2APmjH_>%068z}u}^JGJ-EXksm3*Joc-!Qi-jSC`BHD@l-`PDh~
zJ(_uEA|{N-P1IOX3AyLPbltPb5~Vj>b)eP3+!Z!>l)K}_K)$43nyeC|7PLT_o14R!
z5@MzRe}AdVIyE@`{6G<#1GP3dEKD{CpKc2T39aRmS}6b0fan!DAM99Lge*rc3%}P$
z&3~w$kK<_kV<b_{XWmPr^>YA4JOIx3PcZAi(##B&K4eHn@G&vsSd)86{rETzrmD`N
zq7^SAsuvj64PpKPR8>JR{wG(T@(`&%v{*0<+#<3$*wYXqH)su1ySQ@h0_f>)e&6P+
z@6fG@zr>t%c@PJ&3Lt(E_+nIksoSb@qcI;00sDg;5B#R|&{PK)AXl^}4V@LFi!=5e
z&5!!Nk=r9I8*?&CtGBuz8&eyWfhu5yx{1_xe+ZxmZY|a3#0DXTocMN^YA6Uqh$tp|
z`$G0_lE|5*6|?7a(l!S_tjGcCa*k}2Ja{G?sS`Yirn1Cd@XdrEB-kli*TCGF2;I`r
zTUPd(sU>&wb<flT$2{EeoB)Ez?K8Yh1{MDdZ%0PXKD)@wd=cw<v5aysUP`6EEl#c$
zni98}35Z|g?cXX#{QqKLq6>EUucu1@anJAs#K%Dc`B&yG?2$NM(5X41`$6-6<aW^)
zlq4B7wO5jfuY)t>fO_a#8HHvLQn__-VT{ec&Dc`0Vr`aBW#Wq%mLiF?@3kZpcU~b)
zPEOwW9St6rng1ea{61WzCF&D^30vp~zJ^}V*xz1I%$u?9w0|{HXhcZBz92+9`N_&Z
zC5;?l^frr#ssqqw7sT}BmCU`WKo}C<=~QKX+mZO&9HzrV{@u=Cp8%@vfBG4=yqaVm
z31`t^Y);h>L@Mlba?ZI9xIohwOXw@CpV(wrq)AY%^K%ZA63^mvZ$m%xH|r0byJNSR
znb8LEeZj5R+SAhl;$}DugWO#;U+j$Y`|#nzisWUYNNLBfq$Zx;Y<Lj`^<r%EtsMPF
zw|+ag5gl%-G<#Qw%uZQbGc8b<FlU*-Hz7mQ;7tPQ{{FfKFIbRNoD;)?TdxGmGAnQ8
zOEWyhMVuu|vH{I50hepfrwiCmOQYRA6ibS{x|t^{>zqCsdQ0@_1|AA5@4n#`GO~}3
z{!L7gANhANB{^M74BKV79}|4>AQa~AMdi;*0^bEzaG!3?{!t;Gby_`}KL3B%9QtdX
zBvQlluT=HHEG{^u@BL|m>DcG5iSLfHF%R_qbwA`y>yh8pC##lpvjp(@n)Dzkmjs<!
zcLf^ck5*u<P5G%{_-$KRbqFT>%nw1?F)=k|t*=iAr|ciW$xUpnz4?5sN22wrw4E!*
zq=k7<M1@E#A|@#i=%~Gee;+>6VHTAQ-0u2}kcrQw@BGjzqU<kMDIgu@DA37dQ`t0>
z_=a{In@7D=><S4JwkzJKhdr-|%0F@timohv7yLxM$k))u=8H<A-tZq_4p=?c3Z#{T
zD~49l$P}mN3hra$sNRxM$)|Uhm;IEgE1d@j51E^Xd{^+tOuy2V2;76>VA{N&atxU%
zTdtTGnvlMbC+jto*y}Q6F%kN(8ZUlVpOHB}`z~Q&Nl8oPvtPaWCh`apgruk&<1Ga5
zq=4><w3-?r79J%6S3Ux!AKc&tvd=^wvgIZwhV%&C&1c5CP1&_#>EoVfw(!3~SX1iF
zLEUQ}U<3!hEe>Leegm%x0K01~@E+(UA#X#No|!>|nLiy}UGTe|1D|WD*Rg1Kf&?=2
zhPZNJ>!A#^z*{EzpuUf~Jp{KvN=}preuP5<#p2hzHAQ^?g=27V*mIN-O6@qycoT3|
z?L`K!q(8xwc64+^1Y~6XPRMG+2e8K+^jDDbb#l1MQ<tcjsRi>$@}M;%_!O*DL#1<|
zmxTPae(*I8;N-@)-jE4j;1`Ja8I3LSDiB(NvjaBac|r)2cXJv1!h(%bQBjGNa2G-b
zijndGuc`q1XkC^2{wtW!1A7q6tJNaTPf=%IfIF~;sO#6SKip3Y&d}ffD_l*6#?0-D
z{q|i){3H4D0SD~07vlA}c1CW(DNkU>egk}ZX)v1t?BZ$JCxk?C;_|qBcQJ)cO7BT7
zG(KWXQTx?4>;0pNvXBjv!NtDz^PbDAU=c9|*+I=YQD2xky5Uv5RdSb^*-fZ!awzzM
zy}Tg{@bX)Vh9^_^DajIE99SV`b0yQB5Dy2Z9{j0JeH8F%#s90}MEg^eSSSp}%!0Qz
zrW<dv)czd2SY)tClb9$D!CTAL>x5L7ZkkiTrM}<ds3f}Fq9i{MdH3V9{0l^VQBK4d
zw%|^MqE$CAKq3Jnu`sHT@3gD|#sTnssZVUcO2S)0D4ez-O|zz_eWN`OYSDW5UayPD
z^gop{Oe9THrBPocXE(95#D!llXFLZ}5v9(n<SuI?Ns}A1v*@7fBXfkeZdFE#LL=ns
zt6<*T_vf&#6*pS4@e?NIJX_#lH$W_Th^sGg*Usl9mF-nkS+eCz*zhI|dgyVsz<2<>
zmf*5pxzh!h$7zIVBjg1H@chizu92a7i!ARw6o-s@d-bm!#>16^q-LU>#A^SSRGIKe
zDIqkCdYfV(Lu81@W7P2juaX0AG#@e-Z-T_V$8Ua{sp^-|+6kkV(0!|VTlUe{wXP<e
zjZOcgguAhHSLNT;t}oh{lACNLkX8<Vr?+(}459*D*mpYcnDAR8IHJLdd=r?!lRueY
z-R;j{9a@?(dw#!<N_gv|n#udWo*HKQX!0bR6}8nMZ)Se<N(=ws40^kg$mxA)Zk<ti
z9Ga^o@nZ4GKBwJ}tV-LEGn~SBu++z5HCB8P=JDlkUr-_^BpSqo{rzt)n_sba9<@6;
z*f26Np&`?@C^+27+Szwu5Lp7+ixJRnJe!Jea{AV!2^!VqcVh2YJW^?@%VJ-`+ecDl
z4QFk=Wgf#@8csCueiaVcvW2uVE?vSdFD?BDW9tZ1b8vLDhY1ipNO8=4rz)y~PrhN(
zi8|dtFk9VuhNaoZA;AhhWjg-*6~cyUCY;Jr3E{%e`32Z96_v%K!{0s_o&J>&o+q$K
zb`vsZvve;E21ARf1^P=#8kvL?6hWN#zDz?89odyH`c*HGi331yNFN)9i>ctnE<33e
z?=e*{I(RpgS%;<wxdnc8bcxGKayP1?buTd?=|NL{j?wk>#B;pHn*rv@*Z~fLARujL
zmskE$j~lUu+rc}2^vWF4KEY<o4jKY#(wORpo?OZg1@>M&Ggj^jjE23m=^T|k*Yqff
zPG}5dOlI2?3JM6^gdSO8ain4zHa7MR_;m*gad1K(uPRumbPVr8Q<y|>%IgaNZ)XH}
zlNvhvkh~Ma;SptGo@)t}hVTe}Uxe}gTzr0~D$awere@t~<Vl;*5;|rJf)HKd12n@-
z!tyUt=A8q#+9P7JNzu5F3qk#_qFW#~c8NfKiwya1W}-6$wTwuMkrxC=LbqJl+-1WY
zZsN+%bTkj(#JFxNh)xE#-*AasXkwLp<N<1TF!n>et9Ih87juh?2S;rNa>Nfd738d5
zFSt_!azkW8W|Ys%I7);4NaHa>3c)<`lV5f`=@=|~>Be<aacbBT#s0xzzUr8DPzsHX
z4z|IS;0yEf2s#GBZ$6m_O?+tS!}}7p#~pi?^U|TyU<m;N#xpoVig_<JFg+aE&-?9v
znhvZy^IkpVf5%8kMHK=Qj?D0G4jA#5hBi5<CWyEONkD2qa_ZS4-6K=e<x=F^dcG_A
z8k`rNJMYcD))jE?wVpDkOwGf#gWqP7(I03p@rwe14h;?EbKkQ9!DVftf&v*@Lgr23
zg)hiR4lX&D+SEj9mdnQY*^DWSNqJkQ4?x{h0S=-O3IXCd4!n;v8uja>yJ0Q+4cLff
zh?x=ngO{J57>0_QAcG@Ce0L58X=BBna>JxXvDJ5t`D1u3z$}cggX%)U`>6HR=n_AX
zdR1TJB%XU<*=)@Ot>KsXT)EGBzG#2uN2m~~zPbMY=#0p&thqkrK?0qAsMAAt&W8lA
zi2SEmdqpeOL6Gh=Tv`5;FEob&zclYt4+X)<I=l)DUL;}V`1ENIbm3=bXR``(rA`jc
zRXp8n2Ae+eGK;vl`|Bs9q!!ik>D*UHUpSn8DsMrYvr**(?c~@DvpziB3r4#aXG~@o
zp8h1-EhXeU97&1@+vohkdGz@oF7ugN`tQJj!1bUP2EabrS5#Gb#*lj=9(|aip+{aB
z0G(+`NAJI8^U<ma5VtgPHWITU&Ya-Ri`v&erj$`8P88nwD)Xx?E$EqIn8b$=3L3Jx
zBGy}Zye}Npc1ff-1Ji|?Wtk&Cg0wbxNCm1+IX?`5fVqA6AUZE7C@9=bEApRCb9hCD
zhrQ4;IM<PWuwObG;fO}ae$RsOqGwkD4M*_S*iFX|9YysoBwZ|0<_r0k>EQw}EYWyH
zGV)&y4Z8dJ&wiOfrJRNt%7D;NrI&}A6OV_@!CRE{b}=dI%yEBv!qNish}y!h>-5p=
z#K8Af_CCEQ3t5#IHwhliM%myaO}a+rtJkly#?P5UXSzL>vPstS|D$Z_L&Zb!nzoa|
z%gc790LGxB;1xiW8kwZv>=E6NAW}cRJ~T%Bpq#W;l~ugvqZ<0J?ng}Em8Jhdx|;t~
z_EIB=x$ts{EpQslf~rckfpR$5GIFM|cH3JE86O|ViCBbLM3~jG_?AJ0g-;s{d$all
zkQi&~^B5d&e5cxSv8IEyO)l)K)Zrd_*fsHO2cIHH=iYr^Dv6TS3RmEMOXG3%0EQBs
zHYe#K7^I+o=wJTE(9Db#h5_L%XPzHK9I@c7WoW2NtgOh`&c?=u9h9H)O?a0{5{!&v
z<KZC=5bf36G=7j@6%-WU%^2y@%2XsRjfCLp?-?+0xK@RoEB9$mH*hbm#=h%wergG+
zhEj^Kw5ij<eVADhVwKx&1KF+Nt7*qaEe<|-*;{9V#M$d+j*gF!_qQMx^A$pD)UU-J
zd1Q(Rz>?_NHKNGnCyQ3$U|5d*K8Zj21#6uAEkO-koA61-Ub_QPBril1ija`d2Szq`
zRtAy5UY!yfIvBhHj1vUP?2z+*aPSiQ8)72z9lORaMqezfIEk$Gd2=!9pzAR6SOn+A
z#gj{9>Q%<1>4A4blJ5M^L#djYF!N~5PG~w<Rizy7Ia;Th=}h!c(S5k@uS@Yi&m_PC
zughyzdcxkkf-?WSxrvb(u+=vEZ{E9SILV_hawaS5Ch#fqB3PLDY8y22?nDwF>{yY@
z1*OkQ0!SI;Kq^W<C&7aYC)BgPJnka(|4(Dr9Z&WC|369D$%w2((`X>dx(cb}CZfpB
zER@K&NIDWl)Fq=bv!aZQBC?`F(NN+lA{hy{Xh?p~_c?v<t>5q7zQ2Dxj&nZa{eHjJ
zbLf72xZ&1fsjkXJLa3BCqyxom;0g$%q+|sjCdgN=h(*l0+hWi-9%#BznpBptk?D&n
zlD$U8w%HL>Lul6hqL_0%Ez8$74QHXrb^jwlFxQ_6!SFO~D<(SHMwzmUCH)`X<H4Y<
zrV#_=gL`iB5@daXiSnN(XYDuB)NTbp8I!b>$QyJR_ha)(*Vj+Xn>S8dNL@djO;Fr}
ztH_nh&;PD1Y1-)edcT8!ZA&7*zXzE!?IV-FP|j`t3uNZDk^_;60pU(0agawSZMG7x
zi`X05k_s`K6+D)@?$V_gIv!Z-L4x}^a#we(!Di8Owhg&sz4p+-c^JX$xLniZq)bMC
zMo81?(|SpHpB19_4!Wg@JRh**E{IW4@EZYsLOHGqXSQ)QxhI|K|I#QdqQ}#V5C5%E
zxS5rH#bT;}p?vVD>09r3eK*^*)Qu=(nuOnu(N~{j`7zEjfEEFel|?!}XJg`p2KO#$
z%V4zoNanJQ0JGJ}=qxn+T+?pai<t>y6?yz6WNv&vgFj^?*Y1mZ5FTIydX_wA9`JpS
z>p=pPXA%r#pq|==h+co(I+-n|`d*9=5~|sBy}NehM2lgNFbglBzHe?M<yGG25n4#z
zRraoLT`wMtjsK_yc6r}X-l#d5whl(K309gR`q;YFqEnWhYZM{ZAU&<o7xCeb?t;W3
zbf%lEL-blPI6m-s^Vn-^!Ruxm(|5yugy87WpHQn`kcgSYQtjggmKX^}OIzQqbK!h;
zo@N%m5x6UZPWoex>e{oU`T+8+8Y^~Hl5b@SNLXWZY~GA!arOuLi?>4~rM#_1z(7L&
zeT*q<N#iPm?u0hI@W{<tdlob{ZJ#1t1MQ`^I}olzZjckaMP4~ni<<)g6rme0v4RGd
z(bwu0u#G%a+sW1_a_7eivdp@L(n}5i)~o#39peBT1B6p_KUZL#z~b7<4yRQLg|EDj
za>r@M#HV})_5vAVBBM7|a5$7hxSe%+Z+X%V+L_56a?>sh;C6i^`B?unmx~>M+up}~
z%;-E2UuBk~eui%9c&&G?)Acjet1&hmpGp|r5y&UnL~^2St2C!O?|vNHHzyjGqeEBQ
z?3aSd0H43h+<B5gRx?pEGI~gI6Brx!VYVy;?g|j8Gc3Iv+7utpOY-y%ERCs*`;iHw
zQbit7TJX>DH$p7rt5T&gvHNHI4W~GHKnC+n<4@%0XD;%Z<GFE>$O-Ek-!Wm(#Fn=v
zOJBS7adR#20}1)?0~t7lPUa}j&sP=N^$@RvlZ>a)Lp=Qh3no7PZx#$TkdP?-VuXEi
z<H_O4)6!y?e@c_h-(QJiL-(c44B&PVa1L15Dc`qnaC~Y5sBld|$ZVu{L*26t@)Z4g
zR2F{UmGP+dG6iyblY{kO0jC{s>#g+MUQ%q|kX=MAfXg>3#E#{OGXF?|(Wn33LpeaV
z-}$ZE(+u!QJ6n=WKy;=y(B$PubR)j(Nt9kX#ki~`0bH^?^JHta#qrvx{K4HTXTRB2
zU$-VW(N~9vKmZFXpu9V@K)On(a*o^asC0yoZ|#b$tL*9U?T6g-x2w)NQKEyMCX)zT
zY`l~q)4S>WHCdxZw;b`HS|Nmq1)#pyX;F1a^~#Ugz;A<>p&XyK{zbywwnH*N-Dd5i
zuegCTz;eAW@e1YqJ*6-Vd=gCbo`?S4lYVhvv&`}bw%m84$$`Vudh{m%oU+?wEg5Y>
z3~3tb3p`wF(e1j*KX)CYQQ$L~Ci+_)0OJG|hL7F$y<zqeAe1PxHYZn7^Y>qqne}=H
zR>2|qcDh5g{Xm0hes`s559jkJTHY$VlsH`ZadHNPa^6AN4iJWolP#VlCQ{v>T*BL8
zPu?)aMrkG!qR=~4*E=?kPA(Lb9bOcCDjs;YM^fw`NA&8CpC_Loz?slH6oSaHl9G*f
zr$x$2*<<7HuGndVvJj`>q9sZAHQ#V))<jkBIe?*u#KsCjU~Dy}+>in?9&j|4o`>hW
z+WW0JT4FWm)p3a^9ZO0QC+da#{BEYdAdV&WMf9<X4;=rX+M$&SF4EUys7t!pNXaLp
zevyp^ogG3}2Jn^8%Yi_U#H18DO(r+}e@oY}f<OcC54Mm$gd&mz5mgYh^5;S7Ws+|_
zgUFPYm#e9l^J0?@$_a*z1_-i<&L24L0PJEK|8lvp?J9lRjtes}?#gnfvvZfPTv-ng
zx`aSjf%k~zp*L+j)F=j=lB-wGMLo419X^oHnLkLJ*}VF|b2Yv5N?cH0nes6(;2|=0
zNUgqr-W#XrywkN(6hG?tu=5@hfm3-e<r_M{s&DzlU4i>dBu7E{wH0>0Z^VVf{|grV
zK^Zrik3@O%m8SKx9#o^VD2QVWm5<=SD6=D#zSS~NnS}Q5tT|HOb9L+7L(4fdIu$kn
zk93+s%17r)LN{e+o51|}tjP`%_x~t=FKK>4yHTu)zD!m(<AK+zUt#Wu&I(N8Ym!ru
z<YR!9<OOsN?)!*o7e$GSC$V<`Nn!)_IkX`ipHCyTp5Zj-+_}enHN3%^25(;&ePFP%
z!F~0twPe2huCO|09a~>Q<Hp4GTdFw*mh|6H5CYD+F1jyf?unwyDk-~-JCf$f$aJro
zeJ+$B44MTag%oBJ<p9V?v;bSe-qBICm+0=H^HJG8iOvL(S8GY#$f2#HQ}fX9dZ1%G
zP~p+hZYRi6ieUL9>F@rCDYmdUsq;?{>FRQUV7KCy9k(|h_!~&@B!*P<gO4u}g-4?2
zn2t=Eju&6{9bTN4WT;ieU|815UGht$Cf4q<sT=o~Exi!A=jh(Veoi*a>rP<@eA{h&
zY$MLz6l!+_@;^B!XeLF(9eO69RKgy%UfKiNRFEl{jfg~~h0!s{{!-YQ;B^=(AOgO>
zEyz}hIMLlrSj}5H{VLBZ9#=B3CgU~xeZTCwV40=af5N4_rDnF(g!{3IQUh6`tX-V1
z3it3Yvhs3;N)}BY3a^@I8y9D2HYaIBLa{T;NZRGaO)(-IW@?4^JGh&H>e6!J7<MfC
zfmdbc6fVj7`O_z$YDAR{Z92MZLmG=O>6x+`NuO;iaTO`^9?88Cfx}IRRYXBf!1G$|
zhhcUAh|AaYT5e<0d81!Pt5ir?F89%XW|}RU5tHLXEChdo1|RzIh8l^`(SZmPxl=$v
zQa_J@qfL6JL^gH8qq}AnAk)cEr9SfY(=rrrm##fskK<3!KOuUZiuuXpr8SunXQjK&
zZ&jXOzIN8yURecO-SC&fL_8Mq2%OF8A4GK`L{C5?u?AEKNY_DMl1V_;BoNX+Ssi#B
z;utnmLyBe$O3f4n4XnAmyfjb-2z7=mGB8*g)gv!nTx5A<%Cq9861Ho5pNmv|-pmVx
z!}DDA?1?v$fXI9!hxK`T-^y`yk#LBQ_XRF}A;rwhyiiR|6oea1AVM(NCT!p){4O**
zuDpL;c=ak^5i=;X5^K=&kuDm0|K5(C3~nLq7Y?(Jiut5z>?Xg(L+l2Px2s}D{JPWI
z1J52#XlXgV)-0<zJT)R&N(#^++!!Q3xb`UWpoHXMl>AXG*Q1vkn*=Oj;b?YD*(~+t
z-L@D>|JOsutPeP=zkjl3K3S2uJP7uZooBu$pndGM{+@v5v?biGh#oQYjFt&<h>EPJ
z4KFF#-7$4=8Nt!=Bmf^g4t6GxyKIF(5gV+M0(tgm$SF5jpVMYEgUKCZcjNW=uQusb
z(5nUQ{t=tt{<+@auG0SM$?R2EW#}+!XTYd;h|^;w4^{O8`Yqk|Z}b~iI^e(Pj3M%b
zHVBQ_2K8GDeOAh!OUS%`H3OV(X37aMVt97dqx+nyA*C<hEtr$v^1FbfxL^&z5!Vqq
zbD9D@jY&IWvbg3*dq+>dlTf<~(t*21S*wSAW@m`$hm@bTcL--AcgM50^*-p#n;KUa
zR)`gK9@=kajx@F>lKuUYwb?@SgaHD{+Q(*r%Ih2_BizCM!t1S#B^!-9t-JP-J3qgN
z#EBGo7LzJH)X~BV*o)f8#i<ex4fA_+D({8+@(RZpcb2{JyD}K?&u<{C7gqOmPW)aN
zbR0T{xc|hvaT!XOvD~J(w^l1{)a`myXgXgk@#G|Sf*bvf^~e9#raSs{uAu+3MxLED
zv2!GsZkaJHcDK!4m0MaMMQJ|${*4yxIm4P)TwGhQP)J4PJV!~Rw4n_!^yU7tVgs|5
z07uV8uPh`JoB%Td*gmeqz%M>NVT=wTUc_zckO9&p&Ve)cO*U(POgq15;*?u(Y{F>i
z%Uk|S>KEwMsusBS&MRc~wD|TtLp_$e9UKfjM6L|~Py+XFVO)@msO&8=CspEfJ6@RX
z)_XnPln|Lm?`@2Uf3TMEtc@X9K++dCGGJl=YQo!+=mVw)f{c%Na=9#SQ)!#E)mOD<
zw`6&AuaP@m*^B*1^1~O<m)aRiGE)B668MraV}b1Y*Alpxz&?Tgy#MnO_|Pf(#|HKE
z<Cl<DetJ3p^h5i5C9QYKf8B<{%z{*$lBp{8jw|aw1Z-;)?dRMj)9vKGEQeWZBBQJE
zo#}EQ=o9Eo3(26>H94r|w|@f=Hv&Kmkg>NsUPK1{z2C9Eb9u=}^%fzpW^9}Zf7wMB
z6kgr>RclQgGL|wvlT2=XuYDL8)rY+KP`iE4ZE0*$0}mW$gZ$=uqD_DRafy2sgv1Cx
z#tDR1_hQO_$}>0d+vV6V73l7~KPME4Q{Nm1+OQ!<bljnJVhR`KPXn%OOmEp}&(6y^
zKP!TzwEOH6a(Z|X_;A!gmD26{>I(bmkJ*mj<z5}>w;h?|b@WED+p!Zr6u~+KVT}ut
z{z(y>z-Zk|UhMfv5zNK7|K&(re^Lai(VGeACS0o!nwaBVx2#h%+1`CFutm^AkoT%9
zY|>-wdU@ZU7s2$bU3*-|5$I`--@9M90z3Z9{RS}0dZ~M3<LO%>)wix)?^s<$dfP9z
zW^uV5yLf-<0&n;qNhd!!k_dV)crjyRtooM{xb}TR6MaD&RJ;Or_w(0nWU@P_58aQI
znLrM|wfo|u$fC<9t?=z?&i#afl40OLPwhkXXt4=GUR<7aj-6uGxl3k<`5Cdk5ZltW
zAU3n|ek%{-bqL|^3su^B?%?p$CD3VUqc~r+nU5kfwJc4JG%_yh%-w%xpbUXK!uXmk
zI~^ot-wcE%XjweCqXRhYhW%``+8uHuhnBJrnf~Wfd#BW7dBh}ZKRUEGHvJ8C_!`Z4
zjFM5y{*{1|D`@XPfAOJTUwGVenalSwBJg+&&6WR0fJ3Gq{Yh)<JK4!pN2IL$n3}mr
zHW=hxqg18<urJ29EilqiQN2S-G41#K1t9NsJnJTrLzSGIhFPoC<!A1Qv|m`u`t9C%
z*qnY+0GHAG2xuhyL<OSY;00+1az03AGgZ!x31yG5NsR>Ln=<iSLFX>-Ij5<w?&rn0
za^qeniTZ5H@rz28(6*SGWnt521W5x?wj%lzZ+iFrIl{(s)x6(|Li~;!(`x9}Shpl^
zVCCS*dGZT82S<U=*d8M9hR(H9XPF}LXn>^k?m<O_z}B_@Hr201%vnR93c;&@NQy#0
zIH;v%JG8wdz!h$cU6uFn7XZ?bdmV#|K{dpRiB>sO2J<03M6l=J7Z!h!##x$m)9D}U
z?vKL`b5i-{F;tVzY-r*?315xQ52?I$bp&b2LZ&OBp{(?h>@-r#s;c#5Ad2R9aBOVs
zlPMQS$xeT2KC&8=UV=-PG#H3}3$Zy;ewIUNjNnxFO}=t6-o8EX*fC}wU*DRBhU{&V
zn(2SlEx_fbYOH;=kw8y4_8M1CT?jcfA8I?2cg!XgLyGVIcPu%8Q52Qr|K|x6W~TKR
z@0C|n5LX0A4GpUgs{(QNgT>!B3a;F5;#=@q3d&0=UUg3`CRU$ZIDg)}Srie<p-UuD
zf&KkXS0;fUuX<MK_y9O>c7+fW6Hw<}4|5K{kn=8D5*Zn(0tOOcg3CMjT1AXU-E`iY
z=r@2)4ZT@YV49%jtVo<bsIH!#9X98Ij7EKe>2#*2IHr8ThL^N4241s&mERXsd!ysy
ztz~&MTDy$ErT;+3=~E_#Owyo^<EE~b?VJ4FR|=l*VJ|K*p5YT`Mzdv89`z8s8MMXN
z_yaE~JfZjs!ZQoYsgB{{;Z|4a4rr-dtRJ?Y%yrSAMu7ytvNLMWU5W|lKF?{R!{1AH
zT@pFqOD$i0yKmE3mhvI)9bM^JrX{C0FQ)NXYNBWn7Ut%T+}&~QyJ?t>fpfE#F@3c6
zjYu0?_Tj=t9wm)z|KBhDx@fJ>ptVkZ8UA^vl38p*0vA-X(+|UUX5Sx{&o1{wy)XE)
zp{9l=Q_jiB$gjWM?6xENTYcJkyACGup37z8pD~($Hn}oWY_YM0eX5pDfl1n%_NuGS
zvp~3rJ_anDZl-2MMFqMCv+|b6`k6oS48<{`-K;4o(A{WhNTsKYY_XrN%*wP)IJUlO
zGc6g-5UPuevP#5$@xN{^BJt$O6QUnV<ko-{iS5WRyyW)2-=M9%Jr(M0D)ep2Gq>0E
z6h^}rQ-1x__R2=48WoG_rhIvzHiE8QTMKy8aeVfYcym(|QG_E<77(WmrhMR{u>X0v
zY4@5@x;9hN;Rzw5?CD=ya$!ON#`}DY0JY0khW4)}yEFix=KAlXpN?=Hs!Bij)>D{i
z#-dW47O-m@V^-99G^>;0=N<@&=NA^%0<U`d^yxrQHwd(AZ~_ym+aOuM%|Mq3IS$@l
zhVubt6Qo8Y?hLqnOl|Z1KI~t!_aQU1iF3kYH{pRTT`wt6-P|c^d6L(s?EKSb16|96
z>camjxrqKeN-oT;SmN~v9oMF>Dc-m}1P%>QpBjVo!6HCuG`@_^goa-*w40!O!jzkv
z%Q%2SIYSKoaBhnpy?L4t+|jT256_*<N1Yr<_bhbtHAXODwD^DK>m<DJgtp<|LSlc>
zb05a#N07SW%!z*$wIiwJ57tl!0pIybIX@$!t#HYJ@ke!S?MxUG;kHeRCx|R5RCX_d
z&*SFpy$Onj1O)(Xz{iGpLzU;k!{F=zIzDkMva+%whBwf>9DVn$7Nhnc8jZI5?cu4k
zu;zxSUUy}<AIyZ`6kG?kg{Uivl?g)^jm{k!v^7CjP0nZ38p!Gr0bh&CfjxTPWH`C;
z1_&I2(7N%}-O$k71igqA<K+RLLCIbXIlK2GFT^2gh~X}2dQ1c$I1p<mGy+qB)B=8`
zS|+cY23?6x@6S@Q%PqnH+apkl>V^h(6wtvK=tI3Z7$-H6@rOX(<wN4S>AYw&-bSMW
zwLAC6PoKJRT(uQCaueBa1SZg7jJfkNzGe+vhgx7VekZ@E=oqka?Io^Z0MLmlyEA*g
z{bR$^Tnh|M7qqpt^$*Mw(y;K~nK%J62_h{_M9{3Q7XsFox4Zwhsq`GVQi$mh?~zjT
z2na$X1Albq?%isDd@w*g3RXa2jMbK@lrXsyk&&D5pK!q<>?K^ggsQ3t#=;v74S7je
zgf8m_TwDNHRWpfh1;At?A|jCdvVPTYmcY}pi<2<=$K&L3dX&L+oKuJf;YU0QF^H5C
z^=)!?Z_NwbkpBd%DFQ$z@OG>i0??q=e>bBB`kKy~DujUPm)9E8)j)CNJKPgSK|odg
z--!5Fq-=gH&|?1ZrGPl7aGK%X(%Y~{A>B;>iD!<CZsrBw4ZD)gE*pkV&W;SdYM=#9
zc|ZRR(p={bDK5oXkElVE)B%(`${=VdC=~AY6B|E~Ti;87B4Ak+I6aVTij&itf&xYe
zA#AAm1md7F9(w1{K!{xcK31bij6c=EFf2!m5Vt_qXT};PmL=#TQ3#=|;o=$#U|(_z
zKjS>KWc0C{>xsX`JjPiBC~#-w5cHSxvqRA}{oxctu@JheSy~GzbdYO0Iz&j*3F^0=
z(@cX>7U5x~rOMBtsp~G0|7HxEjxO5a`TWCYUEH%gQWqlnyxOLQnNSmy%FIUw_US#z
zwFrZxLDwUFjVp7RW~^r38I)U}_Qt5^Q4)g7NyyVt6c&T$(=p%NBhz0M=iIYD@>=6_
z|03v(1Oft93rDR@5uzK=wmOxlB{I8uH}8+3Uz(c(eQYAQifv}lE%+<iMpc0`utMuT
zy?gEXc<8gfU{C5PTsl=-mU2Di(RduO1VWgEzHq^Q6(<_r47!dXn->GsK9__PB3VgL
zLU?p6Ai#BOd+LD=4?7Bqf}p9%+k;r-@Zf=D#zm}i#o?94S|GSV*$$%iBkUcA#=I}d
ze^J@F72=9BDdFa!8-MAL?EXL~c3|?XaekuLGja7c;jLL&y%3&gThqVCyyzLEgsFQh
z&JXosZT|!%fKV2Qu5X&56mV*u?Ynkh3mGiuC!jp2r4~R$m>6>DrW;mt;&tH+(L{^y
zP{$`<ypkyF6A855h}1C=vUn@J!P(iFB!CE?DkB<uWByWv+}Xoi6LsXZGlwIy^A-r*
zQdQN}8+yF_{QNx6H%xT*_h%jSdv;T%gL^7+W&K7Z{mevke&?0lHt-W_{$taQi#c*q
zW^BqxD#1}Q%sjkCv21=6?7|T67pkj^5wYJsXipHLAC|b!>jZB%xb`95p2>NWvw2q%
zA0iYMEonNEP9RRoNCYeiU`HU7@z{qwX`jA*Pj+PGJ7o7NZg2lk)=N;C2bYPDa0{~D
zyVvxog}gG7Ga6pAKATAB)n}E^vn>C!K_*VwV<|8%z&#(0{N*z%e3*bXsltIcbUia;
z3B0gKrVf`D7%4@PbqWn`LaiKwe#xSB>*AK!0XAg4;*x9~_$wR%Gktu;H8ZbCPcGds
zZ(xskqSp_N>s?ksTE-W{<eouM7$**KSi=;czdSNSC-(NLZ`3&Z$T8v9*$-L^hQ3Lp
z1WZqPL&Tc}q(uVAAu1`vhztthee8bf*)HbeW8R4y@?)YcZl?^C2jHB9AUAW~luXa?
zFjs+ztWW2rXFbKHR#`V6)LmB_z?s0x#&#5BDJ<UyfXOC|w;J{D(SsfkGZiOq&z@#@
z>2ney{tOC>g7cCQJD6l*A8rObtcyeb1i0nQz`jPah6O#By0ahD#o0MJE&>n_{2Ag@
z5j+fm?OCIh&3vuDn3<UoopdbGN9*`<=<lZZ*8kQ{nTsVyw2I-JPy=Lnqboq*u*F(V
zc1!wJL9y|&uaD}kM@w<wig}425WddysR3VVnA`v{Dbi0g?@mJZnu&sI+c6|5wy>X2
zA(1(!d5dX2&TO4vSNq5UON&<AB-+aq1-mQZYnd4tOw=mZ2MW-LhV(kHge5g4R|~G(
zo0Y;zD^7#`$>_vHJ(9qN!#5iKe;HuwS4+rHzwGIeL$Z-Onmya%R3n6!NxvM6?F%Hi
z`=1qNKDX35j5Q#n>Lm;Ky=8Y2bk+H(7Faexs#=r?N`JMC<9XEj+78ndDsbfH<Q#}}
zPx;`zVn59SaVj}Ex&5%*__3As1Z2EocTp9v(2t9U8sLs{*`$D}YMM!HU?&J%BukYD
zjh#xcy6$FEdQ}%`6vSd{phfIF&cQb#YZ#X8p_&Xnsx^|(dD6c-m(@XoTNHkoTqt<~
zDvP@omy>ZWVk@+l;i<%gV-3kb3O4Dy#9HuwA-`Yt+%l@*QJv$)sBh4%^1?d`Ev<3>
z?;mU;zLLUg_lF}Er(|WZ0#K%<tIPh&r(Ql!9<S^RE9+6KGfjVfS_#jhaEL;uAWENZ
zd_`o5pq`A7WhAlq_k0XHyL#_NQGrO)x?k3X5huWNO0Mh_v0SnA9autDp`Ob}Jx=7<
ziBna>@aqAyrX_%u$;xtqn}zgnJ(6PQVa5Ci>lN%k=)%DcPKYFhhQxXbSekJfM{a(O
zHns<I3+%*jV1Zu}FDh=0FBI4idWmX6_ySv2|JjOSv6nB)j=!l#CIJdw3cD*t3PQ{|
z8w8W)JzQaN>MnyBVMgYcL^vGuCnEih0ZvTjV9pjnTX|%HqOiln6W2lm0IF6t_V&TZ
z?q9-js1C`TVPjrufbwsD+6AkYLDl-d%cKr&H#J*FU}4ZxgGT{9SElz#e`=rlV~$L6
z^?%IKebcrtCs=xDCofc0HCr|*yK<!tAsAwMNHDf`N!oV8B*%Z>pP3Y(iPeJ|VkY9k
zDOchIhW&UEjSP}rBLf?IVs@Y#Ij3;P`S|#5-0w!L2e9o7&v!0M_0@s?wXtU>8{|w+
z!vGaWa4lRwFax6%7kdUPi72^L0XGONAG}hS^!4@OctFm7q{fJ<I*1C=U(7*mCLl(n
zyB8Sg+h-r(_It_WNm~XxB@yVbkK)xha~>sT4d)NsWP>7-vPWMR)UT>>j1YIRF6`70
zAi{|hIk>Fs8ZsDD!}I}}5OVmP;;TYJ2WQ+zD}Uf9N`7R)JmTV!Knp`;{HEr4n3S@S
zG1|!U3OI3C?l%L*lw1?`M>nCnf>j~&<qLK!NjTRjE0cJK{Q7e7Q!VZBnT~jr*;&{v
zn59A}%lPYS&>!JzG8q<0fB~`fbAe<H!+$eTkolK!xdvN^v@KB-b!2Khw*IRk9CLsO
z)x_optUoO$_E+ukI8zJnz|cD;q|HMpV^A769TPJfPY^KuEc5f|IuV(7oFjobIWibm
z&Y{-91eZ!iZY=!aFgO;d0)P=dd|0t$?A^PWbSqR?I?fG~35XpbmJFDgA_`kW5vzGZ
z^TZuH(qlC~1A|ip3sU*OH`fLZxpAv)G)t8rMW%(GO8}Tr?kTHadr7t6oIf=QYaS)4
z3d&btVmxWf0JDG!M6p4x`uX$c^V$iP6eN8eBO{|?D{-Fq1&M^kO9ZWOFw9sVyrZ0r
zjcv2OKFU`e#EC{%tcnFxRc)<SPrxg8Woo_jS(wD_b?NEtHA<O!L@1}phbBN#lqU_r
zRzfnsh@5aEpium(!~!VW^#hlauUv60btS4^4I!n=UcG)jb9n*}{~mjLV|>SjgoGIs
zl6K);f_yx5SY4Jd3^<u6V!GON?_Q&;3<vF;Kv1;B^s=JguU)<nu9-5||LxVoYR|sz
zHD(Lb*0v{GCp$%!bsxJQSS-xcsaZ)D=H?Uy4y^Epq5gi><oD5ey6L6aPop$s&gvGI
z$gN$wYa}X^4hA$(G>0W;VBf+l*Yj|~34C}X%n2@198|gj4G%-OgV<=`CX!kbv>Is1
zIXO99#mE2pQ5T)j0n^H&qBUy%AGh}?Q7}T|qq?B<Ec~H^fYtYQUY%F39~KcYn?k44
z*mht(H5e=lk$@)?@}Z#={Uw{~=p46hlR3|I=-|Nz*X7%gj8nwG?bKums;#c-pcxb#
zRJR5X!@`9ehZOM@rX5y;`zw`<?b2?^&cj2Yx1B02W5)Rq(VG2f|Jl|V{3`aUa9qS%
z@4=CeO7THkAI^(lZOIKdG`~*1m`=qWp-#uf63FdKu*is@BEp2?SAU!VIC{7!;(y9m
zDfI7uSh77$wx|=6bJ&4ia$`4v_83+yynb)ts8M`diA2xjsvR3bz|G0Gs>FjA`3UI>
z5X*{nDk^(G52DFLMuIGi+W!4;hajrgyt`YMNb-@9t<lDIS{FL3Bx^xb>4v(aCQ`Ow
zVs<A@kqAp`<^e|mUuuirg=88@(jfx48Xm5<Zr#etQ+(Wuyq1Lm9OlaIys%KcSRkpL
zb9Z2#uOf&f0^zZ{t-w~rqJ<ndXylSA8q{chb3IM%_lp-uVZ!FrT@#Gb<s;5G&$1V!
zyMgCKJYBRF@)*Z6PJ(FSe*}}%D#D3F56{nUQh1~gijvrxwJ`Gpx;Z(>lK0G|E^uw>
rapD~d>A(Gx3-|vsjC}XcQ{vn9=e(ABFx>=Sa%#)QZQ8k-*0lct-Yqk^

literal 0
HcmV?d00001

diff --git a/book/shared/references.bib b/book/shared/references.bib
index 6656795..8259c44 100644
--- a/book/shared/references.bib
+++ b/book/shared/references.bib
@@ -1,9 +1,55 @@
-@article{achiam_spinning_2018,
-  title = {Spinning {{Up}} in {{Deep Reinforcement Learning}}},
-  author = {Achiam, Joshua},
+@book{vershynin_high-dimensional_2018,
+  title = {High-{{Dimensional Probability}}: {{An Introduction}} with {{Applications}} in {{Data Science}}},
+  shorttitle = {High-{{Dimensional Probability}}},
+  author = {Vershynin, Roman},
   year = {2018},
-  urldate = {2024-07-01},
-  file = {/Users/alexandercai/Zotero/storage/UPUMW6XV/index.html}
+  month = sep,
+  publisher = {Cambridge University Press},
+  abstract = {High-dimensional probability offers insight into the behavior of random vectors, random matrices, random subspaces, and objects used to quantify uncertainty in high dimensions. Drawing on ideas from probability, analysis, and geometry, it lends itself to applications in mathematics, statistics, theoretical computer science, signal processing, optimization, and more. It is the first to integrate theory, key tools, and modern applications of high-dimensional probability. Concentration inequalities form the core, and it covers both classical results such as Hoeffding's and Chernoff's inequalities and modern developments such as the matrix Bernstein's inequality. It then introduces the powerful methods based on stochastic processes, including such tools as Slepian's, Sudakov's, and Dudley's inequalities, as well as generic chaining and bounds based on VC dimension. A broad range of illustrations is embedded throughout, including classical and modern results for covariance estimation, clustering, networks, semidefinite programming, coding, dimension reduction, matrix completion, machine learning, compressed sensing, and sparse regression.},
+  googlebooks = {NDdqDwAAQBAJ},
+  isbn = {978-1-108-41519-4},
+  langid = {english},
+  keywords = {Business & Economics / Econometrics,Computers / Optical Data Processing,Language Arts & Disciplines / Library & Information Science / General,Mathematics / Probability & Statistics / General,Technology & Engineering / Signals & Signal Processing},
+  file = {/Users/adzcai/Vault/papers/assets/2018/High-Dimensional Probability (2018) - Vershynin.pdf}
+}
+
+@book{kochenderfer_algorithms_2022,
+  title = {Algorithms for {{Decision Making}}},
+  author = {Kochenderfer, Mykel J and Wheeler, Tim A and Wray, Kyle H},
+  year = {2022},
+  month = aug,
+  urldate = {2022-10-23},
+  abstract = {A broad introduction to algorithms for decision making under uncertainty, introducing the underlying mathematical problem formulations and the algorithms for...},
+  isbn = {978-0-262-04701-2},
+  langid = {american},
+  file = {/Users/adzcai/Vault/papers/assets/2022/Algorithms for Decision Making (2022) - Kochenderfer, Wheeler, Wray.pdf}
+}
+
+@book{sutton_reinforcement_2018,
+  title = {Reinforcement Learning: An Introduction},
+  shorttitle = {Reinforcement Learning},
+  author = {Sutton, Richard S. and Barto, Andrew G.},
+  year = {2018},
+  series = {Adaptive Computation and Machine Learning Series},
+  edition = {Second edition},
+  publisher = {The MIT Press},
+  address = {Cambridge, Massachusetts},
+  abstract = {"Reinforcement learning, one of the most active research areas in artificial intelligence, is a computational approach to learning whereby an agent tries to maximize the total amount of reward it receives while interacting with a complex, uncertain environment. In Reinforcement Learning, Richard Sutton and Andrew Barto provide a clear and simple account of the field's key ideas and algorithms."--},
+  isbn = {978-0-262-03924-6},
+  langid = {english},
+  lccn = {Q325.6 .R45 2018},
+  keywords = {Reinforcement learning},
+  file = {/Users/adzcai/Vault/papers/assets/2018/Reinforcement learning (2018) - Sutton, Barto.pdf}
+}
+
+@book{agarwal_reinforcement_2022,
+  title = {Reinforcement {{Learning}}: {{Theory}} and {{Algorithms}}},
+  shorttitle = {{{AJKS}}},
+  author = {Agarwal, Alekh and Jiang, Nan and Kakade, Sham M and Sun, Wen},
+  year = {2022},
+  month = jan,
+  langid = {english},
+  file = {/Users/adzcai/Vault/papers/assets/2022/Reinforcement Learning (2022) - Agarwal, Jiang, Kakade, Sun.pdf}
 }
 
 @misc{adaptive_agent_team_human-timescale_2023,
@@ -20,32 +66,15 @@ @misc{adaptive_agent_team_human-timescale_2023
   archiveprefix = {arXiv},
   keywords = {Computer Science - Artificial Intelligence,Computer Science - Machine Learning,Computer Science - Neural and Evolutionary Computing},
   annotation = {1 citations (Semantic Scholar/arXiv) [2023-02-20]},
-  file = {/Users/alexandercai/Library/CloudStorage/GoogleDrive-alexcai@college.harvard.edu/My Drive/Vault/papers/assets/2023/Human-Timescale Adaptation in an Open-Ended Task Space (2023) - Adaptive Agent Team et al.pdf}
-}
-
-@book{agarwal_reinforcement_2022,
-  title = {Reinforcement {{Learning}}: {{Theory}} and {{Algorithms}}},
-  shorttitle = {{{AJKS}}},
-  author = {Agarwal, Alekh and Jiang, Nan and Kakade, Sham M and Sun, Wen},
-  year = {2022},
-  month = jan,
-  langid = {english},
-  file = {/Users/alexandercai/Library/CloudStorage/GoogleDrive-alexcai@college.harvard.edu/My Drive/Vault/papers/assets/2022/Reinforcement Learning (2022) - Agarwal, Jiang, Kakade, Sun.pdf}
+  file = {/Users/adzcai/Vault/papers/assets/2023/Human-Timescale Adaptation in an Open-Ended Task Space (2023) - Adaptive Agent Team et al.pdf}
 }
 
-@inproceedings{azar_minimax_2017,
-  title = {Minimax {{Regret Bounds}} for {{Reinforcement Learning}}},
-  booktitle = {Proceedings of the 34th {{International Conference}} on {{Machine Learning}}},
-  author = {Azar, Mohammad Gheshlaghi and Osband, Ian and Munos, R{\'e}mi},
-  year = {2017},
-  month = jul,
-  pages = {263--272},
-  publisher = {PMLR},
-  issn = {2640-3498},
-  urldate = {2024-06-21},
-  abstract = {We consider the problem of provably optimal exploration in reinforcement learning for finite horizon MDPs. We show that an optimistic modification to value iteration achieves a regret bound of \${\textbackslash}tilde \{O\}( {\textbackslash}sqrt\{HSAT\} + H{\textasciicircum}2S{\textasciicircum}2A+H{\textbackslash}sqrt\{T\})\$ where \$H\$ is the time horizon, \$S\$ the number of states, \$A\$ the number of actions and \$T\$ the number of time-steps. This result improves over the best previous known bound \${\textbackslash}tilde \{O\}(HS {\textbackslash}sqrt\{AT\})\$ achieved by the UCRL2 algorithm. The key significance of our new results is that when \$T{\textbackslash}geq H{\textasciicircum}3S{\textasciicircum}3A\$ and \$SA{\textbackslash}geq H\$, it leads to a regret of \${\textbackslash}tilde\{O\}({\textbackslash}sqrt\{HSAT\})\$ that matches the established lower bound of \${\textbackslash}Omega({\textbackslash}sqrt\{HSAT\})\$ up to a logarithmic factor. Our analysis contain two key insights. We use careful application of concentration inequalities to the optimal value function as a whole, rather than to the transitions probabilities (to improve scaling in \$S\$), and we define Bernstein-based ``exploration bonuses'' that use the empirical variance of the estimated values at the next states (to improve scaling in \$H\$).},
-  langid = {english},
-  file = {/Users/alexandercai/Library/CloudStorage/GoogleDrive-alexcai@college.harvard.edu/My Drive/Vault/papers/assets/2017/Minimax Regret Bounds for Reinforcement Learning (2017) - Azar, Osband, Munos.pdf}
+@book{boyd_convex_2004,
+  title = {Convex {{Optimization}}},
+  author = {Boyd, Stephen and Vandenberghe, Lieven},
+  year = {2004},
+  publisher = {Cambridge University Press},
+  file = {/Users/adzcai/Vault/papers/assets/2004/Convex Optimization (2004) - Boyd, Vandenberghe.pdf;/Users/adzcai/Vault/papers/assets/2004/Slides - Convex Optimization (2004) - Boyd, Vandenberghe.pdf}
 }
 
 @misc{babuschkin_deepmind_2020,
@@ -54,21 +83,16 @@ @misc{babuschkin_deepmind_2020
   year = {2020}
 }
 
-@article{barto_neuronlike_1983,
-  title = {Neuronlike Adaptive Elements That Can Solve Difficult Learning Control Problems},
-  author = {Barto, Andrew G. and Sutton, Richard S. and Anderson, Charles W.},
-  year = {1983},
-  month = sep,
-  journal = {IEEE Transactions on Systems, Man, and Cybernetics},
-  volume = {SMC-13},
-  number = {5},
-  pages = {834--846},
-  issn = {2168-2909},
-  doi = {10.1109/TSMC.1983.6313077},
-  urldate = {2024-07-01},
-  abstract = {It is shown how a system consisting of two neuronlike adaptive elements can solve a difficult learning control problem. The task is to balance a pole that is hinged to a movable cart by applying forces to the cart's base. It is argued that the learning problems faced by adaptive elements that are components of adaptive networks are at least as difficult as this version of the pole-balancing problem. The learning system consists of a single associative search element (ASE) and a single adaptive critic element (ACE). In the course of learning to balance the pole, the ASE constructs associations between input and output by searching under the influence of reinforcement feedback, and the ACE constructs a more informative evaluation function than reinforcement feedback alone can provide. The differences between this approach and other attempts to solve problems using neurolike elements are discussed, as is the relation of this work to classical and instrumental conditioning in animal learning studies and its possible implications for research in the neurosciences.},
-  keywords = {Adaptive systems,Biological neural networks,Neurons,Pattern recognition,Problem-solving,Supervised learning,Training},
-  file = {/Users/alexandercai/Zotero/storage/GHD9WZXL/6313077.html}
+@book{sussman_functional_2013,
+  title = {Functional Differential Geometry},
+  author = {Sussman, Gerald Jay and Wisdom, Jack and Farr, Will},
+  year = {2013},
+  publisher = {The MIT Press},
+  address = {Cambridge, MA},
+  isbn = {978-0-262-01934-7},
+  lccn = {QC20.7.D52 S87 2013},
+  keywords = {Functional differential equations,Geometry Differential,Mathematical physics},
+  file = {/Users/adzcai/Vault/papers/assets/2013/Functional differential geometry (2013) - Sussman, Wisdom, Farr.pdf}
 }
 
 @article{degrave_magnetic_2022,
@@ -89,7 +113,25 @@ @article{degrave_magnetic_2022
   langid = {english},
   keywords = {Computer science,Magnetically confined plasmas,Nuclear fusion and fission},
   annotation = {230 citations (Semantic Scholar/DOI) [2023-05-21]},
-  file = {/Users/alexandercai/Library/CloudStorage/GoogleDrive-alexcai@college.harvard.edu/My Drive/Vault/papers/assets/2022/Magnetic control of tokamak plasmas through deep reinforcement learning (2022) - Degrave et al.pdf}
+  file = {/Users/adzcai/Vault/papers/assets/2022/Magnetic control of tokamak plasmas through deep reinforcement learning (2022) - Degrave et al.pdf}
+}
+
+@misc{hausknecht_deep_2017,
+  title = {Deep {{Recurrent Q-Learning}} for {{Partially Observable MDPs}}},
+  author = {Hausknecht, Matthew and Stone, Peter},
+  year = {2017},
+  month = jan,
+  number = {arXiv:1507.06527},
+  eprint = {1507.06527},
+  primaryclass = {cs},
+  publisher = {arXiv},
+  doi = {10.48550/arXiv.1507.06527},
+  urldate = {2023-06-04},
+  abstract = {Deep Reinforcement Learning has yielded proficient controllers for complex tasks. However, these controllers have limited memory and rely on being able to perceive the complete game screen at each decision point. To address these shortcomings, this article investigates the effects of adding recurrency to a Deep Q-Network (DQN) by replacing the first post-convolutional fully-connected layer with a recurrent LSTM. The resulting {\textbackslash}textit\{Deep Recurrent Q-Network\} (DRQN), although capable of seeing only a single frame at each timestep, successfully integrates information through time and replicates DQN's performance on standard Atari games and partially observed equivalents featuring flickering game screens. Additionally, when trained with partial observations and evaluated with incrementally more complete observations, DRQN's performance scales as a function of observability. Conversely, when trained with full observations and evaluated with partial observations, DRQN's performance degrades less than DQN's. Thus, given the same length of history, recurrency is a viable alternative to stacking a history of frames in the DQN's input layer and while recurrency confers no systematic advantage when learning to play the game, the recurrent net can better adapt at evaluation time if the quality of observations changes.},
+  archiveprefix = {arXiv},
+  keywords = {Computer Science - Machine Learning},
+  annotation = {1274 citations (Semantic Scholar/arXiv) [2023-06-04]},
+  file = {/Users/adzcai/Vault/papers/assets/2017/Deep Recurrent Q-Learning for Partially Observable MDPs (2017) - Hausknecht, Stone.pdf}
 }
 
 @inproceedings{freeman_brax_2021,
@@ -107,37 +149,24 @@ @inproceedings{freeman_brax_2021
   pubstate = {preprint {\textbar} DBLP: https://dblp.org/rec/conf/nips/FreemanFRGMB21},
   keywords = {Computer Science - Artificial Intelligence,Computer Science - Robotics},
   annotation = {151 citations (Semantic Scholar/arXiv) [2023-07-22]},
-  file = {/Users/alexandercai/Library/CloudStorage/GoogleDrive-alexcai@college.harvard.edu/My Drive/Vault/papers/assets/2021/Brax – A Differentiable Physics Engine for Large Scale Rigid Body Simulation (2021) - Freeman et al.pdf}
+  file = {/Users/adzcai/Vault/papers/assets/2021/Brax – A Differentiable Physics Engine for Large Scale Rigid Body Simulation (2021) - Freeman et al.pdf}
 }
 
-@misc{hausknecht_deep_2017,
-  title = {Deep {{Recurrent Q-Learning}} for {{Partially Observable MDPs}}},
-  author = {Hausknecht, Matthew and Stone, Peter},
-  year = {2017},
-  month = jan,
-  number = {arXiv:1507.06527},
-  eprint = {1507.06527},
+@inproceedings{schulman_high-dimensional_2018,
+  title = {High-{{Dimensional Continuous Control Using Generalized Advantage Estimation}}},
+  booktitle = {{{ICLR}} 2016},
+  author = {Schulman, John and Moritz, Philipp and Levine, Sergey and Jordan, Michael and Abbeel, Pieter},
+  year = {2018},
+  month = oct,
+  eprint = {1506.02438},
   primaryclass = {cs},
-  publisher = {arXiv},
-  doi = {10.48550/arXiv.1507.06527},
-  urldate = {2023-06-04},
-  abstract = {Deep Reinforcement Learning has yielded proficient controllers for complex tasks. However, these controllers have limited memory and rely on being able to perceive the complete game screen at each decision point. To address these shortcomings, this article investigates the effects of adding recurrency to a Deep Q-Network (DQN) by replacing the first post-convolutional fully-connected layer with a recurrent LSTM. The resulting {\textbackslash}textit\{Deep Recurrent Q-Network\} (DRQN), although capable of seeing only a single frame at each timestep, successfully integrates information through time and replicates DQN's performance on standard Atari games and partially observed equivalents featuring flickering game screens. Additionally, when trained with partial observations and evaluated with incrementally more complete observations, DRQN's performance scales as a function of observability. Conversely, when trained with full observations and evaluated with partial observations, DRQN's performance degrades less than DQN's. Thus, given the same length of history, recurrency is a viable alternative to stacking a history of frames in the DQN's input layer and while recurrency confers no systematic advantage when learning to play the game, the recurrent net can better adapt at evaluation time if the quality of observations changes.},
+  urldate = {2023-06-21},
+  abstract = {Policy gradient methods are an appealing approach in reinforcement learning because they directly optimize the cumulative reward and can straightforwardly be used with nonlinear function approximators such as neural networks. The two main challenges are the large number of samples typically required, and the difficulty of obtaining stable and steady improvement despite the nonstationarity of the incoming data. We address the first challenge by using value functions to substantially reduce the variance of policy gradient estimates at the cost of some bias, with an exponentially-weighted estimator of the advantage function that is analogous to TD(lambda). We address the second challenge by using trust region optimization procedure for both the policy and the value function, which are represented by neural networks. Our approach yields strong empirical results on highly challenging 3D locomotion tasks, learning running gaits for bipedal and quadrupedal simulated robots, and learning a policy for getting the biped to stand up from starting out lying on the ground. In contrast to a body of prior work that uses hand-crafted policy representations, our neural network policies map directly from raw kinematics to joint torques. Our algorithm is fully model-free, and the amount of simulated experience required for the learning tasks on 3D bipeds corresponds to 1-2 weeks of real time.},
   archiveprefix = {arXiv},
-  keywords = {Computer Science - Machine Learning},
-  annotation = {1274 citations (Semantic Scholar/arXiv) [2023-06-04]},
-  file = {/Users/alexandercai/Library/CloudStorage/GoogleDrive-alexcai@college.harvard.edu/My Drive/Vault/papers/assets/2017/Deep Recurrent Q-Learning for Partially Observable MDPs (2017) - Hausknecht, Stone.pdf}
-}
-
-@book{kochenderfer_algorithms_2022,
-  title = {Algorithms for {{Decision Making}}},
-  author = {Kochenderfer, Mykel J and Wheeler, Tim A and Wray, Kyle H},
-  year = {2022},
-  month = aug,
-  urldate = {2022-10-23},
-  abstract = {A broad introduction to algorithms for decision making under uncertainty, introducing the underlying mathematical problem formulations and the algorithms for...},
-  isbn = {978-0-262-04701-2},
-  langid = {american},
-  file = {/Users/alexandercai/Library/CloudStorage/GoogleDrive-alexcai@college.harvard.edu/My Drive/Vault/papers/assets/2022/Algorithms for Decision Making (2022) - Kochenderfer, Wheeler, Wray.pdf}
+  pubstate = {preprint},
+  keywords = {Computer Science - Machine Learning,Computer Science - Robotics,Electrical Engineering and Systems Science - Systems and Control},
+  annotation = {2253 citations (Semantic Scholar/arXiv) [2023-07-22]},
+  file = {/Users/adzcai/Vault/papers/assets/2018/High-Dimensional Continuous Control Using Generalized Advantage Estimation (2018) - Schulman et al2.pdf}
 }
 
 @article{lai_asymptotically_1985,
@@ -152,7 +181,7 @@ @article{lai_asymptotically_1985
   issn = {0196-8858},
   doi = {10.1016/0196-8858(85)90002-8},
   urldate = {2023-10-23},
-  file = {/Users/alexandercai/Library/CloudStorage/GoogleDrive-alexcai@college.harvard.edu/My Drive/Vault/papers/assets/1985/Asymptotically efficient adaptive allocation rules (1985) - Lai, Robbins.pdf}
+  file = {/Users/adzcai/Vault/papers/assets/1985/Asymptotically efficient adaptive allocation rules (1985) - Lai, Robbins.pdf}
 }
 
 @inproceedings{lechner_gigastep_2023,
@@ -164,19 +193,7 @@ @inproceedings{lechner_gigastep_2023
   urldate = {2023-12-12},
   abstract = {Multi-agent reinforcement learning (MARL) research is faced with a trade-off: it either uses complex environments requiring large compute resources, which makes it inaccessible to researchers with limited resources, or relies on simpler dynamics for faster execution, which makes the transferability of the results to more realistic tasks challenging. Motivated by these challenges, we present Gigastep, a fully vectorizable, MARL environment implemented in JAX, capable of executing up to one billion environment steps per second on consumer-grade hardware. Its design allows for comprehensive MARL experimentation, including a complex, high-dimensional space defined by 3D dynamics, stochasticity, and partial observations. Gigastep supports both collaborative and adversarial tasks, continuous and discrete action spaces, and provides RGB image and feature vector observations, allowing the evaluation of a wide range of MARL algorithms. We validate Gigastep's usability through an extensive set of experiments, underscoring its role in widening participation and promoting inclusivity in the MARL research community.},
   langid = {english},
-  file = {/Users/alexandercai/Library/CloudStorage/GoogleDrive-alexcai@college.harvard.edu/My Drive/Vault/papers/assets/2023/Gigastep - One Billion Steps per Second Multi-agent Reinforcement Learning (2023) - Lechner et al.pdf}
-}
-
-@article{mnih_playing_2013,
-  title = {Playing {{Atari}} with {{Deep Reinforcement Learning}}},
-  author = {Mnih, Volodymyr and Kavukcuoglu, Koray and Silver, David and Graves, Alex and Antonoglou, Ioannis and Wierstra, Daan and Riedmiller, Martin A.},
-  year = {2013},
-  journal = {CoRR},
-  volume = {abs/1312.5602},
-  eprint = {1312.5602},
-  urldate = {2024-06-21},
-  archiveprefix = {arXiv},
-  file = {/Users/alexandercai/Library/CloudStorage/GoogleDrive-alexcai@college.harvard.edu/My Drive/Vault/papers/assets/2013/Playing Atari with Deep Reinforcement Learning (2013) - Mnih et al.pdf}
+  file = {/Users/adzcai/Vault/papers/assets/2023/Gigastep - One Billion Steps per Second Multi-agent Reinforcement Learning (2023) - Lechner et al.pdf}
 }
 
 @book{nielsen_neural_2015,
@@ -187,15 +204,31 @@ @book{nielsen_neural_2015
   urldate = {2024-03-10}
 }
 
-@inproceedings{ross_reduction_2010,
-  title = {A {{Reduction}} of {{Imitation Learning}} and {{Structured Prediction}} to {{No-Regret Online Learning}}},
-  booktitle = {International {{Conference}} on {{Artificial Intelligence}} and {{Statistics}}},
-  author = {Ross, St{\'e}phane and Gordon, Geoffrey J. and Bagnell, J.},
-  year = {2010},
-  month = nov,
-  urldate = {2024-08-08},
-  abstract = {Sequential prediction problems such as imitation learning, where future observations depend on previous predictions (actions), violate the common i.i.d. assumptions made in statistical learning. This leads to poor performance in theory and often in practice. Some recent approaches provide stronger guarantees in this setting, but remain somewhat unsatisfactory as they train either non-stationary or stochastic policies and require a large number of iterations. In this paper, we propose a new iterative algorithm, which trains a stationary deterministic policy, that can be seen as a no regret algorithm in an online learning setting. We show that any such no regret algorithm, combined with additional reduction assumptions, must find a policy with good performance under the distribution of observations it induces in such sequential settings. We demonstrate that this new approach outperforms previous approaches on two challenging imitation learning problems and a benchmark sequence labeling problem.},
-  file = {/Users/alexandercai/Library/CloudStorage/GoogleDrive-alexcai@college.harvard.edu/My Drive/Vault/papers/assets/2010/A Reduction of Imitation Learning and Structured Prediction to No-Regret Online (2010) - Ross, Gordon, Bagnell.pdf}
+@inproceedings{azar_minimax_2017,
+  title = {Minimax {{Regret Bounds}} for {{Reinforcement Learning}}},
+  booktitle = {Proceedings of the 34th {{International Conference}} on {{Machine Learning}}},
+  author = {Azar, Mohammad Gheshlaghi and Osband, Ian and Munos, R{\'e}mi},
+  year = {2017},
+  month = jul,
+  pages = {263--272},
+  publisher = {PMLR},
+  issn = {2640-3498},
+  urldate = {2024-06-21},
+  abstract = {We consider the problem of provably optimal exploration in reinforcement learning for finite horizon MDPs. We show that an optimistic modification to value iteration achieves a regret bound of \${\textbackslash}tilde \{O\}( {\textbackslash}sqrt\{HSAT\} + H{\textasciicircum}2S{\textasciicircum}2A+H{\textbackslash}sqrt\{T\})\$ where \$H\$ is the time horizon, \$S\$ the number of states, \$A\$ the number of actions and \$T\$ the number of time-steps. This result improves over the best previous known bound \${\textbackslash}tilde \{O\}(HS {\textbackslash}sqrt\{AT\})\$ achieved by the UCRL2 algorithm. The key significance of our new results is that when \$T{\textbackslash}geq H{\textasciicircum}3S{\textasciicircum}3A\$ and \$SA{\textbackslash}geq H\$, it leads to a regret of \${\textbackslash}tilde\{O\}({\textbackslash}sqrt\{HSAT\})\$ that matches the established lower bound of \${\textbackslash}Omega({\textbackslash}sqrt\{HSAT\})\$ up to a logarithmic factor. Our analysis contain two key insights. We use careful application of concentration inequalities to the optimal value function as a whole, rather than to the transitions probabilities (to improve scaling in \$S\$), and we define Bernstein-based ``exploration bonuses'' that use the empirical variance of the estimated values at the next states (to improve scaling in \$H\$).},
+  langid = {english},
+  file = {/Users/adzcai/Vault/papers/assets/2017/Minimax Regret Bounds for Reinforcement Learning (2017) - Azar, Osband, Munos.pdf}
+}
+
+@article{mnih_playing_2013-1,
+  title = {Playing {{Atari}} with {{Deep Reinforcement Learning}}},
+  author = {Mnih, Volodymyr and Kavukcuoglu, Koray and Silver, David and Graves, Alex and Antonoglou, Ioannis and Wierstra, Daan and Riedmiller, Martin A.},
+  year = {2013},
+  journal = {CoRR},
+  volume = {abs/1312.5602},
+  eprint = {1312.5602},
+  urldate = {2024-06-21},
+  archiveprefix = {arXiv},
+  file = {/Users/adzcai/Vault/papers/assets/2013/Playing Atari with Deep Reinforcement Learning (2013) - Mnih et al.pdf}
 }
 
 @misc{sun_easy--hard_2024,
@@ -213,51 +246,7 @@ @misc{sun_easy--hard_2024
   abstract = {Current AI alignment methodologies rely on human-provided demonstrations or judgments, and the learned capabilities of AI systems would be upper-bounded by human capabilities as a result. This raises a challenging research question: How can we keep improving the systems when their capabilities have surpassed the levels of humans? This paper answers this question in the context of tackling hard reasoning tasks (e.g., level 4-5 MATH problems) via learning from human annotations on easier tasks (e.g., level 1-3 MATH problems), which we term as {\textbackslash}textit\{easy-to-hard generalization\}. Our key insight is that an evaluator (reward model) trained on supervisions for easier tasks can be effectively used for scoring candidate solutions of harder tasks and hence facilitating easy-to-hard generalization over different levels of tasks. Based on this insight, we propose a novel approach to scalable alignment, which firstly trains the process-supervised reward models on easy problems (e.g., level 1-3), and then uses them to evaluate the performance of policy models on hard problems. We show that such {\textbackslash}textit\{easy-to-hard generalization from evaluators\} can enable {\textbackslash}textit\{easy-to-hard generalizations in generators\} either through re-ranking or reinforcement learning (RL). Notably, our process-supervised 7b RL model achieves an accuracy of 34.0{\textbackslash}\% on MATH500, despite only using human supervision on easy problems. Our approach suggests a promising path toward AI systems that advance beyond the frontier of human supervision.},
   archiveprefix = {arXiv},
   keywords = {Computer Science - Artificial Intelligence,Computer Science - Computation and Language,Computer Science - Machine Learning},
-  file = {/Users/alexandercai/Library/CloudStorage/GoogleDrive-alexcai@college.harvard.edu/My Drive/Vault/papers/assets/2024/Easy-to-Hard Generalization (2024) - Sun et al.pdf;/Users/alexandercai/Zotero/storage/J52D59AK/2403.html}
-}
-
-@book{sussman_functional_2013,
-  title = {Functional Differential Geometry},
-  author = {Sussman, Gerald Jay and Wisdom, Jack and Farr, Will},
-  year = {2013},
-  publisher = {The MIT Press},
-  address = {Cambridge, MA},
-  isbn = {978-0-262-01934-7},
-  lccn = {QC20.7.D52 S87 2013},
-  keywords = {Functional differential equations,Geometry Differential,Mathematical physics},
-  file = {/Users/alexandercai/Library/CloudStorage/GoogleDrive-alexcai@college.harvard.edu/My Drive/Vault/papers/assets/2013/Functional differential geometry (2013) - Sussman, Wisdom, Farr.pdf}
-}
-
-@book{sutton_reinforcement_2018,
-  title = {Reinforcement Learning: An Introduction},
-  shorttitle = {Reinforcement Learning},
-  author = {Sutton, Richard S. and Barto, Andrew G.},
-  year = {2018},
-  series = {Adaptive Computation and Machine Learning Series},
-  edition = {Second edition},
-  publisher = {The MIT Press},
-  address = {Cambridge, Massachusetts},
-  abstract = {"Reinforcement learning, one of the most active research areas in artificial intelligence, is a computational approach to learning whereby an agent tries to maximize the total amount of reward it receives while interacting with a complex, uncertain environment. In Reinforcement Learning, Richard Sutton and Andrew Barto provide a clear and simple account of the field's key ideas and algorithms."--},
-  isbn = {978-0-262-03924-6},
-  langid = {english},
-  lccn = {Q325.6 .R45 2018},
-  keywords = {Reinforcement learning},
-  file = {/Users/alexandercai/Library/CloudStorage/GoogleDrive-alexcai@college.harvard.edu/My Drive/Vault/papers/assets/2018/Reinforcement learning (2018) - Sutton, Barto.pdf}
-}
-
-@book{vershynin_high-dimensional_2018,
-  title = {High-{{Dimensional Probability}}: {{An Introduction}} with {{Applications}} in {{Data Science}}},
-  shorttitle = {High-{{Dimensional Probability}}},
-  author = {Vershynin, Roman},
-  year = {2018},
-  month = sep,
-  publisher = {Cambridge University Press},
-  abstract = {High-dimensional probability offers insight into the behavior of random vectors, random matrices, random subspaces, and objects used to quantify uncertainty in high dimensions. Drawing on ideas from probability, analysis, and geometry, it lends itself to applications in mathematics, statistics, theoretical computer science, signal processing, optimization, and more. It is the first to integrate theory, key tools, and modern applications of high-dimensional probability. Concentration inequalities form the core, and it covers both classical results such as Hoeffding's and Chernoff's inequalities and modern developments such as the matrix Bernstein's inequality. It then introduces the powerful methods based on stochastic processes, including such tools as Slepian's, Sudakov's, and Dudley's inequalities, as well as generic chaining and bounds based on VC dimension. A broad range of illustrations is embedded throughout, including classical and modern results for covariance estimation, clustering, networks, semidefinite programming, coding, dimension reduction, matrix completion, machine learning, compressed sensing, and sparse regression.},
-  googlebooks = {NDdqDwAAQBAJ},
-  isbn = {978-1-108-41519-4},
-  langid = {english},
-  keywords = {Business & Economics / Econometrics,Computers / Optical Data Processing,Language Arts & Disciplines / Library & Information Science / General,Mathematics / Probability & Statistics / General,Technology & Engineering / Signals & Signal Processing},
-  file = {/Users/alexandercai/Library/CloudStorage/GoogleDrive-alexcai@college.harvard.edu/My Drive/Vault/papers/assets/2018/High-Dimensional Probability (2018) - Vershynin.pdf}
+  file = {/Users/adzcai/Vault/papers/assets/2024/Easy-to-Hard Generalization (2024) - Sun et al.pdf;/Users/adzcai/Zotero/storage/J52D59AK/2403.html}
 }
 
 @misc{welleck_decoding_2024,
@@ -275,7 +264,7 @@ @misc{welleck_decoding_2024
   abstract = {One of the most striking findings in modern research on large language models (LLMs) is that scaling up compute during training leads to better results. However, less attention has been given to the benefits of scaling compute during inference. This survey focuses on these inference-time approaches. We explore three areas under a unified mathematical formalism: token-level generation algorithms, meta-generation algorithms, and efficient generation. Token-level generation algorithms, often called decoding algorithms, operate by sampling a single token at a time or constructing a token-level search space and then selecting an output. These methods typically assume access to a language model's logits, next-token distributions, or probability scores. Meta-generation algorithms work on partial or full sequences, incorporating domain knowledge, enabling backtracking, and integrating external information. Efficient generation methods aim to reduce token costs and improve the speed of generation. Our survey unifies perspectives from three research communities: traditional natural language processing, modern LLMs, and machine learning systems.},
   archiveprefix = {arXiv},
   keywords = {Computer Science - Computation and Language,Computer Science - Machine Learning},
-  file = {/Users/alexandercai/Library/CloudStorage/GoogleDrive-alexcai@college.harvard.edu/My Drive/Vault/papers/assets/2024/From Decoding to Meta-Generation (2024) - Welleck et al.pdf;/Users/alexandercai/Zotero/storage/S4Y984R4/2406.html}
+  file = {/Users/adzcai/Vault/papers/assets/2024/From Decoding to Meta-Generation (2024) - Welleck et al.pdf;/Users/adzcai/Zotero/storage/S4Y984R4/2406.html}
 }
 
 @misc{zhai_fine-tuning_2024,
@@ -292,7 +281,7 @@ @misc{zhai_fine-tuning_2024
   abstract = {Large vision-language models (VLMs) fine-tuned on specialized visual instruction-following data have exhibited impressive language reasoning capabilities across various scenarios. However, this fine-tuning paradigm may not be able to efficiently learn optimal decision-making agents in multi-step goal-directed tasks from interactive environments. To address this challenge, we propose an algorithmic framework that fine-tunes VLMs with reinforcement learning (RL). Specifically, our framework provides a task description and then prompts the VLM to generate chain-of-thought (CoT) reasoning, enabling the VLM to efficiently explore intermediate reasoning steps that lead to the final text-based action. Next, the open-ended text output is parsed into an executable action to interact with the environment to obtain goal-directed task rewards. Finally, our framework uses these task rewards to fine-tune the entire VLM with RL. Empirically, we demonstrate that our proposed framework enhances the decision-making capabilities of VLM agents across various tasks, enabling 7b models to outperform commercial models such as GPT4-V or Gemini. Furthermore, we find that CoT reasoning is a crucial component for performance improvement, as removing the CoT reasoning results in a significant decrease in the overall performance of our method.},
   archiveprefix = {arXiv},
   keywords = {Computer Science - Artificial Intelligence,Computer Science - Computation and Language,Computer Science - Computer Vision and Pattern Recognition,Computer Science - Machine Learning},
-  file = {/Users/alexandercai/Library/CloudStorage/GoogleDrive-alexcai@college.harvard.edu/My Drive/Vault/papers/assets/2024/Fine-Tuning Large Vision-Language Models as Decision-Making Agents via (2024) - Zhai et al.pdf;/Users/alexandercai/Zotero/storage/2X2WJU4D/2405.html}
+  file = {/Users/adzcai/Vault/papers/assets/2024/Fine-Tuning Large Vision-Language Models as Decision-Making Agents via (2024) - Zhai et al.pdf;/Users/adzcai/Zotero/storage/2X2WJU4D/2405.html}
 }
 
 @misc{zhang_adaptable_2024,
@@ -309,7 +298,15 @@ @misc{zhang_adaptable_2024
   abstract = {Despite the success of Large Language Models (LLMs) on various tasks following human instructions, controlling model generation at inference time poses a persistent challenge. In this paper, we introduce Ctrl-G, an adaptable framework that facilitates tractable and flexible control of LLM generation to reliably follow logical constraints. Ctrl-G combines any production-ready LLM with a Hidden Markov Model, enabling LLM outputs to adhere to logical constraints represented as deterministic finite automata. We show that Ctrl-G, when applied to a TULU2-7B model, outperforms GPT3.5 and GPT4 on the task of interactive text editing: specifically, for the task of generating text insertions/continuations following logical constraints, Ctrl-G achieves over 30\% higher satisfaction rate in human evaluation compared to GPT4. When applied to medium-size language models (e.g., GPT2-large), Ctrl-G also beats its counterparts for constrained generation by large margins on standard benchmarks. Additionally, as a proof-of-concept study, we experiment Ctrl-G on the Grade School Math benchmark to assist LLM reasoning, foreshadowing the application of Ctrl-G, as well as other constrained generation approaches, beyond traditional language generation tasks.},
   archiveprefix = {arXiv},
   keywords = {Computer Science - Computation and Language},
-  file = {/Users/alexandercai/Library/CloudStorage/GoogleDrive-alexcai@college.harvard.edu/My Drive/Vault/papers/assets/2024/Adaptable Logical Control for Large Language Models (2024) - Zhang, Kung, Yoshida, Broeck, Peng.pdf;/Users/alexandercai/Zotero/storage/38W8T74Y/2406.html}
+  file = {/Users/adzcai/Vault/papers/assets/2024/Adaptable Logical Control for Large Language Models (2024) - Zhang, Kung, Yoshida, Broeck, Peng.pdf;/Users/adzcai/Zotero/storage/38W8T74Y/2406.html}
+}
+
+@article{achiam_spinning_2018,
+  title = {Spinning {{Up}} in {{Deep Reinforcement Learning}}},
+  author = {Achiam, Joshua},
+  year = {2018},
+  urldate = {2024-07-01},
+  file = {/Users/adzcai/Zotero/storage/UPUMW6XV/index.html}
 }
 
 @misc{zhang_deep_2015,
@@ -326,5 +323,49 @@ @misc{zhang_deep_2015
   abstract = {We study the problem of stochastic optimization for deep learning in the parallel computing environment under communication constraints. A new algorithm is proposed in this setting where the communication and coordination of work among concurrent processes (local workers), is based on an elastic force which links the parameters they compute with a center variable stored by the parameter server (master). The algorithm enables the local workers to perform more exploration, i.e. the algorithm allows the local variables to fluctuate further from the center variable by reducing the amount of communication between local workers and the master. We empirically demonstrate that in the deep learning setting, due to the existence of many local optima, allowing more exploration can lead to the improved performance. We propose synchronous and asynchronous variants of the new algorithm. We provide the stability analysis of the asynchronous variant in the round-robin scheme and compare it with the more common parallelized method ADMM. We show that the stability of EASGD is guaranteed when a simple stability condition is satisfied, which is not the case for ADMM. We additionally propose the momentum-based version of our algorithm that can be applied in both synchronous and asynchronous settings. Asynchronous variant of the algorithm is applied to train convolutional neural networks for image classification on the CIFAR and ImageNet datasets. Experiments demonstrate that the new algorithm accelerates the training of deep architectures compared to DOWNPOUR and other common baseline approaches and furthermore is very communication efficient.},
   archiveprefix = {arXiv},
   keywords = {Computer Science - Machine Learning,Statistics - Machine Learning},
-  file = {/Users/alexandercai/Library/CloudStorage/GoogleDrive-alexcai@college.harvard.edu/My Drive/Vault/papers/assets/2015/Deep learning with Elastic Averaging SGD (2015) - Zhang, Choromanska, LeCun.pdf;/Users/alexandercai/Zotero/storage/M4LFKVWK/1412.html}
+  file = {/Users/adzcai/Vault/papers/assets/2015/Deep learning with Elastic Averaging SGD (2015) - Zhang, Choromanska, LeCun.pdf;/Users/adzcai/Zotero/storage/M4LFKVWK/1412.html}
+}
+
+@article{barto_neuronlike_1983,
+  title = {Neuronlike Adaptive Elements That Can Solve Difficult Learning Control Problems},
+  author = {Barto, Andrew G. and Sutton, Richard S. and Anderson, Charles W.},
+  year = {1983},
+  month = sep,
+  journal = {IEEE Transactions on Systems, Man, and Cybernetics},
+  volume = {SMC-13},
+  number = {5},
+  pages = {834--846},
+  issn = {2168-2909},
+  doi = {10.1109/TSMC.1983.6313077},
+  urldate = {2024-07-01},
+  abstract = {It is shown how a system consisting of two neuronlike adaptive elements can solve a difficult learning control problem. The task is to balance a pole that is hinged to a movable cart by applying forces to the cart's base. It is argued that the learning problems faced by adaptive elements that are components of adaptive networks are at least as difficult as this version of the pole-balancing problem. The learning system consists of a single associative search element (ASE) and a single adaptive critic element (ACE). In the course of learning to balance the pole, the ASE constructs associations between input and output by searching under the influence of reinforcement feedback, and the ACE constructs a more informative evaluation function than reinforcement feedback alone can provide. The differences between this approach and other attempts to solve problems using neurolike elements are discussed, as is the relation of this work to classical and instrumental conditioning in animal learning studies and its possible implications for research in the neurosciences.},
+  keywords = {Adaptive systems,Biological neural networks,Neurons,Pattern recognition,Problem-solving,Supervised learning,Training},
+  file = {/Users/adzcai/Zotero/storage/GHD9WZXL/6313077.html}
+}
+
+@inproceedings{ross_reduction_2010,
+  title = {A {{Reduction}} of {{Imitation Learning}} and {{Structured Prediction}} to {{No-Regret Online Learning}}},
+  booktitle = {International {{Conference}} on {{Artificial Intelligence}} and {{Statistics}}},
+  author = {Ross, St{\'e}phane and Gordon, Geoffrey J. and Bagnell, J.},
+  year = {2010},
+  month = nov,
+  urldate = {2024-08-08},
+  abstract = {Sequential prediction problems such as imitation learning, where future observations depend on previous predictions (actions), violate the common i.i.d. assumptions made in statistical learning. This leads to poor performance in theory and often in practice. Some recent approaches provide stronger guarantees in this setting, but remain somewhat unsatisfactory as they train either non-stationary or stochastic policies and require a large number of iterations. In this paper, we propose a new iterative algorithm, which trains a stationary deterministic policy, that can be seen as a no regret algorithm in an online learning setting. We show that any such no regret algorithm, combined with additional reduction assumptions, must find a policy with good performance under the distribution of observations it induces in such sequential settings. We demonstrate that this new approach outperforms previous approaches on two challenging imitation learning problems and a benchmark sequence labeling problem.},
+  file = {/Users/adzcai/Vault/papers/assets/2010/A Reduction of Imitation Learning and Structured Prediction to No-Regret Online (2010) - Ross, Gordon, Bagnell.pdf}
+}
+
+@book{heath_scientific_2018,
+  title = {Scientific Computing: An Introductory Survey},
+  shorttitle = {Scientific Computing},
+  author = {Heath, Michael T.},
+  year = {2018},
+  series = {Classics in Applied Mathematics},
+  edition = {Revised second edition, SIAM edition},
+  number = {80},
+  publisher = {{Society for Industrial and Applied Mathematics}},
+  address = {Philadelphia},
+  isbn = {978-1-61197-557-4},
+  lccn = {Q183.9 .H4 2018},
+  keywords = {Data processing,Numerical analysis,Science},
+  file = {/Users/adzcai/Vault/papers/assets/2018/Scientific computing (2018) - Heath - Chapter 1.pdf;/Users/adzcai/Vault/papers/assets/2018/Scientific computing (2018) - Heath - Chapter 2.pdf;/Users/adzcai/Vault/papers/assets/2018/Scientific computing (2018) - Heath - Chapter 3.pdf;/Users/adzcai/Vault/papers/assets/2018/Scientific computing (2018) - Heath - Frontmatter.pdf}
 }
diff --git a/book/shared/trajectory.png b/book/shared/trajectory.png
new file mode 100644
index 0000000000000000000000000000000000000000..a4529ea7954d8d267273705d42ace91a2be226af
GIT binary patch
literal 36453
zcmd42c{r3`+y{IQSqd?hY#}3ALydj^)hH6OXWuf&o_(FE#GpuveW|G^YxaF9p%{d$
zV~gy2*0GHD=y{$$-ap^}-mWf}3v=J+KKD7_?`J#FcXV$uoZ&hHK@fwMCdvSUD2^Zq
zCPGUMegZwr_zXc8AT88?MuF+eV*#Pu+a#LxZXcl^i7Vf9(>3g`X3Tu6@HGv;^G2P5
zRoLC-t;nm_C3F$>Z`I!rq$ned{fG0G&CXD@3y*5~Gr#<HDfafu-+{#5vCV_)Ig=Ot
zvp>B2Gj|j`<GA2kb0FUlpLsC8OGnEe!F28a;}^vXm6WvnHEm(+#x`>sKRrCqpWW|{
zzPAq8ITA3bPPu+^xwOAkJnFPPDn_rn&eu9+k%tv>T{%V7%!ph+V#aCIW5U7TAf9OU
zNTR4Y^#{0vs8)|k$*UdPD+#j1@=ZGMTf9#U5%#}>ix$*q{GVZR>i+vE81mJ0)}#L!
z`8|1&{U`r^{oTu8>*TZ7XcK<FpJ)H~YechFkL17Wpdw%5ZT>wgj$Y{h_ZOV=R>CqN
z6!1O5iIO%k`}+*e_wTDqRS6|?PJ|y&wZ=<cx3g2L6gYD-k^g~*#YZ`NdbV+U*5NA0
zgE^sf0&;C~Dps$#{dGt2L^#dw+&m9&g|{cu#H<myOPhfAwQ`ypEPbeMyI067DI`SW
zl;L+8vi%zIKzV<@{(Hv~6~v+F`=^lC`ee#PZYv&LdoPSGq`ae!GBh$idRe&CZI<Zg
zD1Y9%uC&dv{5}<=Zme>*!tFG=E0npQ;PUs*&ZWYklUJbmU+)4t;t3oM1txPs*&lOj
zoJ|#v>CPF!AW<=f^st=|cZ`kCoU8~u&2!yGCYLu38+>brG>^YHrI2!Y77k5K+hWKn
zJ44*ltvbuTS&@;!;&<;n>3P@oE@16&6(<=nMA%x$e?_3G&Tweg{(SnI#~jnOi<zuP
zD(Qtw3eTQB%bi#=wXm=-&g#wyuD6z9j{caNo7>}T^ek4bLR{OJRRjDl2j3VQ7rR?H
zq6aZp$K=c8Fn@6}n+8b$>oDDNymQ`bb@U9>^h(4~DXhLGm>1nu2fNARM<G!&Z)6!X
z)5&oKADGh!<44Wy%SnW~Z`&RW-kG%i()lK&`<<G@rOXGHA2T6bDr)B&14eqkSYr;T
zGR6JyY~``c@Yb{s;?K&4m(02Go>_9P%bU^iH)~R)?5^2p=%1;x_T)n>b`rv%Kb|wD
zXR+dlzp(H2v*L($@C8yuw2vL04+<*&%~M^iR{v)^7zU|kc}~m5x0U`dh3*)$L2rbw
z8i=PTijiT5Rpq;7m6Z<Tbqe!?o=|&x*=1`_clVZOa@LWb47<O+*NS>mT+GX;?4^Vz
z1WNHwJZ4f!Y11}44)}IqBrG)3we!U>*~fVLg@TSphBa-%J)-fJntvz$I=<(-_Vms&
zF>lSoY<-W}a8g86$Fj17fx(rC+4Nd_l9L|3`Ta4=14Pp6#KfHqQm6(&nWISxF_MuI
zi*u&Mk_ndTsEw&`7Tw~?v8e;knXm$a8$5yS_)ODCraQr{EsGMjd`GHS2UW&cvElg&
z*F=!T&Oo6S#DIlf4G3FuwZ5;BaRQ^pCuS<qmKm~22XS1nmtL)1GZD8TQ%2Gchs`yd
zJ}C_EBizp&Nsb>ii`dq<{ylv?SWw{3#8?S*K~1c!bfUCU;%IBABXTwA>85|>ix)5c
z8y+4GVd~LD^TKRT!Jk1==hY=``>xgHwGl>9s3lmw)9f}R>$a2UQ97rS=vXci(|ZH8
zlA!rsNB<Rg1$_fULqoC{@?qq+@Hu>~b81B<?o3T^MhX$Ww0hhx-6`b7bidNSPEfmn
z1}o9_SJJG633~pLrFN>X@mL*d$~Erz@UM*Zj+(@hYsqyV-`Lg59N~N-1wTx#eY}@N
zVe`H}b?@f~nYqj>@Afk9Wnpf<e*K!^j`6lSVk8T97Lt-Bo3mT(JRuxXV)-C(emq;E
z<7E?LKX+*^t0Lj{{X4>C{lB^JP9ylC30)fAJ+bqFlSMV5VV$Y{!ou^V8`Tq8ln|&q
zqYL@}wlago<$V=Wf(_g1fY<^RX7r7|LfHMU+`7>77aABT4&^)gubPud7Z9Rrdt694
z3MokZB3^?~_q}4}393w<^-O`6P_d?ylEt!T(eLJ1ebY+b1kF>{MeOeG1rT=3U;F!I
z<$w3V7o0naNork|z1mieZ}jq#ty7h8yaZ>Yek>S+KTkiI1YM&1PAwakM`mt1hC-VO
zZ|?{j&Da|ya87M*3Y87T$1)z6)dvg4#6;pN{MjJe-LZh6E4C*~pvuic?({0!tirXg
z<T-`3C+_Kkr9(kM!65Li8&7%BVz1+z??xqIp!EiK;+mYCT%)AJj7ER}UKwp2cZ&VM
zo06LT>vR6=#m@RN;PPLk8ot>@9r1!sN2`|qtFO0b7uF&Qmkuc8S&Chlrp=Q{VbB_K
z^Pu+!do-HkMsLpFG}WWCqhY%Y9}CH6wUZ>iIwp-hU#9){<dDcdkE5ewHE;Y}AmjuS
zj`eh!)q^SPq9jwZhnLOO`Cr)1|Abdf$KAYR(+DGxtaQ&`1W$#N?wgJ!mz0zg^Ts=I
z!82&zRF;xx(BY!Y(aN$RJIhZzW?2x!IL&``7SX&lgm0{?Q~sE{=!@8Q59LNSLX+No
z(h~t!QvY>Qrm@^Mwxcxm1A-eAI9jpd^!2w_M#sj#J{mFauaZ5#_P+F30cm5OI%`@;
z&2lnyCd8Mk4gY@5hP2^1^MpL(FMBfKwEQpl9b(D4iU*gluDAO_iJg}J{`}fSi&nOy
z>DFgrgwD=-$|fVHzVqmY0qIWDF@oU#+{xl<?);$i3Oyu8IZyfKOT|jwwYNX(CEAz~
zZz&!&c^Mn;1}H_H+`+)~2J2C3_G@_0G$ak#&SIE*hTq<C{?O4Ap@~z!AZ4llY62#o
zfor8y4|y*#^+5bzG5A;}u^$^78?w7Pc5nlcI_qXmJt=wB!{#1d{Q}b+R!33nyJ%-Q
zw1*Ia50dgJy}4}$)vac;g8QLFZxQh}ePz?Q`LhsZhR!Yukst>CrHv8<Q2}R8QQVa0
zITAs=qPswm^EsaVu*<>Vdmr_iBqgmAq>|9yTR0Ea^5f`vErjCtVb%)qrT1R)=lx}p
znZ|Ap`rU{^O!OEF9m^bUdH$O!dlnbtE!R|kX80Jx8#$~AWXc`sUE5I{q4{cst8kir
zGYF3TA2>AsYRVtG0;i{@aZTOfhpR;1{c>#6QPQgYMe=Hj9S1LTnK%-U!$;cHB}QnT
zTwBx7<>!KvhW;y2hy<m=EbTa2RV1RA5hC=>-+TP(IkAngRtUo3?maZ~k9SM^9t6uK
z0u}0y5F*~4C<^~n;hq<R|Hc=BbDs7`7^Ij9BJ|qad3H6HO_*X++z}=2#ER(eXml)Y
zDqg9f!h51?XKl|s)FL!1?m#rgez?N@Q||rYiu#RWN3+DX>EfxW_<HLiwrEuT&W^vB
zk`nR0i%ap}zkkJ>rBcVpn+s2~UCv?A#0m_5v+k4c^t<VXV-_2q!bZE^O6P4msYtmU
zbv3BX#D=3yn8fyuUCjnIetQKL6H_8IuQI5!L5qK6irk5M7Zu`%sJ_^KR>Rt%q*<G!
zpvSPMr=)PWeF|YnO1MD{{%}4=Em$Mk>W#6nU#vS1U%*agCttv5QR%MxN_l6c`$`y3
zsefppaC>W*GH-Y5pkl*W2vjj^Pq+6L*4Dg``r!7nHiy|ui2L_uspni(;4;wA<e@p;
zo34r#b3|^rc-}d6cMXk}2fj#zL9&9LasTsd7on7Fkz>^-nm#CbZT<tr{+lZvj-L_B
zGv}A|A*Z$FD+UeoGznCb_RUi08XgVYJuykiV>NVmJPwzVqG)0QB?o=*Wid1~Gc!xT
z3r|OXe1*Hj)N=|F94h8a9FW=!hWt>!{`mg94E5|epevxeDAtvA`34m(%;*nUVm0Co
zQvwB6F=ScV`eSTc%w|AoY1<j3A$xqS;w9KRzrUn*P9yGn-|rw<d(SSa=h`UBjJXUl
z+Jg+cKIkXq!vpW9_~<wb9?o&L9}4%aC@K?o)s;VANF3lxEL>XQ>DEIrF4T~<o2BB$
zcKFJf5U-z+8YyI?s-?OMHYNAalwXgnOSSz%+S!jz*^BIH=)GUszX*8>$d=7L9vK;#
zU^p-DMVX!!l2Xmn)GTp>H4dzbD>3-nCafAxv&;xl4WSklq$%YdMN>R9Aa8>?^*It3
ze(Eznb98?Ejdy+k+dcbn$lGh??d`obIy!1iNT1sX{F|)M&B^>ae(3d7C}Qy@iZZ!c
z4bEUc3qwg`ac7unYp!OOf>*V?__%N>FFEKeb{J}Km{o9hcQ2JY@EQ3t8-m_}_pC|l
z<dwLGtgVnHFGMg!zxWHtg-SxU%T0<vDkzy`+$SARfpzRiBuVSI3LyC4B=L?HZz&~&
zMMOAXWn_dgVQnaqZ=<eH6)8|n8$elCH!abb+9)5LK55ICw2k}s?=QlBtr0>x+S{oi
zjyJ-$)DT?nh1;9<Xej1f;r7>;so^~r<V)|t;fLXBzE(D^gwp7bF)5`=oU?y~@b((|
z;k2hHl%%^bEisNmpY2o@m*i}JeY`SHtmG3FZO{l0=X-iyA=s(GiXX)|;I4tWeqH6{
z=N=xsk!N|LM@C+As5O+;o|Qz!zjYaLL2XGvQkxSxeaiLIOW7`H1v$E|q!7y&4qqcq
zhn1F;oV|d5oJ@icTzr^kDQU?hD>RuAi5VXsx8}Fn-_i%GlkSbb#lh>Oq<DV(Ms98*
zWiqYpd8`4X(M9vPx#F+CBvgd#I;QB8UK`JJ%L?0(tlM8ie$2Co=S1>;yUsWkt*3Ux
zen>uFw=<GM^Z^xY=%YmpdC&#}i&s;nYb15DMl?^@et9IC9{N`rFvFq$zSyo8p|<4a
z7A9`@)n%6+-R?6|B1+Vl0=)7R*L4v;;X5>XYLXJ-Lv9LXyM)geC?}xU*x2YGacDib
zZ3T2=7GvX#kHYOfR%6<b+tse)0b8l6D94)>q|M#=cgIhMOLamwJJ~;eN{L@wT)T7U
zC(ZRp`mu7)8?-$FD-U}3p5543E{&f|YCL4nRhT(1g^W`pdo5R3=m-2^!MFXCUb%u$
zJPlTgZ=bZIs4n{}0O8hy>xGb;oG5AKhK!Vnd$T#9@pXV6{^>(W$%C_4g_=UOvvaLe
z?&~5k5At&Z)rK;sO-(UT{MRF=I^}G>@dPM80*jp66Lszyj;FZe*k`EXSxwFM-sEFB
z?>`A`c0b>I+3WPow%uFTA4|}~JvFto9IXvl8~0@DAw%XVB>6sR8$Tsey=m4143*be
zrX|AFO;ac{4u$CM%2Wx-nY(dL<ysDv8j=#ieWb!>rC=`N1EA%K0!s4U^!;D`1vjn6
zg46v+NW$XF6AR&aCI#5RrIo*a{1H%%orZN*ca%W9^0~{GJF5==BK0k5&$ONQVFv51
z*gNaBSkgQElto!(t!(`#CJ~pZZ_%}&LleEukYxyUMXZNEJ}d>RSq-j%If{jg?Xizk
zgoRyX=5$G&?bhi_7o=(&vfe=Osf$5O`bc<vGr`n6QRb_3CoIzLFeoI>*bvDGP43Q3
zt~bYzOl<=;Nl2BUG|VBN1C-RSTkb-{_q$^co~^$!>}rW*@Y|bz7vR-zJU5vclKE(Y
z&q95xpwjw=@RpkMNM_)2@qt&L&Mv>)V7ZI#6HQLQrWuWmC#ddxCm5rAlU-7^$GZ9r
zZzoH=_4N1O`9M&9hI^2)AYCM@vq@E18okv_Sn0|)ht~O#jSj>eq1uwT8+FgKuFiaW
zre>(r{^Li>LXHhqZamQ2Nsm8b{y+cfd7Fkq8hXwWC5gIqE@?Tr-yM?2k4<gDsM~k~
zM9*E${ZhX>{s?n8eVLv^iuU)qT;NLiocE}Aj{i-6{$5ar4WVxs1I*~i$i79&hM+=w
zd;5cW>$QG@GD9&{hRM}Ust1fuhS<QirlNRA`Stg@(3l^g+D8n6US8rESy<ZWk3Ax%
zuPT0ELOfnE=$RN9i7D+?JuBIG=ozTYo9^|MbMJ4*@rCo`f8!zVHg;b@-#$%Ek1G&X
zd+>(?Y*`<Jt(@exBAczGR==Ku(Jpw>>vt0Izj`vACe-X-6U~_a8{8(sEdMBnwM#wy
zH0EttVHZaJ8x!2}ZfMNAE~96JLuX;Jxw_`BGByEPge;4Lw--}75-+=YE2Msh#?00&
z5}(Axc>dm<vt6Go(0iKj`gOlm4%@YRpRGjq2a9ZC6BE&)q#4Y5tj&2^Vq8DhMp`tO
zxBnW$DP>pHye2(PDotWyqOPQI%%9r1^hL7!vz?WjHfHHp4b=-jdW;-lJo{B?Y+n$~
zYF?JU=@Wx?s3CMFI(0UGRWbZM&&>jh75|=sH#(g<Kgubkb!roF*_04c4Nct9D7lua
zY3w>kriEy!kGp3sUc=ok?$E&e_aEP=SF<CBO7Qu_%N8+kff(<pXbcScHBk6%@=LAf
zsG5)vJM?pOR9FKOvpyqSnaFcz-q4?vxVma6Et~X}Pvl{3SK)v}vdoCDl3oBP_90?1
zf4Sh>G@nJep7HAI>u)r!Dz^pgvSNBN+B#T2eE1*$Uiv1gj0QOflkbu4lpE_3{5Ej1
zWY!vw3y4=Dm^kkx#gG#O+9o&JY$y{R$+(&bfI);E>=3JHe|*$;D8BdO>$8K(vTucN
zc2|g=rn+J^KUC(7c4?;L9d^F&)?_&Pe0jV-gE>O4@O;O5G-kUbG4;>`U@n<V3IMfL
zttKN+2oyeFC_qteG5puaNN^}<(KpU=pHPVkG`Rcna#ptBc-Zc5i#lrqgE&z2y{0!o
zHv*gKyoE*ior`*P9c%t^UG$tUwN){}0SAx2OxkMbXTrQ7(4efcU99IH<nvzdE6a(V
zm;aWnt2`mynd0HZjDQ0;VQZFn@|YqLZyzgO(xrb>=%F%DXhjL_?Jsq&FWKm6{Sj&L
zzcnL)v0Q%>PO7f&R~qdqM*3Q1<qF?o!vdJ_qhdwi{iI&p@8&2cfVATb*Ng0C!x);M
zKeD-(?kpX{ZXHMgs$1Y%{orRWyK-UzxS9A{*Pz`sHKrt+aDfx~=;(m)nwFNrCB4<q
z#VZMxpGIfmwc9)Zjb7G6=~2iNhPTSyiER<hzhBi=%k=%LA|@sS2dFJ1L<gN0cjd#~
zp4kZW+Umiy_T2OwG`G2i<CS=;p$6Sf^`+ez29KKHZ}v(|<Nfs{HHnPY9Wc(Dh+s(F
z#;LC*((e7t>lj~Fdg>^Vn7?qWKGZ<%g|p+}wv0RBUhg9!*pHyeP15A%2VcYLn_5v|
zDinPWC?7UCYv_}8Q3pzC4d~fvoxXf;nY)b4V9}wrU_i(@%ymh<S)HME!Yl-}MD{fn
zHFLm6bNvt6;&_N+a&o)D2*ruvU-Xk)3#&q)1&f=q&KN@V$t2l6X-Pk&mnZb-zJas<
zP9YmR`>wyz_3VF3UguUkxJ;WzBhhS1T1QDlQnE<VdL6lmOwha+0a}8wwY6$1XKx=}
zQ@O$Vi3XgZO~Pl{<&j7vA3zmh9!^>T3qxRnU#Emh>l{2QT;fLr(>JL@KCorS)Y%On
zo0LnmE$Kf3i^$kmjrtDl<bTalg#rIFpxj=^!0y%D-0*ytk>(!ft*@YPUaF-0@XnX3
zqfw!<Z3l>zl+p9lh>?!rvVn?~#>i`LX<lY%Kqu_hh;_yRFbriUj@Fa)hZlROegU93
zNL>LNmNEQ6)k`6fNhLGe<yp4NPO=#qz<eR$M0aAN3G0kELUC+qoA(^;DP8)g=B%oW
z%Ng%|lGM7M6$7%nqJgS}4Gj+e0^2H*f!ibR)2B~*CTBNZ7CW@<faXD>ZQ?+OcSv_S
zjCEZ^SxRw?+UUQ6kTgtU>#;Cw3U{=ywIyosmt;EaR)FM3%_=+|foTjsDsdz3z}AE6
z$Hmb*S4z7pR>b4mrc1>;uA{=yJ0dYJsG2HC4(Kiff)PFpCI27C!_@}5vB2B-0z$J&
z83Q%~YvWyr6$QEXS=HZ9OQPeh`n1i#i4_7OBcD6_{W4(b6D$KhS|mIl1aM6)s7TYG
zBGsdO)BCoML&Cz2S_}f+vwT<owh5LJi<b5!Q%F}4smpHn(X$6&_j+U~1+3PfGA-P!
zJUmuIP)27bS^A_g)Be2tka}O)VjPt!RvAHoe2im~+{>8DJRJQZgR}a??DFR0$L|xB
zo)h{pczQwnTksV<+kpB-qYlG{&IQ4+s!+-F8sL1>-sz2WKmdpxR9r13ku&Of)mnE{
zR|C^|!aSYgl{&_a(Z<Gamx2#nVmJA#5>mJ!H^|o0KiLE-M;Ubx%Ax!2#6m&5xrY9I
zhUWRiZBxZV?)u9d^khC{7SGwJrJCuXPr{dD!l7i9key(t?L_NNFFbYutF>{!??}2?
zYTRGRMzkXWta*Ix5dg3p8u}7cOizz<S_sH@J@YzO(Gy_+15(<0qRElCvjIx+Ox`G?
zx}b+^ZWE!6!q}v``~G$%$QXCA%a>QCHv)YTy|{TNg7UehJ2}Y{DASfbG#6Bc^x?+(
zBkt4uY$dEfBk6NIHt~%Ex+r%Z>sY3QcEUG83|{yQQmnYI_gs9mCQLm;WqjlS5=Nn{
zlueRTWJ8vzCIu0mha2cdWggyxj0qMsI0?43Hnp4IH!&E;cQu`ni2=F~%fIHfy={)R
z)<X58j2t;yP0}9gMyZ8|7U6{(Y2MIKJaho^QefJb-a8|#N8THSjoou;ut_7x6A(s&
zi_9sPG|c9%zU|;k`smSExa0(@qHWT2GE_mTeJ4~#U3<${yQFMC=aMSR&{Pa1R+N|X
zC<d&KqAQW7;Z7XXEwMQF^}C6Fg4ihL3@tY+DUS2EQCvtGR*fE*GZg7R+9pD43QMM(
zy!kDrgD)kB0bW3J$BDQTj4;<j`Js#khlZ-c$%iC{ALQ^}4J|D#UaWFf_h4+wrjc=H
zse8!F4(b^EpDUY?hCa2_CLliF36v211w0I*=9hJ~ko<P;-sses$D3blVc0b{V&Mh6
zXh+Gck&zL>&)-EOO6)3hNKn3}|Ek;1N^M-~=^*o##bj!k*#5(3R>ytPIw3K*Oh~F_
zdgJ_iml2I@7u8k&2BQ~RR^+PH#hs;^Oi$vD0?Jn+oXfJdw_+vbgqi|#gDXHGfBsxo
zJ=?CL9_I4(e3YpDEUs^3`IZva6u>(B^NIOJcmbWhm=qERB9)6TuK(>FH}q^bZk`)`
z7AAGO@35=<GiN(#cmy{T^(iA80B&LhjXdAkDZVrQ0(b+2=zsaqa@0*=S;j$7d>hNL
zlvGkmScYZ625cR+my_+%r_)#G5we+YQF+dFApZs+m1oZE+XLQtx9T(3(qPe(!nN`h
z%8UP*pF8pfq>I}2RpC-4Q;#FW+b6y9Ui0GLx#3bLNnfxFzPIaph;%-ez>F69A82c9
z-wduFr%NGqgIQNho1?56lX`lyMy_DU7!Wl|AkZnRbV)rO_Lzm^#j;)YeXTsdWREq+
zh3Y2RTudljx(=Qg!d$?sDAJD|0`WL`__I54`6uk0OOw=78E63dxVY>$cVjUcXbTtd
z(FA3fx(J+sTFTC~%|CMS$arvDMZ7~XJmhcau9YCL8uX1!yPHo7r9aHgJu@0G0c7kb
z-e6dBRQ$->HA#4H&3rtPe)@q^{+c0bi!Q5M?q*+^E0~(j6*rHXBRuKIt@TREubFr{
z-ciZpB{`F^)3&vPo}iu52**5t!H+7MXNyvTB3)98{4uwBc!1ip1hk11r<80`J~1d_
zJ|^1*a}P~7?;E<SXBD2{1%&L!hvCOPP&PD_)11|YNg=Uylvw9~{CIcO`dWhX2~;+#
za2u9`8WyUo2Dd}e5|ryMrfYuWhQMSJXnl>=#Hv5_Lr23_M5ipRyTwy9-~k^`WIevg
zSI;hRgVmu2&&7H!yY;per~Vctw(p2;u_pWqb}8C9vT#YrBGK=@{CRQ_ku>_VW4gM0
z3;Xq{*dEOG`@Cg)`$l4?@~+ZRT}Ugp1yPQJKJ$%mI~DY3!<}$Qk39fuPPE6YrK?Zc
z{TzIyg<zACt<brme9c#*uZ$*oo_=nj9_ku)W6yFlV-!*_i8nMcksX-cu+%^aeXG2(
znM61&aWs=OF)@+wL1uYn0Vv~ZwN^5@hQoytEbH#Am^v-THp%YVlw6ek{wJ?UwS~=e
zG$QM0dCu9@^}4QZSK*V@OPq#VZJd$bAucWpdDca{rxE*4a&xnk<o2zHUXqKQ9snHW
zb;s_@peJfoU1*F9Bq)BM)8B)f1S%d#K>7D(%}`s<T~b|Nq!=&DTMfojd7VQJ-gws|
zFxn-6a`Ls}dor|MdK#yr5}0ns;Rys?YV2O}Z+~dw`~KBv7J6!JR*jUnyG|B(#;O0y
z0`Q%%aXtv0oNYF3FP-p6)<Vu<b5I>s0F|VWLLg6<<~VDr$w+sefUa~VsA+B2p<BhF
z<G@#ZkDF_;*y}gNt5>g@mvq8|61@R3l%FQ;g8k(jb2(qS1t5#UrTxo*7I&_M{&@Za
z)Re9o04$E+O<E{l=TJ2?nYKy1Px^Sl#H85a7T9?dfxm)+BM1aRgM`iF&S~gD@-T3n
zkB`z7e8M59Pr4K+Ts|J>w#-1w3+O{<g9&<*o7<;EPqd_)Bxh%5r)<u4S}dVu6KWI-
z(|#*^!X_)-L(1{OYgaafD%>&5d~pH24OO8QZKI#xo)t4~z44SMWciat9eIaXG`WV!
ziU#y;??|C)HkJ6sCB%P1uAJv;3D{yfn@<1|ZyX1`y(4<w02*AX31`OAA-zSjt1^Fr
z?RDPh?s|(_+FHM#%F`j=I1<!EC`WDiUStlc0)k3Itw>AUUs9+f|A1rltw`HwBSQm&
z7jwG@PB3{m5N138HXRC80}lvXb2Qn!=5Q>ptgI}4a4BQhj-z{*5Nhh~zT)tYhsvaD
z(wu`{<5FRvUkQ?a;(r5f=yWPW>Sif%-qV*n{+heGisk0b{U*(+sasfx71pjIl#jNW
z4YmR*TVmNOC9LZLgU!r_1;E1cM<{X?e@Z3;ZNTNXSJlYM)3E{}U($5s(=I_wCMMKw
z9G6w(Zg~nF{uswAr-O7*`{h_;D;3CNzv-Bk?K1wyKhwzfKx6YD2tVGH)O@?-qmaE-
zXxk7k(o%wncDX>fy%dU65PPC4qqA9GxFbjJK+I!|=9wDJ@e{&^6AoXOX10rY%rP%C
zOFi`{!mx|ubP&GZ`5VWk$~E8f2xTcV0N3k#S6Aza0al9coL{9SzS_{hq$4wrlhrSH
zf&N~KbH*4Ovpc8m1WI!qSvWe*AS#jmQJMc+@)|vw*?f3|FpK~B%o5C2{CSHveXRx|
z%VvTO0QT*&6F(`(Lc+IUW~$q4+U15*6IEHt1I?xqsykb0k&R3Ki72p|P3Cg1B$MiA
zXJ@A!X|?{Idi=#M2Csb9midI<ev$6X0rd711nRY$>_V|aWjaHdY0-F^vZsf~*M<XK
z-CKom!MA((<_LdSXsI7#5cnnB{KcZdkQaqZ@f1~{C4ca68f8=Y)WIir&sSnwWM1x1
z*$+v!3^}Jd)+3jvDK-}nQky{~12^VasXWR1%XUExZY?Xq5Y|~~`ZbZgf#QNpc8(^b
z)7Q$<LkHc0NfRr=^o3c2f38@nL<3^X+~W&{Ls`I?=M&K3O}I7NokiKG_D@1w3<vh;
zF?qK(S@o#vqTqp6yx9IUK+HZ+CLQ%-A(kE)l<yg=As{#tFy*}5`>#^op?>i`yFoNE
zHon}^ZelExS6*ITlbd@#$>fE0L?$3W#F5*o2o>j4e*rifa`WcfJM%s6jqm1C{J!kY
zJriDI(|P@f4^mdWRy;rqpL~(3-Cb88Pa4c)x!qTmhZ9zHz4^T){~gl%u5+%aq$D3e
zmew#SxVjj2lfJpCZRYMZx8G0LRYwbtGJ_ZYI}i5md|!^|BNq2hZJ^JCn_c;4@>hkg
z=~)au{RS!rrgph-Y9mmVw77fF=D?m5uN!gCj|8F`S1{!BV1u2+QBd#n^Ucf8Pq!sq
z+W%!Wk`Q#Dv3MHzz$jlq?75DT?3xTA$Aqx!lTBZBb5eW^Nq@qtWza{y5UAw<Sz>c@
z_4o(IqTGpcQP=p*bmXFattMWkOi=x7Gn3h6EU8M-bXu$5RovfMsq|LtzPALHFU>oc
zIL&8V3L_TTcope+0UwGEe~D{=t-rA2*c^RU^8t)eZGWDJknGLS-tG%toLztJ40uo`
zkb4Q7iXLPMT6BNmf!Q};o#CAO%~o;?J!eo1;H%3RxwCZ@(?$>i{Lm9fgD=6paD_Y-
z)uQ-731bC_wO9;D9x_Hop|LEw@|0?apE<M>#Ze(4+?VuJ#a%cn%(q@%TdFf#J;L%o
zd2TUz#LqcrKie(dQBwxIU#-5fIT#bt6^hBQ3a3<ueKs20ejRD&rfGD}YH&D3Z0BmT
zROV>r?!|W<$FpgYzZ(ubTB_%)qnjDkm?5t*KlbTZN!2e8X~?1pIpY$&D|&ieq9ZWK
z?!<8dv;4ss^gou94gsiVH^i~vRs-DHr7^-U7KLM>QEI5d{lxdJs1|;~^l2FggI~u2
z+Q?j(!wtQcVJb&kM6J%^e!SX<{pC+qBO}WA{7Nx7=l%kZ>mhk;I<3`UU2j5n3=FQ~
zaB&`<o_sPVwsmT@N++T5BAyJL4G0I?pk6YG0Ua*Gdgk)NsxnHNsY#%wZ~~evvJfM`
z?3uBt^tgPI`k3Fh=B5MeN{SyV*|2xYq4m|p#a~LOnxh3l4*Ii{UMkQ~MOb==w}FXh
z0BO3nn#f0cDpD-w0TRwb34yxjP84b%I{E`@iyf2$gOGfDXP#sZw^~GK1~u-rJPbSz
z+m$@$u`Ptta4VBWS#sA}H&)bV;KpywDwLL%7SXInBYkjZCT-CTPf~6oH{GErIu#yh
zQWs&K?_#ba#Ssc#4Cs|a*S+=M!?iEIm{D*+$^^?(lXg0Nv(yw1b?Jt!dAkc{UNRGB
zFuP+z_7zdQ`?We1o<pB`p*xn!9xqoI7`COJ7TvMQ`b>w&SR7i>Ta0ee_0FEyq};O*
zX3c$|q}LwfN1_{zE51DQ@?d7<Ty|}SL&!<~rbl|k1C(S1s$)^K>JRD8EbT8@md#O&
z-2>7qI&+yd)Wix#DIG(Q6sYMjQ97FIK7Y{pI0<t+Ub#k=-*$@H?$$98hMZl>*892h
z5PhrXee_GZKDngoqH?Zmp6=RI!8t=%&)a?ZFfS3THc;iH&}0?3l`e`h+L;vyO}dv5
ztmn?*KY#X;fF7sJQQYk!bg*iep*dQ2m0tW1A%{V?jEuu<2L1X<^2Mg|lrFi7V?sYv
zcg7x6`!qvXppVLV<P~N{r64_<^lxThV<i;XOs#`9FBJ);`X2OMzrZ9<(R2|~<B0Tr
zMhr4ARs<ch_XE+!$mkEByw@ZNDujM^42No8WUhV-HjRjxLMk`@qfK9CexE!42s(#g
zliLt>(4d2Q2@{XK@h-~bgsK4@C<UD+_m@1AsPClqVEgOr?Ed_#=!0#Zfq*;bo;Ph%
zrOBd-{mrz;*oz9fD&83-S0UYV-@pGxGkFDK07ZD?^-=h0=-9pTA0Tss8r;4C>4=#Y
z5_qI+o4$Z<<x2DiFleUBg9W;c>xNj5bC(pMe7o5hR9R}uAiO6SZLNn|awl3(m^B1$
zbQ3~8<?a_3F4=$#o`z)nT*N)XP^q(i{z89Fw;twVo@vpy(P}o1tM`N-_&8jD1}due
z^s^+p#b2dTF1`HDZuxDLjH7X!$QN=S%Ac<2G3Mru(?NvLM537ir$~CQ14%Av(cwTw
zC+!Bx6vpw#Ex5kE{^;wyB09KB<y<$}cWiGst5AE^Med8NoABsDcly0vzatIkB`lx%
zvz~^AhIvtv04jH+a5zNwi1=L_ovDQBYJbwnE-D&1<Zpk@?nlS2xcdmxg7M(z*(1}2
zi~-zTn(WK)xsmjVwMH&=PONLvWs3m{2qpcEv~Q}@HvkK)ZGNzXDRVz~!X{=ST?B}U
zpH5!`sIWB6QWU^<@dRNI^lW{!Gd+@#A0L-AdjQv0n;V+nGuCI7@<~#dK7b90b)+2>
zOWw@LdKyGx)1ZQ+Wn=(gri6HdF8X&nYH4wf5Q)X1Bm-cq*v+)haOb?^`hUmBRC=z+
zojHGZXfAdTfuX*jfjQGF?XHL&gN3@Kq79<fKfI)oB$L1Xo~ak+sX6riG_nFx9yv@s
z{U~X|Fe|m!TRRTE+61Q&o|ZWZ`N3yCIm}jrp43uQ>enwaD|{>|F#wy6`Ce_#vhrGH
zu^vU>gMD9<zf0$K$7i|yuPrg2%i3O_XGqPizlIWm-90n8gG$d6m_vVUq;Li>yiu-U
ztR%nN$he*s=pd_Cbv7TL^(4mP#^=`$L(mdbSTFfQDy*bpro&0qp`xOy&C50aM!23F
zM2ihYv|mD~A)P4KS@dPG0u_maoSPF|<rWIP=z8Q_so%RxWW49K92*`be~OblYRGE!
z>4TL3sfNS9YRO2UQ_$Y;sFtd!m{PC3-xzgQS32l|kdQ1_KIZr#gf-4f6e^!To*f%$
z)ufxqC}W9Pk@cSXHWoc^4uNs2^{*&@BG@17muNlF;xQlQG5ADW6K@JsfIst>?Sy%i
z+i?_Cr)Ymu#lfmZgZA@)_Ns8g?%#os4kUG3|J4P1y5<k{JQA!qdP3alRum<^TT)m7
zSo-6OtVdnTN*46yXQG;mmTJZ^&5;ZYGBPsUMe?j{Y$SN?nI4Xb8>nrl#?sn-W2&V9
z%FVE`_~2qbSP*{U77C8ev?K0}iG>Pq6ORpjR=?R?^mnXOJ6gDxJZpO56l`n{W=&U3
z>$aTme`J?D7wFaBPQL%~lCi~5PX+hrkt+AZ+-{r18E{vBzj=?!KwZkAR-w6ZrQoR5
zET$j8o?_!0@Np=KGJ4OF(-H5x__yuDwMvjID$S_giBe2!aGG_Unf>{TiJlshC)_Ro
z*%Ei)07v;$BBL`!m}ps<^cCP#(ZWpx7$SzT-@_4|RlP;!s!X1`-?jSuMXV1H3K|NR
zv|;r9eqk>xl5AinGDn)HMTMJLN7F+}c8H_b&Zp3)Z&hMRD#A(NB<v(+y&Tp2TM?p-
zI?ZT9I#TgbwFuBf)E-@rifekQ*ANX1+P5E_IqJrTaQK|8aPImL_6uTSbdXeN;FZP2
z0RiUdrloCs!R4Lbft6!1%xvy-zvBm_D}Gtb39Urf*HS(Has-|KLQrGw(eaqma$Fdh
ziT8O&v@tPBT-pxqKIH{ZzjDwuWnLL^?QPDB*Vo-LPO@}?Jdhhv6*T_(;%Ak#Jq7=)
zwX=@1(WqB65MlH?A*kz-*M_+`w|ZH?jz<=P@)C{K@Hu9r+MiIBu@!NvJvnSKPiDOR
z?C@!MJu&PhNX{ZrW=OhoVdIgS#}m&#AAHEA5)Ug)+wM6zIef!SJ)=@%;%;D(1meYL
zRWxKTN1{-7cXw+Y91GyfoDYQFcVsQ%=3zj4?_g+l_E*x&4PKvGlRVG%U9aPxQ-Q&E
z1D_hq{a|4I_y^qOMQK{B3vq|FPg)xoM?5n#GuM+zIv|t)aL>C!YG##wU-UR#Q>zv;
zPS|tvKs=+(T_ddgX%+M95;c8u3kyC<2%(4`t@74NYjFx0t~ioJjIcloq@5D+rC}?T
z!osDEw<-k{?nUTXG4T%3#r&c8G}R1z!}*y^g{(rKFv2hv_E_rP_n*&7k2qSB<d;8B
z2q=XlB}xo=^v>v6)f8rFp#%w*Y-gVE`Y7p%YVV$C&vPr1moM{tav7;7)X_Kj;TVb!
zL4d09D*biFC)Xr}7Dw&pKIZQ0Aw+Fw<s5UZe<yau97vR1-CP;YR32Q)TYX-l0q9@9
zC_0=Cdf?-u_duz87Cf&Q;jkjxg%L0|)h0rYv5867#Ew#Br^rq>{gZ2tK}w@G&%W^;
zTSG!p7un6~7X4r<2uvPsW_3>$x2-mRebO&K{gm$IgYgz0F1LlhIoT(Uhy5%!42zTS
z0%QLl-TVZo^59)YlvK0ULO{CDG7~dgk{?FjJR}EJ5iw(qW$gOkc3QMEBigzb7?40u
zLv=fHP{VK@ZH2&q;Y5mP-eQfm;>Sll|Hd60@X95X9bhvGW8<{jeNTPcu4FXgl-%}x
zV^jmZJFR?Ftpd8f9qnJrDHwn9_WI4qI3?cy!j7o_%n5nId)A}C92*A(1@*zU>}HSM
zWa$!zZf65vQse5@So&rl3!XN6wc(bn3ic)<;ky-ZqdNSr<Z5298Wj4SlM)<mVQl=m
zZkQ)xKEfr{+G<c>zUG{V<1DfHw-?WjHoD}S)`AHjCwv!V-RJJisXcwg0!#*a;5<CS
zm=?5i`#I+N9z+X&qB@>ZV5o54xCsWMlA6s%S;h4HNma$nL+yq}+j~K1Fdk*qufheD
zVaF{pNy~(PcArk)DldRdx1e1ClXXe^Svqrb^GJZL5?BvExr1y5C9LigL{RRO?!1*G
za2np&_$Rht`Gvk!_=T3(@V3W5XmP6Ix!A9hrK?|bE8xS|A@V&sOjUMq`H%5W_-hwO
z3%UkxUVD4zb)J?%x;qggmX)pmgs8tio>aR{*YE_z7cMmd9DW@jR;HdH=r1g^i*S=O
z@=C9CXYB`zbY6hqtYIK;;hCDP&;jJa`5Lm4v9S|NPdKD#!MY91M{3|RHGcZEU<X~m
zzcbv395;G-Ym9s^K?1V~f|?;LJqeJxK*D$9lUkGl81m-+tCbUJeOwPv-gJ?)V=WX7
zzih-Dj94MW5xGGJk}*2BoO>>V3IuOby&7lp{?Y%sz)beXjo|f04HjB0(fn)}F611X
zq3L}ypC#uxZjKJ8f*w40pnFm(mZHbV>rt*;Hb?u5e86(l_i%!z85qknFbx)*IFbK`
z0?H!MyiyIgMwY<Dh8=36<e;I9ltwBqF6<b6v%Gd8^9Me#K;S*Ey9_zoBrVzKsXOTC
z=$cW!ib%jgLi(h;@74u+=eeYsK~vYuktH3B@Biyzgix$&J9<7!+KTg9V`Q*}mVw&7
z5Wl_IZOl63sMB{}MyD3ZL7!NaQIO-K8sS2)<bHO;^Q0zp`~fw=X)N4oFz0^i&NIBQ
z4O5Rd#KpwjYX$-kypeXwh$V&hIUqQ%40Xe>?nI|{`S9PDtOkqjunImM+P{^>(sQa=
z%6h3P-i||c?yX}g`fE0qzo9v6WTagvlg(SxzOo<LE@a}$?N8#b(SNndP9}+6{Agd1
zSm6ZtnKFFk>?8%E2AF)FkTR&I$dtWkm43K>XlsG1D2U%$GQ6C+THnEEs#-0z8&XPE
zv>F64Y&LZ@r|ph4i175rJH3vH*Mv!+>_Hsg30WsT?O4}zc1V|vmpsQikVnb6fr!rt
zJC5M>?r%xaoC<;zPq<=`Fl&vU>)I810Z-SWoYFGkeI7=Ns?$vixPnNr3VY-6k75E@
z{gzVRjSU_*wUYPll4~>7HY}hCimI#N1wO^aUBZKY#|!O%Y`&`1pbaS6(EyLdbriq6
zKVT-)qOW&R4c-=4!d$;Te&zM4k*Tx7I(^yj9xsRvM5cw|bKR_rmVb|^nzch5eE|(0
zUl~l)Rrq1KWuOKX0YS-An05dAiW~l5zNSG4i-eU8Rj2@5<q<spBDL>;zN;a});8+H
zlGU$qd%OOxvp&#Me<f}FLQS{?HVyU%O6HD6#f=OE-%?U?53bQn`?q0!gPdeqI(Kk)
z=E?f4M1CmA{JFB`)|z=onDrR)(z?JjbW{J8Lp11y;7CvgeraD6O{PJrCH!!R$f^t9
zc&EdSZ`)p7b1lR3PE<@3Pyam=s<WG6gPi;kuB~cQ*{VAAsP^VBm^#R0dLnAhLT(ml
zqmjhS-kEr@EX!mI2i}D%?Df)w69sM#J{F&H{M+(Nwe0z`lKx8ZX>PWy<2TQ?w|5zj
z#g+76tnVt!lj-tu(*5S<lSaE<(6R{Wj;e@>Eh#_;5QLm?PMx)}ihIr6oBcoA5xfKH
z^{Djpbi>cFboE=OQbt!MGC4zlIfj89vNsK%c&><jAP8JTf#`~0YIfLll-}LMZMRDd
z$3av3EfaD3X5m5J7bd1R^bxh%^^k4n-+?35@L*Dm>8hFR>_?y2#O=lYr3X-03MuxE
z@YW5A?yQ|)1K{2-8Q#ffL+NeS0~g?F&G3pcKEx%}Ul9I`T53?blQ&GVn=)zOHn(|i
z$IDw(YS1e^QNn?<SqG2TfbRE;pik3GdP2chd>F6}w?RZS?)BB)MWJWF6;Wbze3-TJ
z{0EY>JYRPRKw?mzG~=@y?=+yoQ=gkS$w+PE{%v3y=edmhiJ;Pb5mjWlwRZYoiy`iy
zks`h=DLwDiQRKa8#-yizi|+m^`B+yMio*-DLl8XU<txY~KgW+W-7dsFI|6TLT{({r
z0U5F1C|>~x^{$6yv};<im0?Dts>c5d#JA5zIvB;+ZL?OE`m$RMYG)kmt=`Dz_ltE&
zefME<!f3>IA9msVS<FY55oQDzh&P-)8`aELlOfWtfAldAjWNm4A0u0(BsJpk_6qh6
zhaPADg_<6m!GcYcg`3x_DU>|5t_jq8>9OqOy18_!s5RNEA&0oX)>_9>dS^cG!YNfc
z<Sci7N3jLmLvC(nyQKb*4*=G0zHqxHI*tOzq#tsT*)G2Ar6bY5A~)BSp*cC}skjot
z%qqO21SEE3ObRU{iyegCcus`xt=0~60OC_($%S6t77-o@=>vKQrp3l1!2^?hhjm5^
zpuhuBQPFE2bKZuz{-o)v|0_2Da=#5p_(#49(?$t#M_wVWxP1~XhzV~KhmfoqBsy@|
zqw@FDD3J%pf-(W!B;T>J+nhu`vX_p7VVd8D1fS4(GDQ<R@bu<@w|SI+)8)t4!$!d=
zFV9}-6$S_+r?Xk$&ZFH|#*X_KSla2D5_A^Xt<O5c7iou54D*rWXM31FZasMJbI0X%
zyx!{bjT8F77Ok2E{8`9S2So{C=X1okL3AB@lPj7`x(JDwZVA2Xu(48M?8~vi<>$Mx
zvN1G+17aX<o%I%^GL<(8O5fYLo`3K2G+ux`F_XD*B!dg1Wp|MPe%^e(M@>z6xy<il
zvY~Yhm<L7wgFZhu;Mp6(Mxi&j6mFQMdn&s1^2h#+-ur=!5L3@|*Pwu()dVp57SzY2
z7Tbrj3Ut~S11;`a4%=GD9xpwrZ53ZGbTBbCmI1Z8?RMX&5OefXhgo8Bbs{cY8|8UF
zwE{@fu0Up*ER5>`FzyTz^JJ)QtHYhB3M;nHM(Mx02|KK>{p&BN)UA};{YKmq-`)56
zFkE%3eW~<eil_EgXGl$<9zQ+}QvO0fLQUY5NeW33qDxncwQwhngV>l5OM1(IUxtiM
zh%yr`{R9wD9jwWu%{lv#vT1$u+}}MnZN6*|PCO~?ZUDS;0{`X><Nmf!(Ik!R%1$j8
zYPqDhw^tP$lVG1xD$SAny500arY}-ga}0D-()+K<GTPP6E9aF&Ip|kG*2N9dgM=y8
z|C8q8k)%4V_+ZuJ$<fMnST@-vBGINr=cIfa-v4ei<Fvy;Ghcy=n@2gbtiWyHtAqlN
zN=C=$4Zu*~{0qE?36Tm8C_Dgl4#20MofD=2hijtLz-|s>J@X`&zIiVhr%MB&#|89L
zrt@j%ZV33}_4)*f=sVwvlCy(cm~|^JF-N~j%MRkia-qWoS<j5wrp_`DETKu!W+@4P
z|FbBD>~Hm~egip?3wQ@$U4p_={!smmt?QbWZ&Fr@_wiBXm$mLML338+8=~Kbidi5C
z$oPUlihU1^VR24k7P$_jPoZGSy(Z~SOif0cpU61KXjPuXo1TG#KRg=xs=!`oq99hp
zOVLk12h`uuV@{jK#S~c2?nFikEMEA*^)>(Y2oDUR=Fo!W%oFP?I-9n{iYVkJJ(>(}
zf}^$oGok~K<{k*B^LQryJbA&-9LzFAPIJFVs1&+FKRpdDs{yeh_UW*LCr3Aaprqe4
zG>{uJ>WiwXswxYYK8$6wiR6~T{xJ2-#NfpTrQL&_T8^(LN!DHLu_CwMVgat^iEMr*
zyb&|K!@zC-(x@f&IL@TN{9(B(=Jws}E=?dC0N!hd@`ZtL;xMQa7Bwfb<lB>U(;b~Y
zUO~JlLh)(J5|j>NVs}&cJ=)$o!a2@7Q3bRehP?tz+2D{?%U$0TsR}i4&qBbB1tAGa
zuM}|1KuP1|z=D91(m^nI+n>c<&mq^=Mm5X4tAnftqbTMzP<byiYER;pdQcfKVFxpG
z3}JOvhK79=q=uS88<55O7HMwv3a7j=H9-pTdW{!O0tQi<J29Xw=Sew!XA7Wlg=l%q
z=v0dgggtRFOTxzMhU(p0TBc<^T%A9*oroPErws4QnRu9W9D0npL3{*sMNgEk5L0as
z=xSV*n^h7zCz_aY#RTWqFc~az+Egrsw7S2)@9GMUIK0fr@eYeHx$LI9<06-9Lu}}1
z)9k=cc&phPA~Rs>G2|7dr5)3(tT-g;Vr|C+8$jXE`ZZvb-i>ebISbD?Wt)o^Hq5wI
zBx^moP?OnBh-ZNVTV$+rH02|Bi*i^cn6l!@QOv2SDRHRns*h>xXTrVNA3UCWW7a^i
zBp)7zHs<Ck#NhYXG&WcgCMs5z3*&C0c)yY3>u($P$ATqv^wg$dRNC!YX6ig=NTPW3
z5psc8InnCg<II{B`WI*cf4*PguAKYxXKJfYvI(5UPX2k@Dj_AbH!0gC1P*ux7a~D3
zo-h0LB67!9upj*w9I)a;+E7hO;;jCo!V1AG9a6K!==O(2&kac2C)un_oiqItlUnnC
zSpd`J8DZ1q_Jh9ac??C<7;0<DR!W|*&>B;qIp@)60hWclc90&olK(PWb#-+PRCTJ!
z76Kt;ViRE#dMbqHb0?Co%byQ#>u=)x2*UnyipbqjMyNh`Ta*f0SeVNZ3GeX>3p)$~
zDW}YdH9nwz(Q4>N5-U_R^<TlDefOQb4$cHtjfcogjH`ZWhNWqNr5aK3j+*rE&3nPh
z#m6JuT;ca4K?dFRZbPUD2>iVH{Fx8<nX#agsv8@B%stbn_LiNmB3J(I^YZt(-dEof
zvrKUF;d8(JN}amNn4MK6<y_Gt%Dvrsq}HXkESkcwcc;EUKSq6)YYOqM7DvM&Epd;|
z;HF9nhiVab?+n(Q`_)q7%3d7MouYLC{vOA4>w@pK5keRSb1(9l=IM*b0Q3XZ?w3Ny
z01pzH{bj(o!fv_@ajg2XW{CvGsEnvaO107DB&}!mjkfC>8NEtMOq4y>B#3%N1#BQu
zY-8<_NUsdzHiiMKl#yWas!XHN;C{xdzd_RB=@<nrcFU{&7dUH=qI+j1QWbn=-5@Fb
zTf^|C%L!(f2RLUNbfNe8Id{bVz)Je38@GZkV&Q~CM_&~Pv3MGY?Ph-*em^%@ln`<3
z_L5f~DiJ4or&B`rv_vFN{bX|%A59CNIES{@M#(_;AvgP3E+`Fh3sAoON9VuGkc7<^
zDMYPKAd)vo-#Y2s;hs|2!RiqVlB|=W^7AWCR>Pd7c*qAqetv#c$K=!N!F}O!6%z#t
z2e8S505Q`+ciI%=ftNU@DPKqyt8wd71TPCO;}5_%MODRXkJ>e4b}4ZoJ-{-Nz5w!r
zH>3xA^gWg0HY?RH+~HkQ{v_@a8xGMvxyBd*qFCY`S2Zw5L5E7Uo|;4AXGzf2Pos<&
zCT~DaLCTr>(2djlnUr2p#IYFG{|JYY2nzUEU66qOX)FUc&W#MPofW)>D~4(Um5GPf
z>#!;8%soX=IOQFxH=^gy7HtJ8=YbPYCW%Q&k`vY({NHxaG-u%Yl*oxC`L1t+;~1|M
zI2rzsE%tuRpRLZ(F1yP<Lv}Z(A2W`BT^=r7pL|?7)kMkIG8Ui~SL`6!{6g?qR-BEW
zvGOIek>cDK^ZcS*o2yg5RXS_Lv?6e;)KHsuYjuVo|3#N1wcKLj+S2)Mk4AT52+#_7
zo1%N%3CgciJdrKoZWsM}Yzz#TnXZLuqiAUPBV7uz{FDr@YG#8*6hKP507XIS!mO!g
zKrCUHT*l+^OCaFtjHrpgNu2U}`|7VBRGHFGVG&V2P~9U0bfFZmVL>#_8Ify0dln`A
z97ipya0BUc8p{uzlaKQWdm43WfMU|Qn6JK@61tz8(eq^?FT16LxVA}JZ=yW@R^rGu
z-z&(yLJWW)<->I!TeR=;kcdgrxXZ%@<2>&BU$0abkKmqt$7|M7CiBpqvi4SrC%yno
zc}A_yVlFpkBst*V36l_IRU(eeI&4HYsnF)VDto=$K};lP*)1kTNQ1W`@X0_0B4u`k
z@pjS3^LeXqet;%;lB=Z+)+r&74iYOHcn911Z#w@<9xR>ujnOSqA(K5OfH?;kit+Gk
zb;)UIwOma~C~1nO)r7awAo8Qx=6!fe#Nd`z!L|kzehDALLALuub*GcC3qtSK=1Zq_
zfQeg$Hu#dmJt%!xVXv?A6e0aLxPHD*2;=jG5#mwwz5VgggfHpkzzWcrW?B3%EYu$A
zgF;;pw)*JNqqu80eS>x?l=NkT^`h(3h?ua~2F*HO_(HVC7HF{GSg`Reg#_H0Crp@L
zbIgK&_PyuyV`Lj`Iq8UNT~VjUXK3IV=SU6xq`lV$OVB9r0k>?;%}2bXb=iZ&;XGmy
z$uQfxf&JP&L0xNuTVDus@E$YhL0a9G7_zaAum-fk-AbPr`m+CrrY`}9x_jUMpitI9
z5+b8$%GxCRlBN_%jU;=r?_}S#F|sA=Sj!rc$iA0C8DyQZm+WNUxBuh){{HWE<?6b6
zedf&Pob#OLxu5&K&(eHNvwC$&_~<+SG$-Ae2{)IUa+SNXdGFsd{&tjlPrhid_ZKKW
z$B?D?iZ4Q6Vq^^V-uC`mQz1K@N((`7vdnbxo{xYql53StBGhEeuYjOLgBn3s_RR@>
zL!YSJg(>bSVYr1B?Hi56w^2A+{N22a|NhmlZvRLAoIWhU{OHO$pgA!iWXk)ua94sS
z^)O>WrFQc*H6b@JTp7``>)X5e+a{+k^_P0zz&NleM^oL#xd?O8?U|8wn30_5zAHBJ
z54{i9G5?`Qf+(eQRX54K-zTJQoxw6O9io&^&|IKeoLQ4pIm`ALsC7XJ3F@FUEBo-^
z(?GdC8RkdB&#$(<=-m-yL@Io0i3-y-7BQU^6>eYSn=ELs-SSOXI)eG(+vp|XmLhhR
z4hc)44Mn=32ElWgTUPmk2~QOFUe$In9YnK;ZJ*kHiX<72g%Jf{9O=#;%v$sR&DxbV
z6__=q^O@AMvzI|-A_0GxCdS6(iBVC`(x^dX<a$d{cr?EDIH=P>RKoJ}`x`^^Kr@~B
zt=+xLdW>8*BuedgO>}x`ktWJS*=a7M`UGQ;5k2)&SuLS!<olL$Ns!()z2}ZQJLp$H
z4#BpSGg~A%8S59=RPKDswjsZ7=-3F{rol3Zi<!+<M}8~bvUlE$X#Q6{y;-?UQC)~_
z8k?Bdu5_F8Qb2_r`rv7%WbPMB1e1Y@O3^biyHMmo>7j!(H#e7e&dGg4_syklex1%|
zRCCf~aoN;+2XW+@jeOz3RGRY~e$7%}Xjq`FhB1o6uZ8?O|LiRlC!G;`S{a9tPW8Ne
z`LarXsR|w6xNht!4;rL;`-et4A=TB->^>~3w;-5?n_g>5oLUl0t*)-ViQeJ@jl1f;
zy}d|~llPsYuAAvH6Q-xC+^@xryKUR`|2y-nTu@fl`v^+@apMzz6sLvO?E8}1X11MR
zJ{UT}%orpHr-3oa$J5YoC?DRu-Mr|O$ilms^|B(-34@Q@FBxTrq;JKN2H8}fi1_CP
zyX*;$!`yV#c(04cni((w;X7LuBdD#dJxqB@z7o2Tpxe8ty7>k_Elhq=hVCz=gN`9u
zhDOQdPOLQ|%GzO?@2cA77JYVb&jSm4C<<m-Jiwl`6V~e$*zY;OKDFeH&u`)FNjiw(
zK?iNv?v=*jh=$(_=>YG{{mj1TcJDjeF4Wf)n6Ye}%YBd;isrn18-K8>95<u77rqQn
z@T~tE4Ke&6bg?{{25DBSljfQcag$Z4zWQRn->%>9$$`o`2DxT#W)@~y`N!t36E(JO
za(#RGm1lg!@ce#=nGTt?0|iHizA8?mySKOYwxMBdN2^bvTWiJ>jnrzVPY#7CWr~q^
zwYnb;e(xJ=_jBzf+gpeyi1v-qIOgSDX2AiZke=YSU64{fG5*;|^;fG?(z0+1XW%}=
zZylPEO~4Vai*baut6y#uxj_U?1$R>gY_<lpNrrmI@Xfg4p?!X&>Y~UmOJ8V|p9#pz
zhz9VISDjtN>ILBfP>KQZva%@<ENZq+A+tt?+U8O$B#cUF9HM-DspN^1-pm?JmUjN$
zqh4o4<n1ebZ39WOq*qqL?I%IBzf_d^2WvpDuK%PPPg$lHoSIf;Mzl_HC<FSDX99U~
zapKDj=kLAq=m>p(oe?9RrFagcAl&F_yZ$wz{e*aE>}}Onngw!}!Je=O#jN17@=Ja>
zy4Pr(DwrRNplVccuZY^nhKjGRuR~_{bG!Tzn!etjdz1I~QsZjs*pvhUx(JpD(M1rC
zfH<JoI~Ur>i|SB^v!JXh`3x!p#&fQ3PI2_7xEXGR7ZJo6g82%QwkkufuA<q|y}yz8
zWIw=es)8qV(3PnAyJgE9jqhLKA7S0p_|t-bM{Oa_R2*{j=usKB>F%iu7e!vj0Zl?K
zjyQkh5T!;{qKJ5LjJ>~$P+jkq31+M&d^G7Q?l^155F)bRA$OdX?;dkU2$F(5uU!Q?
z8lXZ-ny1)|k2%O1?48|5{YsA8bE9ZC*qc&CG{l$FVQzBzD?$yT4hHo@SFwq7p78cM
zt$6pGTwq~dd_}GFwsg2AsMfOrlLL$ITlAfX++3ZX@6ORY9I*Z4{(?U!OEkK5=dF(J
z?OV|e?g?AW-cik`J4C5j+zX<OhvO>9J9HX1lHO7Ns08%te_vmI%Hm-7`_dDFeXv}$
zS(R)l$}SVOi?kq>{Bo7<ah?mSNAp)bG#3QRqR(&f^ALP^XRMDl{JaXS6R$2iQuSuu
zlr#D}a{G3P-`+@m(z$;b(N{8}+x22g4u$10L1rh(-SkTZ<NATvV?2W%+>@}MAjHhI
z{!EWf$SL8;iJoSM5g%esj#0fIEAee&IiE%OCaW?xZ{3$`rrhrS=6mWkyRTp)Upfe;
zn@{_1#VLMQsMgC4rJvumpAgd(84FxTlw4g`N9X#>Qk8HJ>|PZ20$uY$X!4tAG~XAT
zxqao<d(owmiiHTdf0uC;70EQz$V!Am=dcYe*@Dt?Yq);~u~+NCNa|^xYybNZyS$__
z#8xYuIZHjljU1<3VD0#Z+=|7EICD>k1KIFwX6{4ontU!g1m~h*lYCqCS1@t?5qA^i
zj|V#U45YXjsjdsA-E1l1&R=!UcdhAVD-hZ5%sdt^>2S$wjdW#yk4oI))`?2jmaA@V
zW%rG|@=7J5A|ie8MU$ymi#BS^*eG(1wL=QAs7n4O<`&!Qm2Ll{W`!_baH-0;!7Dm?
z=b77FU%JDv{D+5MFU*#ZUnVnE9HtZpOo5B!6Q%RgY@)kI%H&B`-k$H@tG;+^TmJ`M
zJ=2xe6P3x*0{9<_^drWYF=lff>JfePw3SY;9bzAaS6~`>oP2UyG?V+O;-0}<<MBs`
zA{hb|9s-q*MH|uDwtpy*u=j;zbMM{#MSZn)AVxEl2!SodbKIeb3ID-I1?%pa&C3YO
z`^ku=`+FreWaxf28vh$eEo@hy`4b|Vb>;i+n{M~=#YS1D$u}yEvwgBfd3h_G!DqCB
z){!vHla%#3es#k=|FVrwzbYH{J2sK*&OJ`TG+^81o&b@MGcz++tIbYl$P2luJEM8?
zQ{D2lTSD=L#;%qf-`P0(jkNBBxN5Dc6{HO8ryIztm~OON=DRX-R_!a|3@ec^ll{6^
z_>@xC?4-X^Z8(HdX+JP9kPZpuOJl&Tyl$U3Pw}DE>SNy%fX#6$mTDFjk@&S`54lvz
zjW(wwCkQ93L{Ca=T|4P~{3aigt$ptvql4#`9jo$PIzIcg?cHSe@$&obn|XPXe{%EM
zKILkiU;Gm?|8QrcYVoIu=#`wtb;aGSxf6M6k}YlTJ8e#9MGJ4m;9HQJ6i7>v{Uhmc
z<I~S>pM4f*v?mxYx8$Mkvv~bx!ipTMlgsTXp81dbLYl>~?~8Bx=3`~VYR;K8{uhzo
z7r5!#tr}!@x2WdqBC|{*G0o#ok!pcw8(ONKH?VCTQ1<g{SVYIhw&7KM3!*C->z_2Y
ze!*<7JE^<~30Xg#7KOZi^XAQ0n;|i*bojDG{_5Ar99~Jw{3h#s2Z|3z>rHfLI#hGW
zCi1)dC^J-tR7+7TEuX-}><xdH?t$|r^0E6R<P;UHo}-~BoUwgn^T^2hbHDi^UlX^v
z@g?5K!C!y@*<*Iq;J~Va=6oA=(60ebr%!z)HVYjp`(KyF)Gp(s_BOg+rVh8ySzg1P
zbKTZUZ>bZGMDNZfBqXFD_7>wYxM-#k4Br=JzAt9|vO}Yb8x-|UNvR38=BGz~yz+o~
zA_GNJQBzm9GA(nWOSc_Do)MyC`Z$}6JPV{h(fORvb|#HU*DGxN#Q@V7f0__7ny9%G
z74(l_7FSnV%C4EM5HeJ={qV=@Kdt?Fd0fX&i;4_!sk4816gIUcsJOm=?Cb4Q%o}0E
z)c>Z-MK;2?Tv%Qn2=G3atgSB@50wADU-(eEP0pEPYi)24z6Wbg=bXcw$bN4$l&#vT
zgKQ_Zpj~5zE9OGU^JVU+(>iVThMSE0qL~uI?Ur9JvK~SlAI8Zz{VE+yCID0G_E{GP
zn-o3TJII63QK@~(fR&XMO+*P((zE#d>({T_?8lCMosDBfdGaUkbII{&JZFCRId)r4
z#`<53FI~T;P}NSzanVBv&@6ZDx09sJkbuWj{)*yySFTX0s``BnWSB9DwbHv;rdW~b
z(!sRzzF(14IyX4bX2M}#wecs<Y2$a(!(T;bI28R(TU+}L-(h^Nys>G4Rc=zx`7sw^
z7U!=QjgN}P*Dz!DC|1s-VesEKq1bqqwawD;I5Ox=<h)^jsRPvq=miEgzV}bOf+JHR
zSpVC)x-!`@?Y@cuFO*I#nEmAhhYNb-P1g1?HZ~WiIY4tdWo2bWml5s2SGs<G11aWD
z<Ff5H6v?c9;B3w`_!~;qYz{-NW5?<!kV^OV-NChj+5`$@a@fD1wcorw<+*daJiFwu
z9A|cl3|m3XnvYOA)JAJ?d?nRn=KxlwO}of~O1^@Vq$+QoyBYX9_KX@km<TFx$Zzrf
zEY9HsYenfHkI^$p&bLOzCg*FnSu_;okMKm$XlBJ9&L3ohXU*MISe#sP2*F#k1l+)X
z&!Y&X`ViDK>ZwM%D2+0>V`OA><bekAL1>1B59f}SVOMfTByI8?-1*D;Gop1!K4%cA
z@Z|^RQ@1Z7f=D@gSZdmRoC|$GT-iXGX7;TKnQ*MW&2A>N&Y{=%!mj@cD6R$jrTqoX
zheC+!#X{#(iTzZ~r_sDv3rd2YT||445N`mc)Na`M=W83qAv;H!FJ6Xwle1SrV%J}7
z_vuN6$NHcZ65zK>4W1|!P&xBr&8s7tc)U+Q_52yzezUPlL!(i=Dd`#GNtworCYpQv
zMY08cZ`OFI>rNyWwnEl648^jel+XDeKj-4`r9Otr4cC={&sIL2>&$%g`N*o5(~xIa
zI?t0&ypr=D*N13En)zF|RDw?tt9I>OHp{MIB&>RJwY<fag%NAQWIN)w=2fF{&8;JR
zv|HSnxqjh)HA%i@`W0_P|2i(2f^-xrE`7~66}J8V4v$zNZwm^*4}qG2lkMac|7xJ}
z)AH>d8%!lCz8IjYf2=g3Wo~#GcUZkmTPTy8KhwvMb}>J?=xDtzAa6zQCMX7!aVEi*
zhXT~l>UB~44i^#{RB#n7<k;jEzVJ<<TOHoH3Y&lBs}~zvJjP-y9LWa${X1L5R-%7?
z2hA%<aW?{^FL-wL-*pX*rv2UF69<IGdyO)G=09|KIG18JqBIiEZmGe(G9Fjq*Wh)N
z2sJJ@U7bGH-EI3B&N9Z=i&=(9bKAdKPhO}Q*XBma;ulIqb(+?yevJ)xetu48Q_{;i
zJv|NT(TfKQ=PqQVoA9ktV3QQ%r_=AZb4ML&gdTR8b(~^Rxi$?V*$#p$1_XR!@~K}_
zxWMnAk@c(VWZia~N99*>M9>NWNGZ+t<!DRM6U(I7Z69<WuaetocWG?hlvJAmve!sT
zkKa7n<RpSP%3$X|q@#4Es(I?yonw6;b^iIq^;v>TSytxV)vc9pO*aISpF=$lx(1rX
z%?Vy(IMke0I>Pun{dQeN8YAks6jGU>g!|>A@49-;WhVBig71<^f`b}>5;M~_o-X>L
z*2OQPd52iFw6)inQrq2m0vdqHC%{ZwJ?|#aK*;XN%llE`e0;!fn*nz6sdMMpjn@b1
z9QQW0ckC6@F3HP#DxZ)pe9*``$}Xf^@it3s{j^S|<Q-`DBuLowK18yicz-I?;uS%V
zp_Edm6_;l_nmow)vHLU@GiKD`q1XUP?8kgh`}|c;q^<u^72evYztkD_RcHQ5(D`0P
z><|6;G_$IJ*zWBXrx3GQJ0wYRA%t;E=D5Qgw;qeKt*b0nrG2+%RB9BsIu?lI;+tsA
zrEj_3>x$x%S)*G8<qzp^$xPYak1g@E-Ph6{3~+9%V3U@XFZCkH&R2)e6x6M{(<OiH
zOBi##|M6X^_1}iweb%xN&it%aw*QL$=gjWR9rCee>W3b@@Jm<6oaG=`r25j~*mHBw
zMaJ|n8dSMqulhYvD=pj&?gp&+f!Ia~a~ivniurr6j#~hwOsZY~flWZ4EnfpE9bHE!
z@O821w=mrvAO=bB&nS}5Vb1wN0{dSqKb>dIaoR%)4qxe4kxmL9UdABF<J)(%6>cy5
z9VNHc<au1VbV2q@sp<FDxy`DHsi~=cp^duzz4HJlp7{FJyXj(A?9DjEijv^p@^^G+
zyosJi0_4ji!V--$a!I6{re-Bq%*_R0<*2y1xkK&n*p#zB^{v5P7L-$$BfpkG$XGKU
z=cRCg4APG8OEmOdQd>}?w^XHH5*TR$ATetiXa<7Lt};QptF!YQ#lW-1Cv^!#2Zra$
zzR`Hd|G0GXMk45cQv^)?cH9ulU)|GV-`VevvlrSl?WxSK84s**wis;@%^rP1_ps7q
zkvCq%f5}D@)Q@7zu8~1y2ZyI^Ap>aIdS3~dY_rz1h(3!SU-YPbp`f9mvHzH07&b6T
zhpwy!JSP)Q+fr}LXaun!!pC1oW?aEABU(N)y4_nY1!xhBNciYZ9MRu=oHcmT2-j^u
zD}Ewh^P?1^gzEw($v@ScI{Unz5tU1ip9qeMm)1#-w<U+&i2C;hlbsY|)^bkF<{mQ$
zRPO4(w*vsFpp||;7$|9)nLw(!4y;iwik(3?rXsI@{>?l+Gvi*Jj*|Qa4ZstXJCFsJ
zt|ko6`;&{_k%QmXk!O083C<<GqF=G8TrXa{fc66i0!_kl;mYXL)cKaSHlC11@oge<
zNqpHC1lgX|;5!MmvLT{C%_+!1c$fU+k4{W+Z`DOap>r-`^XJ~2PGvErNt0n9Y<^&N
zV<jDU75Dzd8}++vYF<e^{~v)P0^oTg){c1SsWm|a1j^VqF_us5itS%QPt2Qqdv(=g
zCO=1N@e9`7=OyZRumbcht9E2rfXgg&_Uzf|pM|3r(K1g6hHT1gHu(kavU)<01Dxk)
z8a^j1yeq*&j>e}jjMyW-NB}XF?zyrg&LL}Wm24C;8fjwj#o2bhbY(gwsBCEKbBs-*
zh`+U^bAvZ>ii6&dYGfGk0Rg256!h29e%ci9d5k>v|I@(af8ntIlN)~vcfb7@a|rib
z9?vK1?jNp~hY2&ujGkv_XD@Y^)+K@E>+99*sey#gpFi^`6Q&SDWs^_)C8JV)`KupH
z<K=^i_Cmx$>!^O|KdtaOR1$^$_c{&(MOQ<$VpY1DzCn!TdRq9L#+t}}Y&3opNn(7>
zUSCKg5)~PF<jUI_8P#Nz3C2j}1?QaJtriZW8PnAEb)Xxyk?4q<^qJkC0BJlOOSV%#
z*qEkYXTw&5Vdhg{=eZVt`z?<EA^f;1hWC&dXW03}r8!(>^bOY)!c4X&L{qYRU8jBu
z56(#;0jqh_g??tcMW*vh4mU7xQadfmMKmolq8M(%Qoo_<`3!HJ)&EPE4s|t1Wbj0u
z4I89lv)wseY>36xOY*hguGB$yT;yF?kAz9E(3ak}P_6ZOe`R|Riu^HdU6Ofcc<*E1
ze+xo*i{f=w*0?zYcxu$<b7rzl{yeB=z)o`Q8QGIbc-MB7hIHLomJVTddRwWmYgy*}
z1>A3L99VKdKHvbEBT@^cR1?TI3kKVI@qolKJ~)LH?a-~`Ot;}y#)(o*wvx^x`mam&
zPO+0;#I?*%t#3pZF8_OXA|ZJUU?GpAHw$#v2WNAJY597YJI+IN_yv(N{XbA;YyV+c
zIxw2j?fQX8>VmFRp<QfbZ}H~}G(H`Do_E-eAz*i)t>Yf9F@g8=vkg(yNkCd^S(KB~
zO8gq;Y=w<=dfzrT&^&bV16ToFNe!ytr|OB9l4U`qrd3#eue#(vB*=g2kEA#A9SfvE
zo~PXrGbYsJlyZ2XK<U(n&bg#(%Oz6)&w*T15dVFG0!)uZKkbN1z*;GNU7}#9y2;JC
ze-984{j3uEm?NFM+!ERD>)gX8u8B!$j?Gie#<QKpull2YeX2rpKjN&OO3`TN9d>^<
zDrKY_TcQWn4|%ti455oTvFbI<m-Y}EhWgnhe$^5Mze`JDQ-_biVsDyX=LKKPs|Ui-
zPqAk5$Or}Bmy3voRlcH&^!pC_ob3YF3DDm*F?ZBL`$TspXn@g_16~q40C3P$K*0P*
zLlxtFF4NxboAkcCQ^%b=?Po(nGXd{UoUHWX%6|Iy-qC?Wo^QLhj;;PIFMZ?qong6s
zuCMIM(3JMrKvx2pS$QXzFm&i8!zsXw+d*>cthdY=jP9x0)oc3+BKTpdg&}~ZD$Pwz
zlb9ZGppIP~I+TRY1a;gL(CCVZ)bdUxV9T6<SIy^#{zw4f-v>Ky$ujJTKx2+zK$}>x
z5utK5S7{gjzE83FzDl=;*N(6fj2M#V_jmL*U0>qMdwN{wq~8AdkI6I@RTgQfgD4fF
zzU;2)=UlO6o%^Q|1T)ryTR9wHYrYqUSIw~g#B|YZThmh@)vom?xDJ%++Q{FrF(8q=
zsp2Vtv0aPUFM8D+!+&?rj23K^{a9#{H5qYRwT5ow7($FSs{v%RL5!YX+$x_J+QCA=
zLRNduEW0ad^7&Ay&)hqVf5oW+gBihr+O^~^$NC!;b1B4ynU;Jf1->tL5hm#MM0D$>
z4s%YZySz<2ll1?&06;za-r7Gr;JI1StBR4R1|BpGmWhhF=F{$d#NjC4xs2#hD1KkP
zq&r%AHEYr#t+dqbP(TmD$gUjf2o>zbxZ(Nrs^m{%eXpPqcC7x+L2jOuEWA_oKB26j
zNzFir>8rp9qf5*38+((D^4O#+s=qX87g<rs6c3JJd0|&%#GnPL@6N5!WNYW!yqz#F
zr~<38JLx5Uw=k}bSFSWDAxxBS(P=}-KUEElf|qy&k!NQKI+p1=H#>%LK-{oKw1JGI
z9*bvml73}u8wKN`<KtmbzUw8IQvc+;!!0mhGA=%&niGL=A9>K##=ghMxi(v4Aq!}6
zYxB};6Z^)58*!x8{+Q9{%-hf_McqKm7#JA#?u#zOyd>DuXWcIIKZsH9-{o;G-rg^B
zAKL&Tmb7Y)xN6Qnl3XZJjlz_o{0U)P6*F97w@pZv&W?T*8jZJGT#aG=bA#{8+k^05
z^{Pbp!qt8DXnX;chvd@ANsO7GBpfT6>PL6a@I(kI;j~eA7!lKq5~Fc9g$7HTXuQ#z
z!cn8D8i4Aju4B^u^+pOi`^Y_kdcLtK$#-h9-l&!PT4oEbyu_zm!ws|CaX0tNhL%n$
z4DQ8!)0-~Z1N{!FUz4UQ)iIrOOt%Vj$+!Du@@#DKQ$s>`6}k8G^1>l^GQpVTKrEA3
zz8l5eM5I-MGWf?2N8k$KiXRhrRyKXkJb(T?xKP*l3HIv#J!m|nFg*|ts3pk(E%;hL
zJ7z`j5@tn|Kg}>Qt_#jr$&!}I%Aa5313SzynY9nc>mG7FuVYSQL@1<4F#-A4Ri&xx
zZJ>SErmj0Pdj8xwYyI%CbCmdpw*G2WJN;=TDQ}`>!in2d9^7G?6^w;KGgf?GSo%u~
zehw|F=1`lb@OsLm7Z<z4;-|ecyIm1><~!kF2sw~I)QkZVB>DnrqQG*edCd}I7LsL(
z4K+BEnRF7yXNRBMw|^mJPK`2iZ6--{Wt<39EAGW6u<zl&lc5onN=P%f?r@>q8lCAw
zU;kelN`w9oWmy$BhA%_4tWKh8M=!l3B|EyZsJ2#tseU-#6N6d>_O%mc3|M9D{k>qA
z-NbZ|$90=xsWwFJd1^+&fG%SsqKN6d@`I<JS#~gJ1QveGh$fM!&>HR+kFR+1oX_X$
zC4!Qq^!jgJ;c1N+{G+HRyd)Cxs%SC}UrYT!DnLDal$(>DTCapFY4%l3KW`qOK_n>K
z!hrFS?>gVSQWk@_l?lxp=ED)0|6~Y9fPEaANh_+A2ic~o&r(n9{VL;rPVDNwn`OR{
z+4wfUh7o|0&Jr&>0E8wG5|#@$1<d{A#~&x~%|7dV&Kxz^X3SyVq3hpFYO<Y^K&x7e
zYmO%8oS~-Y_;Hzb@kBru@(xJ0G(g&IA8d=UJQ&lA$bPA1K5x#ucLOKGlJ_NWcdz3d
zX!&YO?W)8oc~K`hJnyq?{)4S?34!X}1Xyl)A?|CYrZ4tR{?Lsu8y6+KmFIMrRsYtr
z9{MFC`a9)^*3XJE#Z690Wq&wq5<lNM`;>6)^fEJbgCz#-<u6-Mq85vB1BrrnvBU75
z9?+|?`_vDHABb_d$ZDsl_S*Z)!E|XJM3Lx9o87I|O_GlwR-3isajaR)5k4MB8k?y;
z2qNI-6wLgvDEd*jnGQcP@UDcXV)s199f)#JJp=+)%NOt%NdZr+j?%N4MbJ5MGOfSo
z%S}&BZp3z#dI&ib_b(~mxziz{e%O-Z+fCJq*8SW3oWK?n6EW6>gv3(G+<ZI&p{6(s
z7+o}e+1*87FZSJ_U)e#;30!!u{M8MxA$u};`W)_Ju1QmPhh##ZQ<981BPEBSjt(&|
zy7E}SsP5!xlNWpD@jwjXL<C<ihA(SAXI?Mo{$_av>R&|}5s&lF{pGP`-*Xf4Kbd&W
z5I0$olMn;4Dw3PT{^XoEtoTD}wAZSzt&;_Ep_icCmJg6K-*W>`IT~oq#n?}%Qh)26
zGx}J~Xmf5xyQ&8YLoi$!)1CQ<&>vo)LxCF-*8SnQHk7(u5Jmg8-rcGC%(mN_Q6geC
zKH4FLZCJ&B_lKPQx-S4o(7;&f%CsPy<rd!IzvA7=YG$dyhDtv2`e=PE{(JucIOtT2
z*~%GkQy@~IR&^`<43+&@1Ty71irzUOE)T~gi+gF>h**oK{S~jS7P)Re#op5j;u?U=
z6V|+DW;bwa(ndG7ffI!1`y&0GTJ2%h<nR&oY*Q-7kmw~DIKXq%R5#<zblhRw!)Yb8
z_c3F3x*PnD1J`hv%v|m!Y3$O<JpTJr0|rwLr?hBm6`TcRdAPd`q{PxS>Hy*l`L5`{
zc|6Ik$Zq=~;sB7NVrGfL*Rkt2FglgHHnC*LSSw>1=0!ca=7jpM;N<z46c7J^dJh-@
zckkc-In=8Et(k3C)+*%lJ{-QZ2P4Y<w&LN)J@GJq_1UCN$7kLnmx{A<0GK%vFez3}
z-u0`~k@{-M8jW|rWVXTfszJbyXdZ=%uTZFDLS+`@89X9{i0>=WiVqdN^S_odlvPqo
z>QTBO1OxhXA}_aLwG9$sTWAz3o~@krIm;%KLR7hGDAPZ&+{&grJO8jc1J;nihp%X%
zsOQXal`vNfU=jzTYa)bhh6x2MlVZ289^H4zUv+k3`gKz^#|e3YczMXBfCj>ZV-frv
z$=3O)nyR}J+>GuNj?&nDZck~pqvWvw;sA-`%rh{aE`!%?lofrUL0j)^N7X@PbX<Ah
z_y6i)H8s6!I3A$sAmhw;6i~kBGr9jgBxO8=F?DAw4C4+`K~YE9sK|kgXezKi0QVGr
z<Q)^l83zXn?#%A_qtqG+T2tjxL6_gAL`IJ347un@aVrE9B`KxusS;m~USr_zO>;ns
zd!K^?oVz)Xx6-Gt+K=B#OYSvx`}<Y?c0zMf09Wos?-m9GC{J+nYl2tejBMl$F;~@V
zWzJ>tfK}ERa2H;p@{lW<F;ONcUTg|g1VS2}WxfR>bu{ebmYiUw&(E>z*IuzG>r*RH
zaeN`2zw+kCV_*QxBuh7Ct-4x&{jC1J)%=5(j__w=!vPmPF%JD^QpePhc}d={oOa+C
zr~kGjWzvliC~q?#Ki?Ych?E?^Qqt>Co{Wh(ZC!RykINQ0{{F0+*&T!2QW<?4VysLk
zpkd^WpK~k4nZMX$hyKg+MzhyorCbt?SHo;qa<^P(!GFJn|1KFkc?2OM3TM*(fNrWN
zS)QEm-n+KgWiM;NO@JXd=Wr%huid+$>L;t_&>-dv_y(n_1JUSvU?hi7OP7*q{*j=D
zptG3@t{^MB(QWU4!DTa14JtiYN-v4p%pH&C`by~7jvw!baB=sDc#7CD@OgcGWqWNd
zmBR5{fGhN@kI^0~tvHu%Ov|?>m_|lcf^94BU|ikR+BSFQ>Q2tKtW>*iY}UwrS+4sx
zzxNSc?RItU<V1M?+_>Ew|1(;aJK_Ow0A3l5YZGH>|7$aDsPn<SX?tB-K(H&H-a&3-
zCNR~1d;E2fp^ST$-U<95E4jVuwR3x@pp0uvvm+FQ-Q;J5kDOX(Lmc`|M9V%F3jhlV
z9HbZ#t}xd8q=IO#!Lj>Cn%H*pZ>oBp2~Yv=%K0u`qxnat`6)mK>)uk7=KQnWVjNrC
zn=8>K@A+l6riM6l!jr1s`;mLg_azU#(Rco)p0wwW_gs2saHq`FKx?pDZGO^ybZre_
zrp;Kh!-Tik0ROX@B<{Uyi7{ZayP8#*c87Icck*6f>4VPZ3YE`)1_#?vT5-LLUklgQ
z-va-`rlq}o-OR$GE}$955e{<HLzL#?Jpy1PM0J1}k!O7Al#I;x1h^SffoW9qWlleE
z+qQr2qnS<w!omG(g)-o`YR<ir>zjR6vpPd2Lx)d(>|&%1k``!am9x5E>t}ZMf#<?k
zLh!Dub2v(wTOHS-Q6HGVcler~*$cl00t03&4Ec8mG55pS$ajuiKhnUqD~`^b{X!CD
z?%1ceKQGnh3-#|CI8UOz-R)-YL*<_WmS%jwY*PB$SP@dvTTqcGwD*j8ou%VGk~cFw
zy>q~)s&05n&;K<(ZCLD~3a!%xlsXl-8|RiNoM$H!#UAG$VSU>((Hhd@soud0n=J|%
zxPsdydLG6O!$uUSN^T)A^_Nz$cAUf&dkVw#Ban3M@xpU+U9gFX&zKdNI}{M}p6+h$
z|41-^ZJ{)>CM83EzQXey84<pC>($~NkTL`Vdq_Ct_rJ(Izc%~SQSV&0DCd`wVMz%L
zzX#ZZ29*=qb(=`PNka2XHoZL&9IHQ4=yXS`#X09}a8~)lhZf9D-|Zi2DH1rscMZGT
z3Q+ptvgH$nk#TWx#Kc6|SYi{)9haPiGKd1J!2PK!=M<p;&MLwd@Cl^C5;z44aVW_{
zdtXOy0d;)iHUK7ksFU!k+d5_TgW=GeuyJ2a(wx^RCnsp}2}iUox=tnpt{HUdVjpQH
z=aHX(FxJ`?AFi#b{%cewDmPtSH{sva8I^#M9mQc*?*ofX5h-K61IbIf8pZ;au)n<p
zRyP7@rcnR1e*30FIl_*%ZYL>H{4~P2reQX?FqyH>8-ANmbTfB|D--$$+YT!7j93U5
zyo5;vX+dUZAYRAd*9Kyo)p^7%Mgm>i*0T+U+sPk|?g;^jQmN{e;mS{bI>E5Mbl;z!
zwpj~OoE_ELtzPU&B^(eBGP~7bSdipl5o*|arI1=hFTp-^ca}{rTt2`PGor~iFc$D{
z{3wGPID>f4_oGtTR{869RDI{&+Hg>}KZQ_o1ROgIX`KT5iS+y^iHjGNa^@EX-Hjgh
z<eodbn%o~r&u?`$)12JXGuK3-qo6;uusZVT>Gteoxv-h5`u2oT;FzoB?lW1p?yJ5s
zZj%XnmTK)LjVo1_3nQO=FI>0~D84L){tlv*y+GnVDAf&<50a}g>_0;EBl>`kWj_J9
z7(@Kt=jZ!o0@hPO*YWTctYWz4!$m);wS=^DDPW?wm$#fvIOjU1iK3hi>}OxXJq$-6
zD-(P8zQSay<-4kw-_CVzugS=_?}rGYwegI6YYhhOB^&c~C%PPpF;8ij+FF~NH@NGO
ziz08mUgE8FoWpA-wtk(Iks3Od9WgTzV%0q7G4sYq()K6YQkkg_{N8+8^Sd);l;v8q
z;%xN*y<9+@j893~E012XYjQG#dS%~DdV1~VBW{c5;9-dT`~j65P)&7Z(K09NpTdP+
z`o_k_sgT@xl|3ze7F`rIDn+9k>v{^yOZ#A0z-09kX&CXSt*qRNIbfkXidrq*Q5_V{
zWJ7L2cbu30#D%{BkeHQzS62&+oZZ_VCU=zBNa}mf4O1XGI%RzF@_fCX7eX^G05GhI
z8T*7NP>B(UigEb83hC$mUJ^|I#Lqe(3m6h-s${5Hvn7j7G%-*i@o9>`tl*H1M`1+T
z(P<&Uld$J*y~I=2uakE8u{4B9!F<=8mPiT&3c$KyMa3nTYhbMSu6dL<BU%u|u(4*Y
z6fZAgd#+<Xe*XPI9rw9N=!g;}hpkRn`ftJ|f2C5P3pZz^^H=5EJF{SgPAL;K2$q&!
zURxHR*8t~&*!PaIamM9#gOw8Zi%e0q+yCfh>-PDiHbxo>fh|*Ja^e~uud2lWUta^-
z7)C4|r_A@@GNY#ez<tl!zJWPuZkSVIpF|95&Nbv}dvwO-OJfONftiz+TAT5Uy+i5I
zx-<`t24LXRJfSbGLdEf&6a>QZjwhO`IxVEj=&eX%OZ=uqOVRfjyw+md?4rlcL4~ZL
zehC{ekIbUx<yt{h&@Cb?oF6eNRYS2-DwKB|%cN}bxGt_~^jDv82~vA#0leV0zR+K>
zG62QqKE&(VLl3euGg}w7cWKH@V-Gi6KitIY&sn_3bI{p#AKv9y`!!KNTvO{}b@-C)
z{U8b|>Wb(eVy{|AL@XJ1bpHhIFU`Ux0BDRN=g;dgAj$+81c#FbdJQk^(SHK=aCwu)
z1F&2^hU}f#e8QYYAVxq3(a~Jt8DWj)gCgq9x5UR<Bz@`!i~(?ISFckNxT3OjaH4hf
zw6)8!Bqj(JP^pr&)h~Gi=LF%BwAUx=lKUc|)5Kjs?te<~k_zvFpUC88oP4}=9Kj3g
zJU{s($#R`%!+=K7njPCtAr?YVkj!gpya%s!6<kR6JKq~r&3AZ2ZL2_b8ziJ({-Jly
z7W(s4Omb}eD&Y-(zqy+Xl%HSx8&zOjHpuCXSs&VFf;%@X-sb#z2HcH5wc9C5&~J)k
z<+}DK5jH^r(7zSkXMZ}$pyr|?BNwL*UPw!G^ZjeM4*M;EmLj0x=O^muMPat0uA5%h
zzy#1xfEN%i4i|*#0MSe^cv#t@We(Nzb&Mvkmm)7CBOf=v<P%#~Me8yF=;unReoVN8
z+;1n%P+YX&3mN4fj$l@vJ<b>;jFzFV=K(z7C-9)0pmR5vlNv(|CKMqXgtM$l0@!WD
zlqHf$Nfw!luQ(YXgv=4%8gx0^ZaS-kYze>EK8%c0#kZ5_kZ6_;xy)`K4kyV2h#CL=
z4QWRhHP;1Cg}Zq^Mrf<rJ(5i{{?ZTpvdLc`CBtR^-363qBqy7vkw0SK>GnTwO6Q5T
z#(!1EACTe`3tgrj>OcCH1CI2PjzgYtRVVw!UVX0iT<$&2&d#{H=>yFm3Wd~yqjjkx
z0(A9LVpr=TX`$UoMr!RV^Pwh&JZlY|_88Y4MUcTdA4ifYTm;f4N8dv}I!Iw_&mbX8
zDC^wZqcXSGy%|9rF!>|C*=8Ge&u|c<SLNnhD();1;EMF7Ep~;KsHbRCfa}+?x^ZfG
zG(H55mq76JohMkH4PeTQRv~^KwzT<I?aX~GY>Q@VwVzcm#e?0@XAu(}d#<~u=Z!(t
zn$%ddv#}2QL4Vq4?-4h<@K(=Ub717%?=3Bo(8S@HTHfqMb?{~t3Z|v_)hP~#*%m%{
zvBw4+p{?_r*=b%5D%%IWo^}#x7GmuWC}G2SBC_QJZiSD!oK6eJbw4JS$*Kdgz6o6<
zN^?W@6ZeXP3B4eJNi@^pdtJCOe3(Qki8bpHa~8&uUpw(->mrHq6@i3YesZ2+nZ3a(
zyjjwq9VQJhuW+dDNJZ~X;9SH4m;n8V0QdUgNw-T41RC?kfr^jfzFV@*?RDnM^Bp9T
zP)2rkx^_YJ@SKrB4T^?)!9p^P^BTT|L>d?m^(N^%==267exW8A@#0tgx!5tUFQqwU
zRvNGnvw0B#kQ{jHoryPo4JO*qlT(ClT06p>L?lYreE6_HtGU5t_bE%gr<p|hyggs{
z+@Wrl8vs)zAkn@OTJw(g?g}Xz7zl?;{VA}=NrYyc$sDlPensdcZsHFzLePrF!PSeP
zI=D8Cf&Tynl#v2?(0OG0V9L&4z<yj<>2i3jnca53FwM!gAt{Mx!OMT&qN=u*M?+zg
z`F+jWynlJbH?dHl&Rw+OphO)LG0gOKnQ?V|$Nx2Hlfk&S`wep2$jIS^AHM)dN+vE|
zyvQBeSi-Zmyy;_f{P^*zQl%HhU!I!j0H1;y*c65}OF#0ohj@?rH93iyeWyC`g^7&n
zYRJq8j4ZB=@EQL13_NX&DryJ9ucX9duhtc|pg+=#ApuRJInJ^Xk(CD@A{G9TH$WP9
z7j)Gz%xTS#ssE03lnULRPYu}F@tYi|IL|JU6p^EuDj~Vdc?}Cm4-~jn*yUklhvipw
z$r1ieGFe3aLU_l+Mz-BXkx2Z||8<k0dhUhu-?+FE5Cg7&-S|yCqyo2+&-~|Xne&N2
zt`wgoKA)v;-X1F*-cB7?1%EHKwAZWf?!rYI|NgmMY`D((>4D>$UavH~wbEaPdd=`r
z99i6&Fr+lh{Qg%E(OoVRz+IEAFoU${$|Ln)rX<!E+x()3Mipa3<;aQhJ@u^orp<12
z|1Wy}lt89B#)crH2{-5yo`A>laZdcL>z!ZUhCWpZ&V>`}O_729Y$c*%E8aCFNzXrZ
zHuILnxGb>tQrMK`_%lh5;H<qE^nXo*P&@D+y$l^8)f_jROQ2}}XcN2V3^-civ$`BO
zuHid%Bg*V#m2mB-4lyJM%Fk3VcEpdnT}q#e#8Y!OO<C#Ovv*$GcN^+D`_ucb^F299
z`UcRUm9?E*>{LbeU1inOh_~vDIc}V1rrqgx;^qQRI6uPz_gLnK&!C@I8bsX1MYoWC
z_G+_A%ym}zgGF(JSf7CA3RhrcH-xPLAQkx{)%1{$>ZnRL0J}{;+`L$r5B+(q7Qqc8
z?}VFom-|H~B<Rz_#IGKFP{Dz?0^`bvHRAc@0w~nS%;@o3D*PhFy;gcI%liSu%Qygn
ze!dI6Wo`-fB~Axrm>52=ck7#@RJ9=1|42Q}hyun;NmcsGob9#559wUH$|N5(F%J61
zGhxOY*I1#@q36$p_GX3d%oRkR_Q5ekF#w>jF8ueDe_2?32V5#yNy}$1kjBtIwI+$&
z1_6{)Xz7)Zd%2AE7SKUbxCv75YpQrvd0?3hrxH`4-CRjqFUR-gB-}|U_8b=AL3Vmf
zc_0pwOD>c;@{*zcSK~hhNX$<(dC2~Clu{S<u?n>7e-8Qrdj7Tlp7;GgxyX5-z&efH
zr%_@@fp}_rtxgp)e@Swivr3uPsXBUTH8eDI4Cit(fRVJrYp$SVGj}=4SRs<jeTE{5
zRcA-gSn8Hk*srN;IHMzc8U3F04FqAV2E0{_1}2jt>9)SUEf6O3;35`862RN1%zs!9
zI*vxJ(pi!9w*4Zf(;!0^3E=6N1JxyJ@J&g|FVpoEfe3IFPry&-mp``yb4TB(CRJ^m
z%UP_y$TKNro89F83iup`gVmbedXU6h=0{kJ?;$*gZePVgo^}^A2FWayGJ&$pnKSs)
z{JJ}|nvN^)0MMzEkvB|L&yD!Qof%U(B?_9Dp5l=p{0pR8rE$Fg+~Tww6@8a?hKJS}
zsTMAwO8C=G00jFF=t|ekNufr5n5N@KH=F)^r33Z<9X-7nHzDM|b<dSil0x%<J^+d|
zECPCuASw=ZT>5inMXHULL`B_-M`IRH+G50O(xR8dkafwyMv^f)p@lRCrFsoia{+3L
zERd1W)_34sE(U<vC4^g>V9Rq@#o;9Ta{Rd~s^3&|3=7)^@Txhi&zVJ8I;4R#RA#sR
z69P;vybu0gceNe9XlW-uO-f3EOMW#5oSLMN`l@+>MjFe51(>cLLO<i_3aN}Y95hBp
zN0iZhboGWf7dsICfIX3d@pweAWo55!1-EA$CR5=c#>)x|+qs!`IRQeKL!KE-CGaeJ
z)s(4W+%;8&A4ynpA6(P|G$aJQWs~t^zGKDJd=K~<VA-)h^vhXAX79#j8{hZQkK_Y>
zLpN0LnUwYcs~^Tn^jZAsBBWP(h)HhYV$sK{H?JR@GW42CUGQQ}AGj6D2K`)-gItah
zk)$jyEHC#k0;%^Q)Jckw56#T48z@fUIa)*g#Ft5{Gvqi@qNGEO_f%pSaE91%sep(f
z%+$;;@$AsyBd$}z{j{I~8We8nizBX6e~^S;q28@gnM9%@#ljI(>kr+65P!V;+06HB
z%H60Aum(?yeiVS8=Cz-=5NcXzKgfyHA20%oTHZe&t`h}G^XQ}8{r>_-BfBba|BwZ$
z0NLfGr3+QAGj1Qubo_(a_9*KwM~X6|WL&2YVayI=<0#@)aLy;2HorP0ZT^QltVivj
zu=7`O$ifMb+D*Eh)Dd>m)4U~64LQBb!lsYBaLe?O*P^J^!#|EI@gi4o-CUMC`$8{>
zaY+1S9B+4bH|eaXsL{V`Yq&QhJZAlClmL2Al;5Bgw*>2%2}q;G#Hr74U89s;`H?Q>
zI{h*J(%e;`aUHQpk*1F`xMaON13Y1zx;n$!fU~If5)lr0dJ|c2GO>m~3e*eY2>4l)
zz-&_n91=TJ&y4P)QQ|?3QAiy{gj$}M=^PY$l;$aP4|uSm)VB|VwkU?AfB1nJAf{Z|
zyf<*Q{&ip1YAEVQ7lpz#!}AXw46KSGp_cEqmX^)HW{o0TpfJ^mjiGxj3+?u{!#|P<
zmFb0g!>mfZZntW9>d7R1#@9(ii{74|n37erzYUabrhH#Skv(;sp!;0kYToSZY&uk5
zfS`opdec=z2VBYNpfku@fE)MH^vbxooxT`R!1ZN=&tcm(9AsOTx5fX}tOu7fZU1ln
zZk>htj2igJHrP5i-aO=XDON&AjCYaO0R%6KpHB&Eh~wQZ5ogUvNg4@~z8Vg$cOgQm
z2S3=Agac&Ixgn%&9$$SG9Du3as5`s&G=>ggd08vOU77g4JZmncz_|!uMa{;YpE3VF
znQUQ0Xl^2%z(u1r|Cx-_BZ*}c&7rpI61$8eOmtaLm)Xths;Azgh-Z2r9bksUURY*9
zZygvO=jY|=T|6iSs+O=NTB<1jQ3PSwxq-WUS9gX9j7lO1LlN{(4|p@Iw1>VAwwZGT
z!!po<b#y?0D&JkZ!GRrTfjcT%r6ym$xnKH@l7?4~4K9!4{|=ys*UDG8jE0?>X{|{j
z9kT`WINi%|c2UyWSI8?W4wJ!R?+FkIDUrjZ0Xn6!X6^M!ceHg-;}a>Ymu>Nf!KE<e
zu8EGO*q-VW@rm5tx+|Zc;a>L1No(R{{eJteyuqIDqSr57x}Er4{O7gn=jSS(-KH<J
z8sL<vr$6obf=-?4gheA?n+vDzpHEMTewI&<7{9uNeW{E&DVBi^PL6+=-7Ov^9+q8O
zl~;0S_UG-*-NUErRP*9$)~9IZICWOJuJ@h)wm(d2lV2Z5cTSPr8<~L1Icgd?*o;Iy
z!^Fpf)ut!WyQ$~b{Ki-_Lr$Qq=H1^!i5ialy?WYcp#Au|MxM3o3-{}|zYRVL`+OGD
zK^g*4R!#<Fa7;q~dC|;nGZ)u6wG#NjgrC#OZ|x)5aV$pDX}%`pw$bo2J%{87_u;{3
z2339`rkyEP+Iq6AF)>F-@ptyhu3)DG(VW-5mi*`ssiVsUb%C~y&WZWCxiHyq*5i??
z!|S7s@#VRVq2l6N!mVjcNtYuirS`i$Qjel^42AYtl*)2>w0rg?8k6p78EMg>{~T}e
z%dri`^c+(iJ4M@;OY_4dbV>xgS)g!PU0HGAGO2`+T`AvQWy?c2sV3LH{kF7ptrWBr
z+E@SjAv5_UYVbWG8p6XYPyhNjLG||$54%t~xPPU=-4fH&30S+OO*uo4-`g8~<k&a4
z*80CpOglAj7nNb|lc*iN%C{VBZL5DdC5`1)@+IFImewEr@e|w=`+DF`!K-A~ea3oL
z^wN`=N`{mPVLI7MX^{`-hnQZryFXm~BP(v#R{~Wr4UgZ(otvqKbg$>r4GQY^SW4}8
z9o`w$v3F&+E19J0^>PqbSWyN7q2lbwUDuH-)wttho<lG0o?A^3H<t)-(6uqpGD!Mn
zy7ufO*PWj!ep{Y5Q{)aeg_pcZAHA4$vw{<F7?Km}lXl$}gWN(rKU~vz=4bh$;`fvl
zqdxR)G@5f2_t*LBT<-^yHyxO$Z`F(=>{{{y=rT@|(u()nF&{?JNBg5O+MhU{-iUht
zrG(rjb^iCfC{KjRP_<&?#v*GAH(!(=I;}<d%9Sf61RoS~w*))uA>v-6J4v`F!)Z=G
zYJIM6_xdbr(}?E~`|vu~3tn+9rV%rnx%LS_X{;vGPjl{8R#yJ#U0dXGaIVOQ>%5l=
zKK%Eh8-ZEh3OCkz-}VvC5+AoCyZW;{x*sQT#B}}Bu&YaugrB=YePM$^RNZb(yH9SW
zbZcuX4NU2iQ?#*f=I2z`PqPHG+X$x~<KrAjcJ~YZ8Z!SyQ^S|5B~I~0zk|^M4j3cP
z`2Jv3ju!oz6+l=&hd)(td$=?!+*9KnPVtt}WXB|^@;1@2m!UNEroeIoIL53-UjNI0
zx<TR5qPv~GwzgI`uI&@NC8X~ms<8d50trY+Sg<FE!!yz6KE|BST-Y(;m8<>z)px&W
zA#zG<jJxVb-p`G`H?MbJ5JQ5}>TEAn(FK(BDqzs(ubNt^tGDv1CrK@~z&9bj;dp9!
z7A7HYZ#&nkx9K(xw@r4~nd=%oZt(HZZIeLaY@FD^=N$b+*7hAA)-JG~)8BsDt8HIt
zquToH#<A<6?qb|@yI~aJyi<+A5njuBy(c3R&WpMJQauv*DAYw}_pXV_XnV-VJ+f?g
zRB5k5LGJkrL{iWdY4(l>Z>M)cxD{>c$nEY_9INfh|9zvQ->4#bYBy&k-F){3C4y4K
z(91i@VtllLd;8<L!N+L4mo7&b{g8-I@ZKH0GU$K9+1WX(wDgj+RcFRL<$VFP(ecQj
zcO`k@Jn~zEyW8s%K{mE&8n0DwX~%8u-rVyR&}#LeuJ1ICu#W%sHIW&GLit|ExjQXj
zLUKmN+4C3=U;coGCCXv0k3$~hYej}+eey(Xcy^v@?^`WN1RD*Mn0lRUA3^FHwV6`n
zR{SQ6d}e6K1-r$*3v==~9PZ*@_<Hx+m+dus0Ix$FOT@=h^Gk$#`>GnkBSgIAkITHx
z9YoePYZa$0Hq_!_<Doh}+{o^lk!!EwqkW=scQt@8a0*@bq1RqjAZpdPNhK~7cA_UT
zL-T%cUIW*?zAI|+@z>~Bh^@ze?<>tX?whHhHTY38*1ID~%#ogu^7iYK2QBk`9s7!{
zVvL)vn<SRFMb1>}g^JG;nrkL(vU6*4w~}oyB??>kG=$wygtriL)yBpqZo+f_M@NUa
zrdI0td&7L_{Y$RX-3`5c69(j6{kNAp<6o&}a^{!hy(^)?{bfKT^QI}ZcKf3QHEwZB
z^%vyIqnA>rT(&Bl*QLA`|9cyE?}0gvCDnrVH`AqgNX1?d<mexS6ND%}e9`SkqeUq;
zwA+^zj!Nos+JPeD{fnkirr_o)2&d|5Zr{7ycJm8FZB^IaJ|3jwj}v0K?7QFIa?)A)
zzZIEw`|qQQbmShzedupplw=9s$;?$ag?2?!C>BCX*0OWA%oS3N^s4Wd3jwNeS0%lp
zY@3{uD~hJkT9rq6l9#I`8(uhM+z^P;3Lh1sTzE~`G0rj6QarSfKzMg6yGxIH&p;+Q
zJZ9)GKYGD;U!$>8W^eDm`MNU+8`&I5gqISVE5Z_~f(!_t*F&0j;YLl-%J+#BA3m+`
zGW@c2(`@Er%7Yu(PpSz-xoh3MTYb2m(xd41*`02uBpoy;W*(7U=a=Oi)SKS~0$=cw
zp@D%snFDL2H4)rAx67D{?~y<Z)w8>}MBY@T{2{yVo4?B+osal$zQQ<CCR`~!PSm%A
zg@t|OZt}dYHP3=Nu5?uC980j=ze;;UElH~DA=l{w>{2E&8$x~Gj=}4CKNI`U{$0!1
zqkalojZFL<6OuJTN1@Mt!yhgIT(P|9#foU-#z#lz6_s$WbMuj2N{<;9)?IApl#8up
z`bpyd)+?cVU|>fUIgh-vw6?DN%<+-A{y6ThJ)#}bOo`B`&KZyo2P7k<In!<^D~2bp
zk15#*MEzp@jG>Q3$`2!Oc<HLfG{y@dZ>{+<$^*475R+JfarP(_-H7?hnJrD~Qq7)y
z$`9{vpRJB*8FP!3|8Xbqf8W{Tt81?Vo4+L-i5(m!Vd9c$^Tw@S;Ia|V@Sv37a|8*U
z^1kzCrZV%_2ov-JBJXJpX6J7E3=d1PL|wC)yNQfaWPmq+mFntc=8RDB+dX&6*fMKt
z{c=a8w7E>)D-S>K*P7gRPh=cf4;ec$vC(x)tMx_mz2S99>dOmmB5^ZkO(xTM;4FeO
z%{3u05va3<`*5&SI(1kUcW^kT6G)UlUgv(qWw0YJ`up(PoehbN@tp=w+_VO19*73`
zq91W6S?s6IZ=XbW@}UPWGrJtV{4xTEHmIqoF(|zMu4TEJ^@?ja`V}RjwX2@frt5t&
zU;;@x`sBgxgRNA!yl?}DMy>lQ-n(-LMfc>Cd1x@rWo9boW09(;gTEb*gw*EhX~TDU
zY>}#nMQN^XUqs%t6mkU7LRipvt${kWWaL8bmSS##c%!LYd;g_)=Q9Gk2Vv{le+kEj
ziVfM2l1oP(gy*(hXdHN*+jTXAvmUq7Dx`7fHa}Vd;cS<@2HyF3`h5{!w_=|k7v%|j
zma7b*GN;51<7mDsbN5*I=q&GxMOF7J)b=zyqK%K*E>T6Si>_zqHeLPuA(w|Ok{^L6
z>xk43ouXZn3IEu>Tz%!KG_Q>0{{}Li*)_!-Fz~#O1ovM1$nm5;2qm6Hv)(OH)oKd9
zPU-ux`-sU+Evwk7i`xvRM9?vr7Xn^9+>R%FL2z0U0_eg&ZUfoULHN)9wa>iblkhzv
zsHy3?tHZWCN02#mXIAc9Zl4Gmeh8pXA~I?KcHeTDg^q@)G*$M;{{kwS@&n-bn#e5*
zDP{l~xXG^G6iT?Xl!?{AlgwfE`};%m5HC^B&k=97Vb~?Hzq8-eXQUDc1j}c@H_m#v
zD0$rWx=ZkXTLl#+ghKqOkz6mCzxOReM-mQ}P8uo=)|~Z4&ej-(YiH0Jx;Fb4X`fVl
zbvVZCd+z2|Fg)rS0TlMU6k-yyQj;b}T?7T<<pQQZJiHznvGU^D($lG@E#B~d5eRLg
ze@zwLH+FYoj2j&vXj^1jwy%9!`Sjm#2$OR6-d7h2-Z5C5iCA-geqk@Cl;;ae<jLWk
zCTSX)E~fw1F*=QI@)vK-1X)qKyJK)F&UK)k#;;`S$JD>B=iAi%+B%AWG94@s&YB|Y
z_2_VZkGe&=sL|2flNO&dIG&v1So(h}?BVc}eeHVuqtk}czZV~TPdoDL&h|IAzO&!f
zhuYx%<#6h5x%Dfiv`*~s{POM91p5+M!-Itf=6vGbmHG1fx@f~Ie}VPjpa#4f|1-at
XwYx2MItTCuMg|5?S3j3^P6<r_*I1U}

literal 0
HcmV?d00001

diff --git a/book/supervised_learning.md b/book/supervised_learning.md
new file mode 100644
index 0000000..e8d9485
--- /dev/null
+++ b/book/supervised_learning.md
@@ -0,0 +1,120 @@
+---
+jupytext:
+  text_representation:
+    extension: .md
+    format_name: myst
+    format_version: 0.13
+    jupytext_version: 1.16.2
+kernelspec:
+  display_name: Python 3 (ipykernel)
+  language: python
+  name: python3
+---
+
+(supervised_learning)=
+# Supervised learning
+
+This section will cover the details of implementing the `fit` function above:
+That is, how to use a dataset of labelled samples $(x_1, y_1), \dots, (x_N, y_N)$ to find a function $f$ that minimizes the empirical risk.
+This requires two ingredients:
+
+1. A **function class** $\mathcal{F}$ to search over
+2. A **fitting method** for minimizing the empirical risk over this class
+
+The two main function classes we will cover are **linear models** and **neural networks**.
+Both of these function classes are *parameterized* by some parameters $\theta$,
+and the fitting method will search over these parameters to minimize the empirical risk:
+
+:::{prf:definition} Parameterized empirical risk minimization
+:label: parameterized_empirical_risk_minimization
+
+Given a dataset of samples $(x_1, y_1), \dots, (x_N, y_N)$ and a class of functions $\mathcal{F}$ parameterized by $\theta$,
+we to find a parameter (vector) $\hat \theta$ that minimizes the empirical risk:
+
+$$
+\hat \theta = \arg\min_{\theta} \frac{1}{N} \sum_{i=1}^N (y_i - f_\theta(x_i))^2
+$$
+:::
+
+The most common fitting method for parameterized models is **gradient descent**.
+
+:::{prf:algorithm} Gradient descent
+Letting $L(\theta) \in \mathbb{R}$ denote the empirical risk in terms of the parameters,
+the gradient descent algorithm updates the parameters according to the rule
+
+$$
+\theta^{t+1} = \theta^t - \eta \nabla_\theta L(\theta^t)
+$$
+
+where $\eta > 0$ is the **learning rate**.
+:::
+
+```{code-cell}
+:tags: [hide-input]
+
+from jaxtyping import Float, Array
+from collections.abc import Callable
+```
+
+```{code-cell}
+Params = Float[Array, " D"]
+
+
+def gradient_descent(
+    loss: Callable[[Params], float],
+    θ_init: Params,
+    η: float,
+    epochs: int,
+):
+    """
+    Run gradient descent to minimize the given loss function
+    (expressed in terms of the parameters).
+    """
+    θ = θ_init
+    for _ in range(epochs):
+        θ = θ - η * grad(loss)(θ)
+    return θ
+```
+
+## Linear regression
+
+In linear regression, we assume that the function $f$ is linear in the parameters:
+
+$$
+\mathcal{F} = \{ x \mapsto \theta^\top x \mid \theta \in \mathbb{R}^D \}
+$$
+
+This function class is extremely simple and only contains linear functions.
+To expand its expressivity, we can _transform_ the input $x$ using some feature function $\phi$,
+i.e. $\widetilde x = \phi(x)$, and then fit a linear model in the transformed space instead.
+
+```{code-cell}
+def fit_linear(X: Float[Array, "N D"], y: Float[Array, " N"], φ=lambda x: x):
+    """Fit a linear model to the given dataset using ordinary least squares."""
+    X = vmap(φ)(X)
+    θ = np.linalg.lstsq(X, y, rcond=None)[0]
+    return lambda x: np.dot(φ(x), θ)
+```
+
+## Neural networks
+
+In neural networks, we assume that the function $f$ is a composition of linear functions (represented by matrices $W_i$) and non-linear activation functions (denoted by $\sigma$):
+
+$$
+\mathcal{F} = \{ x \mapsto \sigma(W_L \sigma(W_{L-1} \dots \sigma(W_1 x + b_1) \dots + b_{L-1}) + b_L) \}
+$$
+
+where $W_i \in \mathbb{R}^{D_{i+1} \times D_i}$ and $b_i \in \mathbb{R}^{D_{i+1}}$ are the parameters of the $i$-th layer, and $\sigma$ is the activation function.
+
+This function class is much more expressive and contains many more parameters.
+This makes it more susceptible to overfitting on smaller datasets,
+but also allows it to represent more complex functions.
+In practice, however, neural networks exhibit interesting phenomena during training,
+and are often able to generalize well even with many parameters.
+
+Another reason for their popularity is the efficient **backpropagation** algorithm for computing the gradient of the empirical risk with respect to the parameters.
+Essentially, the hierarchical structure of the neural network,
+i.e. computing the output of the network as a composition of functions,
+allows us to use the chain rule to compute the gradient of the output with respect to the parameters of each layer.
+
+{cite}`nielsen_neural_2015` provides a comprehensive introduction to neural networks and backpropagation.
diff --git a/environment.yml b/environment.yml
index c8d9ddd..767195a 100644
--- a/environment.yml
+++ b/environment.yml
@@ -16,7 +16,7 @@ dependencies:
   # book
   - jupyter-book
   - jupyterlab
-  - jupytext 1.16.*
+  - jupytext 1.16.2
   - swig
   # github pages
   - ghp-import
diff --git a/graphs.md b/graphs.md
new file mode 100644
index 0000000..0dd9295
--- /dev/null
+++ b/graphs.md
@@ -0,0 +1,23 @@
+# Graphs
+
+Graphs are made with https://mermaid.js.org/syntax/flowchart.html
+
+```bash
+pbpaste | mmdc -i - -o trajectory.png -b transparent
+```
+
+## Trajectories
+
+```mermaid
+graph LR
+    S0($$s_0$$) -- $$\pi_0$$ --> A0{{$$a_0$$}}
+    S0 & A0 --> R0[$$r_0$$]
+    A0 & S0 -- $$P$$ --> S1($$s_1$$)
+    S1 -- $$\pi_1$$ --> A1{{$$a_1$$}}
+    S1 & A1 --> R1[$$r_1$$]
+    A1 & S1 -- $$P$$ --> S2($$s_2$$)
+    S2 -- $$\pi_2$$ --> A2{{$$a_2$$}}
+    S2 & A2 --> R2[$$r_2$$]
+    A2 & S2 -- $$P$$ --> S3($$s_3$$)
+```
+