diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 444d74e..b392b7a 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -29,7 +29,9 @@ jobs:
       - name: Build Packages
         run: sh tools/build.sh
       - name: Test Packages
-        run: sh tools/test.sh
+        run: sh tools/test_all.sh
+      - name: Update API Docs
+        run: sh tools/docs.sh
       - name: Report Code Coverage
         env:
           CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..e69de29
diff --git a/mathy_core/expressions.py b/mathy_core/expressions.py
index 624b222..d831d66 100644
--- a/mathy_core/expressions.py
+++ b/mathy_core/expressions.py
@@ -28,7 +28,7 @@
 MathTypeKeysMax = max(MathTypeKeys.values()) + 1
 
 
-class MathExpression(BinaryTreeNode):
+class MathExpression(BinaryTreeNode["MathExpression"]):
     """Math tree node with helpers for manipulating expressions.
 
     `mathy:x+y=z`
@@ -117,7 +117,7 @@ def visit_fn(
     def with_color(self, text: str, style: str = "bright") -> str:
         """Render a string that is colored if something has changed"""
         if self._rendering_change is True and self._changed is True:
-            return color(text, fore=self.color, style=style)
+            return f"{color(text, fore=self.color, style=style)}"
         return text
 
     def add_class(self, classes: Union[List[str], str]) -> "MathExpression":
@@ -147,7 +147,7 @@ def visit_fn(
 
     def to_list(self, visit: str = "preorder") -> List["MathExpression"]:
         """Convert this node hierarchy into a list."""
-        results = []
+        results: List[MathExpression] = []
 
         def visit_fn(
             node: MathExpression, depth: int, data: Any
@@ -688,7 +688,7 @@ def clone(self) -> "ConstantExpression":  # type:ignore[override]
         result.value = self.value
         return result  # type:ignore
 
-    def evaluate(self, _context: Optional[Dict[str, NumberType]] = None) -> NumberType:
+    def evaluate(self, context: Optional[Dict[str, NumberType]] = None) -> NumberType:
         assert self.value is not None
         return self.value
 
diff --git a/mathy_core/layout.md b/mathy_core/layout.md
new file mode 100644
index 0000000..ec42b6d
--- /dev/null
+++ b/mathy_core/layout.md
@@ -0,0 +1,4 @@
+
+Mathy uses the Tidier algorithm to create visual tree layouts for helping understand and interpret complex node trees.
+
+`mathy:(28 + 1x)(17x - 2y)` 
diff --git a/mathy_core/parser.md b/mathy_core/parser.md
new file mode 100644
index 0000000..748b816
--- /dev/null
+++ b/mathy_core/parser.md
@@ -0,0 +1,17 @@
+Mathy parses [token arrays](./tokenizer) into inspectable, transformable, visualizable symbolic trees.
+
+## Motivation
+
+A Token array verifies that text maps to some known symbols, not that they are a correct ordering that produces a valid mathematical expression. The mathy Parser class converts tokens into a tree while also validating that the tree follows the expected Order of Operations.
+
+## Examples
+
+To help better understand what the parser does, consider a few examples of expressions and their visualized trees:
+
+| Text                  | Tree                        |
+| --------------------- | --------------------------- |
+| `4x`                  | `mathy:4x`                  |
+| `4x / 2y^7`           | `mathy:4x/2y^7`             |
+| `4x + (1/3)y + 7x`    | `mathy:4x+ (1/3)y + 7x`     |
+| `4x + 1/3y + 7x`      | `mathy:4x+ 1/3y + 7x`       |
+| `(28 + 1j)(17j + 2y)` | `mathy:(28 + 1j)(17j + 2y)` |
diff --git a/mathy_core/rules/associative_swap.md b/mathy_core/rules/associative_swap.md
new file mode 100644
index 0000000..7f6ac14
--- /dev/null
+++ b/mathy_core/rules/associative_swap.md
@@ -0,0 +1,44 @@
+The `Associative Property` of numbers says that we can re-group two `addition` or `multiplication` terms so that one is evaluated before the other without changing the value of the expression.
+
+The formulation of this property is the same for addition and multiplication:
+
+- Addition `(a + b) + c = a + (b + c)`
+- Multiplication `(a * b) * c = a * (b * c)`
+
+!!! note
+
+      Interestingly, applying the associative property of numbers to a binary expression tree is a standard tree operation called a "node rotation."
+
+### Transformations
+
+#### Addition
+
+```
+(a + b) + c = a + (b + c)
+
+     (y) +            + (x)
+        / \          / \
+       /   \        /   \
+  (x) +     c  ->  a     + (y)
+     / \                / \
+    /   \              /   \
+   a     b            b     c
+```
+
+#### Multiplication
+
+```
+(a * b) * c = a * (b * c)
+
+     (x) *            * (y)
+        / \          / \
+       /   \        /   \
+  (y) *     c  <-  a     * (x)
+     / \                / \
+    /   \              /   \
+   a     b            b     c
+```
+
+### Examples
+
+`rule_tests:associative_swap`
diff --git a/mathy_core/rules/commutative_swap.md b/mathy_core/rules/commutative_swap.md
new file mode 100644
index 0000000..65d60e7
--- /dev/null
+++ b/mathy_core/rules/commutative_swap.md
@@ -0,0 +1,40 @@
+The `Commutative Property` of numbers says that we can re-order two `addition` or `multiplication` terms so that one occurs before the other in the expression without changing the value of the expression.
+
+The formulation of this property is the same for addition and multiplication:
+
+- Addition `a + b = b + a`
+- Multiplication `a * b = b * a`
+
+The commutative property is used for re-arranging the order of parts of an expression and is, as such, very important for working with mathematical expressions.
+
+### Transformations
+
+Given a common parent node, this rule switches the order of the children of that node. It can only be applied to addition or multiplication nodes.
+
+#### Addition
+
+`a + b = b + a`
+
+```
+        +                  +
+       / \                / \
+      /   \     ->       /   \
+     /     \            /     \
+    a       b          b       a
+```
+
+#### Multiplication
+
+`a * b = b * a`
+
+```
+        *                  *
+       / \                / \
+      /   \     ->       /   \
+     /     \            /     \
+    a       b          b       a
+```
+
+### Examples
+
+`rule_tests:commutative_swap`
diff --git a/mathy_core/rules/constants_simplify.md b/mathy_core/rules/constants_simplify.md
new file mode 100644
index 0000000..ec49176
--- /dev/null
+++ b/mathy_core/rules/constants_simplify.md
@@ -0,0 +1,37 @@
+The `Constant Arithmetic` rule transforms an expression tree by combining two constant values separated by a binary operation like `addition` or `division`.
+
+### Transformations
+
+#### Two Constants
+
+The most uncomplicated transform is to evaluate two constants that are siblings.
+
+- `(4 * 2) + 3` = `8 + 3`
+
+#### Sibling Skipping
+
+The constant simplify rule can simplify constants across a sibling when the sibling is a variable chunk, and the constants are commutatively connected.
+
+For example, `2x * 8` can be transformed into `16x` because the constants are connected through a multiplication chain that allows [commuting](./commutative_property).
+
+We can see this by taking a look at the trees for `2x * 8` and `2 * 8 * x` and recalling that the commutative property says `a * b = b * a`:
+
+| Satisfying the Commutative Property |                   |
+| :---------------------------------: | :---------------- |
+|           `mathy:2x * 8`            | `mathy:2 * 8 * x` |
+
+We can see that the tree structure has been flipped but that multiplication nodes still connect the same variables and constants, so the value of the expression remains unchanged.
+
+#### Alternate Tree Forms
+
+Math trees can be represented in many different equivalent forms, so mathy supports these unnatural groupings to make this rule applicable to more nodes in the tree.
+
+- `5 * (8h * t)` = `40h * t`
+- `(7 * 10y^3) * x` = `70y^3 * x`
+- `(7q * 10y^3) * x` = `(70q * y^3) * x`
+- `792z^4 * 490f * q^3` = `388080z^4 * f * q^3`
+- `(u^3 * 36c^6) * 7u^3` = `u^3 * 252c^6 * u^3`
+
+### Examples
+
+`rule_tests:constants_simplify`
diff --git a/mathy_core/rules/distributive_factor_out.md b/mathy_core/rules/distributive_factor_out.md
new file mode 100644
index 0000000..c010113
--- /dev/null
+++ b/mathy_core/rules/distributive_factor_out.md
@@ -0,0 +1,31 @@
+The `Distributive Property` of numbers says that we can factor out common values from terms connected with an addition operator.
+
+This rule is expressed by the equation `ab + ac = a(b + c)`
+
+!!! note
+
+    This is a core transformation used in combining like terms, though we usually skip over it mentally because humans are pretty intelligent.
+
+    Consider that the `9y + 9y` example from above becomes `(9 + 9) * y`. If you apply a constant simplification rule, you end up with `18y`, which results from combining the two like `y` terms.
+
+### Transformations
+
+Given a common parent node, this rule extracts the common value from both sides, leaving an addition and a multiplication.
+
+#### Addition
+
+`ab + ac = a(b + c)`
+
+```
+          +               *
+         / \             / \
+        /   \           /   \
+       /     \    ->   /     \
+      *       *       a       +
+     / \     / \             / \
+    a   b   a   c           b   c
+```
+
+### Examples
+
+`rule_tests:distributive_factor_out`
diff --git a/mathy_core/rules/distributive_multiply_across.md b/mathy_core/rules/distributive_multiply_across.md
new file mode 100644
index 0000000..c5812df
--- /dev/null
+++ b/mathy_core/rules/distributive_multiply_across.md
@@ -0,0 +1,26 @@
+The `Distributive Property` can distribute multiplication across grouped terms. This has the effect of removing a grouping and can expose the terms that were inside for further simplification depending on the problem type.
+
+This rule is expressed by the equation `a(b + c) = ab + ac`
+
+### Transformations
+
+Given a multiplication of `a` and `(b + c)`, this rule distributes `a` across `b` and `c`, leaving only the simpler form of `ab` and `ac`.
+
+#### Addition
+
+`a(b + c) = ab + ac`
+
+```
+                             +
+         *                  / \
+        / \                /   \
+       /   \              /     \
+      a     +     ->     *       *
+           / \          / \     / \
+          /   \        /   \   /   \
+         b     c      a     b a     c
+```
+
+### Examples
+
+`rule_tests:distributive_multiply_across`
diff --git a/mathy_core/rules/variable_multiplication.md b/mathy_core/rules/variable_multiplication.md
new file mode 100644
index 0000000..80c80e8
--- /dev/null
+++ b/mathy_core/rules/variable_multiplication.md
@@ -0,0 +1,61 @@
+The `Variable Multiplication` rule restates `x^b * x^d` as `x^(b + d)`, which isolates the exponents attached to the variables so they can be combined.
+
+!!! note
+
+        This rule can only be applied when the nodes have matching variable bases. This means that `x * y` cannot be combined, but `x * x` can be.
+
+### Transformations
+
+Both implicit and explicit variable powers are recognized in this transformation.
+
+!!! info "Help Wanted"
+
+        The current variable multiply rule leaves out a case where there is a power
+        raised to another power, they can be combined by multiplying the exponents
+        together.
+
+        For example: `x^(2^2) = x^4`
+
+        If you would like to help out with by updating this rule [open an issue here](https://github.com/justindujardin/mathy/issues/new?title=VariableMultiplyRaisePowerToPower){target=\_blank}
+
+#### Explicit powers
+
+In the simplest case, both variables have explicit exponents.
+
+Examples: `x^b * x^d = x^(b+d)`
+
+- `42x^2 * x^3` becomes `42x^(2 + 3)`
+- `x^1 * x^7` becomes `x^(1 + 8)`
+
+```
+            *
+           / \
+          /   \          ^
+         /     \    =   / \
+        ^       ^      x   +
+       / \     / \        / \
+      x   b   x   d      b   d
+```
+
+#### Implicit powers
+
+When not explicitly stated, a variable has an implicit power of being raised to the 1, and this form is identified.
+
+Examples: `x * x^d = x^(1 + d)`
+
+- `42x * x^3` becomes `42x^(1 + 3)`
+- `x * x` becomes `x^(1 + 1)`
+
+```
+            *
+           / \
+          /   \          ^
+         /     \    =   / \
+        x       ^      x   +
+               / \        / \
+              x   d      1   d
+```
+
+### Examples
+
+`rule_tests:variable_multiply`
diff --git a/mathy_core/tokenizer.md b/mathy_core/tokenizer.md
new file mode 100644
index 0000000..84ee8eb
--- /dev/null
+++ b/mathy_core/tokenizer.md
@@ -0,0 +1,32 @@
+## Motivation
+
+We first need an intermediate representation to parse math text into tree structures that encode the Order of Operations of the input. Specifically, we want to build a list of text characters corresponding to relevant `tokens` for a math expression. That is what the tokenizer does.
+
+The tokenization process treats the input string as an array of characters, iterating over them to produce a list of tokens with `type`/`value` properties. While building the collection, the tokenizer also optionally discards extra whitespace characters.
+
+## Visual Example
+
+For example, consider the input text `8 - (2 + 4)` and its token representation.
+
+`tokens:8 - (2 + 4)`
+
+- The top row contains the token value.
+- The bottom row includes the integer type of the token represented by the value.
+
+## Code Example
+
+Simple tokenization only requires a few lines of code:
+
+```Python
+
+{!./snippets/cas/tokenizer_tokenize.py!}
+
+```
+
+## Conceptual Example
+
+To better understand the tokenizer, let's build a tokens array manually then compare it to the tokenizer outputs:
+
+```Python
+{!./snippets/cas/tokenizer_manual.py!}
+```
diff --git a/mathy_core/tree.py b/mathy_core/tree.py
index d91070c..51f4d67 100644
--- a/mathy_core/tree.py
+++ b/mathy_core/tree.py
@@ -1,4 +1,4 @@
-from typing import Any, Callable, List, Optional, TypeVar, Union, cast
+from typing import Any, Callable, Generic, List, Optional, TypeVar, Union, cast
 
 from .types import Literal
 
@@ -29,7 +29,7 @@
 ]
 
 
-class BinaryTreeNode:
+class BinaryTreeNode(Generic[NodeType]):
     """
     The binary tree node is the base node for all of our trees, and provides a
     rich set of methods for constructing, inspecting, and modifying them.
@@ -44,18 +44,18 @@ class BinaryTreeNode:
     y: Optional[float]
     offset: Optional[float]
     level: Optional[int]
-    thread: Optional["BinaryTreeNode"]
+    thread: Optional[NodeType]
 
-    left: Optional["BinaryTreeNode"]
-    right: Optional["BinaryTreeNode"]
-    parent: Optional["BinaryTreeNode"]
+    left: Optional[NodeType]
+    right: Optional[NodeType]
+    parent: Optional[NodeType]
 
     #  Allow specifying children in the constructor
     def __init__(
-        self,
-        left: Optional["BinaryTreeNode"] = None,
-        right: Optional["BinaryTreeNode"] = None,
-        parent: Optional["BinaryTreeNode"] = None,
+        self: NodeType,
+        left: Optional[NodeType] = None,
+        right: Optional[NodeType] = None,
+        parent: Optional[NodeType] = None,
         id: Optional[str] = None,
     ):
         if id is None:
@@ -208,7 +208,7 @@ def get_root(self: NodeType) -> NodeType:
 
         return cast(NodeType, result)
 
-    def get_root_side(self: "BinaryTreeNode") -> Literal["left", "right"]:
+    def get_root_side(self: NodeType) -> Literal["left", "right"]:
         """Return the side of the tree that this node lives on"""
         result = self
         last_child = None
@@ -225,7 +225,7 @@ def get_root_side(self: "BinaryTreeNode") -> Literal["left", "right"]:
 
     def set_left(
         self: NodeType,
-        child: Optional["BinaryTreeNode"] = None,
+        child: Optional[NodeType] = None,
         clear_old_child_parent: bool = False,
     ) -> NodeType:
         """Set the left node to the passed `child`"""
@@ -241,7 +241,7 @@ def set_left(
 
     def set_right(
         self: NodeType,
-        child: Optional["BinaryTreeNode"] = None,
+        child: Optional[NodeType] = None,
         clear_old_child_parent: bool = False,
     ) -> NodeType:
         """Set the right node to the passed `child`"""
@@ -255,7 +255,7 @@ def set_right(
 
         return self
 
-    def get_side(self, child: Optional["BinaryTreeNode"]) -> Literal["left", "right"]:
+    def get_side(self, child: Optional[NodeType]) -> Literal["left", "right"]:
         """Determine whether the given `child` is the left or right child of this
         node"""
         if child == self.left:
diff --git a/package.json b/package.json
index 6629c50..995009f 100644
--- a/package.json
+++ b/package.json
@@ -65,13 +65,16 @@
       [
         "@semantic-release/changelog",
         {
-          "changelogFile": "libraries/website/docs/changelog.md"
+          "changelogFile": "website/docs/changelog.md"
         }
       ],
       [
         "@semantic-release/git",
         {
           "assets": [
+            "website/docs/changelog.md",
+            "website/docs/api/*.md",
+            "website/docs/api/**/*.md",
             "mathy_core/about.py",
             "package.json"
           ],
diff --git a/tools/README.md b/tools/README.md
index a487b47..2a77242 100644
--- a/tools/README.md
+++ b/tools/README.md
@@ -1,9 +1,5 @@
 Build Tools
 ---
 
-These utilities are consistently named and present in a "tools" folder for each
-app in the monorepo. This means you can CD into any app and run `sh tools/setup.sh`
-to install its prerequisites and then `sh tools/build.sh` to build the app.
-
 These files should **NEVER** be run from within the tools folder. They should always be called
-from the root folder of the app (or repo).
+from the parent, e.g. `sh tools/setup.sh`
diff --git a/tools/test_all.sh b/tools/test_all.sh
new file mode 100644
index 0000000..9e36240
--- /dev/null
+++ b/tools/test_all.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+set -e
+
+sh tools/test.sh
+(cd website && sh tools/test.sh)
diff --git a/website/__init__.py b/website/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/website/docs/api/expressions.md b/website/docs/api/expressions.md
new file mode 100644
index 0000000..51c34f9
--- /dev/null
+++ b/website/docs/api/expressions.md
@@ -0,0 +1,320 @@
+# mathy_core.expressions
+
+## AbsExpression
+```python
+AbsExpression(
+    self, 
+    child: Optional[mathy_core.expressions.MathExpression] = None, 
+    child_on_left: bool = False, 
+)
+```
+Evaluates the absolute value of an expression.
+## AddExpression
+```python
+AddExpression(
+    self, 
+    left: Optional[mathy_core.expressions.MathExpression] = None, 
+    right: Optional[mathy_core.expressions.MathExpression] = None, 
+)
+```
+Add one and two
+## BinaryExpression
+```python
+BinaryExpression(
+    self, 
+    left: Optional[mathy_core.expressions.MathExpression] = None, 
+    right: Optional[mathy_core.expressions.MathExpression] = None, 
+)
+```
+An expression that operates on two sub-expressions
+### get_priority
+```python
+BinaryExpression.get_priority(self) -> int
+```
+Return a number representing the order of operations priority
+of this node.  This can be used to check if a node is `locked`
+with respect to another node, i.e. the other node must be resolved
+first during evaluation because of it's priority.
+
+### to_math_ml_fragment
+```python
+BinaryExpression.to_math_ml_fragment(self) -> str
+```
+Render this node as a MathML element fragment
+## ConstantExpression
+```python
+ConstantExpression(self, value: Optional[float, int] = None)
+```
+A Constant value node, where the value is accessible as `node.value`
+## DivideExpression
+```python
+DivideExpression(
+    self, 
+    left: Optional[mathy_core.expressions.MathExpression] = None, 
+    right: Optional[mathy_core.expressions.MathExpression] = None, 
+)
+```
+Divide one by two
+## EqualExpression
+```python
+EqualExpression(
+    self, 
+    left: Optional[mathy_core.expressions.MathExpression] = None, 
+    right: Optional[mathy_core.expressions.MathExpression] = None, 
+)
+```
+Evaluate equality of two expressions
+### operate
+```python
+EqualExpression.operate(
+    self, 
+    one: Union[float, int], 
+    two: Union[float, int], 
+) -> Union[float, int]
+```
+Return the value of the equation if one == two.
+
+Raise ValueError if both sides of the equation don't agree.
+
+## FactorialExpression
+```python
+FactorialExpression(
+    self, 
+    child: Optional[mathy_core.expressions.MathExpression] = None, 
+    child_on_left: bool = False, 
+)
+```
+Factorial of a constant, e.g. `5` evaluates to `120`
+## FunctionExpression
+```python
+FunctionExpression(
+    self, 
+    child: Optional[mathy_core.expressions.MathExpression] = None, 
+    child_on_left: bool = False, 
+)
+```
+A Specialized UnaryExpression that is used for functions.  The function name in
+text (used by the parser and tokenizer) is derived from the name() method on the
+class.
+## MathExpression
+```python
+MathExpression(
+    self, 
+    id: Optional[str] = None, 
+    left: Optional[MathExpression] = None, 
+    right: Optional[MathExpression] = None, 
+    parent: Optional[MathExpression] = None, 
+)
+```
+Math tree node with helpers for manipulating expressions.
+
+`mathy:x+y=z`
+
+### add_class
+```python
+MathExpression.add_class(
+    self, 
+    classes: Union[List[str], str], 
+) -> 'MathExpression'
+```
+Associate a class name with an expression. This class name will be
+attached to nodes when the expression is converted to a capable output
+format.
+
+See `MathExpression.to_math_ml_fragment`
+### all_changed
+```python
+MathExpression.all_changed(self) -> None
+```
+Mark this node and all of its children as changed
+### clear_classes
+```python
+MathExpression.clear_classes(self) -> None
+```
+Clear all the classes currently set on the nodes in this expression.
+### clone
+```python
+MathExpression.clone(self) -> 'MathExpression'
+```
+A specialization of the clone method that can track and report a cloned
+subtree node.
+
+See `MathExpression.clone_from_root` for more details.
+### clone_from_root
+```python
+MathExpression.clone_from_root(
+    self, 
+    node: Optional[MathExpression] = None, 
+) -> 'MathExpression'
+```
+Clone this node including the entire parent hierarchy that it has. This
+is useful when you want to clone a subtree and still maintain the overall
+hierarchy.
+
+__Arguments__
+
+- __node (MathExpression)__: The node to clone.
+
+__Returns__
+
+`(MathExpression)`: The cloned node.
+
+### color
+Color to use for this node when rendering it as changed with
+`.terminal_text`
+### evaluate
+```python
+MathExpression.evaluate(
+    self, 
+    context: Optional[Dict[str, Union[float, int]]] = None, 
+) -> Union[float, int]
+```
+Evaluate the expression, resolving all variables to constant values
+### find_id
+```python
+MathExpression.find_id(self, id: str) -> Optional[MathExpression]
+```
+Find an expression by its unique ID.
+
+Returns: The found `MathExpression` or `None`
+
+### find_type
+```python
+MathExpression.find_type(self, instanceType: Type[~NodeType]) -> List[~NodeType]
+```
+Find an expression in this tree by type.
+
+- instanceType: The type to check for instances of
+
+Returns the found `MathExpression` objects of the given type.
+
+### make_ml_tag
+```python
+MathExpression.make_ml_tag(
+    self, 
+    tag: str, 
+    content: str, 
+    classes: List[str] = [], 
+) -> str
+```
+Make a MathML tag for the given content while respecting the node's given
+classes.
+
+__Arguments__
+
+- __tag (str)__: The ML tag name to create.
+- __content (str)__: The ML content to place inside of the tag.
+classes (List[str]) An array of classes to attach to this tag.
+
+__Returns__
+
+`(str)`: A MathML element with the given tag, content, and classes
+
+### path_to_root
+```python
+MathExpression.path_to_root(self) -> str
+```
+Generate a namespaced path key to from the current node to the root.
+This key can be used to identify a node inside of a tree.
+### raw
+raw text representation of the expression.
+### set_changed
+```python
+MathExpression.set_changed(self) -> None
+```
+Mark this node as having been changed by the application of a Rule
+### terminal_text
+Text output of this node that includes terminal color codes that
+highlight which nodes have been changed in this tree as a result of
+a transformation.
+### to_list
+```python
+MathExpression.to_list(
+    self, 
+    visit: str = 'preorder', 
+) -> List[MathExpression]
+```
+Convert this node hierarchy into a list.
+### to_math_ml
+```python
+MathExpression.to_math_ml(self) -> str
+```
+Convert this expression into a MathML container.
+### to_math_ml_fragment
+```python
+MathExpression.to_math_ml_fragment(self) -> str
+```
+Convert this single node into MathML.
+### with_color
+```python
+MathExpression.with_color(self, text: str, style: str = 'bright') -> str
+```
+Render a string that is colored if something has changed
+## MultiplyExpression
+```python
+MultiplyExpression(
+    self, 
+    left: Optional[mathy_core.expressions.MathExpression] = None, 
+    right: Optional[mathy_core.expressions.MathExpression] = None, 
+)
+```
+Multiply one and two
+## NegateExpression
+```python
+NegateExpression(
+    self, 
+    child: Optional[mathy_core.expressions.MathExpression] = None, 
+    child_on_left: bool = False, 
+)
+```
+Negate an expression, e.g. `4` becomes `-4`
+### to_math_ml_fragment
+```python
+NegateExpression.to_math_ml_fragment(self) -> str
+```
+Convert this single node into MathML.
+## PowerExpression
+```python
+PowerExpression(
+    self, 
+    left: Optional[mathy_core.expressions.MathExpression] = None, 
+    right: Optional[mathy_core.expressions.MathExpression] = None, 
+)
+```
+Raise one to the power of two
+## SgnExpression
+```python
+SgnExpression(
+    self, 
+    child: Optional[mathy_core.expressions.MathExpression] = None, 
+    child_on_left: bool = False, 
+)
+```
+
+### operate
+```python
+SgnExpression.operate(self, value: Union[float, int]) -> Union[float, int]
+```
+Determine the sign of an value.
+
+__Returns__
+
+`(int)`: -1 if negative, 1 if positive, 0 if 0
+## SubtractExpression
+```python
+SubtractExpression(
+    self, 
+    left: Optional[mathy_core.expressions.MathExpression] = None, 
+    right: Optional[mathy_core.expressions.MathExpression] = None, 
+)
+```
+Subtract one from two
+## UnaryExpression
+```python
+UnaryExpression(
+    self, 
+    child: Optional[mathy_core.expressions.MathExpression] = None, 
+    child_on_left: bool = False, 
+)
+```
+An expression that operates on one sub-expression
diff --git a/website/docs/api/layout.md b/website/docs/api/layout.md
new file mode 100644
index 0000000..10c81d2
--- /dev/null
+++ b/website/docs/api/layout.md
@@ -0,0 +1,51 @@
+
+Mathy uses the Tidier algorithm to create visual tree layouts for helping understand and interpret complex node trees.
+
+`mathy:(28 + 1x)(17x - 2y)` 
+
+# API
+
+```python
+
+import mathy_core.layout
+```
+
+
+## TreeLayout
+```python
+TreeLayout(self, args, kwargs)
+```
+Calculate a visual layout for input trees.
+### layout
+```python
+TreeLayout.layout(
+    self, 
+    node: mathy_core.tree.BinaryTreeNode, 
+    unit_x_multiplier: float = 1.0, 
+    unit_y_multiplier: float = 1.0, 
+) -> 'TreeMeasurement'
+```
+Assign x/y values to all nodes in the tree, and return an object containing
+the measurements of the tree.
+
+Returns a TreeMeasurement object that describes the bounds of the tree
+### transform
+```python
+TreeLayout.transform(
+    self, 
+    node: Optional[mathy_core.tree.BinaryTreeNode] = None, 
+    x: float = 0, 
+    unit_x_multiplier: float = 1, 
+    unit_y_multiplier: float = 1, 
+    measure: Optional[TreeMeasurement] = None, 
+) -> 'TreeMeasurement'
+```
+Transform relative to absolute coordinates, and measure the bounds of
+the tree.
+
+Return a measurement of the tree in output units.
+## TreeMeasurement
+```python
+TreeMeasurement(self) -> None
+```
+Summary of the rendered tree
diff --git a/website/docs/api/parser.md b/website/docs/api/parser.md
new file mode 100644
index 0000000..58f9a3f
--- /dev/null
+++ b/website/docs/api/parser.md
@@ -0,0 +1,129 @@
+Mathy parses [token arrays](./tokenizer) into inspectable, transformable, visualizable symbolic trees.
+
+## Motivation
+
+A Token array verifies that text maps to some known symbols, not that they are a correct ordering that produces a valid mathematical expression. The mathy Parser class converts tokens into a tree while also validating that the tree follows the expected Order of Operations.
+
+## Examples
+
+To help better understand what the parser does, consider a few examples of expressions and their visualized trees:
+
+| Text                  | Tree                        |
+| --------------------- | --------------------------- |
+| `4x`                  | `mathy:4x`                  |
+| `4x / 2y^7`           | `mathy:4x/2y^7`             |
+| `4x + (1/3)y + 7x`    | `mathy:4x+ (1/3)y + 7x`     |
+| `4x + 1/3y + 7x`      | `mathy:4x+ 1/3y + 7x`       |
+| `(28 + 1j)(17j + 2y)` | `mathy:(28 + 1j)(17j + 2y)` |
+
+# API
+
+```python
+
+import mathy_core.parser
+```
+
+
+## ExpressionParser
+```python
+ExpressionParser(self) -> None
+```
+Parser for converting text into binary trees. Trees encode the order of
+operations for an input, and allow evaluating it to detemrine the expression
+value.
+
+### Grammar Rules
+
+Symbols:
+```
+( )    == Non-terminal
+{ }*   == 0 or more occurrences
+{ }+   == 1 or more occurrences
+{ }?   == 0 or 1 occurrences
+[ ]    == Mandatory (1 must occur)
+|      == logical OR
+" "    == Terminal symbol (literal)
+```
+
+Non-terminals defined/parsed by Tokenizer:
+```
+(Constant) = anything that can be parsed by `float(in)`
+(Variable) = any string containing only letters (a-z and A-Z)
+```
+
+Rules:
+```
+(Function)     = [ functionName ] "(" (AddExp) ")"
+(Factor)       = { (Variable) | (Function) | "(" (AddExp) ")" }+ { { "^" }? (UnaryExp) }?
+(FactorPrefix) = [ (Constant) { (Factor) }? | (Factor) ]
+(UnaryExp)     = { "-" }? (FactorPrefix)
+(ExpExp)       = (UnaryExp) { { "^" }? (UnaryExp) }?
+(MultExp)      = (ExpExp) { { "*" | "/" }? (ExpExp) }*
+(AddExp)       = (MultExp) { { "+" | "-" }? (MultExp) }*
+(EqualExp)     = (AddExp) { { "=" }? (AddExp) }*
+(start)        = (EqualExp)
+```
+
+### check
+```python
+ExpressionParser.check(
+    self, 
+    tokens: mathy_core.parser.TokenSet, 
+    do_assert: bool = False, 
+) -> bool
+```
+Check if the `self.current_token` is a member of a set Token types
+
+Args:
+    - `tokens` The set of Token types to check against
+
+`Returns` True if the `current_token`'s type is in the set else False
+### eat
+```python
+ExpressionParser.eat(self, type: int) -> bool
+```
+Assign the next token in the queue to current_token if its type
+matches that of the specified parameter. If the type does not match,
+raise a syntax exception.
+
+Args:
+    - `type` The type that your syntax expects @current_token to be
+
+### next
+```python
+ExpressionParser.next(self) -> bool
+```
+Assign the next token in the queue to `self.current_token`.
+
+Return True if there are still more tokens in the queue, or False if there
+are no more tokens to look at.
+### parse
+```python
+ExpressionParser.parse(
+    self, 
+    input_text: str, 
+) -> mathy_core.expressions.MathExpression
+```
+Parse a string representation of an expression into a tree
+that can be later evaluated.
+
+Returns : The evaluatable expression tree.
+
+## TokenSet
+```python
+TokenSet(self, source: int)
+```
+TokenSet objects are bitmask combinations for checking to see
+if a token is part of a valid set.
+### add
+```python
+TokenSet.add(self, addTokens: int) -> 'TokenSet'
+```
+Add tokens to self set and return a TokenSet representing
+their combination of flags.  Value can be an integer or an instance
+of `TokenSet`
+### contains
+```python
+TokenSet.contains(self, type: int) -> bool
+```
+Returns true if the given type is part of this set
diff --git a/website/docs/api/problems.md b/website/docs/api/problems.md
new file mode 100644
index 0000000..d1879d5
--- /dev/null
+++ b/website/docs/api/problems.md
@@ -0,0 +1,213 @@
+# mathy_core.problems
+Problem Generation
+---
+
+Utility functions for helping generate input problems.
+
+## DefaultType
+Template type for a default return value
+## gen_binomial_times_binomial
+```python
+gen_binomial_times_binomial(
+    op: str = '+', 
+    min_vars: int = 1, 
+    max_vars: int = 2, 
+    simple_variables: bool = True, 
+    powers_probability: float = 0.33, 
+    like_variables_probability: float = 1.0, 
+) -> Tuple[str, int]
+```
+Generate a binomial multiplied by another binomial.
+
+__Example__
+
+
+```
+(2e + 12p)(16 + 7e)
+```
+
+`mathy:(2e + 12p)(16 + 7e)`
+
+## gen_binomial_times_monomial
+```python
+gen_binomial_times_monomial(
+    op: str = '+', 
+    min_vars: int = 1, 
+    max_vars: int = 2, 
+    simple_variables: bool = True, 
+    powers_probability: float = 0.33, 
+    like_variables_probability: float = 1.0, 
+) -> Tuple[str, int]
+```
+Generate a binomial multiplied by a monomial.
+
+__Example__
+
+
+```
+(4x^3 + y) * 2x
+```
+
+`mathy:(4x^3 + y) * 2x`
+
+## gen_combine_terms_in_place
+```python
+gen_combine_terms_in_place(
+    min_terms: int = 16, 
+    max_terms: int = 26, 
+    easy: bool = True, 
+    powers: bool = False, 
+) -> Tuple[str, int]
+```
+Generate a problem that puts one pair of like terms next to each other
+somewhere inside a large tree of unlike terms.
+
+The problem is intended to be solved in a very small number of moves, making
+training across many episodes relatively quick, and reducing the combinatorial
+explosion of branches that need to be searched to solve the task.
+
+The hope is that by focusing the agent on selecting the right moves inside of a
+ridiculously large expression it will learn to select actions to combine like terms
+invariant of the sequence length.
+
+__Example__
+
+
+```
+4y + 12j + 73q + 19k + 13z + 56l + (24x + 12x) + 43n + 17j
+```
+
+`mathy:4y + 12j + 73q + 19k + 13z + 56l + (24x + 12x) + 43n + 17j`
+
+
+## gen_commute_haystack
+```python
+gen_commute_haystack(
+    min_terms: int = 5, 
+    max_terms: int = 8, 
+    commute_blockers: int = 1, 
+    easy: bool = True, 
+    powers: bool = False, 
+) -> Tuple[str, int]
+```
+A problem with a bunch of terms that have no matches, and a single
+set of two terms that do match, but are separated by one other term.
+The challenge is to commute the terms to each other in one move.
+
+__Example__
+
+
+```
+4y + 12j + 73q + 19k + 13z + 24x + 56l + 12x  + 43n + 17j"
+                              ^-----------^
+```
+
+`mathy:4y + 12j + 73q + 19k + 13z + 24x + 56l + 12x  + 43n + 17j`
+
+## gen_move_around_blockers_one
+```python
+gen_move_around_blockers_one(
+    number_blockers: int, 
+    powers_probability: float = 0.5, 
+) -> Tuple[str, int]
+```
+Two like terms separated by (n) blocker terms.
+
+__Example__
+
+
+```
+4x + (y + f) + x
+```
+
+`mathy:4x + (y + f) + x`
+## gen_move_around_blockers_two
+```python
+gen_move_around_blockers_two(
+    number_blockers: int, 
+    powers_probability: float = 0.5, 
+) -> Tuple[str, int]
+```
+Two like terms with three blockers.
+
+__Example__
+
+
+```
+7a + 4x + (2f + j) + x + 3d
+```
+
+`mathy:7a + 4x + (2f + j) + x + 3d`
+## gen_simplify_multiple_terms
+```python
+gen_simplify_multiple_terms(
+    num_terms: int, 
+    optional_var: bool = False, 
+    op: Optional[List[str], str] = None, 
+    common_variables: bool = True, 
+    inner_terms_scaling: float = 0.3, 
+    powers_probability: float = 0.33, 
+    optional_var_probability: float = 0.8, 
+    noise_probability: float = 0.8, 
+    shuffle_probability: float = 0.66, 
+    share_var_probability: float = 0.5, 
+    grouping_noise_probability: float = 0.66, 
+    noise_terms: Optional[int] = None, 
+) -> Tuple[str, int]
+```
+Generate a polynomial problem with like terms that need to be combined and
+simplified.
+
+__Example__
+
+
+```
+2a + 3j - 7b + 17.2a + j
+```
+
+`mathy:2a + 3j - 7b + 17.2a + j`
+
+## get_blocker
+```python
+get_blocker(
+    num_blockers: int = 1, 
+    exclude_vars: Optional[List[str]] = None, 
+) -> str
+```
+Get a string of terms to place between target simplification terms
+in order to challenge the agent's ability to use commutative/associative
+rules to move terms around.
+## get_rand_vars
+```python
+get_rand_vars(
+    num_vars: int, 
+    exclude_vars: Optional[List[str]] = None, 
+    common_variables: bool = False, 
+) -> List[str]
+```
+Get a list of random variables, excluding the given list of hold-out variables
+## MathyTermTemplate
+```python
+MathyTermTemplate(
+    self, 
+    variable: Optional[str] = None, 
+    exponent: Optional[float, int] = None, 
+) -> None
+```
+MathyTermTemplate(variable: Optional[str] = None, exponent: Union[float, int, NoneType] = None)
+## split_in_two_random
+```python
+split_in_two_random(value: int) -> Tuple[int, int]
+```
+Split a given number into two smaller numbers that sum to it.
+Returns: a tuple of (lower, higher) numbers that sum to the input
+
+## use_pretty_numbers
+```python
+use_pretty_numbers(enabled: bool = True) -> None
+```
+Determine if problems should include only pretty numbers or
+a whole range of integers and floats. Using pretty numbers will
+restrict the numbers that are generated to integers between 1 and
+12. When not using pretty numbers, floats and large integers will
+be included in the output from `rand_number`
diff --git a/website/docs/api/rule.md b/website/docs/api/rule.md
new file mode 100644
index 0000000..231ba98
--- /dev/null
+++ b/website/docs/api/rule.md
@@ -0,0 +1,80 @@
+# mathy_core.rule
+
+## BaseRule
+```python
+BaseRule(self, args, kwargs)
+```
+Basic rule class that visits a tree with a specified visit order.
+### apply_to
+```python
+BaseRule.apply_to(
+    self, 
+    node: mathy_core.expressions.MathExpression, 
+) -> 'ExpressionChangeRule'
+```
+Apply the rule transformation to the given node, and return a
+ExpressionChangeRule object that captures the input/output states
+for the change.
+### can_apply_to
+```python
+BaseRule.can_apply_to(self, node: mathy_core.expressions.MathExpression) -> bool
+```
+User-specified function that returns True/False if a rule can be
+applied to a given node.
+
+!!!warning "Performance Point"
+
+    `can_apply_to` is called very frequently during normal operation
+    and should be implemented as efficiently as possible.
+
+### code
+Short code for debug rendering. Should be two letters.
+### find_node
+```python
+BaseRule.find_node(
+    self, 
+    expression: mathy_core.expressions.MathExpression, 
+) -> Optional[mathy_core.expressions.MathExpression]
+```
+Find the first node that can have this rule applied to it.
+### find_nodes
+```python
+BaseRule.find_nodes(
+    self, 
+    expression: mathy_core.expressions.MathExpression, 
+) -> List[mathy_core.expressions.MathExpression]
+```
+Find all nodes in an expression that can have this rule applied to them.
+Each node is marked with it's token index in the expression, according to
+the visit strategy, and stored as `node.r_index` starting with index 0
+
+### name
+Readable rule name used for debug rendering and description outputs
+## ExpressionChangeRule
+```python
+ExpressionChangeRule(
+    self, 
+    rule: mathy_core.rule.BaseRule, 
+    node: Optional[mathy_core.expressions.MathExpression] = None, 
+)
+```
+Object describing the change to an expression tree from a rule transformation
+### done
+```python
+ExpressionChangeRule.done(
+    self, 
+    node: mathy_core.expressions.MathExpression, 
+) -> 'ExpressionChangeRule'
+```
+Set the result of a change to the given node. Restore the parent
+if `save_parent` was called.
+### save_parent
+```python
+ExpressionChangeRule.save_parent(
+    self, 
+    parent: Optional[mathy_core.expressions.MathExpression] = None, 
+    side: Optional[Literal['left', 'right']] = None, 
+) -> 'ExpressionChangeRule'
+```
+Note the parent of the node being modified, and set it as the parent of the
+rule output automatically.
diff --git a/website/docs/api/rules/associative_swap.md b/website/docs/api/rules/associative_swap.md
new file mode 100644
index 0000000..0dcd6fe
--- /dev/null
+++ b/website/docs/api/rules/associative_swap.md
@@ -0,0 +1,78 @@
+The `Associative Property` of numbers says that we can re-group two `addition` or `multiplication` terms so that one is evaluated before the other without changing the value of the expression.
+
+The formulation of this property is the same for addition and multiplication:
+
+- Addition `(a + b) + c = a + (b + c)`
+- Multiplication `(a * b) * c = a * (b * c)`
+
+!!! note
+
+      Interestingly, applying the associative property of numbers to a binary expression tree is a standard tree operation called a "node rotation."
+
+### Transformations
+
+#### Addition
+
+```
+(a + b) + c = a + (b + c)
+
+     (y) +            + (x)
+        / \          / \
+       /   \        /   \
+  (x) +     c  ->  a     + (y)
+     / \                / \
+    /   \              /   \
+   a     b            b     c
+```
+
+#### Multiplication
+
+```
+(a * b) * c = a * (b * c)
+
+     (x) *            * (y)
+        / \          / \
+       /   \        /   \
+  (y) *     c  <-  a     * (x)
+     / \                / \
+    /   \              /   \
+   a     b            b     c
+```
+
+### Examples
+
+`rule_tests:associative_swap`
+
+# API
+
+```python
+
+import mathy_core.rules.associative_swap
+```
+
+
+## AssociativeSwapRule
+```python
+AssociativeSwapRule(self, args, kwargs)
+```
+Associative Property
+Addition: `(a + b) + c = a + (b + c)`
+
+         (y) +            + (x)
+            / \          / \
+           /   \        /   \
+      (x) +     c  ->  a     + (y)
+         / \                / \
+        /   \              /   \
+       a     b            b     c
+
+ Multiplication: `(ab)c = a(bc)`
+
+         (x) *            * (y)
+            / \          / \
+           /   \        /   \
+      (y) *     c  <-  a     * (x)
+         / \                / \
+        /   \              /   \
+       a     b            b     c
+
diff --git a/website/docs/api/rules/balanced_move.md b/website/docs/api/rules/balanced_move.md
new file mode 100644
index 0000000..3cadd21
--- /dev/null
+++ b/website/docs/api/rules/balanced_move.md
@@ -0,0 +1,28 @@
+# mathy_core.rules.balanced_move
+
+## BalancedMoveRule
+```python
+BalancedMoveRule(self, args, kwargs)
+```
+Balanced rewrite rule moves nodes from one side of an equation
+to the other by performing the same operation on both sides.
+
+Addition: `a + 2 = 3` -> `a + 2 - 2 = 3 - 2`
+Multiplication: `3a = 3` -> `3a / 3 = 3 / 3`
+
+### get_type
+```python
+BalancedMoveRule.get_type(
+    self, 
+    node: mathy_core.expressions.MathExpression, 
+) -> Optional[str]
+```
+Determine the configuration of the tree for this transformation.
+
+Supports the following configurations:
+ - Addition is a term connected by an addition to the side of an equation
+   or inequality. It generates two subtractions to move from one side to the
+   other.
+ - Multiply is a coefficient of a term that must be divided on both sides of
+   the equation or inequality.
+
diff --git a/website/docs/api/rules/commutative_swap.md b/website/docs/api/rules/commutative_swap.md
new file mode 100644
index 0000000..485fee5
--- /dev/null
+++ b/website/docs/api/rules/commutative_swap.md
@@ -0,0 +1,70 @@
+The `Commutative Property` of numbers says that we can re-order two `addition` or `multiplication` terms so that one occurs before the other in the expression without changing the value of the expression.
+
+The formulation of this property is the same for addition and multiplication:
+
+- Addition `a + b = b + a`
+- Multiplication `a * b = b * a`
+
+The commutative property is used for re-arranging the order of parts of an expression and is, as such, very important for working with mathematical expressions.
+
+### Transformations
+
+Given a common parent node, this rule switches the order of the children of that node. It can only be applied to addition or multiplication nodes.
+
+#### Addition
+
+`a + b = b + a`
+
+```
+        +                  +
+       / \                / \
+      /   \     ->       /   \
+     /     \            /     \
+    a       b          b       a
+```
+
+#### Multiplication
+
+`a * b = b * a`
+
+```
+        *                  *
+       / \                / \
+      /   \     ->       /   \
+     /     \            /     \
+    a       b          b       a
+```
+
+### Examples
+
+`rule_tests:commutative_swap`
+
+# API
+
+```python
+
+import mathy_core.rules.commutative_swap
+```
+
+
+## CommutativeSwapRule
+```python
+CommutativeSwapRule(self, preferred: bool = True)
+```
+Commutative Property
+For Addition: `a + b = b + a`
+
+         +                  +
+        / \                / \
+       /   \     ->       /   \
+      /     \            /     \
+     a       b          b       a
+
+For Multiplication: `a * b = b * a`
+
+         *                  *
+        / \                / \
+       /   \     ->       /   \
+      /     \            /     \
+     a       b          b       a
+
diff --git a/website/docs/api/rules/constants_simplify.md b/website/docs/api/rules/constants_simplify.md
new file mode 100644
index 0000000..7ada489
--- /dev/null
+++ b/website/docs/api/rules/constants_simplify.md
@@ -0,0 +1,76 @@
+The `Constant Arithmetic` rule transforms an expression tree by combining two constant values separated by a binary operation like `addition` or `division`.
+
+### Transformations
+
+#### Two Constants
+
+The most uncomplicated transform is to evaluate two constants that are siblings.
+
+- `(4 * 2) + 3` = `8 + 3`
+
+#### Sibling Skipping
+
+The constant simplify rule can simplify constants across a sibling when the sibling is a variable chunk, and the constants are commutatively connected.
+
+For example, `2x * 8` can be transformed into `16x` because the constants are connected through a multiplication chain that allows [commuting](./commutative_property).
+
+We can see this by taking a look at the trees for `2x * 8` and `2 * 8 * x` and recalling that the commutative property says `a * b = b * a`:
+
+| Satisfying the Commutative Property |                   |
+| :---------------------------------: | :---------------- |
+|           `mathy:2x * 8`            | `mathy:2 * 8 * x` |
+
+We can see that the tree structure has been flipped but that multiplication nodes still connect the same variables and constants, so the value of the expression remains unchanged.
+
+#### Alternate Tree Forms
+
+Math trees can be represented in many different equivalent forms, so mathy supports these unnatural groupings to make this rule applicable to more nodes in the tree.
+
+- `5 * (8h * t)` = `40h * t`
+- `(7 * 10y^3) * x` = `70y^3 * x`
+- `(7q * 10y^3) * x` = `(70q * y^3) * x`
+- `792z^4 * 490f * q^3` = `388080z^4 * f * q^3`
+- `(u^3 * 36c^6) * 7u^3` = `u^3 * 252c^6 * u^3`
+
+### Examples
+
+`rule_tests:constants_simplify`
+
+# API
+
+```python
+
+import mathy_core.rules.constants_simplify
+```
+
+
+## ConstantsSimplifyRule
+```python
+ConstantsSimplifyRule(self, args, kwargs)
+```
+Given a binary operation on two constants, simplify to the resulting
+constant expression
+### get_type
+```python
+ConstantsSimplifyRule.get_type(
+    self, 
+    node: mathy_core.expressions.MathExpression, 
+) -> Optional[Tuple[str, mathy_core.expressions.ConstantExpression, mathy_core.expressions.ConstantExpression]]
+```
+Determine the configuration of the tree for this transformation.
+
+Support the three types of tree configurations:
+ - Simple is where the node's left and right children are exactly
+   constants linked by an add operation.
+ - Chained Right is where the node's left child is a constant, but the right
+   child is another binary operation of the same type. In this case the left
+   child of the next binary node is the target.
+
+Structure:
+ - Simple
+    * node(add),node.left(const),node.right(const)
+ - Chained Right
+    * node(add),node.left(const),node.right(add),node.right.left(const)
+ - Chained Right Deep
+    * node(add),node.left(const),node.right(add),node.right.left(const)
+
diff --git a/website/docs/api/rules/distributive_factor_out.md b/website/docs/api/rules/distributive_factor_out.md
new file mode 100644
index 0000000..3fba2f1
--- /dev/null
+++ b/website/docs/api/rules/distributive_factor_out.md
@@ -0,0 +1,91 @@
+The `Distributive Property` of numbers says that we can factor out common values from terms connected with an addition operator.
+
+This rule is expressed by the equation `ab + ac = a(b + c)`
+
+!!! note
+
+    This is a core transformation used in combining like terms, though we usually skip over it mentally because humans are pretty intelligent.
+
+    Consider that the `9y + 9y` example from above becomes `(9 + 9) * y`. If you apply a constant simplification rule, you end up with `18y`, which results from combining the two like `y` terms.
+
+### Transformations
+
+Given a common parent node, this rule extracts the common value from both sides, leaving an addition and a multiplication.
+
+#### Addition
+
+`ab + ac = a(b + c)`
+
+```
+          +               *
+         / \             / \
+        /   \           /   \
+       /     \    ->   /     \
+      *       *       a       +
+     / \     / \             / \
+    a   b   a   c           b   c
+```
+
+### Examples
+
+`rule_tests:distributive_factor_out`
+
+# API
+
+```python
+
+import mathy_core.rules.distributive_factor_out
+```
+
+
+## DistributiveFactorOutRule
+```python
+DistributiveFactorOutRule(self, constants: bool = False)
+```
+Distributive Property
+`ab + ac = a(b + c)`
+
+ The distributive property can be used to expand out expressions
+ to allow for simplification, as well as to factor out common properties
+ of terms.
+
+ **Factor out a common term**
+
+ This handles the `ab + ac` conversion of the distributive property, which
+ factors out a common term from the given two addition operands.
+
+           +               *
+          / \             / \
+         /   \           /   \
+        /     \    ->   /     \
+       *       *       a       +
+      / \     / \             / \
+     a   b   a   c           b   c
+
+### get_type
+```python
+DistributiveFactorOutRule.get_type(
+    self, 
+    node: mathy_core.expressions.MathExpression, 
+) -> Optional[Tuple[str, mathy_core.util.TermEx, mathy_core.util.TermEx]]
+```
+Determine the configuration of the tree for this transformation.
+
+Support the three types of tree configurations:
+ - Simple is where the node's left and right children are exactly
+   terms linked by an add operation.
+ - Chained Left is where the node's left child is a term, but the right
+   child is another add operation. In this case the left child
+   of the next add node is the target.
+ - Chained Right is where the node's right child is a term, but the left
+   child is another add operation. In this case the right child
+   of the child add node is the target.
+
+Structure:
+ - Simple
+    * node(add),node.left(term),node.right(term)
+ - Chained Left
+    * node(add),node.left(term),node.right(add),node.right.left(term)
+ - Chained Right
+    * node(add),node.right(term),node.left(add),node.left.right(term)
+
diff --git a/website/docs/api/rules/distributive_multiply_across.md b/website/docs/api/rules/distributive_multiply_across.md
new file mode 100644
index 0000000..24ed220
--- /dev/null
+++ b/website/docs/api/rules/distributive_multiply_across.md
@@ -0,0 +1,64 @@
+The `Distributive Property` can distribute multiplication across grouped terms. This has the effect of removing a grouping and can expose the terms that were inside for further simplification depending on the problem type.
+
+This rule is expressed by the equation `a(b + c) = ab + ac`
+
+### Transformations
+
+Given a multiplication of `a` and `(b + c)`, this rule distributes `a` across `b` and `c`, leaving only the simpler form of `ab` and `ac`.
+
+#### Addition
+
+`a(b + c) = ab + ac`
+
+```
+                             +
+         *                  / \
+        / \                /   \
+       /   \              /     \
+      a     +     ->     *       *
+           / \          / \     / \
+          /   \        /   \   /   \
+         b     c      a     b a     c
+```
+
+### Examples
+
+`rule_tests:distributive_multiply_across`
+
+# API
+
+```python
+
+import mathy_core.rules.distributive_multiply_across
+```
+
+
+## DistributiveMultiplyRule
+```python
+DistributiveMultiplyRule(self, args, kwargs)
+```
+
+Distributive Property
+`a(b + c) = ab + ac`
+
+The distributive property can be used to expand out expressions
+to allow for simplification, as well as to factor out common properties of terms.
+
+**Distribute across a group**
+
+This handles the `a(b + c)` conversion of the distributive property, which
+distributes `a` across both `b` and `c`.
+
+*note: this is useful because it takes a complex Multiply expression and
+replaces it with two simpler ones.  This can expose terms that can be
+combined for further expression simplification.*
+
+                             +
+         *                  / \
+        / \                /   \
+       /   \              /     \
+      a     +     ->     *       *
+           / \          / \     / \
+          /   \        /   \   /   \
+         b     c      a     b a     c
+
diff --git a/website/docs/api/rules/restate_subtraction.md b/website/docs/api/rules/restate_subtraction.md
new file mode 100644
index 0000000..263f139
--- /dev/null
+++ b/website/docs/api/rules/restate_subtraction.md
@@ -0,0 +1,20 @@
+# mathy_core.rules.restate_subtraction
+
+## RestateSubtractionRule
+```python
+RestateSubtractionRule(self, args, kwargs)
+```
+Convert subtract operators to plus negative to allow commuting
+### get_type
+```python
+RestateSubtractionRule.get_type(
+    self, 
+    node: mathy_core.expressions.MathExpression, 
+) -> Optional[str]
+```
+Determine the configuration of the tree for this transformation.
+
+Support two types of tree configurations:
+ - Subtraction is a subtract to be restate as a plus negation
+ - PlusNegative is a plus negative const to be restated as subtraction
+
diff --git a/website/docs/api/rules/variable_multiply.md b/website/docs/api/rules/variable_multiply.md
new file mode 100644
index 0000000..f0d0d2c
--- /dev/null
+++ b/website/docs/api/rules/variable_multiply.md
@@ -0,0 +1,63 @@
+# mathy_core.rules.variable_multiply
+
+## VariableMultiplyRule
+```python
+VariableMultiplyRule(self, args, kwargs)
+```
+
+This restates `x^b * x^d` as `x^(b + d)` which has the effect of isolating
+the exponents attached to the variables, so they can be combined.
+
+    1. When there are two terms with the same base being multiplied together, their
+       exponents are added together. "x * x^3" = "x^4" because "x = x^1" so
+       "x^1 * x^3 = x^(1 + 3) = x^4"
+
+    TODO: 2. When there is a power raised to another power, they can be combined by
+             multiplying the exponents together. "x^(2^2) = x^4"
+
+The rule identifies terms with explicit and implicit powers, so the following
+transformations are all valid:
+
+Explicit powers: x^b * x^d = x^(b+d)
+
+          *
+         / \
+        /   \          ^
+       /     \    =   / \
+      ^       ^      x   +
+     / \     / \        / \
+    x   b   x   d      b   d
+
+
+Implicit powers: x * x^d = x^(1 + d)
+
+        *
+       / \
+      /   \          ^
+     /     \    =   / \
+    x       ^      x   +
+           / \        / \
+          x   d      1   d
+
+
+### get_type
+```python
+VariableMultiplyRule.get_type(
+    self, 
+    node: mathy_core.expressions.MathExpression, 
+) -> Optional[Tuple[str, mathy_core.util.TermEx, mathy_core.util.TermEx]]
+```
+Determine the configuration of the tree for this transformation.
+
+Support two types of tree configurations:
+ - Simple is where the node's left and right children are exactly
+   terms that can be multiplied together.
+ - Chained is where the node's left child is a term, but the right
+   child is a continuation of a more complex term, as indicated by
+   the presence of another Multiply node. In this case the left child
+   of the next multiply node is the target.
+
+Structure:
+ - Simple node(mult),node.left(term),node.right(term)
+ - Chained node(mult),node.left(term),node.right(mult),node.right.left(term)
+
diff --git a/website/docs/api/testing.md b/website/docs/api/testing.md
new file mode 100644
index 0000000..40f86ea
--- /dev/null
+++ b/website/docs/api/testing.md
@@ -0,0 +1,54 @@
+# mathy_core.testing
+
+## get_rule_tests
+```python
+get_rule_tests(name: str) -> Dict[str, Any]
+```
+Load a set of JSON rule test assertions.
+
+__Arguments__
+
+- __name (str)__: The name of the test JSON file to open, e.g. "commutative_property"
+
+__Returns__
+
+`(dict)`: A dictionary with "valid" and "invalid" keys that contain pairs of
+expected inputs and outputs.
+
+## init_rule_for_test
+```python
+init_rule_for_test(
+    example: Dict[str, Any], 
+    rule_class: Type[mathy_core.rule.BaseRule], 
+) -> mathy_core.rule.BaseRule
+```
+Initialize a given rule_class from a test example.
+
+This handles optionally passing the test example constructor arguments
+to the Rule.
+
+__Arguments:__
+
+example (dict): The example assertion loaded from a call to `get_rule_tests`
+rule_class (Type[BaseRule]): The
+
+__Returns__
+
+`(BaseRule)`: The rule instance.
+
+## run_rule_tests
+```python
+run_rule_tests(
+    name: str, 
+    rule_class: Type[mathy_core.rule.BaseRule], 
+    callback: Optional[Callable[[Dict[str, Any]], NoneType]] = None, 
+) -> None
+```
+Load and assert about the transformations and validity of rules
+based on given input examples.
+
+When debugging a problem it can be useful to provide a "callback" function
+and add a `"debug": true` value to the example in the rules json file you
+want to debug. Then you set a breakpoint and step out of your callback function
+into the parsing/evaluation of the debug example.
+
diff --git a/website/docs/api/tokenizer.md b/website/docs/api/tokenizer.md
new file mode 100644
index 0000000..242d8e2
--- /dev/null
+++ b/website/docs/api/tokenizer.md
@@ -0,0 +1,326 @@
+## Motivation
+
+We first need an intermediate representation to parse math text into tree structures that encode the Order of Operations of the input. Specifically, we want to build a list of text characters corresponding to relevant `tokens` for a math expression. That is what the tokenizer does.
+
+The tokenization process treats the input string as an array of characters, iterating over them to produce a list of tokens with `type`/`value` properties. While building the collection, the tokenizer also optionally discards extra whitespace characters.
+
+## Visual Example
+
+For example, consider the input text `8 - (2 + 4)` and its token representation.
+
+`tokens:8 - (2 + 4)`
+
+- The top row contains the token value.
+- The bottom row includes the integer type of the token represented by the value.
+
+## Code Example
+
+Simple tokenization only requires a few lines of code:
+
+```Python
+
+{!./snippets/cas/tokenizer_tokenize.py!}
+
+```
+
+## Conceptual Example
+
+To better understand the tokenizer, let's build a tokens array manually then compare it to the tokenizer outputs:
+
+```Python
+{!./snippets/cas/tokenizer_manual.py!}
+```
+
+# API
+
+```python
+
+import mathy_core.tokenizer
+```
+
+
+## TOKEN_TYPES
+```python
+TOKEN_TYPES(self, args, kwargs)
+```
+
+### CloseParen
+int([x]) -> integer
+int(x, base=10) -> integer
+
+Convert a number or string to an integer, or return 0 if no arguments
+are given.  If x is a number, return x.__int__().  For floating point
+numbers, this truncates towards zero.
+
+If x is not a number or if base is given, then x must be a string,
+bytes, or bytearray instance representing an integer literal in the
+given base.  The literal can be preceded by '+' or '-' and be surrounded
+by whitespace.  The base defaults to 10.  Valid bases are 0 and 2-36.
+Base 0 means to interpret the base from the string as an integer literal.
+>>> int('0b100', base=0)
+4
+### Constant
+int([x]) -> integer
+int(x, base=10) -> integer
+
+Convert a number or string to an integer, or return 0 if no arguments
+are given.  If x is a number, return x.__int__().  For floating point
+numbers, this truncates towards zero.
+
+If x is not a number or if base is given, then x must be a string,
+bytes, or bytearray instance representing an integer literal in the
+given base.  The literal can be preceded by '+' or '-' and be surrounded
+by whitespace.  The base defaults to 10.  Valid bases are 0 and 2-36.
+Base 0 means to interpret the base from the string as an integer literal.
+>>> int('0b100', base=0)
+4
+### Divide
+int([x]) -> integer
+int(x, base=10) -> integer
+
+Convert a number or string to an integer, or return 0 if no arguments
+are given.  If x is a number, return x.__int__().  For floating point
+numbers, this truncates towards zero.
+
+If x is not a number or if base is given, then x must be a string,
+bytes, or bytearray instance representing an integer literal in the
+given base.  The literal can be preceded by '+' or '-' and be surrounded
+by whitespace.  The base defaults to 10.  Valid bases are 0 and 2-36.
+Base 0 means to interpret the base from the string as an integer literal.
+>>> int('0b100', base=0)
+4
+### EOF
+int([x]) -> integer
+int(x, base=10) -> integer
+
+Convert a number or string to an integer, or return 0 if no arguments
+are given.  If x is a number, return x.__int__().  For floating point
+numbers, this truncates towards zero.
+
+If x is not a number or if base is given, then x must be a string,
+bytes, or bytearray instance representing an integer literal in the
+given base.  The literal can be preceded by '+' or '-' and be surrounded
+by whitespace.  The base defaults to 10.  Valid bases are 0 and 2-36.
+Base 0 means to interpret the base from the string as an integer literal.
+>>> int('0b100', base=0)
+4
+### Equal
+int([x]) -> integer
+int(x, base=10) -> integer
+
+Convert a number or string to an integer, or return 0 if no arguments
+are given.  If x is a number, return x.__int__().  For floating point
+numbers, this truncates towards zero.
+
+If x is not a number or if base is given, then x must be a string,
+bytes, or bytearray instance representing an integer literal in the
+given base.  The literal can be preceded by '+' or '-' and be surrounded
+by whitespace.  The base defaults to 10.  Valid bases are 0 and 2-36.
+Base 0 means to interpret the base from the string as an integer literal.
+>>> int('0b100', base=0)
+4
+### Exponent
+int([x]) -> integer
+int(x, base=10) -> integer
+
+Convert a number or string to an integer, or return 0 if no arguments
+are given.  If x is a number, return x.__int__().  For floating point
+numbers, this truncates towards zero.
+
+If x is not a number or if base is given, then x must be a string,
+bytes, or bytearray instance representing an integer literal in the
+given base.  The literal can be preceded by '+' or '-' and be surrounded
+by whitespace.  The base defaults to 10.  Valid bases are 0 and 2-36.
+Base 0 means to interpret the base from the string as an integer literal.
+>>> int('0b100', base=0)
+4
+### Factorial
+int([x]) -> integer
+int(x, base=10) -> integer
+
+Convert a number or string to an integer, or return 0 if no arguments
+are given.  If x is a number, return x.__int__().  For floating point
+numbers, this truncates towards zero.
+
+If x is not a number or if base is given, then x must be a string,
+bytes, or bytearray instance representing an integer literal in the
+given base.  The literal can be preceded by '+' or '-' and be surrounded
+by whitespace.  The base defaults to 10.  Valid bases are 0 and 2-36.
+Base 0 means to interpret the base from the string as an integer literal.
+>>> int('0b100', base=0)
+4
+### Function
+int([x]) -> integer
+int(x, base=10) -> integer
+
+Convert a number or string to an integer, or return 0 if no arguments
+are given.  If x is a number, return x.__int__().  For floating point
+numbers, this truncates towards zero.
+
+If x is not a number or if base is given, then x must be a string,
+bytes, or bytearray instance representing an integer literal in the
+given base.  The literal can be preceded by '+' or '-' and be surrounded
+by whitespace.  The base defaults to 10.  Valid bases are 0 and 2-36.
+Base 0 means to interpret the base from the string as an integer literal.
+>>> int('0b100', base=0)
+4
+### Invalid
+int([x]) -> integer
+int(x, base=10) -> integer
+
+Convert a number or string to an integer, or return 0 if no arguments
+are given.  If x is a number, return x.__int__().  For floating point
+numbers, this truncates towards zero.
+
+If x is not a number or if base is given, then x must be a string,
+bytes, or bytearray instance representing an integer literal in the
+given base.  The literal can be preceded by '+' or '-' and be surrounded
+by whitespace.  The base defaults to 10.  Valid bases are 0 and 2-36.
+Base 0 means to interpret the base from the string as an integer literal.
+>>> int('0b100', base=0)
+4
+### Minus
+int([x]) -> integer
+int(x, base=10) -> integer
+
+Convert a number or string to an integer, or return 0 if no arguments
+are given.  If x is a number, return x.__int__().  For floating point
+numbers, this truncates towards zero.
+
+If x is not a number or if base is given, then x must be a string,
+bytes, or bytearray instance representing an integer literal in the
+given base.  The literal can be preceded by '+' or '-' and be surrounded
+by whitespace.  The base defaults to 10.  Valid bases are 0 and 2-36.
+Base 0 means to interpret the base from the string as an integer literal.
+>>> int('0b100', base=0)
+4
+### Multiply
+int([x]) -> integer
+int(x, base=10) -> integer
+
+Convert a number or string to an integer, or return 0 if no arguments
+are given.  If x is a number, return x.__int__().  For floating point
+numbers, this truncates towards zero.
+
+If x is not a number or if base is given, then x must be a string,
+bytes, or bytearray instance representing an integer literal in the
+given base.  The literal can be preceded by '+' or '-' and be surrounded
+by whitespace.  The base defaults to 10.  Valid bases are 0 and 2-36.
+Base 0 means to interpret the base from the string as an integer literal.
+>>> int('0b100', base=0)
+4
+### OpenParen
+int([x]) -> integer
+int(x, base=10) -> integer
+
+Convert a number or string to an integer, or return 0 if no arguments
+are given.  If x is a number, return x.__int__().  For floating point
+numbers, this truncates towards zero.
+
+If x is not a number or if base is given, then x must be a string,
+bytes, or bytearray instance representing an integer literal in the
+given base.  The literal can be preceded by '+' or '-' and be surrounded
+by whitespace.  The base defaults to 10.  Valid bases are 0 and 2-36.
+Base 0 means to interpret the base from the string as an integer literal.
+>>> int('0b100', base=0)
+4
+### Pad
+int([x]) -> integer
+int(x, base=10) -> integer
+
+Convert a number or string to an integer, or return 0 if no arguments
+are given.  If x is a number, return x.__int__().  For floating point
+numbers, this truncates towards zero.
+
+If x is not a number or if base is given, then x must be a string,
+bytes, or bytearray instance representing an integer literal in the
+given base.  The literal can be preceded by '+' or '-' and be surrounded
+by whitespace.  The base defaults to 10.  Valid bases are 0 and 2-36.
+Base 0 means to interpret the base from the string as an integer literal.
+>>> int('0b100', base=0)
+4
+### Plus
+int([x]) -> integer
+int(x, base=10) -> integer
+
+Convert a number or string to an integer, or return 0 if no arguments
+are given.  If x is a number, return x.__int__().  For floating point
+numbers, this truncates towards zero.
+
+If x is not a number or if base is given, then x must be a string,
+bytes, or bytearray instance representing an integer literal in the
+given base.  The literal can be preceded by '+' or '-' and be surrounded
+by whitespace.  The base defaults to 10.  Valid bases are 0 and 2-36.
+Base 0 means to interpret the base from the string as an integer literal.
+>>> int('0b100', base=0)
+4
+### Variable
+int([x]) -> integer
+int(x, base=10) -> integer
+
+Convert a number or string to an integer, or return 0 if no arguments
+are given.  If x is a number, return x.__int__().  For floating point
+numbers, this truncates towards zero.
+
+If x is not a number or if base is given, then x must be a string,
+bytes, or bytearray instance representing an integer literal in the
+given base.  The literal can be preceded by '+' or '-' and be surrounded
+by whitespace.  The base defaults to 10.  Valid bases are 0 and 2-36.
+Base 0 means to interpret the base from the string as an integer literal.
+>>> int('0b100', base=0)
+4
+## Tokenizer
+```python
+Tokenizer(self, exclude_padding: bool = True)
+```
+The Tokenizer produces a list of tokens from an input string.
+### eat_token
+```python
+Tokenizer.eat_token(
+    self, 
+    context: mathy_core.tokenizer.TokenContext, 
+    typeFn: Callable[[str], bool], 
+) -> str
+```
+Eat all of the tokens of a given type from the front of the stream
+until a different type is hit, and return the text.
+### identify_alphas
+```python
+Tokenizer.identify_alphas(
+    self, 
+    context: mathy_core.tokenizer.TokenContext, 
+) -> int
+```
+Identify and tokenize functions and variables.
+### identify_constants
+```python
+Tokenizer.identify_constants(
+    self, 
+    context: mathy_core.tokenizer.TokenContext, 
+) -> int
+```
+Identify and tokenize a constant number.
+### identify_operators
+```python
+Tokenizer.identify_operators(
+    self, 
+    context: mathy_core.tokenizer.TokenContext, 
+) -> bool
+```
+Identify and tokenize operators.
+### is_alpha
+```python
+Tokenizer.is_alpha(self, c: str) -> bool
+```
+Is this character a letter
+### is_number
+```python
+Tokenizer.is_number(self, c: str) -> bool
+```
+Is this character a number
+### tokenize
+```python
+Tokenizer.tokenize(self, buffer: str) -> List[mathy_core.tokenizer.Token]
+```
+Return an array of `Token`s from a given string input.
+This throws an exception if an unknown token type is found in the input.
diff --git a/website/docs/api/tree.md b/website/docs/api/tree.md
new file mode 100644
index 0000000..dfd7d65
--- /dev/null
+++ b/website/docs/api/tree.md
@@ -0,0 +1,162 @@
+# mathy_core.tree
+
+## BinaryTreeNode
+```python
+BinaryTreeNode(
+    self, 
+    left: Optional[BinaryTreeNode] = None, 
+    right: Optional[BinaryTreeNode] = None, 
+    parent: Optional[BinaryTreeNode] = None, 
+    id: Optional[str] = None, 
+)
+```
+
+The binary tree node is the base node for all of our trees, and provides a
+rich set of methods for constructing, inspecting, and modifying them.
+The node itself defines the structure of the binary tree, having left and right
+children, and a parent.
+
+### clone
+```python
+BinaryTreeNode.clone(self: ~NodeType) -> ~NodeType
+```
+Create a clone of this tree
+### get_children
+```python
+BinaryTreeNode.get_children(self: ~NodeType) -> List[~NodeType]
+```
+Get children as an array.  If there are two children, the first object will
+always represent the left child, and the second will represent the right.
+### get_root
+```python
+BinaryTreeNode.get_root(self: ~NodeType) -> ~NodeType
+```
+Return the root element of this tree
+### get_root_side
+```python
+BinaryTreeNode.get_root_side(self: 'BinaryTreeNode') -> Literal['left', 'right']
+```
+Return the side of the tree that this node lives on
+### get_sibling
+```python
+BinaryTreeNode.get_sibling(self: ~NodeType) -> Optional[~NodeType]
+```
+Get the sibling node of this node.  If there is no parent, or the node
+has no sibling, the return value will be None.
+### get_side
+```python
+BinaryTreeNode.get_side(
+    self, 
+    child: Optional[BinaryTreeNode], 
+) -> Literal['left', 'right']
+```
+Determine whether the given `child` is the left or right child of this
+node
+### is_leaf
+```python
+BinaryTreeNode.is_leaf(self) -> bool
+```
+Is this node a leaf?  A node is a leaf if it has no children.
+### rotate
+```python
+BinaryTreeNode.rotate(self: ~NodeType) -> ~NodeType
+```
+
+Rotate a node, changing the structure of the tree, without modifying
+the order of the nodes in the tree.
+
+### set_left
+```python
+BinaryTreeNode.set_left(
+    self: ~NodeType, 
+    child: Optional[BinaryTreeNode] = None, 
+    clear_old_child_parent: bool = False, 
+) -> ~NodeType
+```
+Set the left node to the passed `child`
+### set_right
+```python
+BinaryTreeNode.set_right(
+    self: ~NodeType, 
+    child: Optional[BinaryTreeNode] = None, 
+    clear_old_child_parent: bool = False, 
+) -> ~NodeType
+```
+Set the right node to the passed `child`
+### set_side
+```python
+BinaryTreeNode.set_side(
+    self, 
+    child: ~NodeType, 
+    side: Literal['left', 'right'], 
+) -> ~NodeType
+```
+Set a new `child` on the given `side`
+### visit_inorder
+```python
+BinaryTreeNode.visit_inorder(
+    self, 
+    visit_fn: Callable[[Any, int, Optional[Any]], Optional[Literal['stop']]], 
+    depth: int = 0, 
+    data: Optional[Any] = None, 
+) -> Optional[Literal['stop']]
+```
+Visit the tree inorder, which visits the left child, then the current node,
+and then its right child.
+
+*Left -> Visit -> Right*
+
+This method accepts a function that will be invoked for each node in the
+tree.  The callback function is passed three arguments: the node being
+visited, the current depth in the tree, and a user specified data parameter.
+
+!!! info
+
+    Traversals may be canceled by returning `STOP` from any visit function.
+
+### visit_postorder
+```python
+BinaryTreeNode.visit_postorder(
+    self, 
+    visit_fn: Callable[[Any, int, Optional[Any]], Optional[Literal['stop']]], 
+    depth: int = 0, 
+    data: Optional[Any] = None, 
+) -> Optional[Literal['stop']]
+```
+Visit the tree postorder, which visits its left child, then its right child,
+and finally the current node.
+
+*Left -> Right -> Visit*
+
+This method accepts a function that will be invoked for each node in the
+tree.  The callback function is passed three arguments: the node being
+visited, the current depth in the tree, and a user specified data parameter.
+
+!!! info
+
+    Traversals may be canceled by returning `STOP` from any visit function.
+
+### visit_preorder
+```python
+BinaryTreeNode.visit_preorder(
+    self, 
+    visit_fn: Callable[[Any, int, Optional[Any]], Optional[Literal['stop']]], 
+    depth: int = 0, 
+    data: Optional[Any] = None, 
+) -> Optional[Literal['stop']]
+```
+Visit the tree preorder, which visits the current node, then its left
+child, and then its right child.
+
+*Visit -> Left -> Right*
+
+This method accepts a function that will be invoked for each node in the
+tree.  The callback function is passed three arguments: the node being
+visited, the current depth in the tree, and a user specified data parameter.
+
+!!! info
+
+    Traversals may be canceled by returning `STOP` from any visit function.
+
+## VisitDataType
+Template type of user data passed to visit functions.
diff --git a/website/docs/api/util.md b/website/docs/api/util.md
new file mode 100644
index 0000000..f248ada
--- /dev/null
+++ b/website/docs/api/util.md
@@ -0,0 +1,195 @@
+# mathy_core.util
+
+## compare_equation_values
+```python
+compare_equation_values(
+    from_expression: mathy_core.expressions.MathExpression, 
+    to_expression: mathy_core.expressions.MathExpression, 
+    eval_context: Dict[str, Union[float, int]], 
+) -> None
+```
+Evaluate two equations with some context.
+
+Raises ValueError if the equations do not hold when evaluated with the given
+context.
+## compare_expression_string_values
+```python
+compare_expression_string_values(
+    from_expression: str, 
+    to_expression: str, 
+    history: Optional[List[Any]] = None, 
+) -> None
+```
+Compare and evaluate two expressions strings to verify they have the
+same value
+## compare_expression_values
+```python
+compare_expression_values(
+    from_expression: mathy_core.expressions.MathExpression, 
+    to_expression: mathy_core.expressions.MathExpression, 
+    history: Optional[List[Any]] = None, 
+) -> None
+```
+Compare and evaluate two expressions to verify they have the same value
+## factor
+```python
+factor(value: Union[float, int]) -> Dict[Union[float, int], Union[float, int]]
+```
+Build a verbose factor dictionary.
+
+This builds a dictionary of factors for a given value that
+contains both arrangements of terms so that all factors are
+accessible by key.  That is, factoring 2 would return
+    result = {
+        1 : 2
+        2 : 1
+    }
+
+## get_term_ex
+```python
+get_term_ex(
+    node: Optional[mathy_core.expressions.MathExpression], 
+) -> Optional[mathy_core.util.TermEx]
+```
+Extract the 3 components of a naturally ordered term.
+
+!!! info Important
+
+    This doesn't care about whether the node is part of a larger term,
+    it only looks at its children.
+
+__Example__
+
+
+`mathy:4x^7`
+
+```python
+TermEx(coefficient=4, variable="x", exponent=7)
+```
+
+## get_terms
+```python
+get_terms(
+    expression: mathy_core.expressions.MathExpression, 
+) -> List[mathy_core.expressions.MathExpression]
+```
+Walk the given expression tree and return a list of nodes
+representing the distinct terms it contains.
+
+__Arguments__
+
+- __expression (MathExpression)__: the expression to find term nodes in
+
+__Returns__
+
+`(List[MathExpression])`: a list of term nodes
+
+## has_like_terms
+```python
+has_like_terms(expression: mathy_core.expressions.MathExpression) -> bool
+```
+Return True if a given expression has more than one of any type of term.
+
+__Examples__
+
+
+- `x + y + z` = `False`
+- `x^2 + x` = `False`
+- `y + 2x` = `True`
+- `x^2 + 4x^3 + 2y` = `True`
+
+## is_add_or_sub
+```python
+is_add_or_sub(node: mathy_core.expressions.MathExpression) -> bool
+```
+Return True if a node is an Add or Subtract expression
+## is_debug_mode
+```python
+is_debug_mode() -> bool
+```
+Debug mode enables extra logging and assertions, but is slower.
+## is_preferred_term_form
+```python
+is_preferred_term_form(expression: mathy_core.expressions.MathExpression) -> bool
+```
+
+Return True if a given term has been simplified so that it only has
+a max of one coefficient and variable, with the variable on the right
+and the coefficient on the left side
+
+Examples
+
+  - Complex   = 2 * 2x^2
+  - Simple    = x^2 * 4
+  - Preferred = 4x^2
+
+## is_simple_term
+```python
+is_simple_term(node: mathy_core.expressions.MathExpression) -> bool
+```
+Return True if a given term has been simplified so it only has at
+most one of each variable and a constant.
+
+__Examples__
+
+  - Simple = 2x^2 * 2y
+  - Complex = 2x * 2x * 2y
+  - Simple = x^2 * 4
+  - Complex = 2 * 2x^2
+
+## pad_array
+```python
+pad_array(in_list: List[Any], max_length: int, value: Any = 0) -> List[Any]
+```
+Pad a list to the given size with the given padding value.
+
+__Arguments:__
+
+in_list (List[Any]): List of values to pad to the given length
+max_length (int): The desired length of the array
+value (Any): a value to insert in order to pad the array to max length
+
+__Returns__
+
+`(List[Any])`: An array padded to `max_length` size
+
+## TermEx
+```python
+TermEx(self, args, kwargs)
+```
+TermEx(coefficient, variable, exponent)
+### coefficient
+An optional integer or float coefficient
+### exponent
+An optional integer or float exponent
+### variable
+An optional variable
+## terms_are_like
+```python
+terms_are_like(
+    one: Union[mathy_core.util.TermResult, mathy_core.expressions.MathExpression, Literal[False]], 
+    two: Union[mathy_core.util.TermResult, mathy_core.expressions.MathExpression, Literal[False]], 
+) -> bool
+```
+Determine if two math expression nodes are **like terms**.
+
+__Arguments__
+
+- __one (MathExpression)__: A math expression that represents a term
+- __two (MathExpression)__: Another math expression that represents a term
+
+__Returns__
+
+`(bool)`: Whether the terms are like or not.
+
+## unlink
+```python
+unlink(
+    node: Optional[mathy_core.expressions.MathExpression] = None, 
+) -> Optional[mathy_core.expressions.MathExpression]
+```
+Unlink an expression from it's parent.
+
+1. Clear expression references in `parent`
+2. Clear `parent` in expression
+
diff --git a/website/docs/changelog.md b/website/docs/changelog.md
new file mode 100644
index 0000000..e69de29
diff --git a/website/docs/examples/predicting_like_terms.ipynb b/website/docs/examples/predicting_like_terms.ipynb
new file mode 100644
index 0000000..f4b1720
--- /dev/null
+++ b/website/docs/examples/predicting_like_terms.ipynb
@@ -0,0 +1,761 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Predicting Like Terms [![Open Example In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/justindujardin/mathy/blob/master/libraries/website/docs/examples/predicting_like_terms.ipynb)\n",
+    "\n",
+    "> This notebook is built using [thinc](https://thinc.ai){target=\\_blank} and [Mathy](https://mathy.ai). \n",
+    "\n",
+    "\n",
+    "Remember in Algebra how you had to combine \"like terms\" to simplify problems? \n",
+    "\n",
+    "You'd see expressions like `60 + 2x^3 - 6x + x^3 + 17x` that have **5** total terms but only **4** \"like terms\". \n",
+    "\n",
+    "That's because `2x^3` and `x^3` are like and `-6x` and `17x` are like, while `60` doesn't have any other terms that are like it.\n",
+    "\n",
+    "Can we teach a model to predict that there are `4` like terms in the above expression?\n",
+    "\n",
+    "Let's give it a shot using [Mathy](https://mathy.ai) to generate math problems and [thinc](https://thinc.ai) to build a regression model that outputs the number of like terms in each input problem."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install \"thinc>=8.0.0a0\" mathy_core"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Sketch a Model\n",
+    "\n",
+    "Before we get started it can be good to have an idea of what input/output shapes we want for our model.\n",
+    "\n",
+    "We'll convert text math problems into lists of lists of integers, so our example (X) type can be represented using thinc's `Ints2d` type.\n",
+    "\n",
+    "The model will predict how many like terms there are in each sequence, so our output (Y) type can represented with the `Floats2d` type.\n",
+    "\n",
+    "Knowing the thinc types we want enables us to create an alias for our model, so we only have to type out the verbose generic signature once."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from typing import List\n",
+    "from thinc.api import Model\n",
+    "from thinc.types import Ints2d, Floats1d\n",
+    "\n",
+    "ModelX = Ints2d\n",
+    "ModelY = Floats1d\n",
+    "ModelT = Model[List[ModelX], ModelY]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Encode Text Inputs\n",
+    "\n",
+    "Mathy generates ascii-math problems and we have to encode them into integers that the model can process. \n",
+    "\n",
+    "To do this we'll build a vocabulary of all the possible characters we'll see, and map each input character to its index in the list.\n",
+    "\n",
+    "For math problems our vocabulary will include all the characters of the alphabet, numbers 0-9, and special characters like `*`, `-`, `.`, etc."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from typing import List\n",
+    "from thinc.api import Model\n",
+    "from thinc.types import Ints2d, Floats1d\n",
+    "from thinc.api import Ops, get_current_ops\n",
+    "\n",
+    "vocab = \" .+-/^*()[]-01234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ\"\n",
+    "\n",
+    "def encode_input(text: str) -> ModelX:\n",
+    "    ops: Ops = get_current_ops()\n",
+    "    indices: List[List[int]] = []\n",
+    "    for c in text:\n",
+    "        if c not in vocab:\n",
+    "            raise ValueError(f\"'{c}' missing from vocabulary in text: {text}\")\n",
+    "        indices.append([vocab.index(c)])\n",
+    "    return ops.asarray2i(indices)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Try It**\n",
+    "\n",
+    "Let's try it out on some fixed data to be sure it works. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[16]\n",
+      " [ 2]\n",
+      " [14]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "outputs = encode_input(\"4+2\")\n",
+    "assert outputs[0][0] == vocab.index(\"4\")\n",
+    "assert outputs[1][0] == vocab.index(\"+\")\n",
+    "assert outputs[2][0] == vocab.index(\"2\")\n",
+    "print(outputs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Generate Math Problems\n",
+    "\n",
+    "We'll use Mathy to generate random polynomial problems with a variable number of like terms. The generated problems will act as training data for our model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from typing import List, Optional, Set\n",
+    "import random\n",
+    "from mathy_core.problems import gen_simplify_multiple_terms\n",
+    "\n",
+    "def generate_problems(number: int, exclude: Optional[Set[str]] = None) -> List[str]:\n",
+    "    if exclude is None:\n",
+    "        exclude = set()\n",
+    "    problems: List[str] = []\n",
+    "    while len(problems) < number:\n",
+    "        text, complexity = gen_simplify_multiple_terms(\n",
+    "            random.randint(2, 6),\n",
+    "            noise_probability=1.0,\n",
+    "            noise_terms=random.randint(2, 10),\n",
+    "            op=[\"+\", \"-\"],\n",
+    "        )\n",
+    "        assert text not in exclude, \"duplicate problem generated!\"\n",
+    "        exclude.add(text)\n",
+    "        problems.append(text)\n",
+    "    return problems"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Try It**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['-7743l^3 + 3130r + -5826.8u - 4394r + 3g^4 - 1y - 1485u + 11w',\n",
+       " '4d - -1525.5m^3 + 1w + 12l^4 + 3069.9w + -3559s - 1.8r + 6737.3l^4 - -2119l^4 - 3w + 1128.9a + -5600v - -2315b + 8.1u - -6832z',\n",
+       " '-4868y^2 - 4548k + 9.6m + 3s - -7128d^4 + 6j^4 - 12v - 8.1t^4 + 1o^3 + 4c^4 - 2579o^3 - -4237.7q',\n",
+       " '-4553l^2 - 11.7j + 10j - 8.3g - -5184m',\n",
+       " '4o^2 + 2886u^3 + 5813q - 1u^3 + 4s - -6991u^3 + -9560a - -4774f + -1479z - 8.0f + 7x + 6.5h + -4397.2y + 12b',\n",
+       " '1247m^4 + 3833q^2 + 1n - 11.7s - 1.3p - 618y^2 + -3821n + 2a - 2.4a - 11r - 4764w^3 + 4.5n - 2.2t + 572.9a - 3c^3',\n",
+       " '1214.7f^4 + 11s - 2151k^4 - -7732q - 9q - 4l + -3697.3h + 3z + 5l - 7813.0p^3',\n",
+       " '4m + 6x - 4u + 1f - 11m - 11.5d + 4.0z - 2n + 4386c^4 + 2.1q',\n",
+       " '5h + 5.0h - 10.9a - 1517h + 2940o - -4178k^2 + -1748k^2',\n",
+       " '1.5l - 8.1d - 7.4m^2 - 0.9a - -4580w - -8290.8k + 8.3j + 8g + -5722d - 5455s^2 + -5355r^2 + 11u^4']"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "generate_problems(10)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Count Like Terms\n",
+    "\n",
+    "Now that we can generate input problems, we'll need a function that can count the like terms in each one and return the value for use as a label.\n",
+    "\n",
+    "To accomplish this we'll use a few helpers from mathy to enumerate the terms and compare them to see if they're like."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from typing import Optional, List, Dict\n",
+    "from mathy_core.problems import mathy_term_string\n",
+    "from mathy_core import MathExpression, ExpressionParser, get_terms, get_term_ex, TermEx\n",
+    "\n",
+    "parser = ExpressionParser()\n",
+    "\n",
+    "def count_like_terms(input_problem: str) -> int:\n",
+    "    expression: MathExpression = parser.parse(input_problem)\n",
+    "    term_nodes: List[MathExpression] = get_terms(expression)\n",
+    "    node_groups: Dict[str, List[MathExpression]] = {}\n",
+    "    for term_node in term_nodes:\n",
+    "        ex: Optional[TermEx] = get_term_ex(term_node)\n",
+    "        assert ex is not None, f\"invalid expression {term_node}\"\n",
+    "        key = mathy_term_string(variable=ex.variable, exponent=ex.exponent)\n",
+    "        if key == \"\":\n",
+    "            key = \"const\"\n",
+    "        if key not in node_groups:\n",
+    "            node_groups[key] = [term_node]\n",
+    "        else:\n",
+    "            node_groups[key].append(term_node)\n",
+    "    like_terms = 0\n",
+    "    for k, v in node_groups.items():\n",
+    "        if len(v) <= 1:\n",
+    "            continue\n",
+    "        like_terms += len(v)\n",
+    "    return like_terms"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Try It**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "assert count_like_terms(\"4x - 2y + q\") == 0\n",
+    "assert count_like_terms(\"x + x + z\") == 2\n",
+    "assert count_like_terms(\"4x + 2x - x + 7\") == 3"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Generate Problem/Answer pairs\n",
+    "\n",
+    "Now that we can generate problems, count the number of like terms in them, and encode their text into integers, we have the pieces required to generate random problems and answers that we can train a neural network with.\n",
+    "\n",
+    "Let's write a function that will return a tuple of: the problem text, its encoded example form, and the output label."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from typing import Tuple\n",
+    "from thinc.api import Ops, get_current_ops\n",
+    "\n",
+    "def to_example(input_problem: str) -> Tuple[str, ModelX, ModelY]:\n",
+    "    ops: Ops = get_current_ops()\n",
+    "    encoded_input = encode_input(input_problem)\n",
+    "    like_terms = count_like_terms(input_problem)\n",
+    "    return input_problem, encoded_input, ops.asarray1f([like_terms])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Try It**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "x+2x [[46]\n",
+      " [ 2]\n",
+      " [14]\n",
+      " [46]] [2.]\n"
+     ]
+    }
+   ],
+   "source": [
+    "text, X, Y = to_example(\"x+2x\")\n",
+    "assert text == \"x+2x\"\n",
+    "assert X[0] == vocab.index(\"x\")\n",
+    "assert Y[0] == 2\n",
+    "print(text, X, Y)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Build a Model\n",
+    "\n",
+    "Now that we can generate X/Y values, let's define our model and verify that it can process a single input/output.\n",
+    "\n",
+    "For this we'll use Thinc and the `define_operators` context manager to connect the pieces together using overloaded operators for `chain` and `clone` operations."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from typing import List\n",
+    "from thinc.model import Model\n",
+    "from thinc.api import concatenate, chain, clone, list2ragged\n",
+    "from thinc.api import reduce_sum, Mish, with_array, Embed, residual\n",
+    "\n",
+    "def build_model(n_hidden: int, dropout: float = 0.1) -> ModelT:\n",
+    "    with Model.define_operators({\">>\": chain, \"|\": concatenate, \"**\": clone}):\n",
+    "        model = (\n",
+    "            # Iterate over each element in the batch\n",
+    "            with_array(\n",
+    "                # Embed the vocab indices\n",
+    "                Embed(n_hidden, len(vocab), column=0)\n",
+    "                # Activate each batch of embedding sequences separately first\n",
+    "                >> Mish(n_hidden, dropout=dropout)\n",
+    "            )\n",
+    "            # Convert to ragged so we can use the reduction layers\n",
+    "            >> list2ragged()\n",
+    "            # Sum the features for each batch input\n",
+    "            >> reduce_sum()\n",
+    "            # Process with a small resnet\n",
+    "            >> residual(Mish(n_hidden, normalize=True)) ** 4\n",
+    "            # Convert (batch_size, n_hidden) to (batch_size, 1)\n",
+    "            >> Mish(1)\n",
+    "        )\n",
+    "    return model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Try It**\n",
+    "\n",
+    "Let's pass an example through the model to make sure we have all the sizes right."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(1, 1)\n"
+     ]
+    }
+   ],
+   "source": [
+    "text, X, Y = to_example(\"14x + 2y - 3x + 7x\")\n",
+    "m = build_model(12)\n",
+    "m.initialize([X], m.ops.asarray(Y, dtype=\"f\"))\n",
+    "mY = m.predict([X])\n",
+    "print(mY.shape)\n",
+    "assert mY.shape == (1, 1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Generate Training Datasets\n",
+    "\n",
+    "Now that we can generate examples and we have a model that can process them, let's generate random unique training and evaluation datasets.\n",
+    "\n",
+    "For this we'll write another helper function that can generate (n) training examples and respects an exclude list to avoid letting examples from the training/test sets overlap."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from typing import Tuple, Optional, Set, List\n",
+    "\n",
+    "DatasetTuple = Tuple[List[str], List[ModelX], List[ModelY]]\n",
+    "\n",
+    "def generate_dataset(\n",
+    "    size: int,\n",
+    "    exclude: Optional[Set[str]] = None,\n",
+    ") -> DatasetTuple:\n",
+    "    ops: Ops = get_current_ops()\n",
+    "    texts: List[str] = generate_problems(size, exclude=exclude)\n",
+    "    examples: List[ModelX] = []\n",
+    "    labels: List[ModelY] = []\n",
+    "    for i, text in enumerate(texts):\n",
+    "        text, x, y = to_example(text)\n",
+    "        examples.append(x)\n",
+    "        labels.append(y)\n",
+    "\n",
+    "    return texts, examples, labels"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Try It**\n",
+    "\n",
+    "Generate a small dataset to be sure everything is working as expected"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "texts, x, y = generate_dataset(10)\n",
+    "assert len(texts) == 10\n",
+    "assert len(x) == 10\n",
+    "assert len(y) == 10"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Evaluate Model Performance\n",
+    "\n",
+    "We're almost ready to train our model, we just need to write a function that will check a given trained model against a given dataset and return a 0-1 score of how accurate it was.\n",
+    "\n",
+    "We'll use this function to print the score as training progresses and print final test predictions at the end of training."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from typing import List\n",
+    "from wasabi import msg\n",
+    "\n",
+    "def evaluate_model(\n",
+    "    model: ModelT,\n",
+    "    *,\n",
+    "    print_problems: bool = False,\n",
+    "    texts: List[str],\n",
+    "    X: List[ModelX],\n",
+    "    Y: List[ModelY],\n",
+    "):\n",
+    "    Yeval = model.predict(X)\n",
+    "    correct_count = 0\n",
+    "    print_n = 12\n",
+    "    if print_problems:\n",
+    "        msg.divider(f\"eval samples max({print_n})\")\n",
+    "    for text, y_answer, y_guess in zip(texts, Y, Yeval):\n",
+    "        y_guess = round(float(y_guess))\n",
+    "        correct = y_guess == int(y_answer)\n",
+    "        print_fn = msg.fail\n",
+    "        if correct:\n",
+    "            correct_count += 1\n",
+    "            print_fn = msg.good\n",
+    "        if print_problems and print_n > 0:\n",
+    "            print_n -= 1\n",
+    "            print_fn(f\"Answer[{int(y_answer[0])}] Guess[{y_guess}] Text: {text}\")\n",
+    "    if print_problems:\n",
+    "        print(f\"Model predicted {correct_count} out of {len(X)} correctly.\")\n",
+    "    return correct_count / len(X)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Try It**\n",
+    "\n",
+    "Let's try it out with an untrained model and expect to see a really sad score."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "texts, X, Y = generate_dataset(128)\n",
+    "m = build_model(12)\n",
+    "m.initialize(X, m.ops.asarray(Y, dtype=\"f\"))\n",
+    "# Assume the model should do so poorly as to round down to 0\n",
+    "assert round(evaluate_model(m, texts=texts, X=X, Y=Y)) == 0"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Train/Evaluate a Model\n",
+    "\n",
+    "The final helper function we need is one to train and evaluate a model given two input datasets. \n",
+    "\n",
+    "This function does a few things:\n",
+    "\n",
+    " 1. Create an Adam optimizer we can use for minimizing the model's prediction error.\n",
+    " 2. Loop over the given training dataset (epoch) number of times.\n",
+    " 3. For each epoch, make batches of (batch_size) examples. For each batch(X), predict the number of like terms (Yh) and subtract the known answers (Y) to get the prediction error. Update the model using the optimizer with the calculated error.\n",
+    " 5. After each epoch, check the model performance against the evaluation dataset.\n",
+    " 6. Save the model weights for the best score out of all the training epochs.\n",
+    " 7. After all training is done, restore the best model and print results from the evaluation set."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from thinc.api import Adam\n",
+    "from wasabi import msg\n",
+    "import numpy\n",
+    "\n",
+    "def train_and_evaluate(\n",
+    "    model: ModelT,\n",
+    "    train_tuple: DatasetTuple,\n",
+    "    eval_tuple: DatasetTuple,\n",
+    "    *,\n",
+    "    lr: float = 3e-3,\n",
+    "    batch_size: int = 64,\n",
+    "    epochs: int = 48,\n",
+    ") -> float:\n",
+    "    (train_texts, train_X, train_y) = train_tuple\n",
+    "    (eval_texts, eval_X, eval_y) = eval_tuple\n",
+    "    msg.divider(\"Train and Evaluate Model\")\n",
+    "    msg.info(f\"Batch size = {batch_size}\\tEpochs = {epochs}\\tLearning Rate = {lr}\")\n",
+    "\n",
+    "    optimizer = Adam(lr)\n",
+    "    best_score: float = 0.0\n",
+    "    best_model: Optional[bytes] = None\n",
+    "    for n in range(epochs):\n",
+    "        loss = 0.0\n",
+    "        batches = model.ops.multibatch(batch_size, train_X, train_y, shuffle=True)\n",
+    "        for X, Y in batches:\n",
+    "            Y = model.ops.asarray(Y, dtype=\"float32\")\n",
+    "            Yh, backprop = model.begin_update(X)\n",
+    "            err = Yh - Y\n",
+    "            backprop(err)\n",
+    "            loss += (err ** 2).sum()\n",
+    "            model.finish_update(optimizer)\n",
+    "        score = evaluate_model(model, texts=eval_texts, X=eval_X, Y=eval_y)\n",
+    "        if score > best_score:\n",
+    "            best_model = model.to_bytes()\n",
+    "            best_score = score\n",
+    "        print(f\"{n}\\t{score:.2f}\\t{loss:.2f}\")\n",
+    "\n",
+    "    if best_model is not None:\n",
+    "        model.from_bytes(best_model)\n",
+    "    print(f\"Evaluating with best model\")\n",
+    "    score = evaluate_model(\n",
+    "        model, texts=eval_texts, print_problems=True, X=eval_X, Y=eval_y\n",
+    "    )\n",
+    "    print(f\"Final Score: {score}\")\n",
+    "    return score\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We'll generate the dataset first, so we can iterate on the model without having to spend time generating examples for each run. This also ensures we have the same dataset across different model runs, to make it easier to compare performance."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[2Knerating train dataset with 8192 examples...\u001b[38;5;2m✔ Train set created with 8192 examples.\u001b[0m\n",
+      "\u001b[2Knerating eval dataset with 2048 examples...\u001b[38;5;2m✔ Eval set created with 2048 examples.\u001b[0m\n"
+     ]
+    }
+   ],
+   "source": [
+    "train_size = 1024 * 8\n",
+    "test_size = 2048\n",
+    "seen_texts: Set[str] = set()\n",
+    "msg.text(f\"Generating train dataset with {train_size} examples...\")\n",
+    "train_dataset = generate_dataset(train_size, seen_texts)\n",
+    "msg.text(f\"Train set created with {train_size} examples.\")\n",
+    "msg.text(f\"Generating eval dataset with {test_size} examples...\")\n",
+    "eval_dataset = generate_dataset(test_size, seen_texts)\n",
+    "msg.text(f\"Eval set created with {test_size} examples.\")\n",
+    "init_x = train_dataset[1][:2]\n",
+    "init_y = train_dataset[2][:2]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Finally, we can build, train, and evaluate our model!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[1m\n",
+      "========================== Train and Evaluate Model ==========================\u001b[0m\n",
+      "\u001b[38;5;4mℹ Batch size = 64      Epochs = 32     Learning Rate = 0.002\u001b[0m\n",
+      "0\t0.23\t25283.75\n",
+      "1\t0.21\t17535.96\n",
+      "2\t0.23\t17329.38\n",
+      "3\t0.24\t15842.63\n",
+      "4\t0.22\t15396.75\n",
+      "5\t0.28\t14760.94\n",
+      "6\t0.24\t14000.68\n",
+      "7\t0.25\t13252.41\n",
+      "8\t0.26\t12263.78\n",
+      "9\t0.28\t12130.36\n",
+      "10\t0.28\t11368.95\n",
+      "11\t0.29\t10993.20\n",
+      "12\t0.28\t10709.44\n",
+      "13\t0.30\t10305.06\n",
+      "14\t0.33\t10134.89\n",
+      "15\t0.34\t9738.52\n",
+      "16\t0.33\t9579.92\n",
+      "17\t0.32\t9091.71\n",
+      "18\t0.34\t8950.21\n",
+      "19\t0.34\t8553.15\n",
+      "20\t0.34\t8320.76\n",
+      "21\t0.34\t7905.48\n",
+      "22\t0.38\t7880.38\n",
+      "23\t0.36\t7484.32\n",
+      "24\t0.37\t7348.58\n",
+      "25\t0.35\t7158.16\n",
+      "26\t0.36\t6754.80\n",
+      "27\t0.37\t6588.11\n",
+      "28\t0.38\t6534.72\n",
+      "29\t0.37\t6266.50\n",
+      "30\t0.40\t6176.08\n",
+      "31\t0.42\t5852.44\n",
+      "Evaluating with best model\n",
+      "\u001b[1m\n",
+      "============================ eval samples max(12) ============================\u001b[0m\n",
+      "\u001b[38;5;2m✔ Answer[6] Guess[6] Text: -7268s + 9c^4 - -3346u + -4891m + 12q^4 +\n",
+      "3.8a + 8h + 10x - 1n^3 - 2.2k + 10b - -8598k - 5499b + 8496k + -5230b - 2r\u001b[0m\n",
+      "\u001b[38;5;1m✘ Answer[2] Guess[3] Text: 11.0t - 11t + 2202f + -581a^3 - 10u\u001b[0m\n",
+      "\u001b[38;5;2m✔ Answer[2] Guess[2] Text: 4085q^3 - 6667m - 9c + 2c + 3y\u001b[0m\n",
+      "\u001b[38;5;2m✔ Answer[2] Guess[2] Text: 10h^3 + 11.0f^2 + 10r - 8091t^4 + 11b -\n",
+      "1114x + 1r\u001b[0m\n",
+      "\u001b[38;5;1m✘ Answer[2] Guess[4] Text: 0.9k + 575t^3 + 5975x - 4147l - 4.6j + 7r +\n",
+      "0.6h^3 + 10b^2 + 1.8w - 4y + 7584.2w + 1q\u001b[0m\n",
+      "\u001b[38;5;1m✘ Answer[2] Guess[4] Text: 1t^4 - 11z + 10.2h - 12s + 7374m + 954s +\n",
+      "9q\u001b[0m\n",
+      "\u001b[38;5;1m✘ Answer[2] Guess[4] Text: 10r + 12l - 7.8q + 2.9g - 8.3f + 3868.1a +\n",
+      "7870p + -182p + 8.9z\u001b[0m\n",
+      "\u001b[38;5;1m✘ Answer[2] Guess[3] Text: 3653x^4 - -8734m + 6418d - 12h^3 - 1069o -\n",
+      "12p - -5812b - 7b - 9.2t - 2.1q + 7a^3\u001b[0m\n",
+      "\u001b[38;5;1m✘ Answer[2] Guess[4] Text: 8x - 1a^2 - 6.4o + 1.2s^3 - -266y + 12f -\n",
+      "-5511p + -3956n + 10.3v^4 + 0.5j + 3.5q - 3m - 8o\u001b[0m\n",
+      "\u001b[38;5;1m✘ Answer[2] Guess[4] Text: 9335k + 6c^3 + 9.8p - 3.6u^4 - 3503f - 2.1h\n",
+      "- -6713g^2 - 6.7d - -1433g^2 - 1j^4 - 10.9a^2 - 4.2s^2 + 4336n^3\u001b[0m\n",
+      "\u001b[38;5;2m✔ Answer[6] Guess[6] Text: 0.9v^3 - 3x + 3g + 2k - 2926w^3 + 6b +\n",
+      "-5993u^3 - 10.3t^2 + 0.5s^2 + 12z - 3585d - 1239z + 0.1d + 7980z - 761d -\n",
+      "-1721f\u001b[0m\n",
+      "\u001b[38;5;2m✔ Answer[2] Guess[2] Text: -713o - -6348.5x^4 - -4531z + 10.9j - 4o +\n",
+      "-4799q - 11u^4 + -8290a - 9.5v\u001b[0m\n",
+      "Model predicted 852 out of 2048 correctly.\n",
+      "Final Score: 0.416015625\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "0.416015625"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model = build_model(64)\n",
+    "model.initialize(init_x, init_y)\n",
+    "train_and_evaluate(\n",
+    "    model, train_dataset, eval_dataset, lr=2e-3, batch_size=64, epochs=32\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.6.8 64-bit ('.env': virtualenv)",
+   "language": "python",
+   "name": "python36864bitenvvirtualenvbcc3528d06af44ca802a113b53f7d700"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
\ No newline at end of file
diff --git a/website/docs/examples/predicting_like_terms.md b/website/docs/examples/predicting_like_terms.md
new file mode 100644
index 0000000..58c10ad
--- /dev/null
+++ b/website/docs/examples/predicting_like_terms.md
@@ -0,0 +1,533 @@
+
+# Predicting Like Terms [![Open Example In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/justindujardin/mathy/blob/master/libraries/website/docs/examples/predicting_like_terms.ipynb)
+
+> This notebook is built using [thinc](https://thinc.ai){target=\_blank} and [Mathy](https://mathy.ai). 
+
+
+Remember in Algebra how you had to combine "like terms" to simplify problems? 
+
+You'd see expressions like `60 + 2x^3 - 6x + x^3 + 17x` that have **5** total terms but only **4** "like terms". 
+
+That's because `2x^3` and `x^3` are like and `-6x` and `17x` are like, while `60` doesn't have any other terms that are like it.
+
+Can we teach a model to predict that there are `4` like terms in the above expression?
+
+Let's give it a shot using [Mathy](https://mathy.ai) to generate math problems and [thinc](https://thinc.ai) to build a regression model that outputs the number of like terms in each input problem.
+
+
+```python
+!pip install "thinc>=8.0.0a0" mathy_core
+```
+
+### Sketch a Model
+
+Before we get started it can be good to have an idea of what input/output shapes we want for our model.
+
+We'll convert text math problems into lists of lists of integers, so our example (X) type can be represented using thinc's `Ints2d` type.
+
+The model will predict how many like terms there are in each sequence, so our output (Y) type can represented with the `Floats2d` type.
+
+Knowing the thinc types we want enables us to create an alias for our model, so we only have to type out the verbose generic signature once.
+
+
+```python
+from typing import List
+from thinc.api import Model
+from thinc.types import Ints2d, Floats1d
+
+ModelX = Ints2d
+ModelY = Floats1d
+ModelT = Model[List[ModelX], ModelY]
+```
+
+### Encode Text Inputs
+
+Mathy generates ascii-math problems and we have to encode them into integers that the model can process. 
+
+To do this we'll build a vocabulary of all the possible characters we'll see, and map each input character to its index in the list.
+
+For math problems our vocabulary will include all the characters of the alphabet, numbers 0-9, and special characters like `*`, `-`, `.`, etc.
+
+
+```python
+from typing import List
+from thinc.api import Model
+from thinc.types import Ints2d, Floats1d
+from thinc.api import Ops, get_current_ops
+
+vocab = " .+-/^*()[]-01234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
+
+def encode_input(text: str) -> ModelX:
+    ops: Ops = get_current_ops()
+    indices: List[List[int]] = []
+    for c in text:
+        if c not in vocab:
+            raise ValueError(f"'{c}' missing from vocabulary in text: {text}")
+        indices.append([vocab.index(c)])
+    return ops.asarray2i(indices)
+```
+
+**Try It**
+
+Let's try it out on some fixed data to be sure it works. 
+
+
+```python
+outputs = encode_input("4+2")
+assert outputs[0][0] == vocab.index("4")
+assert outputs[1][0] == vocab.index("+")
+assert outputs[2][0] == vocab.index("2")
+print(outputs)
+```
+
+    [[16]
+     [ 2]
+     [14]]
+
+
+### Generate Math Problems
+
+We'll use Mathy to generate random polynomial problems with a variable number of like terms. The generated problems will act as training data for our model.
+
+
+```python
+from typing import List, Optional, Set
+import random
+from mathy_core.problems import gen_simplify_multiple_terms
+
+def generate_problems(number: int, exclude: Optional[Set[str]] = None) -> List[str]:
+    if exclude is None:
+        exclude = set()
+    problems: List[str] = []
+    while len(problems) < number:
+        text, complexity = gen_simplify_multiple_terms(
+            random.randint(2, 6),
+            noise_probability=1.0,
+            noise_terms=random.randint(2, 10),
+            op=["+", "-"],
+        )
+        assert text not in exclude, "duplicate problem generated!"
+        exclude.add(text)
+        problems.append(text)
+    return problems
+```
+
+**Try It**
+
+
+```python
+generate_problems(10)
+```
+
+
+
+
+    ['-7743l^3 + 3130r + -5826.8u - 4394r + 3g^4 - 1y - 1485u + 11w',
+     '4d - -1525.5m^3 + 1w + 12l^4 + 3069.9w + -3559s - 1.8r + 6737.3l^4 - -2119l^4 - 3w + 1128.9a + -5600v - -2315b + 8.1u - -6832z',
+     '-4868y^2 - 4548k + 9.6m + 3s - -7128d^4 + 6j^4 - 12v - 8.1t^4 + 1o^3 + 4c^4 - 2579o^3 - -4237.7q',
+     '-4553l^2 - 11.7j + 10j - 8.3g - -5184m',
+     '4o^2 + 2886u^3 + 5813q - 1u^3 + 4s - -6991u^3 + -9560a - -4774f + -1479z - 8.0f + 7x + 6.5h + -4397.2y + 12b',
+     '1247m^4 + 3833q^2 + 1n - 11.7s - 1.3p - 618y^2 + -3821n + 2a - 2.4a - 11r - 4764w^3 + 4.5n - 2.2t + 572.9a - 3c^3',
+     '1214.7f^4 + 11s - 2151k^4 - -7732q - 9q - 4l + -3697.3h + 3z + 5l - 7813.0p^3',
+     '4m + 6x - 4u + 1f - 11m - 11.5d + 4.0z - 2n + 4386c^4 + 2.1q',
+     '5h + 5.0h - 10.9a - 1517h + 2940o - -4178k^2 + -1748k^2',
+     '1.5l - 8.1d - 7.4m^2 - 0.9a - -4580w - -8290.8k + 8.3j + 8g + -5722d - 5455s^2 + -5355r^2 + 11u^4']
+
+
+
+### Count Like Terms
+
+Now that we can generate input problems, we'll need a function that can count the like terms in each one and return the value for use as a label.
+
+To accomplish this we'll use a few helpers from mathy to enumerate the terms and compare them to see if they're like.
+
+
+```python
+from typing import Optional, List, Dict
+from mathy_core.problems import mathy_term_string
+from mathy_core import MathExpression, ExpressionParser, get_terms, get_term_ex, TermEx
+
+parser = ExpressionParser()
+
+def count_like_terms(input_problem: str) -> int:
+    expression: MathExpression = parser.parse(input_problem)
+    term_nodes: List[MathExpression] = get_terms(expression)
+    node_groups: Dict[str, List[MathExpression]] = {}
+    for term_node in term_nodes:
+        ex: Optional[TermEx] = get_term_ex(term_node)
+        assert ex is not None, f"invalid expression {term_node}"
+        key = mathy_term_string(variable=ex.variable, exponent=ex.exponent)
+        if key == "":
+            key = "const"
+        if key not in node_groups:
+            node_groups[key] = [term_node]
+        else:
+            node_groups[key].append(term_node)
+    like_terms = 0
+    for k, v in node_groups.items():
+        if len(v) <= 1:
+            continue
+        like_terms += len(v)
+    return like_terms
+```
+
+**Try It**
+
+
+```python
+assert count_like_terms("4x - 2y + q") == 0
+assert count_like_terms("x + x + z") == 2
+assert count_like_terms("4x + 2x - x + 7") == 3
+```
+
+### Generate Problem/Answer pairs
+
+Now that we can generate problems, count the number of like terms in them, and encode their text into integers, we have the pieces required to generate random problems and answers that we can train a neural network with.
+
+Let's write a function that will return a tuple of: the problem text, its encoded example form, and the output label.
+
+
+```python
+from typing import Tuple
+from thinc.api import Ops, get_current_ops
+
+def to_example(input_problem: str) -> Tuple[str, ModelX, ModelY]:
+    ops: Ops = get_current_ops()
+    encoded_input = encode_input(input_problem)
+    like_terms = count_like_terms(input_problem)
+    return input_problem, encoded_input, ops.asarray1f([like_terms])
+```
+
+**Try It**
+
+
+```python
+text, X, Y = to_example("x+2x")
+assert text == "x+2x"
+assert X[0] == vocab.index("x")
+assert Y[0] == 2
+print(text, X, Y)
+```
+
+    x+2x [[46]
+     [ 2]
+     [14]
+     [46]] [2.]
+
+
+### Build a Model
+
+Now that we can generate X/Y values, let's define our model and verify that it can process a single input/output.
+
+For this we'll use Thinc and the `define_operators` context manager to connect the pieces together using overloaded operators for `chain` and `clone` operations.
+
+
+```python
+from typing import List
+from thinc.model import Model
+from thinc.api import concatenate, chain, clone, list2ragged
+from thinc.api import reduce_sum, Mish, with_array, Embed, residual
+
+def build_model(n_hidden: int, dropout: float = 0.1) -> ModelT:
+    with Model.define_operators({">>": chain, "|": concatenate, "**": clone}):
+        model = (
+            # Iterate over each element in the batch
+            with_array(
+                # Embed the vocab indices
+                Embed(n_hidden, len(vocab), column=0)
+                # Activate each batch of embedding sequences separately first
+                >> Mish(n_hidden, dropout=dropout)
+            )
+            # Convert to ragged so we can use the reduction layers
+            >> list2ragged()
+            # Sum the features for each batch input
+            >> reduce_sum()
+            # Process with a small resnet
+            >> residual(Mish(n_hidden, normalize=True)) ** 4
+            # Convert (batch_size, n_hidden) to (batch_size, 1)
+            >> Mish(1)
+        )
+    return model
+```
+
+**Try It**
+
+Let's pass an example through the model to make sure we have all the sizes right.
+
+
+```python
+text, X, Y = to_example("14x + 2y - 3x + 7x")
+m = build_model(12)
+m.initialize([X], m.ops.asarray(Y, dtype="f"))
+mY = m.predict([X])
+print(mY.shape)
+assert mY.shape == (1, 1)
+```
+
+    (1, 1)
+
+
+### Generate Training Datasets
+
+Now that we can generate examples and we have a model that can process them, let's generate random unique training and evaluation datasets.
+
+For this we'll write another helper function that can generate (n) training examples and respects an exclude list to avoid letting examples from the training/test sets overlap.
+
+
+```python
+from typing import Tuple, Optional, Set, List
+
+DatasetTuple = Tuple[List[str], List[ModelX], List[ModelY]]
+
+def generate_dataset(
+    size: int,
+    exclude: Optional[Set[str]] = None,
+) -> DatasetTuple:
+    ops: Ops = get_current_ops()
+    texts: List[str] = generate_problems(size, exclude=exclude)
+    examples: List[ModelX] = []
+    labels: List[ModelY] = []
+    for i, text in enumerate(texts):
+        text, x, y = to_example(text)
+        examples.append(x)
+        labels.append(y)
+
+    return texts, examples, labels
+```
+
+**Try It**
+
+Generate a small dataset to be sure everything is working as expected
+
+
+```python
+texts, x, y = generate_dataset(10)
+assert len(texts) == 10
+assert len(x) == 10
+assert len(y) == 10
+```
+
+### Evaluate Model Performance
+
+We're almost ready to train our model, we just need to write a function that will check a given trained model against a given dataset and return a 0-1 score of how accurate it was.
+
+We'll use this function to print the score as training progresses and print final test predictions at the end of training.
+
+
+```python
+from typing import List
+from wasabi import msg
+
+def evaluate_model(
+    model: ModelT,
+    *,
+    print_problems: bool = False,
+    texts: List[str],
+    X: List[ModelX],
+    Y: List[ModelY],
+):
+    Yeval = model.predict(X)
+    correct_count = 0
+    print_n = 12
+    if print_problems:
+        msg.divider(f"eval samples max({print_n})")
+    for text, y_answer, y_guess in zip(texts, Y, Yeval):
+        y_guess = round(float(y_guess))
+        correct = y_guess == int(y_answer)
+        print_fn = msg.fail
+        if correct:
+            correct_count += 1
+            print_fn = msg.good
+        if print_problems and print_n > 0:
+            print_n -= 1
+            print_fn(f"Answer[{int(y_answer[0])}] Guess[{y_guess}] Text: {text}")
+    if print_problems:
+        print(f"Model predicted {correct_count} out of {len(X)} correctly.")
+    return correct_count / len(X)
+
+```
+
+**Try It**
+
+Let's try it out with an untrained model and expect to see a really sad score.
+
+
+```python
+texts, X, Y = generate_dataset(128)
+m = build_model(12)
+m.initialize(X, m.ops.asarray(Y, dtype="f"))
+# Assume the model should do so poorly as to round down to 0
+assert round(evaluate_model(m, texts=texts, X=X, Y=Y)) == 0
+```
+
+### Train/Evaluate a Model
+
+The final helper function we need is one to train and evaluate a model given two input datasets. 
+
+This function does a few things:
+
+ 1. Create an Adam optimizer we can use for minimizing the model's prediction error.
+ 2. Loop over the given training dataset (epoch) number of times.
+ 3. For each epoch, make batches of (batch_size) examples. For each batch(X), predict the number of like terms (Yh) and subtract the known answers (Y) to get the prediction error. Update the model using the optimizer with the calculated error.
+ 5. After each epoch, check the model performance against the evaluation dataset.
+ 6. Save the model weights for the best score out of all the training epochs.
+ 7. After all training is done, restore the best model and print results from the evaluation set.
+
+
+```python
+from thinc.api import Adam
+from wasabi import msg
+import numpy
+
+def train_and_evaluate(
+    model: ModelT,
+    train_tuple: DatasetTuple,
+    eval_tuple: DatasetTuple,
+    *,
+    lr: float = 3e-3,
+    batch_size: int = 64,
+    epochs: int = 48,
+) -> float:
+    (train_texts, train_X, train_y) = train_tuple
+    (eval_texts, eval_X, eval_y) = eval_tuple
+    msg.divider("Train and Evaluate Model")
+    msg.info(f"Batch size = {batch_size}\tEpochs = {epochs}\tLearning Rate = {lr}")
+
+    optimizer = Adam(lr)
+    best_score: float = 0.0
+    best_model: Optional[bytes] = None
+    for n in range(epochs):
+        loss = 0.0
+        batches = model.ops.multibatch(batch_size, train_X, train_y, shuffle=True)
+        for X, Y in batches:
+            Y = model.ops.asarray(Y, dtype="float32")
+            Yh, backprop = model.begin_update(X)
+            err = Yh - Y
+            backprop(err)
+            loss += (err ** 2).sum()
+            model.finish_update(optimizer)
+        score = evaluate_model(model, texts=eval_texts, X=eval_X, Y=eval_y)
+        if score > best_score:
+            best_model = model.to_bytes()
+            best_score = score
+        print(f"{n}\t{score:.2f}\t{loss:.2f}")
+
+    if best_model is not None:
+        model.from_bytes(best_model)
+    print(f"Evaluating with best model")
+    score = evaluate_model(
+        model, texts=eval_texts, print_problems=True, X=eval_X, Y=eval_y
+    )
+    print(f"Final Score: {score}")
+    return score
+
+```
+
+We'll generate the dataset first, so we can iterate on the model without having to spend time generating examples for each run. This also ensures we have the same dataset across different model runs, to make it easier to compare performance.
+
+
+```python
+train_size = 1024 * 8
+test_size = 2048
+seen_texts: Set[str] = set()
+msg.text(f"Generating train dataset with {train_size} examples...")
+train_dataset = generate_dataset(train_size, seen_texts)
+msg.text(f"Train set created with {train_size} examples.")
+msg.text(f"Generating eval dataset with {test_size} examples...")
+eval_dataset = generate_dataset(test_size, seen_texts)
+msg.text(f"Eval set created with {test_size} examples.")
+init_x = train_dataset[1][:2]
+init_y = train_dataset[2][:2]
+```
+
+    [2Knerating train dataset with 8192 examples...[38;5;2m✔ Train set created with 8192 examples.[0m
+    [2Knerating eval dataset with 2048 examples...[38;5;2m✔ Eval set created with 2048 examples.[0m
+
+
+Finally, we can build, train, and evaluate our model!
+
+
+```python
+model = build_model(64)
+model.initialize(init_x, init_y)
+train_and_evaluate(
+    model, train_dataset, eval_dataset, lr=2e-3, batch_size=64, epochs=32
+)
+```
+
+    [1m
+    ========================== Train and Evaluate Model ==========================[0m
+    [38;5;4mℹ Batch size = 64      Epochs = 32     Learning Rate = 0.002[0m
+    0	0.23	25283.75
+    1	0.21	17535.96
+    2	0.23	17329.38
+    3	0.24	15842.63
+    4	0.22	15396.75
+    5	0.28	14760.94
+    6	0.24	14000.68
+    7	0.25	13252.41
+    8	0.26	12263.78
+    9	0.28	12130.36
+    10	0.28	11368.95
+    11	0.29	10993.20
+    12	0.28	10709.44
+    13	0.30	10305.06
+    14	0.33	10134.89
+    15	0.34	9738.52
+    16	0.33	9579.92
+    17	0.32	9091.71
+    18	0.34	8950.21
+    19	0.34	8553.15
+    20	0.34	8320.76
+    21	0.34	7905.48
+    22	0.38	7880.38
+    23	0.36	7484.32
+    24	0.37	7348.58
+    25	0.35	7158.16
+    26	0.36	6754.80
+    27	0.37	6588.11
+    28	0.38	6534.72
+    29	0.37	6266.50
+    30	0.40	6176.08
+    31	0.42	5852.44
+    Evaluating with best model
+    [1m
+    ============================ eval samples max(12) ============================[0m
+    [38;5;2m✔ Answer[6] Guess[6] Text: -7268s + 9c^4 - -3346u + -4891m + 12q^4 +
+    3.8a + 8h + 10x - 1n^3 - 2.2k + 10b - -8598k - 5499b + 8496k + -5230b - 2r[0m
+    [38;5;1m✘ Answer[2] Guess[3] Text: 11.0t - 11t + 2202f + -581a^3 - 10u[0m
+    [38;5;2m✔ Answer[2] Guess[2] Text: 4085q^3 - 6667m - 9c + 2c + 3y[0m
+    [38;5;2m✔ Answer[2] Guess[2] Text: 10h^3 + 11.0f^2 + 10r - 8091t^4 + 11b -
+    1114x + 1r[0m
+    [38;5;1m✘ Answer[2] Guess[4] Text: 0.9k + 575t^3 + 5975x - 4147l - 4.6j + 7r +
+    0.6h^3 + 10b^2 + 1.8w - 4y + 7584.2w + 1q[0m
+    [38;5;1m✘ Answer[2] Guess[4] Text: 1t^4 - 11z + 10.2h - 12s + 7374m + 954s +
+    9q[0m
+    [38;5;1m✘ Answer[2] Guess[4] Text: 10r + 12l - 7.8q + 2.9g - 8.3f + 3868.1a +
+    7870p + -182p + 8.9z[0m
+    [38;5;1m✘ Answer[2] Guess[3] Text: 3653x^4 - -8734m + 6418d - 12h^3 - 1069o -
+    12p - -5812b - 7b - 9.2t - 2.1q + 7a^3[0m
+    [38;5;1m✘ Answer[2] Guess[4] Text: 8x - 1a^2 - 6.4o + 1.2s^3 - -266y + 12f -
+    -5511p + -3956n + 10.3v^4 + 0.5j + 3.5q - 3m - 8o[0m
+    [38;5;1m✘ Answer[2] Guess[4] Text: 9335k + 6c^3 + 9.8p - 3.6u^4 - 3503f - 2.1h
+    - -6713g^2 - 6.7d - -1433g^2 - 1j^4 - 10.9a^2 - 4.2s^2 + 4336n^3[0m
+    [38;5;2m✔ Answer[6] Guess[6] Text: 0.9v^3 - 3x + 3g + 2k - 2926w^3 + 6b +
+    -5993u^3 - 10.3t^2 + 0.5s^2 + 12z - 3585d - 1239z + 0.1d + 7980z - 761d -
+    -1721f[0m
+    [38;5;2m✔ Answer[2] Guess[2] Text: -713o - -6348.5x^4 - -4531z + 10.9j - 4o +
+    -4799q - 11u^4 + -8290a - 9.5v[0m
+    Model predicted 852 out of 2048 correctly.
+    Final Score: 0.416015625
+
+
+
+
+
+    0.416015625
+
+
+
+
+```python
+
+```
diff --git a/website/docs/img/favicon/android-chrome-192x192.png b/website/docs/img/favicon/android-chrome-192x192.png
new file mode 100755
index 0000000..fb135e2
Binary files /dev/null and b/website/docs/img/favicon/android-chrome-192x192.png differ
diff --git a/website/docs/img/favicon/android-chrome-512x512.png b/website/docs/img/favicon/android-chrome-512x512.png
new file mode 100755
index 0000000..a469f2b
Binary files /dev/null and b/website/docs/img/favicon/android-chrome-512x512.png differ
diff --git a/website/docs/img/favicon/apple-touch-icon.png b/website/docs/img/favicon/apple-touch-icon.png
new file mode 100755
index 0000000..2316a7b
Binary files /dev/null and b/website/docs/img/favicon/apple-touch-icon.png differ
diff --git a/website/docs/img/favicon/favicon-16x16.png b/website/docs/img/favicon/favicon-16x16.png
new file mode 100755
index 0000000..ec9847d
Binary files /dev/null and b/website/docs/img/favicon/favicon-16x16.png differ
diff --git a/website/docs/img/favicon/favicon-32x32.png b/website/docs/img/favicon/favicon-32x32.png
new file mode 100755
index 0000000..536c914
Binary files /dev/null and b/website/docs/img/favicon/favicon-32x32.png differ
diff --git a/website/docs/img/favicon/favicon.ico b/website/docs/img/favicon/favicon.ico
new file mode 100755
index 0000000..eb149b3
Binary files /dev/null and b/website/docs/img/favicon/favicon.ico differ
diff --git a/website/docs/img/favicon/site.webmanifest b/website/docs/img/favicon/site.webmanifest
new file mode 100755
index 0000000..5771696
--- /dev/null
+++ b/website/docs/img/favicon/site.webmanifest
@@ -0,0 +1,19 @@
+{
+    "name": "",
+    "short_name": "",
+    "icons": [
+        {
+            "src": "/android-chrome-192x192.png",
+            "sizes": "192x192",
+            "type": "image/png"
+        },
+        {
+            "src": "/android-chrome-512x512.png",
+            "sizes": "512x512",
+            "type": "image/png"
+        }
+    ],
+    "theme_color": "#ff7043",
+    "background_color": "#ffffff",
+    "display": "standalone"
+}
diff --git a/website/docs/img/mathy_core_logo.png b/website/docs/img/mathy_core_logo.png
new file mode 100644
index 0000000..7ee18a6
Binary files /dev/null and b/website/docs/img/mathy_core_logo.png differ
diff --git a/website/docs/img/school-24px.png b/website/docs/img/school-24px.png
new file mode 100644
index 0000000..2595b98
Binary files /dev/null and b/website/docs/img/school-24px.png differ
diff --git a/website/docs/img/school-24px.svg b/website/docs/img/school-24px.svg
new file mode 100644
index 0000000..2e52f34
--- /dev/null
+++ b/website/docs/img/school-24px.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24"><path d="M0 0h24v24H0z" fill="none"/><path fill="white" d="M5 13.18v4L12 21l7-3.82v-4L12 17l-7-3.82zM12 3L1 9l11 6 9-4.91V17h2V9L12 3z"/></svg>
diff --git a/website/docs/index.md b/website/docs/index.md
new file mode 100644
index 0000000..7e2f681
--- /dev/null
+++ b/website/docs/index.md
@@ -0,0 +1,65 @@
+# mathy_core
+
+<p align="center">
+  <a href="/"><img mathy-logo src="/img/mathy_core_logo.png" alt="Mathy Core"></a>
+</p>
+<p align="center">
+    <em>Parse text into trees, visualize them, and make them dance by your rules.</em>
+</p>
+<p align="center">
+<a href="https://github.com/mathy/mathy_core/actions">
+    <img src="https://github.com/mathy/mathy_core/workflows/Build/badge.svg" />
+</a>
+<a href="https://codecov.io/gh/mathy/mathy_core">
+    <img src="https://codecov.io/gh/mathy/mathy_core/branch/master/graph/badge.svg?token=CqPEOdEMJX" />
+</a>
+<a href="https://pypi.org/project/mathy_core" target="_blank">
+    <img src="https://badge.fury.io/py/mathy_core.svg" alt="Package version">
+</a>
+</p>
+
+Mathy includes a Computer Algebra System (or CAS). Its job is to turn text into math trees that can be examined and manipulated by a multi-step process:
+
+1. [Tokenize](./api/tokenizer.md) the text into a list of `type`/`value` pairs
+2. [Parse](./api/parser.md) the token list into an Expression tree
+3. [Modify](./api/rule.md) the tree by applying a transformation rule to it.
+
+## Requirements
+
+- Python 3.6+
+
+## Installation
+
+```bash
+$ pip install mathy_envs
+```
+
+## Examples
+
+### Arithmetic
+
+To understand how Mathy's CAS components work, let's add some numbers and assert that the result is what we think it should be.
+
+```Python
+{!./snippets/cas/overview/evaluate_expression.py!}
+```
+
+### Variables Evaluation
+
+Mathy can also deal with expressions that have variables.
+
+When an expression has variables in it, you can evaluate it by providing the "context" to use:
+
+```Python
+{!./snippets/cas/overview/evaluate_expression_variables.py!}
+```
+
+### Tree Transformations
+
+Mathy can also transform the parsed Expression trees using rules that change the tree structure without altering the value it outputs when you call `evaluate()`.
+
+```python
+
+{!./snippets/cas/overview/rules_factor_out.py!}
+
+```
diff --git a/website/docs/lib/styles.css b/website/docs/lib/styles.css
new file mode 100644
index 0000000..adceafc
--- /dev/null
+++ b/website/docs/lib/styles.css
@@ -0,0 +1,139 @@
+/* https://github.com/squidfunk/mkdocs-material/issues/175 */
+.md-typeset__table {
+  min-width: 100%;
+}
+.md-typeset table:not([class]) {
+  display: table;
+}
+
+.contributors-wrapper .md-typeset__table {
+  min-width: 100px;
+}
+.contributors-wrapper .md-typeset__table table {
+  border: none;
+  box-shadow: none;
+}
+
+/**
+ * termynal.js
+ *
+ * @author Ines Montani <ines@ines.io>
+ * @version 0.0.1
+ * @license MIT
+ */
+
+:root {
+  --color-bg: #252a33;
+  --color-text: #eee;
+  --color-text-subtle: #a2a2a2;
+}
+
+[data-termynal] {
+  text-align: left;
+  width: 750px;
+  max-width: 100%;
+  background: var(--color-bg);
+  color: var(--color-text);
+  font-size: 18px;
+  font-family: "Roboto Mono", Consolas, Menlo, Monaco, "Courier New", Courier,
+    monospace;
+  border-radius: 4px;
+  padding: 75px 45px 35px;
+  position: relative;
+  -webkit-box-sizing: border-box;
+  box-sizing: border-box;
+}
+
+[data-termynal]:before {
+  content: "";
+  position: absolute;
+  top: 15px;
+  left: 15px;
+  display: inline-block;
+  width: 15px;
+  height: 15px;
+  border-radius: 50%;
+  /* A little hack to display the window buttons in one pseudo element. */
+  background: #d9515d;
+  -webkit-box-shadow: 25px 0 0 #f4c025, 50px 0 0 #3ec930;
+  box-shadow: 25px 0 0 #f4c025, 50px 0 0 #3ec930;
+}
+
+[data-ty]:not([data-termynal]) {
+  white-space: pre;
+}
+[data-ty] {
+  display: block;
+  line-height: 1.5;
+}
+
+[data-ty]:before {
+  /* Set up defaults and ensure empty lines are displayed. */
+  content: "";
+  display: inline-block;
+  vertical-align: middle;
+}
+
+[data-ty="input"]:before,
+[data-ty-prompt]:before {
+  margin-right: 0.75em;
+  color: var(--color-text-subtle);
+}
+
+[data-ty="input"]:before {
+  content: "$";
+}
+
+[data-ty][data-ty-prompt]:before {
+  content: attr(data-ty-prompt);
+}
+
+[data-ty-cursor]:after {
+  content: attr(data-ty-cursor);
+  font-family: monospace;
+  margin-left: 0.5em;
+  -webkit-animation: blink 1s infinite;
+  animation: blink 1s infinite;
+}
+
+/* API Docs function type label */
+.md-typeset kbd {
+  font-size: 0.65rem;
+  box-shadow: none;
+  border-radius: 0.2rem;
+  float: right;
+}
+
+/* Cursor animation */
+
+@-webkit-keyframes blink {
+  50% {
+    opacity: 0;
+  }
+}
+
+@keyframes blink {
+  50% {
+    opacity: 0;
+  }
+}
+@media only screen and (max-width: 650px) {
+  img[mathy-logo] {
+    max-width: 70%;
+  }
+
+  [data-termynal-container] {
+    margin: 0 -0.8rem;
+  }
+  [data-termynal]:before {
+    background: none;
+    -webkit-box-shadow: none;
+    box-shadow: none;
+  }
+  [data-termynal] {
+    border-radius: 0;
+    padding: 25px 5px;
+    font-size: 11px;
+    margin: 0 -25px;
+  }
+}
diff --git a/website/docs/lib/termynal.js b/website/docs/lib/termynal.js
new file mode 100644
index 0000000..4832914
--- /dev/null
+++ b/website/docs/lib/termynal.js
@@ -0,0 +1,252 @@
+/**
+ * termynal.js
+ * A lightweight, modern and extensible animated terminal window, using
+ * async/await.
+ *
+ * @author Ines Montani <ines@ines.io>
+ * @version 0.0.1
+ * @license MIT
+ */
+
+"use strict"
+
+/** Generate a terminal widget. */
+class Termynal {
+  /**
+   * Construct the widget's settings.
+   * @param {(string|Node)=} container - Query selector or container element.
+   * @param {Object=} options - Custom settings.
+   * @param {string} options.prefix - Prefix to use for data attributes.
+   * @param {number} options.startDelay - Delay before animation, in ms.
+   * @param {number} options.typeDelay - Delay between each typed character, in ms.
+   * @param {number} options.lineDelay - Delay between each line, in ms.
+   * @param {number} options.progressLength - Number of characters displayed as progress bar.
+   * @param {string} options.progressChar – Character to use for progress bar, defaults to █.
+   * @param {number} options.progressPercent - Max percent of progress.
+   * @param {string} options.cursor – Character to use for cursor, defaults to ▋.
+   * @param {Object[]} lineData - Dynamically loaded line data objects.
+   * @param {boolean} options.noInit - Don't initialise the animation.
+   */
+  constructor(container = "#termynal", options = {}) {
+    this.container =
+      typeof container === "string"
+        ? document.querySelector(container)
+        : container
+    this.pfx = `data-${options.prefix || "ty"}`
+    this.startDelay =
+      options.startDelay ||
+      parseFloat(this.container.getAttribute(`${this.pfx}-startDelay`)) ||
+      600
+    this.typeDelay =
+      options.typeDelay ||
+      parseFloat(this.container.getAttribute(`${this.pfx}-typeDelay`)) ||
+      90
+    this.lineDelay =
+      options.lineDelay ||
+      parseFloat(this.container.getAttribute(`${this.pfx}-lineDelay`)) ||
+      1500
+    this.progressLength =
+      options.progressLength ||
+      parseFloat(this.container.getAttribute(`${this.pfx}-progressLength`)) ||
+      30
+    this.progressChar =
+      options.progressChar ||
+      this.container.getAttribute(`${this.pfx}-progressChar`) ||
+      "█"
+    this.progressPercent =
+      options.progressPercent ||
+      parseFloat(this.container.getAttribute(`${this.pfx}-progressPercent`)) ||
+      100
+    this.cursor =
+      options.cursor || this.container.getAttribute(`${this.pfx}-cursor`) || "▋"
+    this.lineData = this.lineDataToElements(options.lineData || [])
+    if (!options.noInit) this.init()
+  }
+
+  /**
+   * Initialise the widget, get lines, clear container and start animation.
+   */
+  init() {
+    // If any lines use a text attribute set innerText on the elements before
+    // calculating the height of the container.
+    const attr = `${this.pfx}-text`
+    this.container.querySelectorAll(`[${attr}]`).forEach(function(textLine) {
+      textLine.textContent = textLine.getAttribute(attr)
+    })
+
+    // Appends dynamically loaded lines to existing line elements.
+    this.lines = [...this.container.querySelectorAll(`[${this.pfx}]`)].concat(
+      this.lineData
+    )
+
+    /**
+     * Calculates width and height of Termynal container.
+     * If container is empty and lines are dynamically loaded, defaults to browser `auto` or CSS.
+     */
+
+    const containerStyle = getComputedStyle(this.container)
+    this.container.style.width =
+      containerStyle.width !== "0px" ? containerStyle.width : undefined
+    this.container.style.minHeight =
+      containerStyle.height !== "0px" ? containerStyle.height : undefined
+
+    this.container.setAttribute("data-termynal", "")
+    this.container.innerHTML = ""
+    this.start()
+  }
+
+  /**
+   * Start the animation and rener the lines depending on their data attributes.
+   */
+  async start() {
+    await this._wait(this.startDelay)
+
+    for (let line of this.lines) {
+      const type = line.getAttribute(this.pfx)
+      const delay = line.getAttribute(`${this.pfx}-delay`) || this.lineDelay
+
+      if (type == "input") {
+        line.setAttribute(`${this.pfx}-cursor`, this.cursor)
+        await this.type(line)
+        await this._wait(delay)
+      } else if (type == "progress") {
+        await this.progress(line)
+        await this._wait(delay)
+      } else {
+        this.container.appendChild(line)
+        await this._wait(delay)
+      }
+
+      line.removeAttribute(`${this.pfx}-cursor`)
+    }
+  }
+
+  /**
+   * Animate a typed line.
+   * @param {Node} line - The line element to render.
+   */
+  async type(line) {
+    const text = line.getAttribute(`${this.pfx}-text`) || line.textContent
+    const chars = [...text]
+    const delay = line.getAttribute(`${this.pfx}-typeDelay`) || this.typeDelay
+    line.textContent = ""
+    this.container.appendChild(line)
+
+    for (let char of chars) {
+      await this._wait(delay)
+      line.textContent += char
+    }
+  }
+
+  /**
+   * Animate a progress bar.
+   * @param {Node} line - The line element to render.
+   */
+  async progress(line) {
+    const progressLength =
+      line.getAttribute(`${this.pfx}-progressLength`) || this.progressLength
+    const progressChar =
+      line.getAttribute(`${this.pfx}-progressChar`) || this.progressChar
+    const chars = progressChar.repeat(progressLength)
+    const progressPercent =
+      line.getAttribute(`${this.pfx}-progressPercent`) || this.progressPercent
+    line.textContent = ""
+    this.container.appendChild(line)
+
+    for (let i = 1; i < chars.length + 1; i++) {
+      await this._wait(this.typeDelay)
+      const percent = Math.round((i / chars.length) * 100)
+      line.textContent = `${chars.slice(0, i)} ${percent}%`
+      if (percent > progressPercent) {
+        break
+      }
+    }
+  }
+
+  /**
+   * Helper function for animation delays, called with `await`.
+   * @param {number} time - Timeout, in ms.
+   */
+  _wait(time) {
+    return new Promise(resolve => setTimeout(resolve, time))
+  }
+
+  /**
+   * Converts line data objects into line elements.
+   *
+   * @param {Object[]} lineData - Dynamically loaded lines.
+   * @param {Object} line - Line data object.
+   * @returns {Element[]} - Array of line elements.
+   */
+  lineDataToElements(lineData) {
+    return lineData.map(line => {
+      let div = document.createElement("div")
+      div.innerHTML = `<span ${this._attributes(line)}>${line.value ||
+        ""}</span>`
+
+      return div.firstElementChild
+    })
+  }
+
+  /**
+   * Helper function for generating attributes string.
+   *
+   * @param {Object} line - Line data object.
+   * @returns {string} - String of attributes.
+   */
+  _attributes(line) {
+    let attrs = ""
+    for (let prop in line) {
+      attrs += this.pfx
+
+      if (prop === "type") {
+        attrs += `="${line[prop]}" `
+      } else if (prop !== "value") {
+        attrs += `-${prop}="${line[prop]}" `
+      }
+    }
+
+    return attrs
+  }
+}
+
+let termynals = []
+
+const all = document.querySelectorAll("[data-termynal-container]")
+for (var i = 0, len = all.length; i < len; i++) {
+  let container = all[i]
+  termynals.push(
+    new Termynal(container, { noInit: true, typeDelay: 40, lineDelay: 750 })
+  )
+}
+// Returns a function, that, as long as it continues to be invoked, will not
+// be triggered. The function will be called after it stops being called for
+// N milliseconds. If `immediate` is passed, trigger the function on the
+// leading edge, instead of the trailing.
+function debounce(func, wait, immediate) {
+  var timeout
+  return function() {
+    var context = this,
+      args = arguments
+    var later = function() {
+      timeout = null
+      if (!immediate) func.apply(context, args)
+    }
+    var callNow = immediate && !timeout
+    clearTimeout(timeout)
+    timeout = setTimeout(later, wait)
+    if (callNow) func.apply(context, args)
+  }
+}
+
+const loadVisibleTermynals = debounce(function loadVisibleTermynals() {
+  termynals = termynals.filter(termynal => {
+    if (termynal.container.getBoundingClientRect().top - innerHeight <= 0) {
+      termynal.init()
+      return false
+    }
+    return true
+  })
+}, 5)
+window.addEventListener("scroll", loadVisibleTermynals)
+loadVisibleTermynals()
diff --git a/website/docs/license.md b/website/docs/license.md
new file mode 100644
index 0000000..e80fab0
--- /dev/null
+++ b/website/docs/license.md
@@ -0,0 +1,23 @@
+# License
+
+**MIT License**
+
+Copyright &copy; 2011 - 2023 Justin DuJardin
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to
+deal in the Software without restriction, including without limitation the
+rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+sell copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+IN THE SOFTWARE.
diff --git a/website/docs/snippets/__init__.py b/website/docs/snippets/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/website/docs/snippets/cas/overview/evaluate_expression.ipynb b/website/docs/snippets/cas/overview/evaluate_expression.ipynb
new file mode 100644
index 0000000..937d32b
--- /dev/null
+++ b/website/docs/snippets/cas/overview/evaluate_expression.ipynb
@@ -0,0 +1,24 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "badea81a",
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "# This file is generated from a Mathy (https://mathy.ai) code example.\n",
+    "!pip install mathy --upgrade\n",
+    "from mathy_core import ExpressionParser\n",
+    "\n",
+    "expression = ExpressionParser().parse(\"4 + 2\")\n",
+    "assert expression.evaluate() == 6"
+   ]
+  }
+ ],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/website/docs/snippets/cas/overview/evaluate_expression.py b/website/docs/snippets/cas/overview/evaluate_expression.py
new file mode 100644
index 0000000..30a148f
--- /dev/null
+++ b/website/docs/snippets/cas/overview/evaluate_expression.py
@@ -0,0 +1,4 @@
+from mathy_core import ExpressionParser
+
+expression = ExpressionParser().parse("4 + 2")
+assert expression.evaluate() == 6
diff --git a/website/docs/snippets/cas/overview/evaluate_expression_variables.ipynb b/website/docs/snippets/cas/overview/evaluate_expression_variables.ipynb
new file mode 100644
index 0000000..5394e29
--- /dev/null
+++ b/website/docs/snippets/cas/overview/evaluate_expression_variables.ipynb
@@ -0,0 +1,24 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ba9bcd0d",
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "# This file is generated from a Mathy (https://mathy.ai) code example.\n",
+    "!pip install mathy --upgrade\n",
+    "from mathy_core import ExpressionParser, MathExpression\n",
+    "\n",
+    "expression: MathExpression = ExpressionParser().parse(\"4x + 2y\")\n",
+    "assert expression.evaluate({\"x\": 2, \"y\": 5}) == 18"
+   ]
+  }
+ ],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/website/docs/snippets/cas/overview/evaluate_expression_variables.py b/website/docs/snippets/cas/overview/evaluate_expression_variables.py
new file mode 100644
index 0000000..12f1a23
--- /dev/null
+++ b/website/docs/snippets/cas/overview/evaluate_expression_variables.py
@@ -0,0 +1,4 @@
+from mathy_core import ExpressionParser, MathExpression
+
+expression: MathExpression = ExpressionParser().parse("4x + 2y")
+assert expression.evaluate({"x": 2, "y": 5}) == 18
diff --git a/website/docs/snippets/cas/overview/rules_factor_out.ipynb b/website/docs/snippets/cas/overview/rules_factor_out.ipynb
new file mode 100644
index 0000000..e583cb7
--- /dev/null
+++ b/website/docs/snippets/cas/overview/rules_factor_out.ipynb
@@ -0,0 +1,37 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "965f3bbf",
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "# This file is generated from a Mathy (https://mathy.ai) code example.\n",
+    "!pip install mathy --upgrade\n",
+    "from mathy_core import ExpressionParser\n",
+    "from mathy_core.rules import DistributiveFactorOutRule\n",
+    "\n",
+    "input = \"4x + 2x\"\n",
+    "output = \"(4 + 2) * x\"\n",
+    "parser = ExpressionParser()\n",
+    "\n",
+    "input_exp = parser.parse(input)\n",
+    "output_exp = parser.parse(output)\n",
+    "\n",
+    "# Verify that the rule transforms the tree as expected\n",
+    "change = DistributiveFactorOutRule().apply_to(input_exp)\n",
+    "assert str(change.result) == output\n",
+    "\n",
+    "# Verify that both trees evaluate to the same value\n",
+    "ctx = {\"x\": 3}\n",
+    "assert input_exp.evaluate(ctx) == output_exp.evaluate(ctx)"
+   ]
+  }
+ ],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/website/docs/snippets/cas/overview/rules_factor_out.py b/website/docs/snippets/cas/overview/rules_factor_out.py
new file mode 100644
index 0000000..ab62616
--- /dev/null
+++ b/website/docs/snippets/cas/overview/rules_factor_out.py
@@ -0,0 +1,17 @@
+from mathy_core import ExpressionParser
+from mathy_core.rules import DistributiveFactorOutRule
+
+input = "4x + 2x"
+output = "(4 + 2) * x"
+parser = ExpressionParser()
+
+input_exp = parser.parse(input)
+output_exp = parser.parse(output)
+
+# Verify that the rule transforms the tree as expected
+change = DistributiveFactorOutRule().apply_to(input_exp)
+assert str(change.result) == output
+
+# Verify that both trees evaluate to the same value
+ctx = {"x": 3}
+assert input_exp.evaluate(ctx) == output_exp.evaluate(ctx)
diff --git a/website/docs/snippets/cas/tokenizer_manual.ipynb b/website/docs/snippets/cas/tokenizer_manual.ipynb
new file mode 100644
index 0000000..2a2c0f8
--- /dev/null
+++ b/website/docs/snippets/cas/tokenizer_manual.ipynb
@@ -0,0 +1,36 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "764889e4",
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "# This file is generated from a Mathy (https://mathy.ai) code example.\n",
+    "!pip install mathy --upgrade\n",
+    "from typing import List\n",
+    "\n",
+    "from mathy_core import Token, TOKEN_TYPES, Tokenizer\n",
+    "\n",
+    "manual_tokens: List[Token] = [\n",
+    "    Token(\"4\", TOKEN_TYPES.Constant),\n",
+    "    Token(\"x\", TOKEN_TYPES.Variable),\n",
+    "    Token(\"+\", TOKEN_TYPES.Plus),\n",
+    "    Token(\"2\", TOKEN_TYPES.Constant),\n",
+    "    Token(\"\", TOKEN_TYPES.EOF),\n",
+    "]\n",
+    "auto_tokens: List[Token] = Tokenizer().tokenize(\"4x + 2\")\n",
+    "\n",
+    "for i, token in enumerate(manual_tokens):\n",
+    "    assert auto_tokens[i].value == token.value\n",
+    "    assert auto_tokens[i].type == token.type"
+   ]
+  }
+ ],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/website/docs/snippets/cas/tokenizer_manual.py b/website/docs/snippets/cas/tokenizer_manual.py
new file mode 100644
index 0000000..27d3772
--- /dev/null
+++ b/website/docs/snippets/cas/tokenizer_manual.py
@@ -0,0 +1,16 @@
+from typing import List
+
+from mathy_core import Token, TOKEN_TYPES, Tokenizer
+
+manual_tokens: List[Token] = [
+    Token("4", TOKEN_TYPES.Constant),
+    Token("x", TOKEN_TYPES.Variable),
+    Token("+", TOKEN_TYPES.Plus),
+    Token("2", TOKEN_TYPES.Constant),
+    Token("", TOKEN_TYPES.EOF),
+]
+auto_tokens: List[Token] = Tokenizer().tokenize("4x + 2")
+
+for i, token in enumerate(manual_tokens):
+    assert auto_tokens[i].value == token.value
+    assert auto_tokens[i].type == token.type
diff --git a/website/docs/snippets/cas/tokenizer_tokenize.ipynb b/website/docs/snippets/cas/tokenizer_tokenize.ipynb
new file mode 100644
index 0000000..719369f
--- /dev/null
+++ b/website/docs/snippets/cas/tokenizer_tokenize.ipynb
@@ -0,0 +1,30 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "595d59b2",
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "# This file is generated from a Mathy (https://mathy.ai) code example.\n",
+    "!pip install mathy --upgrade\n",
+    "from typing import List\n",
+    "\n",
+    "from mathy_core import Token, Tokenizer\n",
+    "\n",
+    "text = \"4x + 2x^3 * 7x\"\n",
+    "tokenizer = Tokenizer()\n",
+    "tokens: List[Token] = tokenizer.tokenize(text)\n",
+    "\n",
+    "for token in tokens:\n",
+    "    print(f\"type: {token.type}, value: {token.value}\")"
+   ]
+  }
+ ],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/website/docs/snippets/cas/tokenizer_tokenize.py b/website/docs/snippets/cas/tokenizer_tokenize.py
new file mode 100644
index 0000000..1ce5a47
--- /dev/null
+++ b/website/docs/snippets/cas/tokenizer_tokenize.py
@@ -0,0 +1,10 @@
+from typing import List
+
+from mathy_core import Token, Tokenizer
+
+text = "4x + 2x^3 * 7x"
+tokenizer = Tokenizer()
+tokens: List[Token] = tokenizer.tokenize(text)
+
+for token in tokens:
+    print(f"type: {token.type}, value: {token.value}")
diff --git a/website/docs/snippets/examples/__init__.ipynb b/website/docs/snippets/examples/__init__.ipynb
new file mode 100644
index 0000000..de57305
--- /dev/null
+++ b/website/docs/snippets/examples/__init__.ipynb
@@ -0,0 +1,19 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "# This file is generated from a Mathy (https://mathy.ai) code example.\n",
+    "!pip install mathy --upgrade"
+   ]
+  }
+ ],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/website/docs/snippets/examples/__init__.py b/website/docs/snippets/examples/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/website/docs/snippets/examples/swarm_random_task.ipynb b/website/docs/snippets/examples/swarm_random_task.ipynb
new file mode 100644
index 0000000..097b312
--- /dev/null
+++ b/website/docs/snippets/examples/swarm_random_task.ipynb
@@ -0,0 +1,33 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "26b3c4d8",
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "# This file is generated from a Mathy (https://mathy.ai) code example.\n",
+    "!pip install mathy --upgrade\n",
+    "!pip install gymnasium\n",
+    "\n",
+    "import random\n",
+    "\n",
+    "import gymnasium as gym\n",
+    "from mathy.solver import SwarmConfig, swarm_solve\n",
+    "from mathy_envs.gym import MathyGymEnv\n",
+    "\n",
+    "config = SwarmConfig(max_iters=10)\n",
+    "task = random.choice([\"poly\", \"binomial\", \"complex\"])\n",
+    "env: MathyGymEnv = gym.make(f\"mathy-{task}-easy-v0\")\n",
+    "_, problem = env.mathy.get_initial_state(env.env_problem_args)\n",
+    "swarm_solve(problem.text, config)"
+   ]
+  }
+ ],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/website/docs/snippets/examples/swarm_random_task.py b/website/docs/snippets/examples/swarm_random_task.py
new file mode 100644
index 0000000..9ff416f
--- /dev/null
+++ b/website/docs/snippets/examples/swarm_random_task.py
@@ -0,0 +1,13 @@
+#!pip install gymnasium
+
+import random
+
+import gymnasium as gym
+from mathy.solver import SwarmConfig, swarm_solve
+from mathy_envs.gym import MathyGymEnv
+
+config = SwarmConfig(max_iters=10)
+task = random.choice(["poly", "binomial", "complex"])
+env: MathyGymEnv = gym.make(f"mathy-{task}-easy-v0")
+_, problem = env.mathy.get_initial_state(env.env_problem_args)
+swarm_solve(problem.text, config)
diff --git a/website/docs/snippets/rules/commutative_swap.ipynb b/website/docs/snippets/rules/commutative_swap.ipynb
new file mode 100644
index 0000000..506417b
--- /dev/null
+++ b/website/docs/snippets/rules/commutative_swap.ipynb
@@ -0,0 +1,33 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d589941f",
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "# This file is generated from a Mathy (https://mathy.ai) code example.\n",
+    "!pip install mathy --upgrade\n",
+    "from mathy_core import ExpressionParser\n",
+    "from mathy_core.rules import CommutativeSwapRule\n",
+    "\n",
+    "input = \"x + y + x\"\n",
+    "output = \"x + x + y\"\n",
+    "parser = ExpressionParser()\n",
+    "\n",
+    "input_exp = parser.parse(input)\n",
+    "output_exp = parser.parse(output)\n",
+    "\n",
+    "# Verify that the rule transforms the tree as expected\n",
+    "change = CommutativeSwapRule().apply_to(input_exp)\n",
+    "assert str(change.result) == output"
+   ]
+  }
+ ],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/website/docs/snippets/rules/commutative_swap.py b/website/docs/snippets/rules/commutative_swap.py
new file mode 100644
index 0000000..00b8cc1
--- /dev/null
+++ b/website/docs/snippets/rules/commutative_swap.py
@@ -0,0 +1,13 @@
+from mathy_core import ExpressionParser
+from mathy_core.rules import CommutativeSwapRule
+
+input = "x + y + x"
+output = "x + x + y"
+parser = ExpressionParser()
+
+input_exp = parser.parse(input)
+output_exp = parser.parse(output)
+
+# Verify that the rule transforms the tree as expected
+change = CommutativeSwapRule().apply_to(input_exp)
+assert str(change.result) == output
diff --git a/website/mkdocs.yml b/website/mkdocs.yml
new file mode 100644
index 0000000..20db18a
--- /dev/null
+++ b/website/mkdocs.yml
@@ -0,0 +1,97 @@
+# Project information
+site_name: Mathy Core
+site_description: Parse and manipulate math trees with interpretable rules
+site_author: Justin DuJardin
+site_url: https://core.mathy.ai
+
+# Copyright
+copyright: Copyright &copy; 2011 - 2023 Justin DuJardin
+
+repo_name: mathy/mathy_core
+repo_url: https://github.com/mathy/mathy_core
+edit_uri: edit/main/website/docs/
+
+google_analytics:
+- UA-8701293-12
+- auto
+
+nav:
+- Home: https://mathy.ai
+- Core:
+  - Home: index.md
+  - API:
+    - expressions: api/expressions.md
+    - layout: api/layout.md
+    - parser: api/parser.md
+    - problems: api/problems.md
+    - rule: api/rule.md
+    - rules:
+      - associative_swap: api/rules/associative_swap.md
+      - balanced_move: api/rules/balanced_move.md
+      - commutative_swap: api/rules/commutative_swap.md
+      - constants_simplify: api/rules/constants_simplify.md
+      - distributive_factor_out: api/rules/distributive_factor_out.md
+      - distributive_multiply_across: api/rules/distributive_multiply_across.md
+      - restate_subtraction: api/rules/restate_subtraction.md
+      - variable_multiply: api/rules/variable_multiply.md
+    - testing: api/testing.md
+    - tokenizer: api/tokenizer.md
+    - tree: api/tree.md
+    - util: api/util.md
+  - Releases: changelog.md
+  - License: license.md
+- Environments: https://envs.mathy.ai
+extra:
+  social:
+  - icon: fontawesome/brands/github-alt
+    link: https://github.com/justindujardin
+  - icon: fontawesome/brands/twitter
+    link: https://twitter.com/justindujardin
+  - icon: fontawesome/brands/linkedin
+    link: https://linkedin.com/in/justindujardin
+
+extra_css:
+- lib/styles.css
+
+extra_javascript:
+- lib/termynal.js
+
+theme:
+  name: material
+  icon:
+    logo: material/hub
+  features:
+  - instant
+  - navigation.tabs
+  - navigation.tabs.sticky
+  - navigation.footer
+  favicon: img/favicon/favicon-16x16.png
+  palette:
+    primary: purple
+    accent: deep-purple
+  highlightjs: true
+  hljs_languages:
+  - python
+  - typescript
+  - json
+markdown_extensions:
+- markdown.extensions.codehilite:
+    guess_lang: false
+- toc:
+    permalink: true
+- markdown_include.include:
+    base_path: docs
+- admonition
+- codehilite
+- extra
+- pymdownx.superfences
+- pymdownx.details
+
+plugins:
+- mathy
+- search
+- social
+- git-revision-date-localized
+- git-committers
+- minify:
+    minify_html: true
diff --git a/website/requirements.txt b/website/requirements.txt
new file mode 100644
index 0000000..b9b4443
--- /dev/null
+++ b/website/requirements.txt
@@ -0,0 +1,20 @@
+graphviz
+snakeviz
+pydot
+pydotplus
+# Docs 
+mkdocs
+mkdocs-material>=9.5.2,<10.0.0
+mkdocs-git-revision-date-localized-plugin>=1.2.1,<2.0.0
+mkdocs-material[imaging]
+mkdocs-git-committers-plugin
+markdown-include
+mkdocs-minify-plugin
+ruamel.yaml
+# for converting snippets to ipynb notebooks
+nbformat
+mathy_pydoc>=0.7.18
+mathy_core
+../
+git+https://github.com/mathy/mathy_mkdocs.git # TODO: remove this when published
+gym
\ No newline at end of file
diff --git a/website/runtime.txt b/website/runtime.txt
new file mode 100644
index 0000000..cc1923a
--- /dev/null
+++ b/website/runtime.txt
@@ -0,0 +1 @@
+3.8
diff --git a/website/tests/__init__.py b/website/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/website/tests/cas/__init__.py b/website/tests/cas/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/website/tests/cas/test_overview.py b/website/tests/cas/test_overview.py
new file mode 100644
index 0000000..20ef7f1
--- /dev/null
+++ b/website/tests/cas/test_overview.py
@@ -0,0 +1,10 @@
+def test_cas_overview_evaluate_expression_variables():
+    from ...docs.snippets.cas.overview import evaluate_expression_variables  # noqa
+
+
+def test_cas_overview_rules_factor_out():
+    from ...docs.snippets.cas.overview import rules_factor_out  # noqa
+
+
+def test_cas_overview_evaluate_expression():
+    from ...docs.snippets.cas.overview import evaluate_expression  # noqa
diff --git a/website/tests/cas/test_tokenizer_manual.py b/website/tests/cas/test_tokenizer_manual.py
new file mode 100644
index 0000000..8ad3c57
--- /dev/null
+++ b/website/tests/cas/test_tokenizer_manual.py
@@ -0,0 +1,2 @@
+def test_tokenizer_manual():
+    from ...docs.snippets.cas import tokenizer_manual  # noqa
diff --git a/website/tests/cas/test_tokenizer_tokenize.py b/website/tests/cas/test_tokenizer_tokenize.py
new file mode 100644
index 0000000..09ffba9
--- /dev/null
+++ b/website/tests/cas/test_tokenizer_tokenize.py
@@ -0,0 +1,2 @@
+def test_tokenizer_tokenize():
+    from ...docs.snippets.cas import tokenizer_tokenize  # noqa
diff --git a/website/tests/rules/__init__.py b/website/tests/rules/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/website/tests/rules/test_rules_overview.py b/website/tests/rules/test_rules_overview.py
new file mode 100644
index 0000000..c155f22
--- /dev/null
+++ b/website/tests/rules/test_rules_overview.py
@@ -0,0 +1,2 @@
+def test_rules_overview_custom_problem_text():
+    from ...docs.snippets.rules import commutative_swap  # noqa
diff --git a/website/tools/__init__.py b/website/tools/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/website/tools/build.sh b/website/tools/build.sh
new file mode 100644
index 0000000..5f12770
--- /dev/null
+++ b/website/tools/build.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+set -e
+. ../../.env/bin/activate
+echo "Build python package..."
+mkdocs build
diff --git a/website/tools/clean.sh b/website/tools/clean.sh
new file mode 100644
index 0000000..935d3c7
--- /dev/null
+++ b/website/tools/clean.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+set -e
+rm -rf .env/
+rm -rf .pytest_cache/
diff --git a/website/tools/develop.sh b/website/tools/develop.sh
new file mode 100644
index 0000000..5c58e4f
--- /dev/null
+++ b/website/tools/develop.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+set -e
+
+../../.env/bin/mkdocs serve
diff --git a/website/tools/docs.py b/website/tools/docs.py
new file mode 100644
index 0000000..838b5fd
--- /dev/null
+++ b/website/tools/docs.py
@@ -0,0 +1,127 @@
+from pathlib import Path
+from subprocess import check_output
+from ruamel.yaml import YAML
+
+# Constants
+EXCLUDE_FILES = {
+    ".DS_Store",
+    "__init__.py",
+    "README.md",
+    "types.py",
+    "cli.py",
+    "py.typed",
+    "about.py",
+    "conrastive.py",
+}
+INCLUDE_FOLDERS = [".", "rules"]
+DOCS_KEY = "API"
+
+# Paths
+parent_folder_path = Path(__file__).parent.parent
+yaml_path = parent_folder_path / "mkdocs.yml"
+source_path = parent_folder_path.parent / "mathy_core"
+dest_dir = parent_folder_path / "docs" / "api"
+
+# YAML
+yaml = YAML()
+
+
+def prepend_md_content(original_md, prepending_md):
+    with open(prepending_md, "r") as file:
+        prepending_content = file.read()
+
+    with open(original_md, "r+") as file:
+        original_content = file.readlines()
+        # First line is a heading 1 with the full module name that is described.
+        # Make it explode if that changes so we notice.
+        assert original_content[0].startswith(
+            "# "
+        ), "Expected heading 1 at beginning of mathy_pydoc API doc file"
+
+        # Change to
+        ticks = "```"
+        new_content = (
+            f"# API\n\n{ticks}python\n\nimport {original_content[0][2:]}{ticks}\n\n"
+        )
+        original_content[0] = new_content
+        # flatten the list of lines into a single string
+        original_content = "".join(original_content)
+
+        file.seek(0, 0)
+        file.write(prepending_content + "\n" + original_content)
+
+
+def render_docs(src_rel_path, src_file, to_file, modifier="++"):
+    insert = "." + src_rel_path if src_rel_path not in ["", "."] else ""
+    namespace = f"mathy_core{insert}.{src_file.stem}{modifier}"
+    args = ["mathy_pydoc", "--plain", namespace]
+    if not to_file.parent.exists():
+        to_file.parent.mkdir(parents=True)
+    call_result = check_output(args, cwd=parent_folder_path).decode("utf-8")
+    with open(to_file, "w") as file:
+        file.write(call_result)
+
+
+def process_directory(directory):
+    nav_entries = []
+    for file_path in directory.iterdir():
+        if file_path.name in EXCLUDE_FILES or not file_path.suffix == ".py":
+            continue
+
+        print(f"\t{file_path.name}")
+        rel_out_md = file_path.with_suffix(".md").name
+        to_file = dest_dir / directory.relative_to(source_path) / rel_out_md
+        render_docs(directory.relative_to(source_path).as_posix(), file_path, to_file)
+
+        # Prepend existing md file content if present
+        existing_md = file_path.with_suffix(".md")
+        if existing_md.exists():
+            prepend_md_content(to_file, existing_md)
+
+        nav_item = {
+            file_path.stem: to_file.relative_to(parent_folder_path / "docs").as_posix()
+        }
+        nav_entries.append(nav_item)
+    return nav_entries
+
+
+def update_yaml_nav(nav_entries):
+    mkdocs_yaml = yaml.load(yaml_path)
+    updated = False
+    site_nav = mkdocs_yaml["nav"]
+    for nav_obj in site_nav:
+        site_keys = list(nav_obj.keys())
+        for key in site_keys:
+            if isinstance(nav_obj[key], str):
+                continue
+            for nav_sub in nav_obj[key]:
+                if DOCS_KEY in nav_sub:
+                    nav_sub[DOCS_KEY] = nav_entries
+                    updated = True
+                    break
+    if not updated:
+        raise Exception(f"Could not find {DOCS_KEY} in mkdocs.yml")
+    with open(yaml_path, "w") as file:
+        yaml.dump(mkdocs_yaml, file)
+
+
+def main():
+    print("Building API docs...")
+    nav_entries = []
+    for src_folder in INCLUDE_FOLDERS:
+        folder = source_path / src_folder
+        if folder.is_dir():
+            print(f"Found directory: {folder.relative_to(source_path)}")
+            if src_folder not in [".", ""]:
+                new_entries = process_directory(folder)
+                new_entries.sort(key=lambda x: list(x)[0])
+                nav_entries.append({folder.name: new_entries})
+            else:
+                nav_entries += process_directory(folder)
+    nav_entries.sort(key=lambda x: list(x)[0])
+    update_yaml_nav(nav_entries)
+    print("Done!")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/website/tools/docs.sh b/website/tools/docs.sh
new file mode 100644
index 0000000..63e2f5f
--- /dev/null
+++ b/website/tools/docs.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+set -e
+
+. ../../.env/bin/activate
+
+python -m tools.docs
diff --git a/website/tools/netlify.sh b/website/tools/netlify.sh
new file mode 100644
index 0000000..c82e636
--- /dev/null
+++ b/website/tools/netlify.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+set -e
+
+# Default Python path
+PYTHON_PATH="python3"
+
+# Check if a custom Python path is provided as the first argument
+if [ -n "$1" ]; then
+  PYTHON_PATH="$1"
+fi
+
+echo "Using Python at: $PYTHON_PATH"
+$PYTHON_PATH --version
+
+# Make the virtualenv only if the folder doesn't exist
+DIR=.env
+if [ ! -d "${DIR}" ]; then
+  pip install virtualenv --upgrade
+  $PYTHON_PATH -m virtualenv .env -p $PYTHON_PATH || virtualenv .env -p $PYTHON_PATH
+fi
+
+echo "Installing/updating requirements..."
+.env/bin/pip install -r requirements.txt
+.env/bin/mkdocs build
diff --git a/website/tools/setup.sh b/website/tools/setup.sh
new file mode 100644
index 0000000..3000f07
--- /dev/null
+++ b/website/tools/setup.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+set -e
+
+# Make the virtualenv only if the folder doesn't exist
+DIR=../../.env
+if [ ! -d "${DIR}" ]; then
+  sh ../../tools/setup.sh
+fi
+
+# Use root env
+. ../../.env/bin/activate
+echo "Installing/updating requirements..."
+pip install -r requirements.txt
diff --git a/website/tools/test.sh b/website/tools/test.sh
new file mode 100644
index 0000000..639001a
--- /dev/null
+++ b/website/tools/test.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+set -e
+echo "Activating virtualenv... (if this fails you may need to run setup.sh first)"
+echo "Running tests..."
+../.env/bin/python3 -m pytest --cov=mathy_core