From 9332e987aa6fff9c98df72a58ef8bba69ba0ad19 Mon Sep 17 00:00:00 2001
From: Steven Schaefer <stschaef@umich.edu>
Date: Fri, 8 Nov 2024 01:39:00 -0500
Subject: [PATCH] sync paper changes

---
 .gitignore          |    1 +
 paper/logo-agda.pdf |  Bin 0 -> 1445 bytes
 paper/paper.tex     | 5281 +++++++++++++++++++++++++++----------------
 3 files changed, 3343 insertions(+), 1939 deletions(-)
 create mode 100644 paper/logo-agda.pdf
diff --git a/.gitignore b/.gitignore
index f677b26..a686b8d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -281,3 +281,4 @@ TSWLatexianTemp*
 /code/cubical/_build/
 /code/cubical/Everything.agda
 /code/Cubical/env/
+!logo-agda.pdf
diff --git a/paper/logo-agda.pdf b/paper/logo-agda.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..612bac3e231f151b57347ee49658f211183e846a
GIT binary patch
literal 1445
zcmY!laB<T$)HCH$-THRjZ!QxB1BLvgEG`=x1%02?y!4U`1yi6<5SN02g1%d3PDyGJ
zkj_agNp(q0&QD3@va{nVE-6Y)%;l<>Qya85@16nAov$KuxTmOUdM&x8$H-LTVI(HY
zCGHVu;(SYSz5FltNxy476K^n_w|P~e&eru@`S@|0#i9}OE)_50{r8z;{yt%wf3~lF
zOD8l=Uy=9t@6>&D%uY@6o)5WgnDc#qn=kX|p1(}tYQU_6!aPSfk8*dI>SRg3GmOkS
z8kBm4+v8+}&*hwpJs}l+FTNI@Fq*h%M))ziq{C;uXDxqo!0V+IU&@O&%6evz51vck
zuKmYQetE}BiN#Y4-kzF&hh^*4%PoyEr&H5&w)Q?PF3nk|USG8G^6WLNS3fQJUUfq5
z#KwpV$1ffKQ1U5t&i&|J`~3FbNcj4+YV}$1-cxsTN}C@4Du|h{?LNnJ(#i>}JF5ar
z7YS_?aEdtb(A?(8n(TSpX2uzUIoz`*E)ITS)+PN{&(}HYb7fT8{r3`~XMV}8-tu9>
z6O|yPhcEuy>$-cWUYVPramQ~pC;R@7(jG3`Sgj_oY}MN3YW%iy*0u|Ai>FR#J?M3Z
z^J>SEw4)!6apkj2Zki%3@;%gJmcxu=d)jUYin@1MRNZZjy~bUUt&#FFp8wq??HHe1
zN4Zk-QlK#gWP#!u6u}@G7{w+=#vpkR&lsK(z;W+dQQ{t40*rSB8yk=S5a=fw067Z!
z&W;L(3i`>3KoXc5fD%9wh@j?}!_86952-9jRnYfO$_g&Y1)B;@Vixcu=BHo?mT=1~
zDlSnlfwJ)@JL6!32VKv)o-#48Sun<8`VV9})PII%FlSj3;VeTVBzM8w0&*8HltBI`
zlpjJTI%YQ;h#Y@k*HtdLn#ZJbSD9<o3Mb8Eh1~qte2TngYa~1$MeP3@7xdQ5#-r}!
z|GnS6H70ZjX?PrI*!a97=tQ@YmbVB0^*E<nAHr^NFkA{R-SmNvhvUry<po~+bD5>q
zhACc@sFi9lF%XWC<TY8?-E!c~o0$CRnNk_q74>0<)NXAllsP3cWw*zS*pk(<W_u5w
z@_1)*?N-_Ru!1?QYLSN?ROl~1#vA`q;=iZn-K%#Jk5--JIPE!4aF*zr1^z9zLPGwZ
z|4AKUj`~q^xc7@!oKb^$Wl+u9{TJsv_&zE1<er9~N`IC`zmRFudGh_ru@?*5H+|_&
zkg#I4T4)<|*!uSGzDHIMbuElfWQIOm8C9pLSU!7eyT#7u)|p=uTDDBMTm8;Z<Bsov
zN1OjmUbEwI#s0<0b}v_SwAV36^t1<K#v?d7pb=?oXbFo+Lqtl2rLc&g)HLMuWom&l
zzCiI9oLQBsU<ireaD`|?1tSHcSg?+u{QMFHLqm8n@XSlgS1>dH^N~xw<b;GD|L1o$
zPHtp$F)%dpG%_$S`oPSomNr4cx??Jbl3a6#yg+G>LetENsipxvPX(Bn8E$W4)4=p(
YaY<rPNktLR{f0({7F?>TuKsRZ0Dv$wq5uE@

literal 0
HcmV?d00001

diff --git a/paper/paper.tex b/paper/paper.tex
index 9dca2cd..d2790df 100644
--- a/paper/paper.tex
+++ b/paper/paper.tex
@@ -1,6 +1,5 @@
-
 % -*- fill-column: 80; -*-
-\documentclass[sigconf,review,anonymous,screen,acmsmall]{acmart}
+\documentclass[review,anonymous,screen,acmsmall,nonacm]{acmart}
 \usepackage{mathpartir}
 \usepackage{tikz-cd}
 \usepackage{lipsum}
@@ -8,8 +7,52 @@
 \usepackage{wrapfig}
 \usepackage{fancyvrb}
 \usepackage{xspace}
+
+\definecolor{codegreen}{rgb}{0,0.6,0}
+\definecolor{codegray}{rgb}{0.5,0.5,0.5}
+\definecolor{codepurple}{rgb}{0.58,0,0.82}
+\definecolor{backcolour}{rgb}{0.95,0.95,0.92}
+
+\usepackage{listings}
+\lstdefinelanguage{Agda}{
+keywords={data, where, module, import, open, public,
+record, field, let, in, if, then, else, case, of, with,
+do, postulate, primitive, mutual, abstract, private,
+forall, exists, cong, set, prop, sort, Level, Data, Type,
+Renamer},
+morekeywords=[2]{Set, Type, Prop, moduleFuncs, instance, Named},
+sensitive=true,
+comment=[l]{--},
+morecomment=[s]{\{-}{-\}},
+morestring=[b]",
+mathescape=true,
+escapeinside={(*@}{@*)}
+}
+\lstset{
+language=Agda,
+basicstyle=\ttfamily\small,
+keywordstyle=\color{blue},
+keywordstyle=[2]\color{teal},
+identifierstyle=\color{black},
+commentstyle=\color{gray}\textit,
+stringstyle=\color{orange},
+numbers=none,
+numberstyle=\tiny\color{gray},
+stepnumber=1,
+numbersep=5pt,
+showstringspaces=false,
+tabsize=4,
+captionpos=b,
+breaklines=true,
+breakatwhitespace=false,
+rulecolor=\color{black},
+texcl=true,
+backgroundcolor=\color{backcolour},
+frame=single
+}
+
 \usepackage{hyperref}
-\usepackage{cleveref}
+\usepackage[capitalise]{cleveref}
 \usepackage{stmaryrd}
 \usepackage{scalerel}
 \usepackage{tikz}
@@ -17,23 +60,44 @@
 
 \usepackage{pdfpages}
 
+\newcommand{\SPF}{\mathsf{SPF}}
+\newcommand{\Var}{\mathsf{Var}}
+\newcommand{\K}{\mathsf{K}}
+\newcommand{\map}{\mathsf{map}}
+\newcommand{\roll}{\mathsf{roll}}
+\newcommand{\fold}{\mathsf{fold}}
+\newcommand{\inl}{\mathsf{inl}}
+\newcommand{\inr}{\mathsf{inr}}
 \newcommand{\sem}[1]{\llbracket{#1}\rrbracket}
 \newcommand{\cat}[1]{\mathbf{#1}}
 \newcommand{\lto}{\multimap}
 \newcommand{\tol}{\mathrel{\rotatebox[origin=c]{180}{$\lto$}}}
-\newcommand{\String}{\Sigma^{*}}
+\newcommand{\String}{\textbf{String}}
+\newcommand{\stringg}{\mathtt{String}}
+\newcommand{\charg}{\mathtt{Char}}
 \newcommand{\Set}{\mathbf{Set}}
+\newcommand{\Syn}{\mathbf{Synx}}
 \newcommand{\SemAct}{\mathbf{SemAct}}
 \newcommand{\Gr}{\mathbf{Gr}}
 \newcommand{\Grammar}{\mathbf{Gr}}
 \newcommand{\Type}{\mathbf{Type}}
 \newcommand{\Prop}{\mathbf{Prop}}
-\newcommand{\Bool}{\mathbf{Bool}}
-\newcommand{\true}{\mathbf{true}}
-\newcommand{\false}{\mathbf{false}}
+\newcommand{\Bool}{\mathtt{Bool}}
+\newcommand{\true}{\mathtt{true}}
+\newcommand{\false}{\mathtt{false}}
 \newcommand{\nat}{\mathbb{N}}
-\newcommand{\theoryname}{Grammar Type Theory\xspace}
-\newcommand{\theoryabbv}{GramTT\xspace}
+\newcommand{\theoryname}{Dependent Lambek Calculus\xspace}
+\newcommand{\theoryabbv}{$\textrm{Lambek}^D$}
+\newcommand{\lnld}{$\textrm{LNL}_D$}
+
+\newcommand{\isTy}{\textrm{ type}}
+\newcommand{\isCtx}{\textrm{ ctx}}
+\newcommand{\isSmall}{\textrm{ small}}
+\newcommand{\isLinTy}{\textrm{ lin. type}}
+\newcommand{\isLinCtx}{\textrm{ lin. ctx.}}
+
+\newcommand{\quoteTy}[1]{\lceil{#1}\rceil}
+\newcommand{\unquoteTy}[1]{\lfloor{#1}\rfloor}
 
 \newcommand{\gluedNL}{{\mathcal G}_S}
 \newcommand{\gluedNLUniv}{{\mathcal G}_{S,i}}
@@ -42,33 +106,71 @@
 \newcommand{\amp}{\mathrel{\&}}
 \newcommand{\pair}{\amp}
 \DeclareMathOperator*{\bigamp}{\scalerel*{\&}{\bigoplus}}
+\DeclareMathOperator*{\bigwith}{\scalerel*{\&}{\bigoplus}}
+
+
+\newcommand{\bang}{~\textbf{!}~}
+% Tentatively using uparrow for linear to non-linear to be in line with
+% Pfennings adjoint functional programming
+\newcommand{\ltonl}[1]{~\uparrow #1}
+\newcommand{\nil}{\texttt{nil}}
+\newcommand{\ident}{\texttt{id}}
+\newcommand{\cons}{\texttt{cons}}
+\newcommand{\epscons}{\varepsilon\texttt{cons}}
+\newcommand{\data}{\mathsf{data}}
+\newcommand{\where}{\mathsf{where}}
+\newcommand{\Trace}{\texttt{Trace}}
+\newcommand{\stringquote}[1]{\texttt{\textquotesingle#1\textquotesingle}}
+\newcommand{\internalize}[1]{\lceil#1\rceil}
+
+\newcommand{\oplusinj}[2]{\sigma\,#1\,#2}
+\newcommand{\withprj}[2]{\pi\,#1\,#2}
 
 \newcommand{\simulsubst}[2]{#1\{#2\}}
 \newcommand{\subst}[3]{\simulsubst {#1} {#2/#3}}
+\newcommand{\el}{\mathsf{el}}
 \newcommand{\letin}[3]{\mathsf{let}\, #1 = #2 \, \mathsf{in}\, #3}
 \newcommand{\lamb}[2]{\lambda #1.\, #2}
 \newcommand{\lamblto}[2]{\lambda^{{\lto}} #1.\, #2}
 \newcommand{\lambtol}[2]{\lambda^{{\tol}} #1.\, #2}
-\newcommand{\dlamb}[2]{\overline{\lambda} #1.\, #2}
+\newcommand{\dlamb}[2]{{\lambda}^{{\&}} #1.\, #2}
+\newcommand{\withlamb}[2]{{\lambda}^{{\&}} #1.\, #2}
 \newcommand{\app}[2]{#1 \, #2}
-\newcommand{\applto}[2]{#1 \mathop{{}^{\lto}} #2}
+\newcommand{\applto}[2]{#1 \, #2}
 \newcommand{\apptol}[2]{#1 \mathop{{}^{\tol}} #2}
 \newcommand{\PiTy}[3]{\textstyle\prod (#1 : #2). #3}
 \newcommand{\SigTy}[3]{\textstyle\sum (#1 : #2). #3}
 \newcommand{\LinPiTy}[3]{\textstyle\bigamp (#1 : #2). #3}
+\newcommand{\LinPiTyLimit}[3]{\bigwith\limits_{#1 : #2} #3}
 \newcommand{\LinSigTy}[3]{\textstyle\bigoplus (#1 : #2). #3}
+\newcommand{\LinSigTyLimit}[3]{\bigoplus\limits_{#1 : #2} #3}
+%% \newcommand{\DepWith}[2]{{\textstyle\bigamp}\limits_{#1}{#2}
+%% \newcommand{\DepPlus}[2]{{\textstyle\bigoplus}\limits_{#1}{#2}
 \newcommand{\GrTy}{\mathsf{Gr}}
 
-\newcommand{\ctxwff}[1]{#1 \,\, \mathsf{ok}}
-\newcommand{\ctxwffjdg}[2]{#1 \vdash #2 \,\, \mathsf{type}}
-\newcommand{\linctxwff}[2]{#1 \vdash #2 \,\, \mathsf{ok}}
-\newcommand{\linctxwffjdg}[2]{#1 \vdash #2 \,\, \mathsf{linear}}
+\newcommand{\equalizer}[3]{\{#1\,|\,\applto {#2}{#1} = \applto{#3}{#1} \}}
+\newcommand{\equalizerin}[1]{\langle #1 \rangle}
+\newcommand{\equalizerpi}[1]{#1.\pi}
+
+\newcommand{\ctxwff}[1]{#1 \isCtx}
+\newcommand{\ctxwffjdg}[2]{#1 \vdash #2 \isTy}
+\newcommand{\linctxwff}[2]{#1 \vdash #2 \isLinCtx}
+\newcommand{\linctxwffjdg}[2]{#1 \vdash #2 \isLinTy}
+
+\newsavebox{\logoagdabox}
+\sbox{\logoagdabox}{%
+  %
+  \raisebox{-2pt}{\includegraphics[height=1em]{logo-agda.pdf}}%
+}
+\newcommand{\agdalogo}{%
+  \usebox{\logoagdabox}}%
 
 \newif\ifdraft
 \drafttrue
 \newcommand{\steven}[1]{\ifdraft{\color{orange}[{\bf Steven says}: #1]}\fi}
 \renewcommand{\max}[1]{\ifdraft{\color{blue}[{\bf Max says}: #1]}\fi}
 \newcommand{\pedro}[1]{\ifdraft{\color{red}[{\bf Pedro says}: #1]}\fi}
+\newcommand{\nathan}[1]{\ifdraft{\color{green}[{\bf Nathan says}: #1]}\fi}
 \newcommand{\pipe}{\,|\,}
 
 \begin{document}
@@ -77,7 +179,7 @@
 
 \pagebreak
 
-\title{Formal Grammars as Types in Non-Commutative Linear-Non-Linear Type Theory}
+\title{Intrinsic Verification of Parsers and Formal Grammar Theory in Dependent Lambek Calculus}
 
 \author{Steven Schaefer}
 \affiliation{\department{Electrical Engineering and Computer Science}
@@ -86,6 +188,13 @@
 }
 \email{stschaef@umich.edu}
 
+\author{Nathan Varner}
+\affiliation{\department{Electrical Engineering and Computer Science}
+  \institution{University of Michigan}
+  \country{USA}
+}
+\email{nvarner@umich.edu}
+
 \author{Max S. New}
 \affiliation{
   \department{Electrical Engineering and Computer Science}
@@ -107,41 +216,32 @@
 \makeatother
 
 \begin{abstract}
-  We propose \theoryname~(\theoryabbv) a new type-theoretic calculus
-  for the specification of formal grammars and intrinsic verification
-  of parsers. \theoryabbv is a dependent non-commutative
-  linear/non-linear type theory where the linear types can be viewed
-  as formal grammars, and linear terms can be viewed as parse
-  transformers, of which parsers are a special case. We formalize this
-  interpretation by giving a denotational semantics of the theory in a
-  novel categorical semantics of formal grammars in a category of
-  families indexed by strings.
-
-  To demonstrate the usefulness of the calculus for formal grammar
-  theory, we show that existing grammar formalisms --- e.g.\ regular
-  expressions, context free expressions, and their associated automata \max{which ones do
-    we actually show?} \steven{regular expressions, $\mu$-regular expressions
-    (or context free expressions), and different automata classes (finite,
-    pushdown, Turing)} --- can be expressed as linear types in
-  \theoryabbv. We give simple characterizations of the regular and
-  context-free grammars as expressible by restricting which
-  connectives of \theoryabbv are allowed, a form of type theoretic
-  characterization of these grammar classes. Further, we demonstrate
-  some central proofs of formal grammar theory can be intrinsically
-  verified: \emph{strong} equivalence between regular expressions and
-  NFAs is expressed by a syntactic \emph{isomorphism} definable in
-  \theoryabbv.
-
-  Finally, since \theoryabbv is a syntactic calculus, in addition to
-  the ``standard'' semantics of linear types as grammars, we
-  demonstrate two non-standard semantic interpretations. One is a
-  semantics where linear types are instead interpreted as
-  \emph{semantic actions}, rather than grammars, which are more common
-  in application than formal grammars proper. Lastly we use a
-  non-standard semantics to prove a \emph{canonicity} metatheorem that
-  shows that the interpretation of types as grammars is canonical: the
-  type-theoretic notion of a parse agrees with the expected semantic
-  notion.
+  We propose \theoryname~(\theoryabbv), a domain-specific dependent type theory
+  for the definition of intrinsically verified parsers and verification of weak
+  and strong equivalences of formal grammars. The linear types in \theoryabbv
+  have a simple interpretation as formal grammars, with values of the types
+  interpreted as parse trees and general terms as a form of intrinsically
+  verified parse transformers, of which parsers being a special case. We
+  demonstrate the expressivity of this system by showing that the indexed
+  inductive linear types in this calculus can be used to encode many different
+  familiar grammar formalisms, including regular and context-free grammars but
+  also traces of various types of automata. We can then construct strong
+  equivalences between grammar and automata formalisms and construct verified
+  parsers as simple linear functional programs. All of our examples have been
+  implemented in a prototype implementation of \theoryabbv as a shallow
+  embedding in Agda.
+
+  Our shallow embedding is based on a denotational semantics of \theoryabbv
+  where non-linear types denote ordinary sets and linear types denote formal
+  grammars. To develop this semantics compositionally, we define a notion of
+  general notion of model we call a \emph{Chomsky hyperdoctrine}, a
+  generalization of the Kleene algebra approach to formal language theory. In
+  addition to our ``standard model'' where linear types denote formal grammars,
+  we demonstrate two non-standard models. The first interprets linear types as
+  semantic actions over formal grammars, suggesting future extensions of our
+  calculus. The second is used to structure a logical relations argument to
+  prove a canonicity metatheorem for the type theory, proving that all closed
+  terms of context-free linear types normalize to a parse tree.
 \end{abstract}
 
 \maketitle
@@ -149,6 +249,15 @@
 \section{Introduction}
 \label{sec:intro}
 
+\steven{TODO adapt section 1 and the abstract to reflect the rest of the paper}
+%% Outline
+%% - Why a framework for parser verification
+%% - what intrinsic verification gets us
+%% - the secret sauce: indexed linear inductive types
+%% - prototype implementation
+%% - overview of "abstract" formal grammars?
+
+
 Parsing data from a serialized format is one of the most common tasks
 in all of computing. Accordingly, this is a quite well-studied problem
 spanning theoretical computer science and linguistics with such
@@ -163,21 +272,21 @@ \section{Introduction}
 thoroughly developed area of theoretical computer science.
 The central object of study is a \emph{formal language} $L$ over an alphabet
 $\Sigma$, which is classically defined as a
-\emph{subset} of strings $L \subseteq \String$. The expressivity of formal
+\emph{subset} of strings $L \subseteq \String = \Sigma^{*}$. The expressivity of formal
 languages is sufficient for defining a \emph{recognizer} of a language $L$:
 a function $r : \String \to \Prop$ such that $r$ maps each string $w$ to the
 proposition that $w$ is in the language $w \in L$. This technique of language
 recognition can concisely specify lexical analysis, and even lead to verified
 lexer implementations \cite{egolfVerbatim,Ouedraogo_2023}.
 
-While there are ample thoery and tooling to handle the first stage of a language frontend, the final
+While there are ample theory and tooling to handle the first stage of a language frontend, the final
 frontier for parsing research is the development of
 \emph{formally verified} parser implementations. Due to the
 ubiquitous nature of parsing, these would useful components of many
 formally verified software systems: compilers and servers most
 especially. However, language recognition is insufficient for specifying a
 parser, and for this task much of the structured foundations from the theory of
-formal languages is rendered moot. Rather than returning a propositon-valued result, a parser produces a
+formal languages is rendered moot. Rather than returning a proposition-valued result, a parser produces a
 structured \emph{parse tree} adherent to a \emph{formal grammar}.
 The research on such grammars is often performed atop an ad-hoc choice of grammar
 formalism, and results are proven with respect to that formalism. Critically,
@@ -232,7 +341,7 @@ \section{Introduction}
   \item An implementation of the language, which is embedded shallowly in Agda
 \end{itemize}
 
-\section{A Syntax-Independent Notion of Syntax}
+\subsection{A Syntax-Independent Notion of Syntax}
 \label{sec:synindnotion}
 
 The notion of formal language is central to the theory of parsing.
@@ -257,6 +366,8 @@ \section{A Syntax-Independent Notion of Syntax}
   $G : \String \to \Set$.
 \end{definition}
 
+\steven{TODO need to go through and ensure that $\Sigma^{*}$ and $\String$ are each used appropriately}
+
 We say that a grammar $G$ associates to every string $w$ the set\footnote{We
   make little comment as to which foundations are used to encode $\Set$. Our
   construction is polymorphic in choice of a proper class of small sets, a
@@ -273,35 +384,18 @@ \section{A Syntax-Independent Notion of Syntax}
 \[ L_{G} = \{ w \in \String ~|~ G w~\text{is inhabited} \} \]
 
 As they are simply sets, formal languages are naturally endowed with a partial
-order via subset inclusion. Similarly, formal grammars naturally coalesce into a
-\emph{category}. Define $\Grammar$ to be the category of formal grammars. Whose
+order via subset inclusion. Just, formal grammars naturally coalesce into a
+\emph{category}.
+
+Define $\Grammar$ to be the category of formal grammars. Whose
 objects are grammars, and whose morphisms are functions $f : G \to H$ between two
-grammars presented by a family of functions
-
-\[ f^{w} : G w \to H w \]
-
-for every string $w \in \String$. Intuitively, we read a morphism of grammars as
-a \emph{parse transformer} --- i.e. $f$ translates a $G$-parse of $w$ into an $H$-parse
-of $w$. The induced notion of isomorphism in $\Grammar$ encodes that two grammars are equivalent if there is
-a bijective translation of parses. In other words, two grammars are isomorphic
-if they capture the same structural descriptions of strings, which is
-precisely Chomsky's notion of \emph{strong equivalence} of
-grammars \cite{chom1963}. In works like
-\cite{yoshinaga2002formal}, this notion of strong equivalence is often treated
-like the appropriate choice of ``sameness'' for two grammars. When abstracting over the latent
-structure of formal grammars and describing it in the the language of
-categories, it is encouraging to see the same definition of equivalence arise
-naturally.
-
-Upon inspection of the above definition, $\Grammar$ may equivalently
-be described as the functor category $\Set^{\String}$ --- where the
-set $\String$ is viewed as a discrete\footnote{Objects in $\String$
-comprise the set of all strings, and the only morphisms are the
-identity morphisms.}  category. In fact, in this paper we will take
-$\Grammar$ to be defined as the functor category $\Set^{\String}$ ---
-also called presheaf category. Such presheaf categories carry a
-remarkable amount of structure.  Indeed they are a monoidal topos and,
-as such, are a model to a substructural dependently typed theory.
+grammars presented by a family of functions $f^{w} : G w \to H w$ for
+$w : \String$. Intuitively, we interpret these morphisms as parse
+\emph{transformers} --- as constructive translations of $G$-parses into $H$-parses. Throughout this paper, we will make frequent reference to this categorical
+structure on $\Grammar$.
+
+\steven{Above sounds awkward. I want to additionally say something here that signposts that we will give more
+  information about that structure without just listing it all here. }
 
 \paragraph{From Grammars to Formal Grammars}
 The category-theoretic framework can be further used to describe and compare
@@ -314,259 +408,571 @@ \section{A Syntax-Independent Notion of Syntax}
 this sense, $\Grammar$ provides an
 all-encompassing semantic domain for parsing.
 
-\max{this section kind of trails off at the end with a very vague aside about a $2$-category of grammar formalisms. Maybe this should go in future work/discussion? What is the message of this section? How does it tie in with the message of the paper? What even is the message of this paper?}
 
-\pedro{Would building on this proposed structure make the message of the section clearer? In this case
-  we would have to be a bit clearer in terms of how familiar notions of grammar can be seen as categories with functor into $\Grammar$.}
+Upon further inspection, $\Grammar$ may equivalently
+be described as the functor category $\Set^{\String}$ --- where the
+set $\String$ is viewed as a discrete\footnote{Objects in $\String$
+comprise the set of all strings, and the only morphisms are the
+identity morphisms.} category. In fact, in this paper we will take
+$\Grammar$ to be defined as the functor category $\Set^{\String}$. Such functor
+categories in $\Set$, called presheaf categories, carry a
+remarkable amount of structure.  Indeed they are a monoidal topos and,
+as such, are a model to a substructural dependently typed theory.
 
+Thus we
+present \theoryname (\theoryabbv) as a domain-specific type theory that
+abstracts over the structure of $\Grammar$. To begin, let us look at what some
+programs in \theoryabbv look like.
+
+\steven{TODO Reword this transition into the examples}
+
+% \max{this section kind of trails off at the end with a very vague aside about a $2$-category of grammar formalisms. Maybe this should go in future work/discussion? What is the message of this section? How does it tie in with the message of the paper? What even is the message of this paper?}
+
+% \pedro{Would building on this proposed structure make the message of the section clearer? In this case
+%   we would have to be a bit clearer in terms of how familiar notions of grammar can be seen as categories with functor into $\Grammar$.}
+
+
+\section{\theoryname by Example}
+\label{sec:type-theory-examples}
+To gain intuition for working in \theoryabbv, we begin with some illustrative
+examples drawn from the theory of formal languages. In particular, we
+demonstrate how to encode finite grammars, regular expressions, and finite
+automata in \theoryabbv. Each of our examples will be defined for strings over the three character alphabet
+$\Sigma = \{ \texttt{a} , \texttt{b}, \texttt{c} \}$.
+
+\newcommand{\A}{\texttt{A}}
+\newcommand{\B}{\texttt{B}}
+\newcommand{\I}{\texttt{I}}
+\newcommand{\f}{\texttt{f}}
+\newcommand{\g}{\texttt{g}}
+\renewcommand{\L}{\texttt{L}}
+\renewcommand{\a}{\texttt{a}}
+\renewcommand{\b}{\texttt{b}}
+\renewcommand{\c}{\texttt{c}}
+\newcommand{\w}{\texttt{w}}
+
+\paragraph{Finite Grammars}
+First consider finite grammars --- those built from base types via disjunctions and
+concatenations. The base types comprise characters drawn from the alphabet, the
+empty string, and the empty grammar.
+For instance, the grammar for the single character $\a$ has a single parse tree for the string
+\stringquote{a} and no parse trees at any other strings. The grammar $\I$ recognizes has a single
+parse tree for the empty string $\epsilon = \stringquote{}$ and no parses for any other strings.
+The final base type, the empty grammar $\bot$, has no parses for any string. We
+use type-theoretic syntax to represent disjunction $\oplus$ and concatenation
+$\otimes$ of
+grammars. Over an input string $\w$, a parse of the disjunction $\A \oplus \B$ is either
+a parse of $\A$ over the string $\w$ or a
+parse of $\B$ over the string $\w$. Similarly, $\w$ matches $\A \otimes \B$ if
+$\w$ can be split into two strings $\w_{\A}$ and $\w_{\B}$ that match $\A$
+and $\B$, respectively.
+
+For a grammar $\A$, a parse tree of a string $\w$ can be represented as
+a term of type $\A$ in the context $\internalize \w$, where
+$\internalize \w$ is a context with one variable for each character of
+$\w$. For example, to define a parse tree for \stringquote{ab}, we use
+the context $\internalize{\stringquote{ab}} = a : \a , b : \b$. Then the left of
+\cref{fig:fingram} gives a typing derivation that shows that $\inl(a
+\otimes b)$ defines a valid parse tree for the finite grammar $(\a \otimes \b)
+\oplus \c$. On the right of \cref{fig:fingram}, we give a linear $\lambda$-term
+that implements the program given in the derivation.
 
-\section{\theoryname as a Syntax for Formal Grammars}
-\label{sec:tt}
+\begin{figure}
+\begin{minipage}{0.5\textwidth}
+\begin{mathpar}
+  \inferrule
+  {
+    \inferrule
+    {
+      \inferrule
+      {~}
+      {a : \a \vdash a : \a}
+      \\
+      \inferrule
+      {~}
+      {b : \b \vdash b : \b}
+    }
+    {a : \a , b : \b \vdash a \otimes b : \a \otimes \b}
+  }
+  {a : \a , b : \b \vdash \texttt{f} := \inl(a \otimes b) : (\a \otimes \b) \oplus \c}
+\end{mathpar}
+\end{minipage}%
+\begin{minipage}{0.5\textwidth}
+\begin{lstlisting}
+f : (*@$\uparrow$@*)(a (*@$\otimes$@*) b (*@$\lto$@*) (a (*@$\otimes$@*) b) (*@$\oplus$@*) c)
+f (a' , b') = inl (a' , b')
+\end{lstlisting}
+\end{minipage}
+\caption{\stringquote{ac} matches $(a \oplus b) \otimes c$}
+\label{fig:fingram}
+\end{figure}
+For this interpretation of terms as parse trees to make sense, our
+calculus cannot allow for \emph{any} of the usual structural rules of
+a type theory: weakening, contraction and exchange. Weakening allows
+for variables to go unused, while contraction allows for the same
+variable to be used twice, but in a parse tree, every character must
+be accounted for exactly once. That is, we want to prevent the following
+erroneous derivations,
+\[
+  a : \a , b : \b \not \vdash a : \a \qquad a : \a , b : \b \not \vdash (a, a) : \a \otimes \a
+\]
 
-Omission of the structural rules of a deductive system, such as in
-linear logic \cite{GIRARD19871}, offers precise control over how a value is used
-in a derivation. Linear logic omits the weakening and contraction rules
-to ensure that every value in context is used exactly once. This control enables
-\emph{resource-sensitive} reasoning, where we may treat a resource as
-\emph{consumed} after usage. This viewpoint is amenable to parsing applications,
-as we may treat characters of a string as finite resources that are consumed at
-parse-time. That is, when reading a string, the occurrence of any character is read once. Freely duplicating or dropping characters from a string changes the meaning of that string. \max{what about a backtracking parser?} \steven{Does the sentence sound better now? I've tried to avoid suggesting any particular parser design. Also, I think backtracking is a very interesting question to talk about in future work}. One may then envision a linear type system where the types comprise
-formal grammars generated over some alphabet $\Sigma$, and the type constructors
-correspond precisely to inductive constructions on grammars --- such as
-conjunction, disjunction, concatenation, etc.
-
-Unfortunately, programming in a linear term language is often
-cumbersome and unintuitive for users who are unaccustomed to the usage
-restrictions.\max{You are arguing against our own type theory here...} \steven{I
-  want to argue against a \emph{purely linear} calculus to motivate the
-  convenience of LNL, but perhaps this isn't appropriate here, and I don't want to
-  make our own type theory sound bad.Should probably just cut the beginning of
-  this paragraph and jump into LNL, but then there's not an immediate reason for
-  why one might want/need it over a pure linear system}
-\pedro{I think that it's reasonable to simply say something like ``programming in a purely linear term language
-is limiting due to variable usage restrictions''. In this case we're not throwing our own TT under the bus}
-Code in such a language can
-become unnecessarily verbose when ensuring every function written is indeed linear. It would be more ergonomic to allow users using Girard encodings to translate
-high-level reasoning into low-level linear terms.\pedro{What encodings are these?
-We should add a citation here}
-To alleviate this
-concern, in 1995 Benton et al.\ proposed an alternative categorical
-semantics of linear logic based on adjoint interactions between linear
-and non-linear logics \cite{bentonMixedLinearNonlinear1995} ---
-appropriately referred to as a \emph{linear-non-linear} (LNL)
-system. This work is simply typed, so the boundary between linear and
-non-linear subtheories is entirely characterized via a monoidal
-adjunction $F \dashv G$ between linear terms and non-linear terms.
-
-Inside of an LNL system, linearity may be thought of as an option that users can
-choose to deliberately invoke at deliberate points in their developments in an
-otherwise intuitionistic system. However, if we are wishing to treat parsers as
-linear terms over input strings, the non-linear fragment of an LNL theory does
-not really assist in the development of parsers. It is instead the case that
-parsers may benefit from a \emph{dependence} on non-linear terms.
-Through the approach described by Krishnaswami et al.\ in
-\cite{krishnaswami_integrating_2015},
-we may define a restricted form of dependent types. In particular, dependence
-on linear terms
-is disallowed; however, through dependence of a linear term on a non-linear
-index, we recover the definition of Aho's indexed grammars \cite{AhoIndexed}
-internal to \theoryabbv.
-
-\steven{The above is meant to walk the reader into accepting LNL as a natural
-  choice, but it doesn't really flow well to the beginning of the next subsection }
-
-
-In this section we go on a tour of \theoryabbv. We start by presenting its syntax.
-In order to avoid a notion of raw-term, we present it in an intrinsically-typed fashion,
-where only well-typed terms are meaningful. Next, we show how it admits a model in $\Set^{\String}$
-where the linear types have natural interpretation in terms of formal languages --- for instance,
-the tensor corresponds to grammar concatenation. We conclude the section by showing how
-classes of grammars correspond to certain sub-type-theories inside \theoryabbv.
-
-\pedro{Hopefully this serves as a segue between this and the syntax subsection}
-\subsection{Syntax}
-\label{subsec:syntax}
-
-Below we describe \theoryname~(\theoryabbv), an instance of an LNL theory with dependent types
-to serve as a deductive
-setting for formal grammar theory. \theoryabbv axiomatizes the necessary
-structure underlying $\Grammar$ to
-specify and construct parsers.
-
-The structural judgments are shown in \cref{fig:structjdg}, the typing
-well-formedness in \cref{fig:typewf}, and the intuitionistic typing
-rules in \cref{fig:inttyping}. These are mostly just as they appear in
-\cite{krishnaswami_integrating_2015}. It has two main differences. The
-first one it the presence of two-distinct arrow types, one for adding
-variables to the beginning of contexts and another one for adding
-variables to the end of contexts. This is an adequate change in the
-context of grammars, which are inherently non-symmetric. The second
-change is the introduction of inductive types, which allows for the definition
-of recursive grammars. The treatment of the non-linear
-types is standard, so below we focus on the linear syntax.
+%
+Finally, the ordering of characters in a string cannot be ignored while
+parsing, so we omit the exchange rule because it would allow
+for variables in the context to be reordered,
+\[
+  a : \a , b : \b \not \vdash (b , a) : \b \otimes \a
+\]
+% otherwise we would have that $\inl(x\otimes y)$ is a valid
+% term in context $y:b,x:a$, but there should be no parses of $ba$ in
+% this grammar.
+
+\paragraph{Regular Expressions}
+Regular expressions can be encoded as types generated by base types,
+$\oplus$, and $\otimes$, and the Kleene star $(\cdot)^{\ast}$.  For a
+grammar $\A$, we can define the Kleene star $\A^{*}$ as a particular
+\emph{inductive linear type} of linear lists, as shown in
+\cref{fig:kleenestarinductive}. Here $\A^{*} : \L$ means we are defining
+a \emph{linear} type. $\A^{*}$ has two constructors: $\nil$, which
+builds a parse of type $\A^{*}$ from nothing; and $\cons$, which
+linearly consumes a parse of $\A$ and a parse of $\A^{*}$ and builds a
+parse of $\A^{*}$. This linear consumption is defined by the linear
+function type $\lto$. The linear function type $\A \lto \B$ defines functions that
+take in parses of $\A$ as input, \emph{consume} the input, and return a parse of
+$\B$ as output. The arrow, $\uparrow$, wrapping these
+constructors means that the constructors by themselves are not consumed
+upon usage, and so are \emph{non-linear} values themselves. That is, the
+names $\nil$ and $\cons$ are function symbols that may be reused as many times
+as we wish.
 
 \begin{figure}
-  \begin{mathpar}
-    \boxed{\ctxwff \Gamma}
+\begin{lstlisting}
+data A(*@$^*$@*) : L where
+  nil : (*@$\uparrow$@*)(A(*@$^*$@*))
+  cons : (*@$\uparrow$@*)(A (*@$\lto$@*) A(*@$^*$@*) (*@$\lto$@*) A(*@$^*$@*))
+\end{lstlisting}
+% \begin{align*}
+% \data &~A^{*} : L~\where\\
+%       & \nil : \ltonl {A^{*}} \\
+%       & \cons : \ltonl {(A \lto A^{*} \lto A^{*})}
+% \end{align*}
+\caption{Kleene Star as an inductive type}
+\label{fig:kleenestarinductive}
+\end{figure}
 
-    \inferrule{~}{\ctxwff \cdot}
-    \and
-    \inferrule{\ctxwff \Gamma \\ \ctxwffjdg \Gamma X}{\ctxwff {\Gamma, x : X}}
+Through repeated application of the Kleene star constructors, \cref{fig:kleenestarderivation} gives a derivation that shows \stringquote{ab} matches the
+regular expression $(\a^{*} \otimes \b) \oplus \c$. The leaves of the proof tree
+that mention the arrow $\uparrow$ describe a cast from a non-linear type to a linear type.
+For instance, the premise of the leaf involving $\nil$
+views $\nil : \ltonl {(\a^{*})}$ as the name of a
+constructor, and a constructor should be nonlinearly valued because we may call
+it several time. However, the conclusion of this leaf views $\nil : \a^{*}$ as a
+linear value. After we call the constructor it returns a value that may only be
+used a single time. In this paper such casts are often made implicitly, but this
+does not introduce any ambiguity.
+Even though we use $\nil$ to refer to both a constructor and the
+linear value produced by it
+, we differentiate between the two views by inspecting
+the type.
 
+\begin{figure}
+\begin{mathpar}
+\inferrule
+{
+  \inferrule
+  {
+    \inferrule
+    {
+      \inferrule
+      {
+        \inferrule
+        {
+          \inferrule
+          {~}
+          {\cdot \vdash \cons : \ltonl {(\a \lto \a^* \lto \a^*)}}
+        }
+        {\cdot \vdash \cons : \a \lto \a^* \lto \a^*}
+        \\
+        \inferrule
+        {~}
+        {a : \a \vdash a : \a}
+      }
+      {a : a \vdash \cons~a : \a^* \lto \a^*}
+      \\
+      \inferrule
+      {
+        \inferrule
+        {~}
+        {\cdot \vdash \nil : \ltonl {(\a^*)}}
+      }
+      {\cdot \vdash \nil : \a^*}
+    }
+    {a : \a \vdash \cons~a~\nil : \a^{*}}
     \\
+    \inferrule
+    {~}
+    {b : \b \vdash b : \b}
+  }
+  {a : \a , b : \b \vdash (\cons~a~\nil) \otimes b : \a^{*} \otimes \b}
+}
+{a : \a , b : \b \vdash \texttt{g} := \inl ((\cons~a~\nil) \otimes b) : (\a^{*} \otimes \b) \oplus \c}
+\end{mathpar}
+\begin{lstlisting}
+g : (*@$\uparrow$@*)((a (*@$\otimes$@*) b) (*@$\lto$@*) (a(*@$^*$@*) (*@$\otimes$@*) b) (*@$\oplus$@*) c)
+g (a' , b') = inl (cons a' nil (*@$\otimes$@*) b')
+\end{lstlisting}
+\caption{\stringquote{ab} matches $(a^{*} \otimes b) \oplus c$}
+\label{fig:kleenestarderivation}
+\end{figure}
 
-    \boxed{\linctxwff \Gamma \Delta}
-
-    \inferrule{~}{\linctxwff \Gamma \cdot}
-    \and
-    \inferrule{\linctxwff \Gamma \Delta \\ \linctxwffjdg \gamma A}{\linctxwff \Gamma {\Delta, a : A}}
+We may also have derivations where the term in context is not simply a
+string of base types. In \cref{fig:kleeneabstractproof} we show that every parse
+of the grammar $(\A \otimes \A)^{*}$ induces a parse of $\A^{*}$ for an
+arbitrary grammar $\A$. The context $(\A \otimes \A)^{*}$ does not correspond directly to a string, so it is
+not quite appropriate
+to treat linear terms here as parse \textit{trees}.
+The
+context $a : (\A \otimes \A)^{*}$ does not contain concrete data to be parsed; rather, there may be many choices of string underlying the parse tree captured
+by the variable $a$. Thus, the term $\texttt{h}$ from
+\cref{fig:kleeneabstractproof} is not a parse of a string, and
+it is more
+appropriate to think of it as a parse \textit{transformer} --- a function from
+parses of $(\A \otimes \A)^{*}$ to parse of $\A^{*}$.
+
+$\texttt{h}$ is defined by recursion on terms of type $(\A \otimes \A)^{*}$.
+This recursion is expressed in the derivation tree by invoking the
+elimination principle for Kleene star, written
+as $\fold$. $\texttt{h}$ is more compactly presented in the pseudocode
+of \cref{fig:kleeneabstractproof} by pattern matching on the input and making an
+explicit recursive call in the body of its definition. When appropriate we will favor this code-like
+presentation of terms in our calculus, as it is more readable and more
+faithful to the implementation of \theoryabbv in Agda.
 
+\begin{figure}
+\begin{mathpar}
+  \inferrule
+  {
+    \inferrule
+    {~}
+    {\cdot \vdash \nil : \A^*}
     \\
+    \inferrule
+    {
+      \inferrule
+      {
+        \inferrule
+        {~}
+        {a_1 : \A, a_2 : \A , a_{*} : \A^* \vdash \cons (a_1 , \cons(a_2 , a_{*})) : \A^*}
+      }
+      {a' : \A \otimes \A , a_{*} : \A^* \vdash g := \letin {a_1 \otimes a_2} {a'} {\cons (a_1 , \cons(a_2 , a_{*})) : \A^*}}
+    }
+    {\cdot \vdash f := \lamblto a {\lamblto {a_{*}} {\app {\app g a} {a_{*}} }} : (\A \otimes \A) \lto \A^* \lto \A^*}
+  }
+  {a : (\A \otimes \A)^* \vdash \texttt{h} := \fold(\nil , f)(a) : \A^* }
+\end{mathpar}
+\begin{lstlisting}
+h : (*@$\uparrow$@*)((A (*@$\otimes$@*) A)(*@$^*$@*) (*@$\lto$@*) A(*@$^*$@*))
+h nil = nil
+h (cons (a (*@$\otimes$@*) a') as) = cons a (cons a' (h as))
+\end{lstlisting}
+\caption{A parse transformer for abstract grammars}
+\label{fig:kleeneabstractproof}
+\end{figure}
 
-    \boxed{\ctxwffjdg \Gamma X}
+\paragraph{Non-deterministic Finite Automata}
+\newcommand{\s}{\texttt{s}}
+\newcommand{\0}{\texttt{0}}
+\newcommand{\1}{\texttt{1}}
+\newcommand{\2}{\texttt{2}}
+Regular expressions are a compact formalism for defining a formal grammar,
+however simply defining a regular expression --- such as
+$(\a^{*} \otimes \b) \oplus \c$ --- does not give any insight into how to parse
+it. Many parsing algorithms are based on a notion of automaton, and to verify
+these algorithms we need to represent their associated automata in
+\theoryabbv.
+
+Finite automata are precisely the class of machines that recognize regular
+expressions. \cref{fig:exampleNFA} shows a non-deterministic
+finite automaton (NFA) for the regular expression $(\a^{*} \otimes b) \oplus c$.
+\cref{fig:nfainductive} defines $\Trace$ an \emph{indexed} inductive linear type of
+traces through this automaton. Defining an indexed inductive type can be thought
+of like defining a family of mutually inductive types varying coherently over a
+type of indexing data. That is, \cref{fig:nfainductive} defines the linear types
+$\Trace~\0$,
+$\Trace~\1$, and $\Trace~\2$ mutually inductively. The index $\s : \texttt{Fin 3}$ picks out which
+state in the automaton a trace begins at --- where $\texttt{Fin 3}$ is the
+finite type containing inhabitants $\{\0 , \1 , \2\}$.
+There are three kinds of constructors for $\Trace$:
+(1) those that terminate traces, (2) those that correspond to transitions
+labeled by a character, and (3) those that correspond to transitions labeled by
+the empty string $\epsilon$. The constructor $\texttt{stop}$ terminates a trace
+in the accepting state $\2$. The constructors $\texttt{1to1}$, $\texttt{1to2}$,
+$\texttt{0to2}$ each define a labeled transition through the NFA, and each of
+these consumes a parse of the label's character and a trace beginning at the
+destination of a transition to produce a trace beginning at the source of a
+transition. The constructor $\texttt{0to1}$ behaves similarly, except its
+transition is labeled with the empty string $\epsilon$. Therefore,
+$\texttt{0to1}$ takes in a trace beginning a state $\1$ and returns a trace
+beginning at state $\0$ corresponding to the same underlying string.
+
+Just as \cref{fig:kleenestarderivation} shows the \stringquote{ab} matches
+$(a^{*} \otimes b) \oplus c$, \cref{fig:nfaderivation} demonstrates that \stringquote{ab} is
+accepted by the NFA representation of this regular expression. In particular, we
+show that \stringquote{ab} matches the grammar $\Trace~0$ because $\0$ is the
+initial state of the NFA.\ We may imagine defining parse transformers from
+$(\a^{*} \otimes b) \oplus c$ to $\Trace~\1$, and vice versa. The existence of
+such maps between the two types would demonstrate that the regular expression
+and the automaton capture the same language --- i.e.\ they recognize the same
+set of strings. As we will see in LATER SECTION, we can define parse
+transformers between regular expressions and their associated automaton.
+Moreover, we prove that these parse transformers define a bijective
+correspondence of parses over every input string. That is, in a strong sense the
+parse of a regular expression and the traces of their associated automata
+capture precisely the same data.
 
-    \inferrule{\Gamma \vdash X : U_i}{\ctxwffjdg \Gamma X}
+\begin{figure}
+  \begin{tikzpicture}[node distance = 25mm ]
+    \node[state, initial] (0) {$\0$};
+    \node[state, below left of=0] (1) {$\1$};
+    \node[state, right of=1, accepting] (2) {$\2$};
+
+    \path[->] (0) edge[above] node{$\epsilon$} (1)
+              (0) edge[right] node{$\stringquote{c}$} (2)
+              (1) edge[loop left] node{$\stringquote{a}$} (1)
+              (1) edge[below] node{$\stringquote{b}$} (2);
+  \end{tikzpicture}
+  \caption{NFA for $(\a^{*} \otimes \b) \oplus \c$}
+  \label{fig:exampleNFA}
+\end{figure}
 
-    \boxed{\linctxwffjdg \Gamma A}
+\begin{figure}
+\begin{lstlisting}
+data Trace : (s : Fin 3) (*@$\to$@*) L where
+  stop : (*@$\uparrow$@*)(Trace 2)
+  1to1 : (*@$\uparrow$@*)(a (*@$\lto$@*) Trace 1 (*@$\lto$@*) Trace 1)
+  1to2 : (*@$\uparrow$@*)(b (*@$\lto$@*) Trace 2 (*@$\lto$@*) Trace 1)
+  0to2 : (*@$\uparrow$@*)(c (*@$\lto$@*) Trace 2 (*@$\lto$@*) Trace 0)
+  0to1 : (*@$\uparrow$@*)(Trace 1 (*@$\lto$@*) Trace 0)
+\end{lstlisting}
+\caption{$N$ as an indexed inductive type}
+\label{fig:nfainductive}
+\end{figure}
 
-    \inferrule{\Gamma \vdash A : L_i}{\linctxwffjdg \Gamma A}
+\begin{figure}
+\begin{lstlisting}
+k : (*@$\uparrow$@*)((a (*@$\otimes$@*) b) (*@$\lto$@*)  Trace 0)
+k (a' , b') = 0to1 (1to1 a' (1to2 b' stop))
+\end{lstlisting}
+\caption{\stringquote{ab} is accepted by the NFA for $(a^{*} \otimes b) \oplus c$}
+\label{fig:nfaderivation}
+\end{figure}
 
-    \\
+%% 3. Third example: NFA traces to show the indexed inductive types
 
-    \boxed{\ctxwffjdg \Gamma {X \equiv Y}}
+\section{Syntax and Typing for \theoryname}
+\label{sec:tt}
 
-    \inferrule{\Gamma \vdash X \equiv Y : U_i}{\ctxwffjdg \Gamma {X\equiv Y}}
+%% OUTLINE
+%%
+%% 1. Overview of syntax: what are the judgments
+%% 2. Cover the linear types except for inductives
+%% 3. Cover non-linear types, mainly focus on the universes
+%% 4. Cover indexed inductive linear types
+%% 5. Cover the "axioms" we add
+
+\max{was this paragraph supposed to be deleted?}  Next we present the
+full details of the syntax and typing of \theoryabbv. The syntax of
+\theoryabbv is based on the dependent linear-non-linear (\lnld)
+calculus from \cite{dlnl}. As in \lnld, \theoryabbv includes both
+non-linear dependent types, as well as linear types, which are allowed
+to depend on the non-linear types, but not on other linear types.
+%
+The main point of departure from \lnld's design in that the linear
+typing is \emph{non-commutative}, i.e., that exchange is not an
+admissible structural rule. Furthermore, we add a general-purpose
+indexed inductive linear type connective, as well as an
+\emph{equalizer} type, which we will show allows us to perform
+inductive proofs of equalities between linear terms.
+%
+Finally, we add base types and axioms to the system specifically to
+model formal grammars and parsing.
+
+The formation rules for the judgments of \theoryabbv are shown in
+\Cref{fig:formation}. $\Gamma$ stand for non-linear contexts, $X,Y,Z$
+stand for non-linear types, $M,N$ stand for non-linear terms, these
+act as in an ordinary dependent type theory. $\Delta$ stands for
+linear contexts, $A,B,C$ for linear types and $e,f,g$ for linear
+terms. These contexts, types and terms are allowed to depend on an
+ambient non-linear context $\Gamma$, but note that linear types $A$
+cannot depend on any \emph{linear} variables in $\Delta$. We include
+definitional equality judgments for both kinds of type and term
+judgments as well. Additionally, we have a judgment $\Gamma\vdash X
+\isSmall$ which is used in the definition of universe types.
 
-    \boxed{\linctxwffjdg \Gamma {A \equiv B}}
+\begin{figure}
+  \begin{mathpar}
+    \inferrule{~}{\ctxwff \Gamma}
 
-    \inferrule{\Gamma \vdash A \equiv B : L_i}{\linctxwffjdg \Gamma {A \equiv B}}
+    \inferrule{\ctxwff \Gamma}{\ctxwffjdg \Gamma X}
 
-  \end{mathpar}
-  \caption{Structural judgments}
-  \label{fig:structjdg}
-\end{figure}
+    \inferrule{\ctxwffjdg \Gamma X}{\Gamma \vdash X \isSmall}
 
-\begin{figure}
-  \begin{mathpar}
-    \boxed{\Gamma \vdash X : U_{i}}
+    \inferrule{\ctxwffjdg \Gamma X \and \ctxwffjdg \Gamma Y}{\ctxwffjdg \Gamma {X \equiv Y}}
 
-    \boxed{\Gamma \vdash A : L_{i}}
+    \inferrule{\ctxwffjdg \Gamma X}{\Gamma \vdash M : X}
 
-    \inferrule{~}{\Gamma \vdash U_i : U_{i+1}}
- %
-    \and
-%
-    \inferrule{~}{\Gamma \vdash L_i : U_{i+1}}
-%
-    \and
-%
-    \inferrule{\Gamma \vdash X : U_i \\ \hspace{-0.1cm} \Gamma, x : X \vdash Y : U_i}{\Gamma \vdash \PiTy x X Y : U_i }%
-%
-    \and
-%
-    \inferrule{\Gamma\vdash X : U_i \\ \hspace{-0.1cm} \Gamma, x : X \vdash Y : U_i}{\Gamma \vdash \SigTy x X Y : U_i}
-%
-    \and
-%
-    \inferrule{~}{\Gamma \vdash 1 : U_i}
-%
-    \and
-%
-    \inferrule{\Gamma \vdash A : L_i}{\Gamma \vdash G A : U_i}
+    \inferrule{\Gamma \vdash M : X \and \Gamma \vdash N : X}{\Gamma \vdash M \equiv N : X}
 
-    \and
-%
-    \inferrule{~}{\Gamma \vdash I : L_i}
- %
-    \and
-%
-    \inferrule{\Gamma \vdash A : L_i \\ \hspace{-0.1cm}\Gamma \vdash B : L_i}{\Gamma \vdash A \otimes B : L_i}
-%
-    \and
-%
-    \inferrule{\Gamma \vdash A : L_i \\ \hspace{-0.1cm}\Gamma \vdash B : L_i}{\Gamma \vdash A \lto B : L_i}
-%
-    \and
-%
-    \inferrule{\Gamma \vdash A : L_i \\ \hspace{-0.1cm}\Gamma \vdash B : L_i}{\Gamma \vdash B \tol A : L_i}
-%
-    \and
-%
-    \inferrule{\Gamma \vdash X : U_i \\ \Gamma, x : X \vdash A : L_i}{\Gamma \vdash \LinPiTy x X A : L_i}
-%
-    \and
-%
-    \inferrule{\Gamma \vdash X : U_i \\ \Gamma, x : X \vdash A : L_i}{\Gamma \vdash \LinSigTy x X A : L_i}
-%
-    \and
-%
-    \inferrule{\Gamma \vdash X : U_i \quad \{\Gamma \vdash e_i : X\}_i}{\Gamma \vdash e_1 =_X e_2 : U_i}
-    %
-    \and
-    %
-    \inferrule{~}{\Gamma \vdash \top : L_i}
-%
-    \and
+    \inferrule{\ctxwff \Gamma}{\linctxwff \Gamma \Delta}
 
-    \inferrule{~}{\Gamma \vdash \bot : L_i}
+    \inferrule{\ctxwff \Gamma}{\Gamma \vdash A \isLinTy}
 
-    \and
-    \inferrule{c \in \Sigma}{\Gamma \vdash c : L_0}
-    %
-    \and
-    %
-    \inferrule{\Gamma, x : L_i \vdash A : L_i \and A \textrm{ strictly positive}}{\Gamma \vdash \mu x.\, A : L_i}
+    \inferrule{\Gamma \vdash A \isLinTy \and \Gamma \vdash B \isLinTy}{\Gamma \vdash A \equiv B}
+
+    \inferrule{\Gamma \vdash \Delta \isLinCtx \and \Gamma \vdash A \isLinTy}{\Gamma;\Delta\vdash e : A}
+
+    \inferrule{\Gamma; \Delta \vdash e : A \and \Gamma;\Delta \vdash f : A}{\Gamma;\Delta\vdash e \equiv f : A}
   \end{mathpar}
-  \caption{Type well-formedness}
-  \label{fig:typewf}
+  \caption{Formation Rules}
+  \label{fig:formation}
 \end{figure}
 
+\subsection{Non-linear Typing}
+
+The non-linear types of \theoryabbv include the standard dependent
+types for $\Pi,\Sigma$, extensional equality, natural numbers,
+booleans, unit and empty types. We include these rules in the
+appendix. We present the other non-linear type constructions in
+\Cref{fig:non-linear-stuff}. First, we include universe types $U$ of
+small non-linear types and $L$ of linear types. These are defined as
+``universes ala Coquand'' in that we define a judgment saying when a
+non-linear type is small and define the universe to internalize
+precisely this judgment. These universe types are needed so that we
+can define types by recursion on natural numbers. Next, we include a
+non-linear type $\ltonl A$ where $A$ is a linear type. The intuition
+for this type is that its elements are the linear terms that are
+``resource free'': its introduction rule says we can construct an
+$\ltonl A$ when we have a linear term of type $A$ with no free linear
+variables. This type is used extensively in our examples, playing a
+similar role to the $!$ modality of ordinary linear logic.
+
+%% \begin{figure}
+%%   \[
+%%   \begin{array}{rrcl}
+%%     \textrm{non-linear contexts} & \Gamma & ::= & \cdot \pipe \Gamma,x:X \\
+%%     \textrm{non-linear types} & X , Y , Z & ::= & \mathsf{Nat} \pipe
+%%                                                   \mathsf{Bool} \pipe
+%%                                                   \mathsf{Empty} \pipe
+%%                                                   \mathsf{Unit} \pipe
+%%                                                   \mathsf{SPF}\,X\pipe U \pipe L
+%%                                                   \pipe \ltonl A \pipe \sum\limits_{x:X} Y
+%%                                                   \pipe \prod\limits_{x:X} Y\pipe M =_A
+%%                                                   N
+%%   \end{array}
+%%   \]
+%%   \caption{Non-linear Types}
+%% \end{figure}
+
 \begin{figure}
   \begin{mathpar}
-  \boxed{\Gamma \vdash x : X}
+    \inferrule{}{\Gamma \vdash U \isTy}\and
+    \inferrule
+    {\Gamma \vdash M : U}
+    {\Gamma \vdash \unquoteTy M \isTy}\and
+    \inferrule
+    {\Gamma \vdash X \isTy}
+    {\Gamma \vdash \quoteTy X : U}\and
+    \unquoteTy{\quoteTy{X}} \equiv X \and \inferrule{\Gamma \vdash M : U}{\Gamma\vdash \quoteTy{\unquoteTy{M}}\equiv M : U}
 
-  \inferrule{~}{\Gamma, x : X, \Gamma' \vdash x : X}
-  %
-  \and
-  %
-  \inferrule{\Gamma \vdash e : Y \quad \ctxwffjdg \Gamma {X \equiv Y}}{\Gamma \vdash e : X}
-  %
-  \and
-  %
-  \inferrule{~}{\Gamma \vdash () : 1}
-  %
-  \and
-  %
-  \inferrule{\Gamma \vdash e : X \\ \Gamma \vdash e : \subst Y e x}{\Gamma \vdash (e, e') : \SigTy x X Y}
-  %
-  \and
-%
-  \inferrule{\Gamma \vdash e : \SigTy x X Y}{\Gamma \vdash \pi_1\, e : X}
-  %
-  \and
-  %
-  \inferrule{\Gamma \vdash e : \SigTy x X Y}{\Gamma \vdash \pi_2\, e : \subst Y {\pi_1\, e} x}
-  \and
-  \inferrule{\Gamma, x : X \vdash e : Y}{\Gamma \vdash \lamb x e : \PiTy x X Y}
-  %
-  \and
-  %
-  \inferrule{\Gamma \vdash e : \PiTy x X Y \\ \Gamma \vdash e' : X}{\Gamma \vdash \app e {e'} : \subst Y {e'} x}
-  %
-  \and
-  %
-  \inferrule{\Gamma \vdash e \equiv e' : X}{\Gamma \vdash \mathsf{refl} : e =_X e'}
-  \and
-  \inferrule{\Gamma ; \cdot \vdash e : A}{\Gamma \vdash \mathsf G e : \mathsf G A}
+    \inferrule{}{\Gamma \vdash L \isTy}\and
+    \inferrule
+    {\Gamma \vdash M : L}
+    {\Gamma \vdash \unquoteTy M \isLinTy}\and
+    \inferrule
+    {\Gamma \vdash A \isLinTy}
+    {\Gamma \vdash \quoteTy A : L}\and
+    \unquoteTy{\quoteTy{A}} \equiv A \and \inferrule{\Gamma \vdash M : U}{\Gamma\vdash \quoteTy{\unquoteTy{M}}\equiv M : U}
+
+    \inferrule
+    {\Gamma \vdash A \isLinTy}
+    {\Gamma \vdash \ltonl { A } \isTy}
+    \and
+    \inferrule{\Gamma ; \cdot \vdash e : A}{\Gamma \vdash e : \ltonl A}\and
+    \inferrule{\Gamma \vdash M : \ltonl A}{\Gamma ; \cdot \vdash M : A} \and
   \end{mathpar}
-  \caption{Intuitionistic typing}
-  \label{fig:inttyping}
+  \caption{Non-linear types (selection)}
+  \label{fig:non-linear-types}
 \end{figure}
 
+\subsection{Linear Typing}
+
+We give an overview of the linear types in \Cref{fig:linear-types} and
+terms in \Cref{fig:linear-terms}. The equational theory for these
+types is straightforward and included in the appendix. First, the
+linear variable rule says that a linear variable can be used if it is
+the \emph{only} variable in the context.
+
+First, we cover the ``multiplicative'' connectives of non-commutative
+linear logic. The linear unit and tensor product are standard for a
+non-commutative linear logic: when we construct a linear unit we
+cannot use any variables and when we construct a tensor product, the
+two sides must use disjoint variables, and the variables the left side
+of the product uses must be to the left in the context of the
+variables used by the right side of the tensor product. Because we are
+non-commutative, there are two function types: $A \lto B$ and $B \tol
+A$. The difference between these is that the introduction rule for $A
+\lto B$ adds a variable to the right side of the context, whereas the
+introduction rule for $B \tol A$ adds a variable to the left side of
+the context. In our experience, because by convention parsing
+algorithms parse from left-to-right, we rarely need to use the $B \tol
+A$ connective. One of the main uses of linear types is that, in
+combination with the $\uparrow$ connective, we can abstract over
+linear terms, in that the non-linear type $\ltonl{A \lto B}$ is a
+non-linear type of linear terms with one linear input $A$ and output
+$B$.
+
+Next, we have the ``additive'' connectives. For these, we use the
+non-linear types to define \emph{indexed} versions of the additive
+disjunction $\oplus$ and additive conjunction $\&$ of linear
+logic. These are like a linear version of the $\Sigma$ and $\Pi$
+connectives of ordinary dependent type theory. The indexed $\&$ is
+defined by a $\lambda$ and eliminated using projection. The indexed
+$\oplus$ is like a \emph{weak} $\Sigma$ type: it has an injection
+introduction rule $\sigma$, but its elimination rule is given by
+\emph{pattern matching} rather than projections. Note that we can
+define the more typical nullary and binary versions of these
+connectives by using indexing over the empty and boolean type
+respectively. We will freely use $\bot$ to refer to this empty
+discunction and $\top$ to refer to the empty conjunction. Lastly, we
+include a type $\equalizer {a}{f}{g}$ that we call the
+\emph{equalizer} of $f$ and $g$. We think of this type as the
+``subset'' of $A$ that satisfy the equation $f\,a\equiv g\,a$. If
+these were non-linear types, such a type could be constructed using
+the equality type as $\sum_{x:X} f\,x=_Y g\,x$, but this construction
+can't be used because it uses a \emph{dependent} equality type. We
+note that the equalizer type is not used in defining any of our
+parsers or formal grammars, but instead will be used shortly to derive
+an induction principle for proving equalities of functions whose
+domain is an inductive type.
+
 \begin{figure}
   \begin{mathpar}
-    \boxed{\Gamma ; \Delta \vdash a : A}
+    \inferrule{}{\Gamma \vdash I \isLinTy}\and
+    \inferrule{\Gamma \vdash A \isLinTy \and \Gamma \vdash B \isLinTy}{\Gamma \vdash A \otimes B \isLinTy}\and
+    \inferrule{\Gamma \vdash A \isLinTy \and \Gamma \vdash B \isLinTy}{\Gamma \vdash A \lto B \isLinTy}\and
+    \inferrule{\Gamma \vdash A \isLinTy \and \Gamma \vdash B \isLinTy}{\Gamma \vdash A \tol B \isLinTy}\and
+    \inferrule{\Gamma,x:X \vdash A \isLinTy}{\Gamma \vdash \bigoplus\limits_{x:X} A}\and
+    \inferrule{\Gamma,x:X \vdash A \isLinTy}{\Gamma \vdash \bigamp\limits_{x:X} A}\and
+    \inferrule
+    {\Gamma \vdash f : \ltonl {(A \lto B)} \and \Gamma \vdash g : \ltonl { (A \lto B) }}
+    {\Gamma \vdash \equalizer {a}{f}{g} \isLinTy}
+  \end{mathpar}
+  \caption{Linear Types (selection)}
+\end{figure}
 
+\begin{figure}
+  \begin{mathpar}
     \inferrule{~}{\Gamma ; a : A \vdash a : A}
     \and
     \inferrule{\Gamma ; \Delta \vdash e : B \\ \linctxwffjdg \Gamma {A \equiv B}}{\Gamma ; \Delta \vdash e : A}
@@ -586,1220 +992,1482 @@ \subsection{Syntax}
     \inferrule{\Gamma ; \Delta \vdash e : A \otimes B \\ \Gamma ; \Delta'_1, a : A, b : B, \Delta'_2 \vdash e'}{\Gamma ;  \Delta_1', \Delta, \Delta'_2 \vdash \letin {a \otimes b} e {e'}}
     \\
     %
-    \inferrule{\Gamma ; a : A , \Delta \vdash e : B}{\Gamma ; \Delta \vdash \lamblto a e : A\lto B}
+    \inferrule{\Gamma ; \Delta , a : A \vdash e : B}{\Gamma ; \Delta \vdash \lamblto a e : A\lto B}
     \and
-    \inferrule{\Gamma ; \Delta' \vdash e' : A \\ \Gamma ; \Delta \vdash e : A \lto B}{\Gamma ; \Delta', \Delta \vdash \applto {e'} {e} : B}
-    \\
-    %
-    \inferrule{\Gamma ; \Delta , a : A \vdash e : B}{\Gamma ; \Delta \vdash \lambtol a e : B\tol A}
-    \and
-    \inferrule{\Gamma ; \Delta \vdash e : B \tol A \\ \Gamma ; \Delta' \vdash e' : A}{\Gamma ; \Delta, \Delta' \vdash \apptol e {e'} : B}
-    %
+    \inferrule{\Gamma ; \Delta' \vdash e' : A \\ \Gamma ; \Delta \vdash e : A \lto B}{\Gamma ; \Delta, \Delta' \vdash \applto {e'} {e} : B}
     \\
     %
-    \inferrule{\Gamma, x : X ; \Delta  \vdash e : A}{\Gamma ; \Delta \vdash \dlamb x e : \LinPiTy x X A}
+    \inferrule{\Gamma, x : X ; \Delta  \vdash e : A}
+              {\Gamma ; \Delta \vdash \dlamb x e : \LinPiTy x X A}
     \and
-    \inferrule{\Gamma ; \Delta \vdash e : \LinPiTy x X A \\ \Gamma \vdash e' : X}{\Gamma ; \Delta \vdash \app e {e'} : \subst A {e'} x}
+    \inferrule{\Gamma ; \Delta \vdash e : \LinPiTy x X A \\ \Gamma \vdash M : X}{\Gamma ; \Delta \vdash e\,.\pi\,M : \subst A {e'} x}
     %
     \\
     %
-    \inferrule{\Gamma \vdash e : X \quad \Gamma ; \Delta \vdash e' : \subst A e x}{\Gamma ; \Delta \vdash (e, e') : \LinSigTy x X A}
+    \inferrule{\Gamma \vdash M : X \quad \Gamma ; \Delta \vdash e : \subst A e x}{\Gamma ; \Delta \vdash \sigma\,M\,e : \bigoplus\limits_{x:X} A}
     %
     \and
     %
-    \inferrule{\Gamma ; \Delta \vdash e : \LinSigTy x X A \quad \Gamma, x : X ; \Delta'_1, a : A, \Delta'_2 \vdash e' : C}{\Gamma; \Delta'_1, \Delta, \Delta'_2 \vdash \letin {(x, a)} e {e'}: C}
-    %
-    \\
-    %
-    \inferrule{~}{\Gamma ; \Delta \vdash () : \top}
-
-    \and
-
-    \inferrule{~}{\Gamma ; x : \bot \vdash \mathsf{absurd}_A : A}
+    \inferrule{\Gamma ; \Delta \vdash e : \bigoplus\limits_{x:X} A \quad \Gamma, x : X ; \Delta'_1, a : A, \Delta'_2 \vdash e' : C}{\Gamma; \Delta'_1, \Delta, \Delta'_2 \vdash \letin {\sigma\,x\,a} e {e'}: C}
     %
     \and
     %
+    \inferrule
+    { \Gamma ; \Delta \vdash e : A \\
+      \Gamma ; \Delta \vdash \applto {f}{e} \equiv \applto {g}{e}}
+    {\Gamma ; \Delta \vdash \equalizerin{e} : \equalizer {e}{f}{g}}
     %
-    \inferrule{\Gamma \vdash e : \mathsf{G} A}{\Gamma ; \cdot \vdash \mathsf{G}^{-1}\, e : A}
-    %
-    \\
-    %
-    \inferrule{\Gamma; \Delta \vdash e : \subst A {\mu x.\, A} x}{\Gamma; \Delta \vdash \mathsf{cons}\, e : \mu x.\, A}
     \and
-    \inferrule{\Gamma;\Delta\vdash e' : \mu x.\,A \and \Gamma; a:\subst A B x \vdash e : B}{\Gamma;\Delta\vdash \mathsf{fold}(a.e)(e') : B}
+    %
+    \inferrule
+    {\Gamma ; \Delta \vdash e : \equalizer{a}{f}{g}}
+    {\Gamma ; \Delta \vdash \equalizerpi {e} : A}
   \end{mathpar}
-  \caption{Linear typing}
+  \caption{Linear terms (selection)}
   \label{fig:linsyntax}
 \end{figure}
 
+\subsection{Indexed Inductive Linear Types}
+
+Next, we introduce one of the most complex and important linear
+type constructors of our development, \emph{indexed inductive linear
+types} $\mu F$. To encode these, we first add a non-linear type
+$\SPF\,X$ of \emph{strictly-positive functorial} linear type
+expressions indexed by a non-linear type $X$. We give the rules for
+strictly-positive functors in \Cref{fig:spf}. First, we add operations
+for a recursive reference $\Var{x}$ the constant functor, $K$, tensor
+products and additive conjunction and disjunction. As an example,
+\max{TODO: make an example}.  Next we add an operation $\el(F)$ which
+interprets our syntactic codes as an actual type constructor, and
+define it for each of the operations given above. Lastly, we give a
+\emph{functorial action} $\map(F)$ for each of the codes.\max{TODO:
+  find a citation for this kind of approach. Container papers work
+  this way for instance. I think there is a linear container paper
+  that uses this appraoch}
+
+With this notion of strictly-positive functor so defined, in
+\Cref{fig:iilt} we next add a linear type constructor $\mu F$, which
+defines a family of mutually-recursive inductive types for the family
+of type expressions $F$. The introduction rule for this is
+$\mathsf{roll}$, which constructs an element of $\mu F\,x$ from its
+one-level unfolding. The elimination principle is defined by a
+$\mathsf{fold}$ operation. The equations ensure that this makes the
+family an \emph{initial algebra} for the type expressions. That is the
+$\beta$ rule says that a $\mathsf{fold}$ applied to a $\mathsf{roll}$
+is equivalent to $\mathsf{map}$ping the $\mathsf{fold}$ over all the
+sub-expressions, which means that $\mathsf{fold}$ is an algebra
+homomorphism from $\mathsf{roll}$ to $\mathsf{f}$. Then the $\eta$
+rule says that $\mathsf{fold}$ is the \emph{unique} such homomorphism.
+
+This definition as an initial algebra is well-understood semantically
+but the $\eta$ principle in particular is somewhat cumbersome to use
+directly in proofs. In dependent type theory, we would have a
+dependent \emph{elimination} principle, which justifies
+\emph{inductive proofs}. However since linear types do not support
+dependency on linear types, we cannot directly adapt this
+approach. However, if we are trying to prove that two morphisms out of
+a mutually recursive type are equal, we can use the \emph{equalizer}
+type to prove their equality by induction.\max{TODO: show how this is
+  done.}
+
 \begin{figure}
+  \begin{align*}
+    \mathsf{Var} &: X \to \SPF\,X\\
+    \mathsf{K} &: L \to \SPF\,X\\
+    \mathsf{\bigoplus} &: (Y \to \SPF\,X) \to \SPF\,X\\
+    \mathsf{\bigamp} &: (Y \to \SPF\,X) \to \SPF\,X\\
+    \mathsf{\otimes} &: \SPF\,X \to \SPF\,X \to \SPF\,X\\
+    \el &: \prod_{X:U}\SPF\,X \to (X \to L) \to L\\
+    \map &: \prod_{X:U}\prod_{F : \SPF\,X}\prod_{A,B:X\to L}(\prod_{x:X}\uparrow(A\,x\lto B\,x)) \to \uparrow(\el(F)(A) \lto \el(F)(B))\\\\
+    \el(\Var\,M) B &= B\,M\\
+    \el(\mathsf{K}\,A) B &= A\\
+    \el(\bigoplus A) B &= \bigoplus_{y:Y}\el(A y)B\\
+    \el(\bigamp A) B &= \bigamp_{y:Y}\el(A y)B\\
+    \el(A \otimes A') B &= \el(A)B \otimes \el(A')B\\\\
+    \map(\Var\,M)\,f &= f\,M\\
+    \map(\K\,A)\,f &= \lambda a. a\\
+    \map(\bigoplus A)\,f &= \lambda a. \letin{\oplusinj y {a_y}}{a}{\oplusinj y {\map(A\,y)\,f\,a_y}}\\
+    \map(\bigwith\,A)\,f &= \lambda a. \withlamb{y}{\map(A\,y)\,f\,(\withprj y a)}\\
+    \map(A \otimes A')\,f &= \lambda b. \letin{(a,a')}{b}{(\map(A)\,f\,a,\map(A')\,f\,a')}
+  \end{align*}
   \begin{mathpar}
-    \boxed{\Gamma \vdash e \equiv e' : X}
+  \end{mathpar}
+  \caption{Strictly Positive Functors}
+  \label{fig:spf}
+\end{figure}
 
-    \inferrule{\Gamma \vdash p : e =_X e'}{\Gamma \vdash e \equiv e' : X}
-%
-    \and
-%
-    \inferrule{~}{\Gamma \vdash \app {(\lamb x e)} {e'} \equiv \subst x e {e'} : X}
-%
-    \and
-%
-    \inferrule{~}{\Gamma \vdash e \equiv \lamb x {\app e x} : \PiTy x X Y}
-%
-    \and
-%
-    \inferrule{~}{\Gamma \vdash \pi_1\, (e_1, e_2) \equiv e_1 : X}
-%
-    \and
-%
-    \inferrule{~}{\Gamma \vdash \pi_2\, (e_1, e_2) \equiv e_2 : \subst x {e_1} Y}
-%
-    \and
-%
-    \inferrule{~}{\Gamma \vdash e \equiv (\pi_1\, e, \pi_2\, e) : \SigTy x X Y}
-%
-    \and
-%
-    \inferrule{~}{}
-%
-    \inferrule{~}{\Gamma \vdash t \equiv t' : 1}
-%
-    \and
-%
-    \inferrule{~}{\Gamma \vdash G\, (G^{-1} \, t) \equiv t : G A}
+\begin{figure}
+  \begin{mathpar}
+    \mathsf{roll} : \prod_{X:U}\prod_{F:X \to \SPF\,X}\prod_{x:X}\ltonl{(\el(F\,x)(\mu\,F))}
 
-    \\
+    %% \inferrule*[right=IndIntro]
+    %% {\Gamma \vdash A : X \to \textrm{SPF}\,X\and
+    %%  \Gamma \vdash M : X\and
+    %%  \Gamma; \Delta \vdash e : \textrm{el}(A\,M) (\mu\,A)}
+    %% {\Gamma; \Delta \vdash \mathsf{roll}\, e : \mu\,A\, M}
 
-    \boxed{\Gamma ; \Delta \vdash a \equiv a' : A}
+    \mathsf{fold} : \prod_{X:U}\prod_{F:X\to\SPF\,X}\prod_{A:X \to L}
+    (\prod_{x:X}\ltonl{(\el(F\,x)(A) \lto A\,x)})
+    \to \prod_{x:X}\ltonl{(\mu F\,x \lto A\,x)}
 
-    \inferrule{~}{\Gamma; \cdot \vdash G^{-1}\, (G \, t ) \equiv t: A}
-%
-    \\
-%
-    \inferrule{~}{\Gamma; \Delta \vdash \app {(\lamblto a e)} {e'} \equiv \subst e x {e'} : C}
-%
-    \and
-%
-    \inferrule{~}{\Gamma; \Delta \vdash e \equiv \lamblto a {\app e a} : A \lto B}
-%
-    \and
-%
-    \inferrule{~}{\Gamma; \Delta \vdash \app {(\lambtol a e)} {e'} \equiv \subst e x {e'} : C}
-%
-    \and
-%
-    \inferrule{~}{\Gamma; \Delta \vdash e \equiv \lambtol a {\app e a} : A \tol B}
-%
-    \and
-%
-    \inferrule{~}{\Gamma; \Delta \vdash \app {(\dlamb x e)} {e'} \equiv \subst x a {e'} : C}
-%
-    \and
-%
-    \inferrule{~}{\Gamma; \Delta \vdash e \equiv \dlamb x {\app e x} : \LinPiTy x X A}
-%
-    \and
-%
-    \inferrule{~}{\Gamma; \Delta \vdash e \equiv e' : \top}
-%
-    \and
-%
-    \inferrule{~}{\Gamma; \Delta \vdash e_i \equiv \pi_i (e_1, e_2) : A_i}
-%
-    \and
-%
-    \inferrule{~}{\Gamma; \Delta \vdash e \equiv (\pi_1 e, \pi_2 e) : A\& B}
-%
-    \and
-%
-    \inferrule{~}{\Gamma; \Delta \vdash \letin {()} {()} e \equiv e : C}
-    %
-    \and
-    %
-    \inferrule{~}{\Gamma; \Delta \vdash \letin {()} e {\subst {e'} {()} a} \equiv \subst {e'} a e : C}
-%
-    \and
-%
-    \inferrule{~}{\Gamma; \Delta \vdash \letin {e \otimes e'} {a \otimes a'} e'' \equiv \subst {e''} {a, a'} {e, e'} : C}
-%
-    \and
-    %
-    \inferrule{~}{\Gamma; \Delta \vdash \letin {a \otimes b} e {\subst {e'} {a \otimes b} c} \equiv \subst {e'} c e : C}
-%
-    \and
-%
-    \inferrule{~}{\Gamma;\Delta \vdash \letin {(x, a)} {(e, e')} {e''} \equiv \subst {e''} {x, a} {e, e'} : C}
-    %
-    \and
-    %
-    \inferrule{~}{\Gamma; \Delta \vdash \letin {(x, a)} e {\subst {e'} {(x, a)} y} \equiv \subst {e'} e y : C}
-\end{mathpar}
-  \caption{Judgmental equality}
-  \label{fig:jdgeq}
+    \inferrule*[right=Ind$\beta$]
+    {}
+    {\fold\,F\,f\,x\,(\roll e) \equiv f\,x\,(\map(F\,x)\,(\fold\,F\,f))}
+
+    \inferrule*[right=Ind$\eta$]
+    {x:X;a:\el(F\,x)(\mu F) \vdash e'\,x\,(\roll a) \equiv f\,x\,(\map(F\,x)\,e')
+    }
+    {\fold\,F\,f\equiv e'}
+    %% {\Gamma,x:X; a:\mu\,A\,x \vdash e' : B\,x
+    %%   \and \Gamma;\Delta \vdash e : \mu A M\\\\
+    %%   \Gamma,x:X; a':\el(A x)(\mu A) \vdash
+    %%   e'[x/x][\roll(a')/a] = f[x/x][\map(A\,x)\,(y.a.e'[y/x][a/a])\,a/b] : B\,x
+    %% }
+    %% {\Gamma; \Delta \vdash \fold(A)(x.b.f)(M,e) = e'[M/x][e/a] : B\,M}
+  \end{mathpar}
+  \caption{Indexed Inductive Linear Types}
+  \label{fig:iilt}
 \end{figure}
 
-\subsection{Formal Grammars as Linear Types}
-\label{sec:formaltype}
-
-The linear typing judgment in our syntax takes on the following schematic form
-$\Gamma ; \Delta \vdash a : A$. First, $A$ represents a \emph{linear type} in
-our syntax. The intended semantics of these linear types are formal grammars.
-That is, the linear typing system is designed to syntactically reflect the
-behavior of formal grammars. For this reason, we may often interchangeably use
-the terms ``linear type'' and ``grammar''.
-
-The term $a$ is an inhabitant of type $A$, which is thought of as a parse tree of
-the grammar $A$. The core idea of this entire paper follows precisely from this
-single correspondence: grammars are types, and the inhabitants of these types
-are parse trees for the grammars.
-
-$\Gamma$ represents a non-linear context, while $\Delta$ represents a
-\emph{linear context} dependent on $\Gamma$. These linear contexts behave
-substructurally. As stated earlier, they are linear --- they do not obey
-weakening or contraction --- because a character is exhausted once read by a
-parsing procedure. Moreover, the characters in strings appear in the order in
-which we read them. We do not have the freedom to freely permute characters,
-therefore any type theory that is used to reason about formal grammars ought to
-omit the structural rule of exchange as well. This means that every variable
-within $\Delta$ must be used \emph{exactly once} and \emph{in order of
-  occurrence}. Thus, we can think of the linear contexts as an ordered list of
-limited resources. Once a resource is consumed, we cannot make reference to it
-again. Variables in a linear context then act like building
-blocks for constructing patterns over strings.
-
-We give the base types and type constructors for linear terms. As the
-interpretation of types as grammars in $\Grammar$ serves as our intended
-semantics, we simultaneously give the interpretation $\sem \cdot$ of the
-semantics as grammars.
-
-\pedro{The paragraphs above are a bit circular, what about the following alternative?}
-\steven{I am in favor of cutting what's above, but we haven't introduced the
-  syntax in the main body of the paper yet, even if the figures appeared
-  earlier. So what you write needs to take that into account}
-\pedro{In which case we should introduce it in the previous section. I suggest
-  adding this right after ``and the intuitionistic typing rules in fig 3''}
-
-Now that we have specified the syntactic aspect of \theoryabbv, we must justify
-its connections to formal grammars in order to show that it is a sound methodology
-for reasoning about parsers. We do this by showing that $\mathbf{Gr}$ is a model
-to our type theory.
-
-Most of the non-linear semantics is standard \pedro{We should either cite something or add the non-linear semantics in the appendix}, so let us investigate in more
-depth the linear semantics.
+\subsection{Grammar-specific Additions}
 
+So far, our calculus is a generic combination of dependent types with
+non-commutative linear types. In order to carry out formal grammar
+theory and define parsers, however, we add a few carefully chosen base
+types, terms and axioms that allow us to reason about formal grammars.
+\max{TODO: more prose, also are these all of our axioms?}
 \begin{enumerate}
-  \item A non-linear context $\Gamma$ denotes a set $\sem \Gamma$
-  \item A non-linear type $\Gamma \vdash X : U_{i}$ denotes a family of sets
-        $\sem X : \sem \Gamma \to \Set_{i}$
-  \item A non-linear term $\Gamma \vdash e : X$ denotes a section
-        $\sem e : \Pi(\gamma : \sem \Gamma)\sem{X} \gamma$ \pedro{we have not defined what sections are}
-  \item Linear contexts $\Gamma \vdash \Delta$ and types
-        $\Gamma \vdash A : L_{i}$ both denote families of grammars
-        $\sem \Gamma \to \Gr_{i}$
-  \item A linear term $\Gamma ; \Delta \vdash M : A$ denotes a family of parse
-        transformers
-        $\sem M : \Pi(\gamma : \sem \Gamma)\Pi(w : \String) \sem \Delta \gamma w \Rightarrow \sem M \gamma w$
+\item (Characters) For each character of our alphabet $c \in \Sigma$, we add a linear type $c \isLinTy$.
+\item (Strings are total) The function $\ltonl{(\String \lto \top)}$ is an isomorphism.\max{for this need to define $\String$, and isomorphism, and possibly we should define $\top$ in more detail earlier? idk}
+\item (Distributivity of $\oplus,\&$) The function  $\ltonl{(\bigoplus\limits_{f : \prod_{x:X} Y(x)}\bigamp\limits_{x:X}A\,x\,(f\,x) \lto \bigamp\limits_{x:X}\bigoplus\limits_{y:Y(x)}A\,x\,y)}$ is an isomorphism.
+\item ($\ltonl$ preserves $\oplus$) The function $\sum_{x:X}\ltonl{(A\,x)} \to \ltonl{(\bigoplus\limits_{x:X}A\,x)}$ is an isomorphism.
 \end{enumerate}
 
-\paragraph{Linear Unit}
-First, the linear unit $I$ may be built in the empty linear
-context\footnote{When appropriate, statements like ``the empty linear context''
-  may be taken polymorphically over all non-linear contexts that they might
-  depend on.} $\cdot \dashv I$. $I$ serves as the unit for the operation
-$\otimes$ described below.
-
-As a grammar,
-%
-\[
-  \sem {I} \gamma w = \{ () ~|~ w = \varepsilon \}
-\]
-%
-That is, $I$ maps strings to the proposition that they are the empty string. $I$
-is clearly only inhabited by the empty string $\varepsilon$, for which the
-outputted set contains a single parse tree.
-
-
-\paragraph{Base Types}
-For each character $c$ in the alphabet $\Sigma$, we include a base type at the
-lowest universe level $c : L_{0}$.
-
-The grammar interpretation for characters is quite similar to that of $I$.
-%
-\[
-  \sem {c} \gamma w = \{ () ~|~ w = c \}
-\]
-%
-The grammar for the character $c$ likewise maps strings $w$ to the proposition that
-they are equal to $c$.
-
-\paragraph{Tensor Product}
-The tensor product of two linear types is the first place where the ordering on
-contexts really takes effect. That is, the tensor product of two types is
-non-commutative.
-When context $\Delta$ forms a linear term of
-type $A$, and $\Delta'$ forms a linear term of type $B$, then the context
-extension $\Delta, \Delta'$ forms a linear term of type $A \otimes B$.
-
-As stated above, the type $I$ serves as the unit for $\otimes$. That is, for all
-linear types $A$, the following equalities hold:
-%
-\[ I \otimes A \equiv A \otimes I \equiv A \]
-%
-In the grammar semantics,
-%
-\[
-  \sem {A \otimes B} = \Sigma_{w_{1}, w_{2} : \String} (w_{1}w_{2} = w) \land \left(  \sem {A} \gamma w_{1} \times \sem {B} \gamma w_{2} \right)
-\]
-%
-The string $w$ matches $A \otimes B$
-precisely when it may be split into two
-pieces such that the left one matches $A$
-and the right one matches $B$.
-
-\paragraph{Linear Function Types}
-The monoidal structure provided by $\otimes$ is both left and right closed and
-this is denoted by the left and right linear function types. Further,
-for linear types $A$ and $B$ the left linear function type $A \lto B$
-enjoys an elimination principle similar to function application or modus ponens.
-%
-\[
-\inferrule{\Delta \vdash A \otimes A \lto B}{\Delta \vdash B}
-\]
-%
-The interpretation of linear function types at a string $w$ is a linear function
-that takes in parses of $A$ on a string $w_{a}$ and outputs parses of $B$ on the
-string $w_{a} w$.
-%
-\[
-  \sem{A \lto B} \gamma w = \Pi(w_a:\String) \left( A \gamma w_a \Rightarrow B\gamma (w_a w) \right)
-\]
-%
-That is, strings match $A \lto B$ if when prepended with a parse of $A$ they
-complete to parses of $B$. In this manner, the linear function types generalize
-Brzozowksi's notion of derivative
-\cite{brzozowskiDerivativesRegularExpressions1964}.
-Brzozowski initially only gave an accounting of this operation for
-generalized regular expressions, but later work from Might et al.\ demonstrates that the same
-construction can be generalized to context free grammars
-\cite{mightParsingDerivativesFunctional2011}. Here, via the linear function
-types, the same notion of derivative extends to the grammars of \theoryabbv.
-
-Note, to ensure that the linear function types do indeed generalize Brzozowski
-derivatives, we must include the equalities in \cref{fig:brzozowskideriv} as axioms\max{we must? why must we}.
-\pedro{We should further explain why these equations are required, or at the very
-least give some intuition}
-
-Of course, all the above statements for the left function type also have
-corresponding analogues for the
-right-handed counterpart.
-
-\begin{figure}
-\begin{align*}
-  c\lto c &\equiv I\\
-  c\lto d &\equiv \bot\\
-  c\lto I &\equiv \bot\\
-  c\lto 0 &\equiv \bot\\
-  c\lto (A \otimes B) & \equiv (c\lto A) \otimes B + (A \amp I) \otimes (c\lto B)\\
-  c\lto A^* &\equiv (A \amp I)^{*} \otimes (c \lto A) \otimes A^* \\
-  c\lto (A + B) &\equiv (c\lto A) + (c\lto B)
-\end{align*}
-\caption{Equality for Brzozowski Derivatives}
-\label{fig:brzozowskideriv}
-\end{figure}
-
-\paragraph{LNL Dependent Types}
-Given a non-linear type $X$, we may form both the dependent product type
-$\LinPiTy x X A$ and the dependent pair type $\LinSigTy x X A$ as linear types
-where $x$ is free in $A$.
-
-$\sem{\LinPiTy x X A} \gamma w = \Pi(x:\sem{X}\gamma) \sem{A}(\gamma,x) w$
+% \steven{TODO the intro to this section is to be condensed based on what is
+%   included in the examples section as motivation and intuition building}
+
+% Omission of the structural rules of a deductive system, such as in
+% linear logic \cite{GIRARD19871}, offers precise control over how a value is used
+% in a derivation. Linear logic omits the weakening and contraction rules
+% to ensure that every value in context is used exactly once. This control enables
+% \emph{resource-sensitive} reasoning, where we may treat a resource as
+% \emph{consumed} after usage. This viewpoint is amenable to parsing applications,
+% as we may treat characters of a string as finite resources that are consumed at
+% parse-time. That is, when reading a string, the occurrence of any character is read once. Freely duplicating or dropping characters from a string changes the meaning of that string. \max{what about a backtracking parser?}. One may then envision a linear type system where the types comprise
+% formal grammars generated over some alphabet $\Sigma$, and the type constructors
+% correspond precisely to inductive constructions on grammars --- such as
+% conjunction, disjunction, concatenation, etc.
+
+% Programming in a purely linear term language is limiting due to the variable
+% usage restrictions. Code in such a language can
+% become unnecessarily verbose when ensuring the linearity invariant.
+% To alleviate this
+% concern, in 1995 Benton et al.\ proposed an alternative categorical
+% semantics of linear logic based on adjoint interactions between linear
+% and non-linear logics \cite{bentonMixedLinearNonlinear1995} ---
+% appropriately referred to as a \emph{linear-non-linear} (LNL)
+% system. This work is simply typed, so the boundary between linear and
+% non-linear subtheories is entirely characterized via a monoidal
+% adjunction between linear terms and non-linear terms.
+
+% \steven{Describe this monoidal adjunction with $\ltonl A$ and the derivable
+%   version of $F$}
+
+% Inside of an LNL system, linearity may be thought of as an option that users can
+% choose to deliberately invoke at deliberate points in their developments in an
+% otherwise intuitionistic system. However, if we are wishing to treat parsers as
+% linear terms over input strings, the non-linear fragment of an LNL theory does
+% not really assist in the development of parsers. It is instead the case that
+% parsers may benefit from a \emph{dependence} on non-linear terms.
+% Through the approach described by Krishnaswami et al.\ in
+% \cite{krishnaswami_integrating_2015},
+% we may define a restricted form of dependent types.
+
+% In particular, dependence
+% on linear terms
+% is disallowed; however, through dependence of a linear term on a non-linear
+% index, we can recover the definition of Aho's indexed grammars \cite{AhoIndexed}
+% internal to \theoryabbv. Although, we can define strictly more things, as Aho's
+% indexed grammars provide restricted access to the index whereas we can do
+% whatever with it.
+
+% \steven{elaborate on indexed grammars in a more articulate way in the last sentence}
+% \steven{Put the aho stuff in the subtheory section}
+%
+
+% \paragraph{Regular Expressions}
+% We may realize regular expression as a subtheory
+% of our language by restricting the type constructors to the
+% linear unit, characters, $\otimes$, $\oplus$, and Kleene star. Classically, the
+% languages recognized by these are exactly the regular languages --- the lowest
+% on the Chomsky hierarchy.
+
+% \paragraph{$\mu$-Regular Expressions} Similarly, we may restrict the connectives
+% to be linear unit, characters, $\otimes$, $\oplus$, and arbitrary recursion via
+% left-fixed point rather than only Kleene star.
+% Instead of regular expressions these correspond to Lei{\ss}'s $\mu$-regular
+% expressions, which are known to be equivalent to context-free grammars
+% \cite{leiss,krishnaswami_typed_2019}.
+
+% \paragraph{Beyond Context-Free Grammars} The previous two subtheories induce as semantics
+% regular grammars and context-free grammars, respectively; however, by including
+% the LNL dependent types we may actually express the entirety of the Chomsky
+% hierarchy. Through use of the LNL dependent types, we may encode indexed
+% grammars, which are known to be properly between context-free and
+% context-sensitive grammars within the Chomsky hierarchy \cite{AhoIndexed}. We
+% will further see in \cref{subsubsec:tm} how to use dependence to encode Turing machines internal to our
+% calculus, which of course induces a semantics of unrestricted grammars.
+
+% \paragraph{Indexed Grammars}
+% The LNL dependent types can be used to encode Aho's indexed grammars
+% \cite{AhoIndexed} internal to \theoryabbv. Although, we can define strictly more
+% things with this non-linear index, as we have unrestricted access to the index
+% whereas Aho's grammars can only manipulate it in a particular way.
+
+% \steven{Elaborate on these, and note that they are bw context free and context
+%   sensitive on the Chomsky hierarchy}
+
+\section{Formal Grammar Theory in \theoryname}
+\label{sec:applications}
+This section explores the applications of \theoryabbv to conducting formal
+gramamr theory. We demonstrate that several classical notions and constructions
+integral to the theory of formal languages are faithfully represented
+in \theoryname. By encoding well-established formal grammar concepts, we ensure
+that our framework remains grounded in the
+foundational principles of formal language theory while opening the door to
+compositional formal verification of parsers.
+
+\paragraph{Equivalence}
+In \cite{chom1963}, Chomsky differentiates between the weak generative capacity
+of a formal grammar --- the set of strings accepted by it --- and its strong
+generative capacity --- the set of ``structural descriptions'', or parse trees,
+that it generates. Two grammars are said to be \emph{weakly equivalent} if they
+agree in their weak generative capacity, and likewise \emph{strongly equivalent}
+if they agree in their strong generative capacity. We aim to recover each of
+these notions of equivalence in \theoryabbv.
 
-$\sem{\LinSigTy x X A} \gamma w = \Sigma(x:\sem{X}\gamma) \sem{A}(\gamma,x) w$
-
-The grammar semantics of the linear product type is
-indeed a dependent function out of the
-semantics of $X$. Likewise, the grammar semantics of the linear dependent pair type is a dependent
-pair in $\Set$.\pedro{The connection to grammar semantics needs to be better explained. For instance,
-we should explain that disjunction corresponds to ``or'' of grammars}
-
-Note that even though we do not take the binary additive conjunction and
-disjunction as primitive, we may define them via these LNL dependent types. In
-particular, via a dependence on $\Bool$.
-%
-\[
-  A \amp B := \LinPiTy b \Bool {C(b)}
-\]
-\[
-  A \oplus B := \LinSigTy b \Bool {C(b)},
-\]
-where $C(\true) = A, C(\false) = B$.
+\begin{definition}
+  \label{def:weakequiv}
+  Grammars $\A$ and $\B$ are \emph{weakly equivalent} if there exists parse
+  transformers $\f : \ltonl{(\A \lto \B)}$ and $\g : \ltonl {(\B \lto \A)}$.
+\end{definition}
 
-\paragraph{Universal Type}
-The universal type $\top$ may be formed in any context.
-%
-\[ \sem{\top} \gamma w = \{ \ast \}\]
-%
-Its grammar semantics in set outputs the unit type in $\Set$ for all input strings in all contexts.
+\newcommand{\lang}{\texttt{Lang}}
+A parse transformer $\f : \ltonl{(\A \lto \B)}$ takes as input parses of $\A$ for string
+$\w$ and returns a parse of $\B$ for $\w$. Therefore, $\f$ demonstrates an
+inclusion of languages $\lang(\A) \subset \lang(\B)$. Similarly, the existence of
+$\g$ shows that $\lang(\B) \subset \lang(\A)$, and thus $\lang(\A) = \lang(\B)$.
 
-\paragraph{Empty Type}
-The empty type $\bot$ has no inhabitants. The elimination for the empty type
-witnesses the principle of explosion --- i.e.\ from a term of type $\bot$ we may
-introduce a term $\mathsf{absurd}_{A} : A$ for any type $A$.
-%
-\[ \sem {\bot} \gamma w = \emptyset \]
-%
-\paragraph{The $G$ Modality}
-The $G$ modality provides the embedding of linear types in the empty context
-into non-linear types. The introduction and elimination forms for $G A$ obey the
-same rules as given in \cite{bentonMixedLinearNonlinear1995} and \cite{krishnaswami_integrating_2015}.
+\steven{Need better notation for language that is cohesive throughout the paper}
 
-The left adjoint $F$ from non-linear types back to linear types may be defined
-as,
-%
-\[
-  F X := \LinSigTy a A I
-\]
-%
-\steven{Elaborate on $G$ more}
+\begin{definition}
+  \label{def:strongequiv}
+  Grammars $\A$ and $\B$ are \emph{strongly equivalent} if there exists parse
+  transformers $\f : \ltonl{(\A \lto \B)}$ and $\g : \ltonl {(\B \lto \A)}$ such
+  that $\f \circ \g \equiv \ident$ and $\g \circ \f \equiv \ident$.
+\end{definition}
 
-\pedro{Agreed :) maybe mention connections to persistent propositions from the
-separation logic literature}
+For a string $\w$, \cref{def:strongequiv} induces a bijection between the
+$\A$-parses of $\w$ and the $\B$-parses of $\w$, thus demonstrating that $\A$
+and $\B$ agree in their strong generative capacity.
 
-The semantics of $G A$ are exactly the semantics of $A$ at the empty word $\varepsilon$.
-%
-\[ \sem{G A} \gamma = \sem{A} \gamma \varepsilon \]
-%
-%
-\paragraph{Recursive Types}
-Recursive linear types may be defined via the least-fixed point operator $\mu$.
-The grammar semantics of which are the fixed-point of sets induced by the
-grammar semantics of $A$.
-%
-\[ \sem{\mu x. A} \gamma = \mu (x:\Gr_i). \sem{A}(\gamma,x) \]
-%
-\pedro{We should justify, even if briefly, the existence of this
-  fixed-point}
-\steven{As a consequence of Knaster-Tarski}
+\paragraph{Ambiguity}
+A formal grammar $\A$ is ambiguous if there is a string $\w$ for which $\A$ has
+several parse trees of $\w$.
+For example, $\a \oplus \a$ is ambiguous because
+$\stringquote{a}$ has two parse trees --- on either side of the disjunction. On
+the other hand, a formal grammar is unambiguous when there is at most
+one parse tree for any input string. These notions of ambiguity are entirely
+semantic, so we seek to find a property expressible in the syntax of \theoryabbv
+that reifies the semantic description of ambiguity.
 
-Observe that we need not take the Kleene star as a primitive
-grammar constructor, as it is definable as a fixed point.
-The Kleene star of a grammar $g$ is given as,
+\begin{definition}
+  \label{def:unambig}
+  A grammar $\A$ is \emph{unambiguous} if for every linear type $\B$,
+  $\f : \ltonl{(\B \lto \A)}$, and $\g : \ltonl{(\B \lto \A)}$
+  then $\f \equiv \g$
+\end{definition}
+\cref{def:unambig} can be read more intuitively as stating that $\A$ is
+unabmiguous if there is at most one way to transform parses of any other grammar
+$\B$ into parse of $\A$.
+
+Note that this definition of unambiguity is preserved by strong equivalence, so a common proof technique for showing $\A$ is unambiguous is by proving $\A$
+is strongly equivalent to an unambguous grammar $\B$. In this style of proof, we
+build the parse transformers $\f : \ltonl{(\A \lto \B)}$ and
+$\g : \ltonl{(\B \lto \A)}$, then we have the obligation to show
+$\f \circ \g \equiv \ident$ and $\g \circ \f \equiv \ident$. However, because $\B$ is
+unambiguous there is only one parse transformer $\ltonl{(\B \lto \B)}$, so
+$\f \circ \g : \ltonl{(\B \lto \B)}$ and $\ident : \ltonl{(\B \lto \B)}$ are equal.
+Thus, in this type of proof, we really only have the obligation of building each of $\f$
+and $\g$ and demonstrating that $\g \circ \f \equiv \ident$.
+
+% \paragraph{A Grammar of Strings}
+% Define the character grammar $\charg$ to be the disjunction over characters in the alphabet $\Sigma$,
+% $\charg = \LinSigTy {c} {\Sigma} c$. Then define a linear type of strings as a
+% linear list of characters,
+% $\stringg = \charg^{*}$.
+
+% In the grammars model, we can build a
+% strong equivalence between $\top$ and $\stringg$. hen building parsers, we may wish to enforce this
+% syntactically by including $\top \cong \stringg$ as an axiom.
+
+% Through inclusion of this axiom, $! : \stringg \to \top$ is an isomorphism, and
+% thus also a monomorphism. That is, $\stringg$ is unambiguous.
+% \max{we should have a section of the previous section which gives the axioms, this one and the distributivity axiom}
+
+% \paragraph{Parseability}
+% We say $A$ is \emph{parseable} if there exists a grammar $B$ and a map
+% $\top \to A \oplus B$. Taking $\top \cong \stringg$, a grammar $A$ is parseable
+% exactly when we may read an arbitrary string into a parse of $A$ or a $B$-valued
+% refutation.
+
+% \paragraph{Decidability}
+% In the case that $A$ is parseable with respect to $\neg A$ --- i.e.\
+% there's a map $\top \to A \oplus \neg A$ --- then call $A$ \emph{decidable}.
+
+
+\subsection{Regular Expressions and Finite Automata}
+%% 2. Regex, NFA, DFA
+In this section, we make the equivalences
+between regular expressions and finite automata precise by encoding two theorems
+from classical language theory: (1) every regular expression is strongly equivalent
+to the traces through some non-deterministic finite automaton (NFA); and, (2) for
+every NFA, there is some deterministic finite automaton (DFA) such that the type
+of traces through the NFA is weakly equivalent to the type of traces through the DFA.
+
+\newcommand{\states}{\mathtt{states}}
+\newcommand{\labelt}{\mathtt{label}}
+\newcommand{\transitions}{\texttt{transitions}}
+\newcommand{\epstransitions}{\epsilon\texttt{transitions}}
+\newcommand{\isAcc}{\mathtt{isAcc}}
+\newcommand{\init}{\mathtt{init}}
+\newcommand{\src}{\mathtt{src}}
+\newcommand{\dst}{\mathtt{dst}}
+\newcommand{\parse}{\mathtt{parse}}
+\newcommand{\print}{\mathtt{print}}
+\newcommand{\epssrc}{\epsilon\mathtt{src}}
+\newcommand{\epsdst}{\epsilon\mathtt{dst}}
+
+\newcommand{\N}{\texttt{N}}
+
+\paragraph{Regular Expressions} The theory of regular expressions emerges as the
+subtheory of \theoryabbv generated by
+$\otimes$, $\oplus$, and Kleene star over the types $\bot$, $I$, and characters drawn from $\Sigma$.
+
+\paragraph{Nondeterministic Finite Automata} In \cref{sec:type-theory-examples}, we saw
+one particular NFA corresponding to the regular expression
+$(a^{*} \otimes b) \oplus c$. The definition of the linear type of traces
+through this
+NFA, as
+presented in \cref{fig:nfainductive}, inspects the concrete structure of the
+NFA.\ When generalizing from this example to an arbitrary NFA, we do not have the
+luxury of inspecting a concrete defintion. Therefore, we abstract over the
+specification of an arbitrary NFA and define traces with respect to that
+specification.
+
+Let the data of an NFA $\N$ be given by the following nonlinear data,
+\begin{itemize}
+\item $\N.\states$ --- a finite set of states
+\item $\N.\init : \N.\states$ --- the initial state
+\item $\N.\isAcc : \N.\states \to \Bool$ --- a mapping marking which states are
+        accepting
+\item $\N.\transitions$ --- a finite set of labeled transitions
+\item $\N.\labelt : \N.\transitions \to \Sigma$ --- a mapping from $N.\transitions$ to the character labeling the transition
+\item $\N.\src , \N.\dst : \N.\transitions \to \N.\states$ --- mappings from
+        $\N.\transitions$ to the source and target states, respectively, of a labeled transition
+\item $\N.\epstransitions$ --- a finite set of $\varepsilon$-transitions
+\item $\N.\epssrc, \N.\epsdst : \N.\epstransitions \to \N.\states$ --- mappings
+        from $\N.\epstransitions$ to the source and target states of an $\epsilon$-transition
+\end{itemize}
 
-\[
-  g^{*} := \mu X . I \oplus (g \otimes X)
-\]
+In \cref{fig:nfatrace} we define a linear type of traces
+through an NFA $\N$. $\Trace_{\N}$ is an inductive type indexed by the first
+state $\s : \N.\states$ of a trace and a Boolean $b : \Bool$ that marks if the
+trace ends in an accepting state. Traces in $\N$ may be built through one of three constructors. We
+may terminate a trace at an accepting state with the constructor $\nil$. If we
+had a trace beginning at the destination state of a transition, then we may use
+the $\cons$ constructor to linearly combine that trace with a parse of the label
+of the transition to build a trace beginning at the source of the transition.
+Finally, if we had a trace beginning at the destination of an
+$\epsilon$-transition then we may use $\epscons$ to pull it back along the
+$\epsilon$-transition and construct a trace beginning at the source of the $\epsilon$-transition.
 
-\begin{figure}[h!]
-\begin{mathpar}
-  \inferrule
-  {\Gamma ; \Delta \vdash p : I}
-  {\Gamma ; \Delta \vdash \mathsf{nil} : g^{*}}
-
-  \and
+\begin{figure}
+\begin{lstlisting}
+data Trace(*@$_{\N}$@*) : (s : N.states) (b : Bool) (*@$\to$@*) L where
+  nil : (*@$\uparrow$@*)(&[ s (*@$\in$@*) N.states ] Trace(*@$_{\N}$@*)(s , N.isAcc s))
+  cons : (*@$\uparrow$@*)(&[ t (*@$\in$@*) N.transitions ] &[ b (*@$\in$@*) Bool ]
+           (N.label(t) (*@$\lto$@*) Trace(*@$_{\N}$@*)(N.dst(t) , b) (*@$\lto$@*) Trace(*@$_{\N}$@*)(N.src(t) , b)))
+  (*@$\epsilon$@*)cons : (*@$\uparrow$@*)(&[ t (*@$\in$@*) N.(*@$\epsilon$@*)transitions ] &[ b (*@$\in$@*) Bool ]
+           (Trace(*@$_{\N}$@*)(N.(*@$\epsilon$@*)dst(t) , b) (*@$\lto$@*) Trace(*@$_{\N}$@*)(N.(*@$\epsilon$@*)src(t) , b)))
+\end{lstlisting}
+\caption{Traces through an NFA $N$ as an indexed inductive type}
+\label{fig:nfatrace}
+\end{figure}
 
-  \inferrule
-  {\Gamma ; \Delta \vdash p : g \\ \Gamma ; \Delta' \vdash q
-  : g^{*}}
-  {\Gamma ; \Delta \vdash \mathsf{cons}(p , q) : g^{*}}
+\paragraph{Deterministic Finite Automata}
+\newcommand{\D}{\texttt{D}}
+Just as the above defines an NFA, we provide a specification for an arbitrary
+deterministic finite automaton.\ A DFA $D$ is
+given by the following data,
 
-  \\
+\begin{itemize}
+\item $\D.\states$ --- a finite set of states
+\item $\D.\init : \D.\states$ --- the initial state
+  \item $\D.\isAcc : \D.\states \to \Bool$ --- a mapping marking which states
+        are accepting
+\item $\D.\delta : \Sigma \to \D.\states \to \D.\states$ --- a deterministic transition function. Given a character $\c$ and a state
+        $\s$, $\D.\delta~\c~\s$ is the state for which there is a transition $\s \overset{\c}{\to} (\D.\delta~\c~\s)$
+\end{itemize}
 
-  \inferrule
-  {
-    \Gamma ; \Delta \vdash p : g^{*} \\
-    \Gamma ; \cdot \vdash p_{\varepsilon} : h \\
-    \Gamma ; x : g , y : h \vdash p_{\ast} : h
-  }
-  {\Gamma ; \Delta \vdash \mathsf{foldr}(p_{\varepsilon} , p_{\ast}) : g^{*}}
-\end{mathpar}
-\caption{Kleene Star Rules}
-\label{fig:star}
+\begin{figure}
+\begin{lstlisting}
+data Trace(*@$_{\D}$@*) : (s : D.states) (b : Bool) (*@$\to$@*) L where
+  nil : (*@$\uparrow$@*)(&[ s (*@$\in$@*) D.states ] Trace(*@$_{\D}$@*)(s , D.isAcc s))
+  cons : (*@$\uparrow$@*)(&[ c (*@$\in$@*) (*@$\Sigma$@*) ] &[ s (*@$\in$@*) D.states ] &[ b (*@$\in$@*) Bool ]
+             (c (*@$\lto$@*) Trace(*@$_{\D}$@*)(D.(*@$\delta$@*) c s , b) (*@$\lto$@*) Trace(*@$_{\D}$@*)(s , b)))
+\end{lstlisting}
+\caption{Traces through the DFA $D$ as an indexed inductive type}
+\label{fig:dfatrace}
 \end{figure}
 
-Likewise, $g^{*}$ has admissible introduction and
-elimination rules, shown in \cref{fig:star}. Note that this
-definition of $g^{*}$ and these
-rules arbitrarily assigns a handedness to the Kleene star.
-We could have just as well took it to be a fixed point of
-$I \oplus (X \otimes g)$. In fact, the definitions are
-equivalent, as the existence of the $\mathsf{foldl}$ term below
-shows that $g^{*}$ is also a fixed point of
-$I \oplus (X \otimes g)$.
-This is a marked point of difference from Kleene
-algebra with recursion, where the fixed points for the left and right variants
-of Kleene star need not agree \cite{leiss}.
-
-\begin{equation}
-  \label{eq:foldl}
-  \inferrule
-  {
-    \Gamma ; \Delta \vdash p : g^{*} \\
-    \Gamma ; \cdot \vdash p_{\varepsilon} : h \\
-    \Gamma ; y : h , x : h \vdash p_{\ast} : h
-  }
-  {\Gamma ; \Delta \vdash \mathsf{foldl}(p_{\varepsilon} , p_{\ast}) : g^{*}}
-\end{equation}
-
-In fact, the $\mathsf{foldl}$ term is defined using
-$\mathsf{foldr}$ --- much in the same way one
-may define a left fold over lists in terms of a right fold
-in a functional programming language.
-The underlying trick is to fold over a list of linear functions
-instead of the original string. We curry each character $c$
-of the string into a function that concatenates $c$, and
-right fold over this list of linear functions. Because function
-application is left-associative, this results in a left
-fold over the original string.
-
-We only take fixed points of a single variable as a
-primitive operation in the type theory, but we may apply
-Beki\`c's theorem \cite{Bekić1984} to define an admissible
-notion of multivariate fixed point. This is particularly
-useful for defining grammars that encode the states of an
-automaton, as we will see in \cref{sec:automata}. In \cref{fig:multifix} we provide the
-introduction and elimination principles for such a fixed
-point, where $\sigma$ is the substitution that unrolls the
-mutually recursive definitions one level. That is,
-
-\begin{align*}
-  \sigma = \{ & \mu(X_{1} = A_{1} \dots, X_{n} = A_{n}).X_{1} / X_{1} , \dots, \mu(X_{1} = A_{1}, \dots, X_{n} = A_{n}).X_{n} / X_{n} \}
-\end{align*}
+$\Trace_{\D}$ --- the linear type of traces through $\D$ --- is given in
+\cref{fig:dfatrace}, and its is
+quite similar to the definition of $\Trace_{\N}$. These
+traces may be terminated in an accepting state $\s$ with the $\nil$ constructor.
+The $\cons$ constructor builds a trace out of state $\s$ by linearly combining
+a parse of some character $\c$ with a
+trace out of the state $\D.\delta~\c~\s$. The trace built with $\cons$ is
+accepting if and only if the trace out of $D.\delta_{c}(s)$ is accepting.
+
+Because DFAs are deterministic, we are able to prove that their type of traces
+are unambiguous and define a parser for them. In particular we show that
+$\LinPiTy {s} {D.\states} {\LinSigTy {b} {\Bool} \Trace_{D}(s , b)}$ is isomorphic to
+$\stringg$, which simultaneously constructs a parser and demonstrates that
+traces through $D$ is unambiguous. This isomorphism intuitively says that we pay
+unambiguously build traces through $D$ beginning at any state $s$, and further
+that from any trace through $D$ we can uniquely recover the string that built
+the trace.
+
+The first half of the isomorphism,
+$\parse_{D} : \ltonl {\left( \stringg \lto \LinPiTy {s} {D.\states} {\Trace_{D}(s)} \right)}$,
+is defined by induction on strings in \cref{fig:dfaparser}. If this string is empty, then $\parse_{D}$
+defines a linear function that terminates a trace at the input state $s$. If the
+string is nonempty, then $\parse_{D}$ walks forward from the input state $s$ by
+the character at the head of the string.
 
 \begin{figure}
-\begin{mathpar}
-  \inferrule
-  {\Gamma ; \Delta \vdash e : \simulsubst {A_{k}} {\sigma}}
-  {\Gamma ; \Delta \vdash \mathsf{cons}~e : \mu(X_{1} = A_{1}, \dots, X_{n} = A_{n}).X_{k}}
-
-  \and
-
-  \inferrule
-  {\Gamma ; \Delta \vdash e : \mu(X_{1} = A_{1}, \dots, X_{n} = A_{n}).X_{k} \\
-             \Gamma ; x_{j} : \simulsubst {A_{j}}{\sigma} \vdash e_{j} : B_{j}\quad \forall j
-  }
-  {\Gamma; \Delta \vdash \mathsf{mfold}(x_{1}.e_{1}, \dots, x_{n}.e_{n})(e) : B_{k}}
-\end{mathpar}
-\caption{Multi-fixed Points}
-\label{fig:multifix}
+\begin{lstlisting}
+parse : (*@$\uparrow$@*)(String (*@$\lto$@*) &[ s (*@$\in$@*) D.states ] (*@$\oplus$@*)[ b (*@$\in$@*) Bool ] Trace(*@$_{\D}$@*)(s , b))
+parse String.nil s = (*@$\sigma$@*) (D.isAcc s) (Trace(*@$_{\D}$@*).nil s)
+parse (String.cons ((*@$\sigma$@*) c a) w) s = let (*@$\sigma$@*) b t = parse w (D.(*@$\delta$@*) c s) in
+                           (*@$\sigma$@*) b (Trace(*@$_{\D}$@*).cons c s b a t)
+\end{lstlisting}
+\caption{Parser for DFA traces}
+\label{fig:dfaparser}
 \end{figure}
 
-\subsection{Subtheories}
-Each choice of connective has direct implications on the expressivity of
-\theoryabbv. In particular, the set of connectives used will determine where the
-grammar semantics is placed in
-the Chomsky hierarchy.
-
-\paragraph{Regular Expressions} We may realize regular expression as a subtheory
-of our language by restricting the type constructors to the
-linear unit, characters, $\otimes$, $\oplus$, and Kleene star. Classically, the
-languages recognized by these are exactly the regular languages --- the lowest
-on the Chomsky hierarchy.
-
-\paragraph{$\mu$-Regular Expressions} Similarly, we may restrict the connectives
-to be linear unit, characters, $\otimes$, $\oplus$, and arbitrary recursion via
-left-fixed point rather than only Kleene star.
-Instead of regular expressions these correspond to Lei{\ss}'s $\mu$-regular
-expressions, which are known to be equivalent to context-free grammars
-\cite{leiss,krishnaswami_typed_2019}.
-
-\paragraph{Beyond Context-Free Grammars} The previous two subtheories induce as semantics
-regular grammars and context-free grammars, respectively; however, by including
-the LNL dependent types we may actually express the entirety of the Chomsky
-hierarchy. Through use of the LNL dependent types, we may encode indexed
-grammars, which are known to be properly between context-free and
-context-sensitive grammars within the Chomsky hierarchy \cite{AhoIndexed}. We
-will further see in \cref{subsubsec:tm} how to use dependence to encode Turing machines internal to our
-calculus, which of course induces a semantics of unrestricted grammars.
-
-\section{Automata as Grammars}
-\label{sec:automata}
-
-The canonicity theorem from the previous section gives a tight connection
-between terms in our type theory and the theory of formal languages. In this section
-we further explore these connections by using the fact that, classically
-formal language theory is closely related to the study of automata.
-In the Chomsky hierarchy, each language class is associated to a class of
-automata that serve as recognizers. Internal to \theoryabbv,
-we can characterize these language classes syntactically; moreover, we
-demonstrate the equivalence of these language classes to its associated automata class as a
-proof term within our logic.
-
-\subsection{Non-deterministic Finite Automata}
-\label{subsec:finiteaut}
+The second half of the isomorphism,
+$\print_{D} : \ltonl {\left( \LinPiTy {s} {D.\states} {\Trace_{D}(s)} \lto \stringg \right)}$,
+is given in \cref{fig:printdfa} by induction on traces. If the trace is defined
+via $\nil$, then $\print_{D}$ returns the empty string. Otherwise, if the trace
+is defined by $\cons$ then $\parse_{D}$ appends the character $c$ to the output
+string and recurses.
+
 \begin{figure}
-  \begin{tikzpicture}[node distance = 25mm ]
-    \node[state, initial, accepting] (1) {$1$};
-    \node[state, below left of=1] (2) {$2$};
-    \node[state, right of=2] (3) {$3$};
-
-    \path[->] (1) edge[above] node{$b$} (2)
-              (1) edge[below, bend right, left=0.3] node{$\epsilon$} (3)
-              (2) edge[loop left] node{$a$} (2)
-              (2) edge[below] node{$a, b$} (3)
-              (3) edge[above, bend right, right=0.3] node{$a$} (1);
-  \end{tikzpicture}
-  \caption{An example NFA}
-  \label{fig:NFA}
+\begin{lstlisting}
+print : (s : D.states) (*@$\to$@*) (*@$\uparrow$@*)(((*@$\oplus$@*)[ b (*@$\in$@*) Bool ] Trace(*@$_{\D}$@*)(s , b)) (*@$\lto$@*) String)
+print .s ((*@$\sigma$@*) b (Trace(*@$_{\D}$@*).nil s)) = String.nil
+print .s ((*@$\sigma$@*) b (Trace(*@$_{\D}$@*).cons c (D.(*@$\delta$@*) c s) b a trace)) =
+  String.cons ((*@$\sigma$@*) c a) (print (D.(*@$\delta$@*) c s) ((*@$\sigma$@*) b trace))
+\end{lstlisting}
+% \[
+% \inferrule
+% {
+% }
+% {tr : \LinPiTyLimit {s} {D.\states} {\LinSigTyLimit {b} {\Bool } {\Trace_{D}(s, b)}} \vdash asdf : \stringg}
+% \]
+\caption{Printer for DFA traces}
+\label{fig:printdfa}
 \end{figure}
 
-Classically, a \emph{nondeterministic finite automaton} (NFA) is a finite state machine where
-transitions are labeled with characters from a fixed alphabet $\Sigma$. These
-are often represented formally as a 5-tuple $(Q, \Sigma, \delta, q_{0}, F)$,
-
-\begin{itemize}
-  \item $Q$ a finite set of states
-  \item $\Sigma$ a fixed, finite alphabet
-  \item $\delta : Q \times (\Sigma \cup \{ \varepsilon\}) \to \mathcal{P}(Q)$ the labeled transition function
-  \item $q_{0} \in Q$ the start state
-  \item $F \subset Q$ a set of accepting states
-\end{itemize}
-
-Intuitively, this can be thought of like a directed graph with nodes in $Q$ with
-an edge $q \overset{c}{\to} q'$ whenever $q' \in \delta(q, c)$. Note that
-transitions in an NFA may be labeled with the empty string $\varepsilon$ --- such
-transitions are referred to as \emph{$\varepsilon$-transitions}. We may see an
-example of an NFA in \cref{fig:NFA}.
-
-From an NFA, we may construct a grammar of traces as follows:
-
-First, we define a mutual fixed point grammar that describes the traces through the NFA.\ Then, we have another
-grammar that tells us if we're currently in an accepting state of the automaton.
-A parse of the NFA grammar is then a pair of a trace and the data that we're in
-an accepting state.
-
-Consider the NFA $N$ pictured in \cref{fig:NFA}. There are three states, $1$, $2$,
-and $3$. We introduce a non-linear type $Q$ with three inhabitants $q_{1}$,
-$q_{2}$, and $q_{3}$ to represent each of these states, respectively. Given a
-$q, q'$, we can
-then define the type of traces from $q$ to $q'$. For instance, let's construct
-the traces starting at $q_{1}$ and ending at $q_{2}$ as an example.
-
-\begin{equation}
-  \label{eq:nfatrace}
-  \mathsf{Trace}_{N}(q_{1}, q_{2}) = \mu
-  \begin{pmatrix}
-    g_{q_{1}} := g_{q_{3}} \oplus ( b \otimes g_{q_{2}} ) \\
-    g_{q_{2}} := ( a \otimes g_{q_{2}} ) \oplus ( a \otimes g_{q_{3}} ) \oplus ( b \otimes g_{q_{3}} ) \oplus I \\
-    g_{q_{3}} := a \otimes g_{q_{1}} \\
-  \end{pmatrix}. g_{q_{1}}
-\end{equation}
-
-We should read this as defining three mutually recursive grammars, one for each
-state. The definitions of these mutually recursive grammars capture the
-transitions of the automaton. To ensure that we only encode traces that end in
-state 2, we only include the unit $I$ as a summand in
-$g_{q_{2}}$. That is, by only including $I$ at this location, it makes it the
-only place where derivations of $\mathsf{Trace}(q_{1}, q_{2})$ can terminate. We
-can think of all of these definitions underneath
-of the $\mu$ binder as bringing some local grammars into scope. With these local
-grammars in scope we are ultimately constructing a term of type $g_{q_{1}}$ to denote
-only traces starting in state 1.
-
-To encode the acceptance criteria of $N$, we want to internalize a proposition
-over each state of the NFA.\ That is, for each $q \in Q$ we define a term
-
-\[
-  \mathsf{acc}(q) := q \text{ is accepting}
-\]
-
-In this example, $\mathsf{acc}(q_{1}) = \top$ and
-$\mathsf{acc}(q_{2}) = \mathsf{acc}(q_{3}) = \bot$. An accepting trace of the NFA $N$ is then given by the following dependent grammar,
-\[
- \mathsf{AccTrace}_{N} := \LinSigTy q Q {\left( \mathsf{Trace}_{N}(q_{0} , q) \pair \mathsf{acc}(q) \right)}
-\]
-
-where $q_{0}$ is the initial state. That is, a trace is accepted by the NFA if
-we can construct the trace, and the trace ends at an accepting state. This idea
-is simple enough and aligns with how we intuitively treat these automata.
+\steven{TODO make the agda logo clickable, and mention its meaning somewhere}
+\newcommand{\Agda}{\agdalogo}
+\begin{theorem}[Determinization, \Agda]
+  Given an NFA $N$, there exists a DFA $D$ such that
+  $\Trace_{N}~\N.\init~\true$ is weakly equivalent to $\Trace_{D}~\D.\init~\true$.
+\end{theorem}
+\newcommand{\X}{\texttt{X}}
+\newcommand{\Y}{\texttt{Y}}
+\newcommand{\x}{\texttt{x}}
+\newcommand{\y}{\texttt{y}}
+\begin{proof}
+  We present a variant of Rabin and Scotts's classic powerset construction \cite{rabinFiniteAutomataTheir1959}. First, we build
+  the DFA $\D$. Define the
+  states of $\D$ to be the $\mathbb{P}_{\varepsilon}(\N.\states)$ --- the type
+  of $\epsilon$-closed\footnote{A subset of states $\X$ is $\epsilon$-closed if
+    for every $\s \in \X$ and $\epsilon$-transition $\s \overset{\epsilon}{\to} \s'$ we have $\s' \in \X$.} subsets of
+  $\N.\states$. A subset is accepting in $\D$ if it contains an accepting state
+  from $\N$. Construct the initial state of $\D$ as the
+  $\epsilon$-closure of $\N.\init$. Lastly define the transtion function
+  of $\D$ to send the subset $X$ under the character $\c$ to the
+  $\epsilon$-closure of all the states reachable from $X$ via a transition
+  labelled with the character $\c$.
+
+  We demonstrate the weak equivalence between $\Trace_{\N}~\N.\init~\true$ and
+  $\Trace_{\D}~\D.\init~\true$ by constructing parse transformers between the two
+  grammars. To build the parse transformer
+  $\ltonl{(\Trace_{\N}~\N.\init~\true \lto \Trace_{\D}~\D.\init~\true)}$, it
+  suffices to construct the stronger term
+\newcommand{\NtoD}{\texttt{NtoD}}
+\newcommand{\DtoN}{\texttt{DtoN}}
+  \[
+    \NtoD : \ltonl {\left(\Trace_{\N}~\s~\true \lto \bigwith\limits_{\X : \D.\states} \bigwith\limits_{\texttt{sInX} : \X \ni \s} \Trace_{\D}~\X~\true \right)}
+  \]
+  that maps a trace in $\N$ from an arbitrary state $\s$ to a trace in $\D$
+  that may begin at any subset of states $\X$ that contains $\s$. $\NtoD$ may then be instantiated at
+  $\s = \N.\init$ and $\X = D.\mathsf{init}$ to get the desired
+  parse transformer.
 
-Generalizing over the above example, we want to define the type of traces as,
+\begin{figure}
+\begin{lstlisting}
+NtoD : (s : N.states) (*@$\to$@*)
+       (*@$\uparrow$@*)(Trace(*@$_{\N}$@*) s true (*@$\lto$@*)
+           &[ X (*@$\in$@*) D.states ] &[ sInX (*@$\in$@*) (X (*@$\ni$@*) s) ] Trace(*@$_{\D}$@*) X true)
+NtoD .s (nil s) X sInX = nil s
+NtoD .(N.src trans) (N.cons trans .true a dstTrace) X sInX =
+  D.cons (N.label trans) X true a
+    (((NtoD s dstTrace).(*@$\pi$@*) (D.(*@$\delta$@*) (N.label trans) X)).(*@$\pi$@*) (N.dst trans))
+NtoD .(N.(*@$\epsilon$@*)src trans) ((*@$\epsilon$@*)cons (*@$\epsilon$@*)trans .true (*@$\epsilon$@*)DstTrace) X sInX =
+  ((NtoD s (*@$\epsilon$@*)DstTrace).(*@$\pi$@*) X).(*@$\pi$@*) (N.(*@$\epsilon$@*)dst trans)
+\end{lstlisting}
+\caption{Parse transformer from NFA traces to Powerset DFA traces}
+\label{fig:NtoD}
+\end{figure}
 
+Because the state $\s$ is now arbitrary, we can define $\NtoD$ via
+recursion on the trace through the NFA. Code for this is given in \cref{fig:NtoD}.
+If the NFA trace is terminating at the accepting state $\s$ and $\s \in \X$,
+then we return a trace in $\D$ that terminates at
+$\X$, which is accepting because it contains the accepting state $\s$.
+
+If the NFA trace is defined via $\cons$ for a labeled transition
+$\texttt{trans} : \N.\transitions$ such that $N.\src~\texttt{trans} \in \X$, then
+we first decompose the trace into a parse of $\N.\labelt~\texttt{trans}$ and a
+trace starting from $\N.\dst~\texttt{trans}$. We then recursively call $\NtoD$
+on this trace which returns a family of traces parametrized by $\epsilon$-closed
+subsets containing $N.\dst~\texttt{trans}$. We instantiate this family at the
+set of states
+$\D.\delta~(\N.\labelt~\texttt{trans})~X$. Because $\N.\dst~\texttt{trans}$ is
+reachable from $\N.\src~\texttt{trans}$ by a single transition labeled by
+$\N.\labelt~\texttt{trans}$ and $\N.\src~\texttt{trans} \in X$ we have that
+$\N.\dst~\texttt{trans} \in \D.\delta~(\N.\labelt~\texttt{trans})~X$. Therefore
+we can linearly combine the parse of the label and the trace built from the
+recursive call to get a trace in $\D$ rooted at $X$.
+
+Lastly, if the NFA trace is defined via $\epsilon\cons$ for an $\epsilon$-transition
+$\epsilon\texttt{trans} : \N.\epstransitions$ such that
+$\N.\epssrc~\epsilon\texttt{trans} \in \X$, then the construction is quite
+similar. In this case we again recursivley call $\NtoD$, except afterwards we
+instantiate the family of grammars at the set $X$ itself. We may do this because
+all states in $\D$ are taken to be $\epsilon$-closed, therefore
+$\N.\epsdst~\epsilon\texttt{trans} \in \X$ because
+$\N.\epssrc~\epsilon\texttt{trans}$ is in $X$. This concludes the construction of $\NtoD$.
+
+To construct a term from DFA traces to NFA traces, we similarly strengthen our
+induction hypothesis and build a parse transformer
 \[
-  \mathsf{Trace}_{N}(q_{0}, q_{1}) = \mu
-  \begin{pmatrix}
-    g_{q} := \mathsf{Trans}(q), & q \neq q_{1} \\
-    g_{q} := \mathsf{Trans}(q) \oplus I , & q = q_{1}
-  \end{pmatrix}. g_{q_{0}}
+  \DtoN : \ltonl {\left( \Trace_{\D}~\X~\true \lto \bigoplus\limits_{s : \N.\mathsf{states}}\bigoplus\limits_{\texttt{sInX} : \X \ni \s} \Trace_{\N}~s~\true \right)}
 \]
 
-where $\mathsf{Trans}(q)$ is an iterated disjunction that describes which
-transition you should take. For an NFA, $\mathsf{Trans}(q)$ must take on the
-following syntactic form,
-
-\begin{gather*}
- \mathsf{State} \in \{ g_{q} : q \in Q \} \\
- \mathsf{Char} \in \Sigma \\
- \mathsf{Trans}_{N}(q) ::= \mathsf{Char} \otimes \mathsf{State}~|~\mathsf{State}~|~\mathsf{Trans}_{N}(q) \oplus \mathsf{Trans}_{N}(q)
-\end{gather*}
-
-That is, $\mathsf{Trans}(q)$ is a disjunction of literals followed by grammars
-that encode states.
-
-When conducting proofs involving NFA grammars, we often to
-need either construct terms of type $\mathsf{Trace}_{N}(q , q')$. To
-this end, we give three admissible rules for constructing
-traces,
-
-\begin{figure}[h!]
-  \label{fig:admissibleintro}
-  \begin{mathpar}
-    \inferrule
-    {~}
-    {\Gamma ; \cdot \vdash \mathsf{nil} : \mathsf{Trace}_N
-      (q , q)}
+For a moment, let us turn our attention to the content behind the traces in $\D$.
+Traces from subset $\X$ to subset $\Y$ describes the \emph{existence} of some trace
+in the $\N$ from some $\x \in \X$ to some $\y \in \Y$. In general it is not clear how
+to extract out a choice of trace in $\N$, which is what $\DtoN$ is asking of us.
+However, we may further take the states and transitions of $\N$ to be \emph{ordered}.
+This allows us to
+induce an order on traces in $\D$ from $\X$ to $\Y$. From these orderings we can then
+define the necessary choice functions on each type by taking the smallest with
+respect to the ordering.
+
+We define $\DtoN$ by recursion on traces through $\D$. First if the trace stops at
+some accepting $\epsilon$-closed subset $\X$, then we may choose some state
+$\s \in X$ such that $\s$ is accepting and terminate the trace in $\N$ at $\s$.
+
+Next if the trace in $\D$ is defined via transition
+$\X \overset{\c}{\to} (\D.\delta~\c~\X)$ then we may choose some state in
+$\s \in \D.\delta~\c~\X$. Recursively, $\DtoN$ sends the trace rooted at
+$\D.\delta~\c~\X$ to one rooted at $\s$. By definition of $\D.\delta~\c~\X$,
+there exists some $\s' \in \X$ such that $\s'$ tranitions to $\s$ by some number of
+$\epsilon$-transitions followed by a transition labeled by $\c$.
+We order all
+such traces of this form and choose the smallest one, which is then linearly
+combined with the parse of $\c$ to build an $\N$ trace. This concludes the
+definition of $\DtoN$, thus showing that $\Trace_{\N}~\N.\init~\true$ and $\Trace_{\D}~\D.\init~\true$ are
+weakly equivalent.
+\end{proof}
 
-    \and
+\newcommand{\R}{\texttt{R}}
+\begin{theorem}[Thompson's Construction, \Agda]
+  Given a regular expression $\R$, there exists an NFA $\N$ such that $\R$ is
+  strongly equivalent to $\Trace_{N}(\N.\init)$.
+\end{theorem}
 
-    \inferrule
-    {\Gamma ; \Delta \vdash M : \mathsf{Trace}_N
-      (dst , q') \\
-    \exists \text{ transition } src \overset{c}{\to} dst
-    }
-    {\Gamma ; x : c , \Delta \vdash \mathsf{cons}(M) : \mathsf{Trace}_N
-    (src , q')}
+\begin{proof}
+\steven{TODO this proof needs to be rewritten. Likely through use of equalizers}
+We present a variant of Thompson's construction
+\cite{thompsonProgrammingTechniquesRegular1968} in which we recurse on the
+structure of $\R$ to construct a strongly equivalent NFAs. In this paper we will walk through
+some of the cases of this proof. The remaining cases
+follow
+similar reasoning and the details may be found in the formalized artifact.
 
-    \and
+\begin{figure}
+  \begin{tikzpicture}[node distance = 17mm ]
+    \node[state, initial] (0) {$\0$};
+    \node[state, right of=0, accepting] (1) {$\1$};
 
-    \inferrule
-    {\Gamma ; \Delta \vdash M : \mathsf{Trace}_N (dst , q')
-      \\
-    \exists~\varepsilon\text{-transition } src
-    \overset{\varepsilon}{\to} dst}
-    {\Gamma ; \Delta \vdash \mathsf{\varepsilon cons}(M) : \mathsf{Trace}_N
-    (src , q')}
-  \end{mathpar}
-  \caption{Admissible Trace Constructors}
+    \path[->] (0) edge[above] node{$\stringquote{c}$} (1);
+  \end{tikzpicture}
+  \caption{NFA $\N_{\c}$ for a single character $\c$}
+  \label{fig:litnfa}
 \end{figure}
 
+First handle the case where $R$ is a literal character $c$. The NFA $N_{c}$ pictured in
+\cref{fig:litnfa} is strongly equivalent to the grammar $c$. To demonstrate this
+strong equivalence, we build terms between $c$ and $\Trace_{N_{c}}(N_{c}.\init)$
+and show they are mutually inverse.
 
-That is, we may use $\mathsf{nil}$ to terminate a trace that
-begins and ends at state $q$. The rules $\mathsf{cons}$ and
-$\mathsf{\varepsilon cons}$ are then used to inductively
-stitch traces together when sound. For instance, we can read
-the $\mathsf{cons}$ rule as saying that we can
-create a trace coming from a state $src$ provided that we may first
-transition via the character $c$ to state $dst$ and
-inductively build a trace starting from $dst$. The
-$\mathsf{\varepsilon cons}$ rule says something similar, but
-with an $\varepsilon$-transition instead of a transition
-labelled by a character. Recall that
-these rules are \emph{admissible}. They are not strictly
-necessary as primitives to conduct our proofs; however, they
-do provide convenient shorthand notation for building terms
-of type $\mathsf{Trace}_{N}(q , q')$.
-
-Dual to constructing traces, we often want to construct
-other terms in a context containing values of type
-$\mathsf{Trace}_{N}(q , q')$. For this purpose, we make use
-of the elimination principle for multiple-fixed points ---
-which we write as $\mathsf{mfold}$ --- given
-in \cref{fig:multifix}.
-
-Note that this general construction will readily generalize to other types of
-automata. If we wanted to define say deterministic finite automata\footnote{DFAs
-\emph{could} just be defined as NFAs that happen to be deterministic. This is
-one way to do so, or you may choose to present the transition relation for the
-automaton as a transition \emph{function} instead. Concerns like these don't
-matter so much when defining things on paper, but at formalization time these
-are important and can make some proofs much easier.}, pushdown automata, Turing
-machines, etc, we just swap out the type of traces for a different, but very
-similar, trace construction.
-
-With this setting for finite automata, we can now internalize some classical theorems inside of our formal system.
-
-\subsection{A DFA Parser}
-\label{subsec:regexparser}
-
-Just as we encoded traces of NFAs as grammars, we likewise
-encode the traces of a DFA as grammars. The key difference
-between NFAs and DFAs is \emph{determinism} --- meaning,
-that in a state $q$ inside of DFA, given a character $c$ there
-will be exactly one transition that we may take leaving $q$
-with label $c$. For us, this changes the definition of valid
-transitions for a DFA, instead of the definition of
-$\mathsf{Trans}$ provided in \cref{subsec:finiteaut} DFAs
-obey
-
-\begin{gather*}
- \mathsf{State} \in \{ g_{q} : q \in Q \} \\
- \mathsf{Trans}(q) ::= \bigoplus_{c \in \Sigma} (c \otimes \mathsf{State})
-\end{gather*}
-
-Meaning, each state has a transition for every character. \emph{Note:} the above use
-  of a disjunction over $\Sigma$ is a bit of abuse of notation.
-  As $\Sigma$ is a finite alphabet, we wish to think of this as an iterated
-  binary sum, or perhaps as a finitely indexed sum defined via linear dependent
-  sums in the same manner by which we defined binary sums.
-
-We now wish to define a parser term for DFA grammars. In
-particular, for a DFA $D$ we want to build a term,
+Define $f : \ltonl {(c \lto \Trace_{N_{c}}(N_{c}.\init))}$ as follows,
 
 \[
-  w : \String \vdash \mathsf{parse}_{D} : \mathsf{AccTrace}_{D} \oplus \top
+  a : c \vdash f := \cons(a, \nil) : \Trace_{N_{c}}(N_{c}.\init)
 \]
 
-where left injection into the output type denotes acceptance
-by the parser, and right injection denotes rejection. To
-build such a parser, it will suffice to construct a term
+Define $g : \ltonl {(\Trace_{N_{c}}(N_{c}.\init) \lto c)}$ via induction on the
+trace. Map traces beginning in state 1, which are necessarily built via $\nil$,
+to the linear unit $I$. Then we must handle traces beginning in state 0, which
+are necessarily built via $\cons$. We decompose such a trace into a parse of the
+character $c$ and a trace beginning in state 1. We inductively transform the
+trace beginning in state 1 into a term of type $I$. Finally, we use the right
+unitor to turn a term of type $c \otimes I$ into one of type $c$.
 
-\[
-  w : \String \vdash \mathsf{parse}_{D} : \LinSigTy q Q {\mathsf{Trace}_{D}(q_{0} , q)}
-\]
-This is because given a trace of a DFA, we may easily check
-if we should accept or reject by simply testing
-if the final state is accepting.
-
-Because $w$ is a Kleene star of characters, we may construct
-our desired $\mathsf{parse}_{D}$ term as a $\mathsf{foldl}$
-over $w$. In the empty case, we just have the trace that
-ends at the accepting state. In the recursive case, we
-effectively add to our trace by transitioning one character
-at a time, as we read them moving across $w$.
-
-Perhaps this derivation is not too surprising. All it says
-is that a DFA may be ran by transitioning a single character
-at a time, and then accepting or rejecting based on the
-final state. This is exactly what DFAs did initially, so
-what did we gain? Well, this has the benefit of our type
-system to ensure its correctness. Moreover, this construction exports to an
-intrinsically verified and executable decision procedure for DFAs in \theoryabbv
-embedded in Agda.
-
-We should note that the
-construction of a DFA parser requires the addition of an additional but seemingly innocuous
-axiom.
-
-\[
-  \top \cong \left( \bigoplus_{c : \Sigma} c \right)^{*}
-\]
-
-That is, the characters in the alphabet $\Sigma$ really are all of the
-characters. The need for this axiom arises when we go to take a transition
-within the DFA,
-as we must ensure that the next character of the string has a corresponding
-transition from the current state
---- which would be guaranteed by determinism provided there are no surprise characters.
-
-\subsection{Regular Expressions and DFAs}
-\label{subsec:deriv}
-
-In order to extend the DFA
-parser from \cref{subsec:regexparser} to the construction of
-a verified parser
-generator for regular expression we need to perform some
-plumbing establishing an equivalence between regular
-expressions and DFAs.
-
-There are several routes we may hope to take in establishing
-this equivalence. First, we could prove an equivalence
-between NFAs and regular expressions, and separately prove
-an equivalence between NFAs and DFAs.
-In \cref{subsec:eqproofs}, we include a version of
-Thompson's construction --- which established the
-equivalence between regular grammars and DFAs. We may additionally
-hope to internalize a variant of the powerset construction \cite{rabinFiniteAutomataTheir1959}
---- which takes as input an NFA and constructs a DFA that
-recognizes the same language --- and combine the results of
-Thompson's construction and the powerset constructions to give an equivalence
-between regular expressions and DFAs.  This route is alluring, as it
-internalizes several classic grammar-theoretic constructions. However, it may
-necessitate extensions to the LNL theory, like
-a propositional truncation, and we have not yet investigated
-how this would interact with the existing types in the
-theory. The addition of a propositional truncation may seem
-harmless, but it is not always immediately clear how
-distinct constructions will interact. For instance, when
-exploring LNL models, Benton discovered that the synthesis
-of linear and dependent types require a new presentation of
-the $!$ modality from linear logic
-\cite{bentonMixedLinearNonlinear1995}. That is all to say,
-this is a work in progress and
-it is not immediate that the addition of a propositional
-truncation is adequate for establishing the weak equivalence
-between NFAs and DFAs.
-
-We may instead hope to internalize an equivalence between
-regular grammars and DFAs by using Brzozowski derivatives to
-directly create a DFA that is weakly equivalent to a given
-regular expression, as described by Owens et al.
-\cite{owensRegularexpressionDerivativesReexamined2009}.
-One characterization of regular grammars is that they are
-precisely those grammars which have finitely many inequivalent Brzozowski
-derivatives
-\cite{brzozowskiDerivativesRegularExpressions1964}.
-The algorithm used by Owens takes in a
-regular grammar and generates a DFA that recognizes the same
-language, and the states in this DFA are the finitely many
-derivative equivalence classes. We initially had a version of
-this theorem very roughly internalized in the LNL theory.
-To our taste, too much of this presentation relied on
-meta-arguments that lived outside of
-our formalism, and thus this particular phrasing of the
-theorem did not translate well into formalization.
-
-In any case, we believe
-that revisiting these lines of thought will lead to a
-satisfactory internalization of the equivalence between
-regular grammars and DFAs, and thus would bridge the gap
-between our DFA parser and a full regular expression parser.
-
-\subsection{Equivalence Between Regular Expressions and Finite Automata}
-\label{subsec:eqproofs}
-In this section, we describe a version of Thompson's
-construction \cite{thompsonProgrammingTechniquesRegular1968}
-   where we construct an NFA that recognizes a given regular
-expression. Moreover, we will show that this NFA is strongly equivalent to the
-original grammar. Witnessing this construction in our syntax has two benefits
-\begin{enumerate}
-  \item It reinforces this high-level view that the syntax is a natural and
-        general setting for formal grammar reasoning, as we demonstrate that
-        this formal system subsume results from existing systems, and
-  \item Following the
-        development of Thompson's construction, we then need
-        only establish the equivalence of NFAs and DFAs to
-        complete the full regular expression parser
-\end{enumerate}
-
-\begin{theorem}[Thompson]
-  \label{thm:thompson}
-  For $g$ a regular grammar $g$, there is an NFA $N$ that recognizes the same
-  language as $g$.
-\end{theorem}
-We make a pretty straightforward adaptation of Thompson's theorem to our setting,
-
-\begin{theorem}[Typed Thompson]
-  \label{thm:typthompson}
-  For $g$ a regular expression $g$, there is an NFA $N$ such that $g$ is isomorphic
-  to $\mathsf{AccTrace}_{N}$.
-\end{theorem}
-
-\begin{proof}[Proof Sketch]
-  Recall that regular expressions are inductively defined via
-  disjunction, concatenation, and Kleene star over literals
-  and the empty grammar. By induction over regular expressions,
-  we will construct an NFA that is equivalent to $g$.
-
-  First, define the recognizing NFA for the empty grammar
-  $I$.
+\[\begin{tikzcd}
+    {c \otimes \mathsf{Trace}_{N_{c}}(1)} &
+    {c \otimes I} &
+    c
+	\arrow["{id\otimes g}", from=1-1, to=1-2]
+	\arrow["{}", from=1-2, to=1-3]
+\end{tikzcd}\]
 
-  \begin{figure}[h!]
-  \begin{tikzpicture}[node distance = 25mm ]
-    \node[state, initial] (1) {$1$};
-    \node[state, right of=1, accepting] (2) {$2$};
+Note that the grammar $c$ is unambiguous, so $g \circ f$ is equal to the
+identity on $c$. Additionally, we can show that $f$ carries the structure of an
+indexed algebra homomorphism for the family of functors defining the trace type
+$\Trace_{N_{c}}$. Because $g$ is defined via induction, it carries the structure
+of an algebra homomorphism as well. Thus, $f \circ g$ is also an algebra
+homomorphism. $f \circ g$ and the identity map are both then algebra
+homomorphisms from $\Trace_{N_{c}}$ to itself. Because $\Trace_{N_{c}}$ is the
+initial algebra for the relevant family of functors, there is only one
+homomorphism from it to itself and thus $f \circ g = \mathsf{id}$. Therefore,
+$f$ and $g$ are mutually inverse and the regular expression is strongly
+equivalent to the traces through the NFA in this case.
+
+Now consider the case where $R$ is the disjunction of two regular expressions
+$R_{1}$ and $R_{2}$. Inductively, we may build NFAs $N_{1}$ and $N_{2}$ that are
+strongly equivalent to $R_{1}$ and $R_{2}$, respectively. We define $N_{\oplus}$
+as a new NFA built as the disjunction of $N_{1}$ and $N_{2}$.
+$N_{\oplus}.\states$ comprises the states from $N_{1}$, the states from
+$N_{2}$, and a new marked initial state. The accepting states are precisely
+those that were accepting in the subautomata. The initial state has an
+$\varepsilon$-transition to each of the initial states of the subautomata. Once in
+$N_{1}.\init$, the downstream transitions are a copy of those in $N_{1}$,
+likewise for $N_{2}.\init$.  That is, $N_{\oplus}$ contains a copy of each
+$N_{1}$ and $N_{2}$, and we choose which copy to inhabit by deciding which
+$\varepsilon$-transition out of the initial state to take. A schematic
+representation of this automaton is given in \cref{fig:disjunctionnfa}.
 
-    \path[->] (1) edge[below] node{$\varepsilon$} (2);
+\begin{figure}
+  \begin{tikzpicture}[node distance = 17mm ]
+    \node[state, initial] (init) {$\init$};
+    \node[state, above right of=init] (init1) {$N_{1}.\init$};
+    \node[state, below right of=init] (init2) {$N_{2}.\init$};
+    \node[right of=init1] (dots1) {$\dots$};
+    \node[right of=init2] (dots2) {$\dots$};
+    \node[state, accepting, right of=dots1] (acc1) {$s_{1}$};
+    \node[state, accepting, right of=dots2] (acc2) {$s_{2}$};
+
+    \path[->] (init) edge[below] node{$\varepsilon$} (init1)
+              (init) edge[above] node{$\varepsilon$} (init2)
+              (init1) edge[above] node{} (dots1)
+              (init2) edge[above] node{} (dots2)
+              (dots1) edge[above] node{} (acc1)
+              (dots2) edge[above] node{} (acc2);
   \end{tikzpicture}
-  \caption{$NFA(I)$}
-  \label{fig:emptyNFA}
-  \end{figure}
-
-  The type of traces from the initial state of $NFA(I)$ to the single
-  accepting state is given by,
-
-  \[
-    \mathsf{Trace}_{NFA(I)}(q_{1}, q_{2}) = \mu
-      \begin{pmatrix}
-         g_{q_{1}} := g_{q_{2}} \\
-         g_{q_{2}} := I
-      \end{pmatrix}. g_{q_{1}}
-  \]
-
-  The accepting traces through $NFA(I)$ are then described
-  as,
-
-  \[
-    \mathsf{AccTrace}_{NFA(I)} = \LinSigTy q {\{1 , 2\}} {\left( \mathsf{Trace}_{NFA(I)}(q_{1} , q) \pair \mathsf{acc}(q) \right)}
-  \]
-
-  A quick inspection of \cref{fig:emptyNFA} reveals that the
-  only reasonable choice for $q$ is $q_{2}$ --- because
-  state 2 is accepting while state 1 is not. Therefore,
-
-  \begin{align*}
-    \mathsf{AccTrace}_{NFA(I)}
-    & \cong \mathsf{Trace}_{NFA(I)}(q_{1} , q_{2}) \pair \mathsf{acc}(q_{2}) \\
-    & \cong \mathsf{Trace}_{NFA(I)}(q_{1} , q_{2}) \pair \top \\
-    & \cong \mathsf{Trace}_{NFA(I)}(q_{1} , q_{2})
-  \end{align*}
-
-  From here, to prove
-  $I \cong \mathsf{AccTrace}_{NFA(I)}$ it suffices to show
-  $I \cong \mathsf{Trace}_{NFA(I)}(q_{1} , q_{2})$. Below we
-  give two parse transformers, one from $I$ to
-  $\mathsf{Trace}_{NFA(I)}(q_{1} , q_{2})$ and vice versa.
-
-  \[
-    \inferrule
-    {p : I \vdash \mathsf{nil} : \mathsf{Trace}(q_{2} , q_{2}) \\
-     \exists \text{~transition~} q_{1} \overset{\varepsilon}{\to} q_{2}
-    }
-    {p : I \vdash \mathsf{\varepsilon cons}(\mathsf{nil}) : \mathsf{Trace}_{NFA(I)}(q_{1} , q_{2})}
-  \]
-
-  Let $\gamma$ be the substitution $\{ g_{q_{2}} / g_{q_{1}}, I / g_{q_{2}} \}$,
+  \caption{NFA $N_{\oplus}$ as a disjunction of $N_{1}$ and $N_{2}$}
+  \label{fig:disjunctionnfa}
+\end{figure}
 
-  \[
-    \inferrule
-    {
-      p : \mathsf{Trace}_{NFA(I)}(q_{1} , q_{2}) \vdash p :
-        \mathsf{Trace}_{NFA(I)}(q_{1} , q_{2}) \\
-      x_{1} : \simulsubst {g_{q_{2}}} {\gamma} = I \vdash x_{1} : I \\
-      x_{2} : \simulsubst {I} {\gamma} = I \vdash x_{2} : I
-    }
-    {
-      p : \mathsf{Trace}_{NFA(I)}(q_{1} , q_{2}) \vdash \mathsf{mfold}(x_{1}.x_{1} , x_{2}.x_{2})(p) : I
-    }
-  \]
+Because $N_{1}$ and $N_{2}$ are each strongly equivalent to $R_{1}$ and $R_{2}$,
+it suffices to show that
+$\Trace_{N_{1}}(N_{1}.\init) \oplus \Trace_{N_{2}}(N_{2}.\init)$ is strongly
+equivalent to $\Trace_{N_{\oplus}}(N_{\oplus}.\init)$.
+
+Just like the case of a single character, when building the maps in either
+direction, we strengthen the induction hypothesis by mapping out of traces
+beginning with an arbitrary state then proceed by induction on traces.
+
+First, define the
+mapping out of $\Trace_{N_{\oplus}}(s)$. If $s$ is a state from $N_{1}$ then
+send the trace to a trace in $N_{1}$ beginning at $s$. Likewise, if $s$ is from
+$N_{2}$, then we map to a trace in $N_{2}$ beginning at $s$. If $s$ is
+$N_{\oplus}.\init$, then we map the trace to a disjunction of traces
+$\Trace_{N_{1}}(N_{1}.\init) \oplus \Trace_{N_{2}}(N_{2}.\init)$.
+
+In the other direction, we need to map a disjunction of traces through $N_{1}$
+or $N_{2}$ to a trace in $N_{\oplus}$. Without loss of generality, assume that
+the trace came from $N_{1}$. That is, assume we currently have a term of type
+$\Trace_{{N_{1}}}(N_{1}.\init)$ and we aim to construct a term of type
+$\Trace_{N_{\oplus}}(N_{\oplus}.\init)$. We induct on the trace $N$ trace an map
+it over to a trace beginning at the copy of $N_{1}.\init$ in $N_{\oplus}$, and
+then we conclude by using the $\epscons$ constructor to turn this trace into one
+beginning at $N_{\oplus}.\init$.
+
+We show that these maps mutually invert each other by the same sort of argument
+we used with the single character NFA.\ That is, we prove that the each map is a
+homomorphism of algebras and then appeal to the initiality of each of
+$\Trace_{N_{1}}$, $\Trace_{N_{2}}$, and $\Trace_{N_{\oplus}}$ to prove that the
+relevant maps compose to the identity.
+
+In this sketch, we elide the proof that these maps do indeed form homomorphisms.
+Further, we are eliding the needed NFAs for the cases where the regular
+expression is $I$, $\bot$, a Kleene star, or a concatenation. All of these
+elided details may be found in the Agda formalization.
+\end{proof}
 
-  This concludes the proof for the case of the empty
-  grammar. Let's now walk through the construction for
-  literal grammars. Given a character $c$, we construct an
-  NFA that recognizes only the string containing the single
-  character $c$ as,
+\subsection{The Dyck Grammar}
+%% 3. Dyck language
+\newcommand{\balanced}{\mathtt{balanced}}
+\newcommand{\Dyck}{\mathtt{Dyck}}
+\steven{This subsection is still a WIP}
+Moving up the Chomsky hierarchy from regular grammars, we arrive at context-free
+grammars (CFGs). The smallest nontrivial CFG is the Dyck grammar
+$\Dyck$ --- the grammar of balanced parentheses. Moreover, the Chomsky-Sch\"utzenberger
+representation theorem \cite{CHOMSKY1959118} demonstrates that $\Dyck$, and grammars like it,
+cut to the essence of what it means to be context free.
+
+$\Dyck$ is a grammar over the alphabet $\{ \stringquote{(}, \stringquote{)} \}$
+and is defined in \cref{fig:dyckinductive}. The $\nil$ constructor shows that
+the empty string is balanced, and the $\balanced$ constrcutor builds a balanced
+parse by wrapping an already balanced parse in an additional set of parentheses
+then following it with another balanced parse.
 
+\begin{figure}
+\begin{lstlisting}
+data Dyck : L where
+  nil : (*@$\uparrow$@*)(Dyck)
+  balanced : (*@$\uparrow$@*)('(' (*@$\lto$@*) Dyck (*@$\lto$@*) ')' (*@$\lto$@*) Dyck (*@$\lto$@*) Dyck)
+\end{lstlisting}
+\caption{The Dyck grammar as an inductive linear type}
+\label{fig:dyckinductive}
+\end{figure}
 
-  \begin{figure}[h!]
-  \begin{tikzpicture}[node distance = 25mm ]
-    \node[state, initial] (1) {$1$};
-    \node[state, right of=1, accepting] (2) {$2$};
+\steven{somewhere in here, sneak in the benchmark where the Dyck parser could
+  handle 10's of thousands of characters in about 30s}
+
+We construct a parser for $\Dyck$ by building a deterministic automaton $M$ such
+that $\Trace_{M}(M.\init) \cong \Dyck$.
+
+\Cref{fig:DyckAutomaton} presents a sketch of the automaton $M$. The traditional
+approach to parse grammars like $\Dyck$ is to employ a stack to count how many
+open parentheses have been seen so far. We may think of
+$M$ as performing the same task as a stack machine, except instead of storing
+the number of open parentheses in a distinguished stack of memory, $M$ operates
+by storing this data in the current state. This is made feasible by specifying
+$M$ almost identically to how we defined DFAs, except we omit the
+finiteness condition on the states of the automaton.
+
+\steven{Mention that $\Trace_{M}$ is iso to string. Mention the same for DFAs above }
+
+% Unlike the finite automata seen thus far, $M$ has infinitely many states --- one
+% for each $n : \mathbb{N}$ with an additional fail state. This diverges a bit
+% from classical language theory, where the recognizer for $D$ would be encoded as a
+% pushdown automaton with finitely many
+% states and stack of memory. Such PDAs are encodable in our theory, although they
+% are a little cumbersome to use. Moreover, they
+% seem overfit to classical reasoning as they don't preserve the grammar up to
+% strong equivalence, only weak equivalence in general. \steven{cite Lambek
+%   automata and state that is is a secondary goal to preserve as much structure as possible}
+% By moving the stack of memory (the nats) into the
+% state, we obtain infinitely many states, which is fine bc the states need only
+% be a type for us to interface with it.
+
+\steven{TODO sketch proof of equivalence. Mention that it is mechanized}
 
-    \path[->] (1) edge[below] node{$c$} (2);
+\begin{figure}
+  \begin{tikzpicture}[node distance = 17mm ]
+    \node[state, initial, accepting] (0) {$0$};
+    \node[state, right of=0] (1) {$1$};
+    \node[state, right of=1] (2) {$2$};
+    \node[right of=2] (3) {$\dots$};
+    \node[state, below of=0] (fail) {$\mathsf{fail}$};
+
+    \path[->] (0) edge[above, bend left] node{\stringquote{(}} (1)
+              (1) edge[below, bend left] node{\stringquote{)}} (0)
+              (1) edge[above, bend left] node{\stringquote{(}} (2)
+              (2) edge[below, bend left] node{\stringquote{)}} (1)
+              (2) edge[above, bend left] node{\stringquote{(}} (3)
+              (3) edge[below, bend left] node{\stringquote{)}} (2)
+              (fail) edge[loop right] node{\stringquote{(}, \stringquote{)}} (fail)
+              (0) edge[left] node{\stringquote{)}} (fail);
   \end{tikzpicture}
-  \caption{$NFA(c)$}
-  \label{fig:literalNFA}
-  \end{figure}
-
-  The automaton in \cref{fig:literalNFA} induces the
-  following type of traces from $q_{1}$ to $q_{2}$.
-
-  \[
-    \mathsf{Trace}_{NFA(c)}(q_{1}, q_{2}) = \mu
-      \begin{pmatrix}
-         g_{q_{1}} := c \otimes g_{q_{2}} \\
-         g_{q_{2}} := I
-      \end{pmatrix}. g_{q_{1}}
-  \]
-
-  Through the same argument as the empty grammar, the only
-  state that is accepting is $q_{2}$ and thus,
-
-  \[
-    \mathsf{AccTrace}_{NFA(c)} \cong \mathsf{Trace}_{NFA(c)}(q_{1} , q_{2})
-  \]
-
-  To show the desired isomorphism of $c \cong NFA(c)$ we
-  make a similar argument as we did for the empty grammar
-  $I$, except we leverage the $\mathsf{cons}$ rule instead
-  of $\mathsf{\varepsilon cons}$. That is, the parse
-  transformers in either direction are given as,
-
-  \[
-    \inferrule
-    {\cdot \vdash \mathsf{nil} : \mathsf{Trace}(q_{2} , q_{2}) \\
-     \exists \text{~transition~} q_{1} \overset{c}{\to} q_{2}
-    }
-    {p : c \vdash \mathsf{cons}(\mathsf{nil}) :
-      \mathsf{Trace}_{NFA(c)}(q_{1} , q_{2})}
-  \]
+  \caption{Automaton $M$ for the Dyck grammar}
+  \label{fig:DyckAutomaton}
+\end{figure}
 
+% \subsection{LL(1)}
+% %% 4. LL(1) parser
 
-  \[
-    \inferrule
-    {
-      p : \mathsf{Trace}_{NFA(c)}(q_{1} , q_{2}) \vdash p :
-        \mathsf{Trace}_{NFA(c)}(q_{1} , q_{2}) \\
-      x_{1} : \simulsubst {(c \otimes g_{q_{2}})} {\gamma} = c \otimes I \vdash \mathsf{unitR}(x_{1}) : c \\
-      x_{2} : \simulsubst {I} {\gamma} = I \vdash x_{2} : I
-    }
-    {
-      p : \mathsf{Trace}_{NFA(c)}(q_{1} , q_{2}) \vdash \mathsf{mfold}(x_{1}.\mathsf{unitR}(x_{1}) , x_{2}.x_{2})(p) : c
-    }
-  \]
+% \steven{TODO impute LL(1) example, mention that its weak equivalence is mechanized}
 
-  Where $\gamma$ is the substitution
-  $\{ c \otimes g_{q_{2}} / g_{q_{1}}, I / g_{q_{2}} \}$ and
-  $\mathsf{unitR}$ is a witness to the isomorphism
-  $c \otimes I \cong c$. Again, we may see that these do
-  indeed mutually invert each other in the Agda code.
-
-  It remains to show that the desired isomorphisms are
-  preserved by $\otimes$, $\oplus$, and Kleene star. Here,
-  we will give the argument for the disjunction case, the
-  others are defined quite similarly.
-
-  Given two NFAs $N$ and $M$, we may define a new NFA that
-  encodes the disjunction of $N$ and $M$. Denote the
-  internal states of $N$ by $q_{j}$'s and the internal states
-  of $M$ by $r_{k}$'s,
-
-  \begin{figure}[h!]
-  \begin{tikzpicture}[node distance = 20mm ]
-    \node[state] (2) {$q_{init}$};
-    \node[state, initial, below left of=2] (1) {$init$};
-    \node[state, below right of=1] (3) {$r_{init}$};
-    \node[right of=2] (4) {$\cdots$};
-    \node[right of=3] (5) {$\cdots$};
-    % \node[state, below right of=1] (3) {$3$};
-    % \node[state, below of=2] (4) {$4$};
-
-    \path[->] (1) edge[below] node{$\varepsilon$} (2);
-    \path[->] (1) edge[below] node{$\varepsilon$} (3);
-    \path[->] (2) edge[below] node{} (4);
-    \path[->] (3) edge[below] node{} (5);
-
-    \node[label={[name=l] $N$}, draw,line width=2pt,rounded corners=5pt, fit=(2)(4)] {};
-    \node[label={[name=l] $M$}, draw,line width=2pt,rounded corners=5pt, fit=(3)(5)] {};
-  \end{tikzpicture}
-  \caption{$N \oplus_{NFA} M$}
-  \label{fig:disjunctionNFA}
-  \end{figure}
-
-  \Cref{fig:disjunctionNFA} shows the process of
-  disjunctively combining NFAs. Precisely, we add a single
-  new state and we included copies of the states from each of $N$
-  and $M$. The new state acts as the initial state and has
-  $\varepsilon$-transitions to the initial states of $N$ and
-  $M$. We include all of the internal transitions from $N$
-  and $M$, and the accepting states of $N \oplus_{NFA} M$
-  are exactly the accepting states in each subautomaton.
-
-  Let $g$ and $g'$ be two regular grammars such that
-  $g \cong NFA(g)$ and $g' \cong NFA(g')$. As a matter of
-  notation\footnote{We shall similarly abuse notation for
-    $\otimes$ and Kleene. That is, for a regular grammar
-    $g$, when we write $NFA(g)$ we mean the NFA inductively
-    built up with the NFA-analogues to the constructors that
-    built up $g$.}, we will write $NFA(g \oplus g')$ for
-  $NFA(g) \oplus_{NFA} NFA(g')$. The traces of
-  $NFA(g \oplus g')$ are then given by,
+\subsection{Turing Machines}
+\newcommand{\Turing}{\mathtt{Turing}}
+\newcommand{\ttt}{\mathtt{t}}
+\newcommand{\T}{\mathtt{T}}
+\newcommand{\acc}{\mathtt{acc}}
+\newcommand{\rej}{\mathtt{rej}}
+\newcommand{\Tape}{\mathtt{Tape}}
+\newcommand{\parseTape}{\mathtt{parseTape}}
+\newcommand{\snoc}{\mathtt{snoc}}
+\steven{This subsection is still a WIP}
+%% 5. Expressiveness: Turing Machines
+The expressive power of \theoryabbv spans the entire Chomsky hierachy, as we may
+encode the acccepting traces of Turing machines as linear types.
 
-  \[
-    \mathsf{Trace}_{NFA(g \oplus g')}(src , dst) = \mu
-      \begin{pmatrix}
-        g_{init} := g_{q_{0}} \oplus g_{r_{0}} \\
-        g_{q_{j}} := \mathsf{Trans}_{NFA(g)}(q_{j}) \oplus \mathsf{isDst}(q_{j}) \\
-        g_{r_{k}} := \mathsf{Trans}_{NFA(g')}(r_{k}) \oplus \mathsf{isDst}(r_{k})
-      \end{pmatrix}.g_{src}
-    \]
-
-  where $\mathsf{Trans}$ is used to echo the same syntactic
-  definitions that appear in the $NFA(g)$ and $NFA(g')$.
-  Also, $src$ and $dst$ may take on any value in
-  $Q := \{init\} \cup \{q_{j}\} \cup \{r_{k}\}$, and
-  $\mathsf{isDst}(q)$ checks if $q$ is equal to $dst$. Which
-  is all to say, the traces of $NFA(g \oplus g')$ comprise
-  either a trace of $NFA(g)$, or a trace of $NFA(g')$, and
-  the transition coming out of $g_{init}$ determines which
-  subautomaton we step into.
-
-  The parse transformer from $g \oplus g'$ checks which side
-  of the sum type we are on, then takes the appropriate step
-  from $g_{init}$ in the automaton.
+\steven{TODO introduce tape alphabet and blank symbol}
 
-  \[
-    \inferrule
-    {
-      u : g \vdash \iota (\phi (u)) : \mathsf{AccTrace}_{NFA(g \oplus g')} \\
-      v : g' \vdash \iota' (\psi (v)) : \mathsf{AccTrace}_{NFA(g \oplus g')}
-    }
-    {p : g \oplus g' \vdash \mathsf{case}~p \{ \mathsf{inl}(u) \mapsto s , \mathsf{inr}(v) \mapsto r \} : \mathsf{AccTrace}_{NFA(g \oplus g')}}
-  \]
+The data of a Turing machine $\T$  is given by the following data,
+\begin{itemize}
+  \item $\T.\states$ --- a finite set of states
+  \item $\T.\init : \T.\states$ --- the initial state
+  \item $\T.\acc , \T.\rej : \T.\states$ --- distinguished accepting and
+        rejecting states, respectively, such that $\T.\acc \neq \T.\rej$
+  \item $\T.\delta : \T.\states \to \Gamma \to \T.\states \times \Gamma \times \Bool$
+        --- a deterministic transition function. Given the current state and the
+        head of the tape return the new state, the character written to the
+        tape, and which direction to shift ($\true$ for left, $\false$ for right)
+\end{itemize}
 
-  with $\iota$ and $\iota'$ as embeddings from $NFA(g)$ and
-  $NFA(g')$, respectively, into $NFA(g \oplus g')$,
-  $\phi: g \cong \mathsf{AccTrace}_{NFA(g)}$, and $\psi: g' \cong \mathsf{AccTrace}_{NFA(g')}$. On a
-  high level, all this construction does is turn a parse of
-  $g$ into a parse of $NFA(g)$ and then embeds that inside
-  of the larger automaton $NFA(g \oplus g')$. Likewise for $g'$.
-
-  In the other direction, recall that the data of an
-  accepting trace for $NFA(g \oplus g')$ is a pair of a
-  trace and a proof that
-  the end state $q'$ of that trace is accepting. By
-  multifolding over the first part of that pair, we turn the
-  term of type
-  $\mathsf{Trace}_{NFA(g \oplus g')}(init , q')$ into a
-  trace of either of the subautomata,
+The tape of a
+(one-sided) Turing machine is encoded as nonlinear function from the natural numbers to the tape
+alphabet $\Gamma$, $\Tape = \mathbb{N} \to \Gamma$. To initialize the tape of
+$\T$, we define the parse transformer
+$\parseTape : \ltonl {(\stringg \lto \LinSigTy {\ttt} {\Tape} {\Turing~\ttt})}$ in \cref{fig:parseTape}.
 
-  \[
-    p : \mathsf{Trace}_{NFA(g \oplus g')}(init , q') \vdash \mathsf{mfold}_{NFA(g \oplus g')} : \mathsf{Trace}_{NFA(g)}(q_{0} , q') \oplus \mathsf{Trace}_{NFA(g')}(r_{0} , q')
-  \]
+\steven{TODO finish the description of Turing machines}
 
-  Additionally, we leverage the fact that the only accepting
-  states for $NFA(g \oplus g')$ are those from the
-  subautomata to extract that $q'$ must be an accepting
-  state from a subautomaton.
+\begin{figure}
+\begin{lstlisting}
+data Turing : (t : Tape) (*@$\to$@*) L where
+  nil : (*@$\uparrow$@*)(&[ t (*@$\in$@*) Tape ] Turing t)
+  snoc : (*@$\uparrow$@*)(&[ c (*@$\in$@*) (*@$\Sigma$@*) ] &[ t (*@$\in$@*) Tape ](Turing t (*@$\lto$@*) c (*@$\lto$@*) Turing (push c t)))
+\end{lstlisting}
+\caption{Inductive linear type of Turing machines initialized with tape $\ttt$}
+\label{fig:turinginductive}
+\end{figure}
 
-  \[
-    x : \mathsf{acc}_{NFA(g \oplus g')}(q') \vdash M : \mathsf{acc}_{NFA(g)}(q') \oplus \mathsf{acc}_{NFA(g')}(q')
-  \]
+\begin{figure}
+\begin{lstlisting}
+parseTape : (*@$\uparrow$@*)(String (*@$\lto$@*) (*@$\oplus$@*)[ t (*@$\in$@*) Tape ] Turing t)
+parseTape String.nil = (*@$\sigma$@*) ((*@$\lambda$@*) n . blank) (Turing.nil ((*@$\lambda$@*) n . blank))
+parseTape (String.cons ((*@$\sigma$@*) c a) w) =
+  let (*@$\sigma$@*) t Tur = parseTape w in (*@$\sigma$@*) (push c t) (snoc c t Tur a)
+\end{lstlisting}
+\caption{Initialization of the tape of Turing machine as a parse transformer}
+\label{fig:parseTape}
+\end{figure}
 
-  We then combine the trace and proof of acceptance into an
-  accepting trace of one of the subautomata,
 
-  \[
-    p : \mathsf{AccTrace}_{NFA(g \oplus g')} \vdash N : \mathsf{AccTrace}_{NFA(g)} \oplus \mathsf{AccTrace}_{NFA(g')}
-  \]
 
-  Lastly, we then inductively use the isomorphisms $\phi$
-  and $\psi$ to turn the accepting traces into a parse of
-  $g$ or $g'$,
+We may then define a term of type
+$\mathsf{parseTape} : \ltonl {\left( \stringg \lto \LinSigTy {\ttt} {\Tape} {\Turing~\ttt} \right)}$.
+In this manner, all of the interesting computations that the Turing machine
+would do occur solely inside of the nonlinear index. That is,
+$\mathsf{parseTape}$ does not discern which strings are accepted by the Turing
+machine, nor does it do any computation. $\mathsf{parseTape}$ simply loads the
+input string into the initial spaces on the tape.
 
-  \[
-     N : \mathsf{AccTrace}_{NFA(g)} \oplus \mathsf{AccTrace}_{NFA(g')} \vdash \mathsf{case}~N~\{\mathsf{inl}(n) \mapsto \phi^{-1}(n), \mathsf{inr}(n') \mapsto \psi^{-1}(n')\} : g \oplus g'
-  \]
+Once the string has been loaded onto the tape, we may further define the
+following term to run the Turing machine.
+\[
+  \mathsf{run} : \ltonl {\left( \bigoplus\limits_{t : \mathbb{N} \to \Gamma} \Turing(t) \lto \bigwith\limits_{\mathsf{fuel} : \mathbb{N}} \mathsf{Maybe} \left( \bigoplus\limits_{t : \mathbb{N} \to \Gamma} \bigoplus\limits_{b : \Bool} \bigoplus\limits_{\mathsf{trace} : \mathsf{TuringTrace}(b , t)} \Turing(t) \right) \right)}
+\]
 
-  When you wish to show that these actually maps invert each other to form an isomorphism\max{jumbled sentence}, we invoke the
-  universal properties of the $\mathsf{Trace}$ types. That is,
-  $\mathsf{Trace}_{\oplus NFA}$, $\mathsf{Trace}_{N}$, and $\mathsf{Trace}_{M}$
-  are \emph{initial} algebras to the the functors that define them. Thus, the
-  embedding of traces through $N$ into $\oplus NFA$ and then mapped back down to $N$
-  compose to a map from $\mathsf{Trace}_{N}$ to itself. Such a map is unique and
-  thus must be the identity. The same reasoning shows that the desired
-  composite do indeed invert each other to form an isomorphism.
-
-  As discussed above, the other cases and
-  follow through a similar argument. This
-  concludes the proof of our variant of Thompson's construction.
-\end{proof}
+where $\mathsf{TuringTrace}$ is a nonlinear type denoting terminating traces
+through the Turing machine. Note that $\mathsf{run}$ returns a $\mathsf{Maybe}$
+type in a fuel parameter. Turing machines may not terminate. If they do
+terminate within $\mathsf{fuel}$-many steps, then we return the trace along with
+the current tape and a boolean denoting if the trace is accepting or rejcting.
+If the trace does not terminate within the alloted amount of fuel, then we
+timeout and return nothing.
 
-\subsection{Other Automata}
-\subsubsection{Pushdown Automata}
-\label{subsubsec:pda}
-A (nondeterministic) \emph{pushdown automaton} is an automaton that employs a
-stack. Just like NFAs, they have transitions labeled with characters from a
-fixed string alphabet $\Sigma$. Additionally, they maintain a stack of
-characters drawn from a stack alphabet $\Gamma$. They are often represented
-formally as a 7-tuple $(Q, \Sigma, \Gamma, \delta, q_{0}, Z, F)$,
+\subsection{Semantic Actions}
+\steven{TODO write this section and give some small simple actions}
+All constructions discussed until now detail how a string may be parsed into a
+\emph{concrete} syntax tree; however, this is often not the representation that later
+compiler passes would take in. Instead, they often take in an \emph{abstract}
+syntax tree that forgets syntactic details that are superfluous to the later
+passes. For example, after using parentheses to disambiguate a parse of a string
+into a concrete syntax tree, we may remove these parentheses to have a cleaner
+structured representation of the underlying data. The procedure that takes in a
+concrete syntax tree and returns an abstract one is referred to as a
+\emph{semantic action}.
+
+Given a linear type $A$, the inhabitants of $A$ serve as concrete syntax. We may
+then chose a non-linear type $X$ that serves as abstract syntax. A semantic
+action can then be defined as $\square (A \lto \Delta X)$ where
+$\Delta X = \LinSigTy x X \top$.
+\steven{Note here that even though $\Delta$ is a map from nonlinear types into
+  linear ones, it is very different from $F$/whatever we call that half of the adjunction}
+
+% \steven{Everything in this section below this marker is outdated and needs to be
+% folded into the above or cut}
+
+% \steven{Need a different segue because canonicity is no longer before this}
+% The canonicity theorem from the previous section gives a tight connection
+% between terms in our type theory and the theory of formal languages. In this section
+% we further explore these connections by using the fact that, classically
+% formal language theory is closely related to the study of automata.
+% In the Chomsky hierarchy, each language class is associated to a class of
+% automata that serve as recognizers. Internal to \theoryabbv,
+% we can characterize these language classes syntactically; moreover, we
+% demonstrate the equivalence of these language classes to its associated automata class as a
+% proof term within our logic.
+
+% \subsection{Non-deterministic Finite Automata}
+% \label{subsec:finiteaut}
+% \begin{figure}
+%   \begin{tikzpicture}[node distance = 25mm ]
+%     \node[state, initial, accepting] (1) {$1$};
+%     \node[state, below left of=1] (2) {$2$};
+%     \node[state, right of=2] (3) {$3$};
+
+%     \path[->] (1) edge[above] node{$b$} (2)
+%               (1) edge[below, bend right, left=0.3] node{$\varepsilon$} (3)
+%               (2) edge[loop left] node{$a$} (2)
+%               (2) edge[below] node{$a, b$} (3)
+%               (3) edge[above, bend right, right=0.3] node{$a$} (1);
+%   \end{tikzpicture}
+%   \caption{An example NFA}
+%   \label{fig:NFA}
+% \end{figure}
+
+% Classically, a \emph{nondeterministic finite automaton} (NFA) is a finite state machine where
+% transitions are labeled with characters from a fixed alphabet $\Sigma$. These
+% are often represented formally as a 5-tuple $(Q, \Sigma, \delta, q_{0}, F)$,
+
+% \begin{itemize}
+%   \item $Q$ a finite set of states
+%   \item $\Sigma$ a fixed, finite alphabet
+%   \item $\delta : Q \times (\Sigma \cup \{ \varepsilon\}) \to \mathcal{P}(Q)$ the labeled transition function
+%   \item $q_{0} \in Q$ the start state
+%   \item $F \subset Q$ a set of accepting states
+% \end{itemize}
+
+
+
+% \subsection{A DFA Parser}
+% \label{subsec:regexparser}
+
+% Just as we encoded traces of NFAs as grammars, we likewise
+% encode the traces of a DFA as grammars. The key difference
+% between NFAs and DFAs is \emph{determinism} --- meaning,
+% that in a state $q$ inside of DFA, given a character $c$ there
+% will be exactly one transition that we may take leaving $q$
+% with label $c$. For us, this changes the definition of valid
+% transitions for a DFA, instead of the definition of
+% $\mathsf{Trans}$ provided in \cref{subsec:finiteaut} DFAs
+% obey
 
-\begin{itemize}
-  \item $Q$ a finite set of states
-  \item $\Sigma$ a fixed, finite string alphabet
-  \item $S$ a fixed, finite stack alphabet
-  \item
-        $\delta \subset Q \times (\Sigma \cup \{ \varepsilon \}) \times S \to \mathcal{P}(Q \times S^{*})$
-        the transition function
-  \item $q_{0} \in Q$ the start state
-  \item $Z \in S$ the initial stack symbol
-  \item $F \subset Q$ the accepting states
-\end{itemize}
+% \begin{gather*}
+%  \mathsf{State} \in \{ g_{q} : q \in Q \} \\
+%  \mathsf{Trans}(q) ::= \bigoplus_{c \in \Sigma} (c \otimes \mathsf{State})
+% \end{gather*}
 
-We encode the traces of a pushdown automaton very similarly to those of an NFA,
-except the transitions of a PDA are instead encoded via the LNL product type
-$\bigamp$. This is because when simply transitioning via character, a PDA must
-also pop and push characters onto a stack, which is used as the argument to
-these dependent functions. Thus, the appropriate formation of traces through a
-PDA is dependent on a stack.
-
-Let $S$ be a non-linear type encoding the stack
-alphabet, and build lists over $S$ as the (non-linear) least fixed-point
-$\mathsf{List}(S) := \mu X . 1 + S \times X$. Then, the type of states for a
-PDA $P$ with stack alphabet $S$ are given as a functions that takes in lists $L$,
-and then makes a case distinction between possible transitions based on what was witnessed as $\mathsf{head}(L)$. The choice of transition
-will then determine which character to transition by and what word $w$ should be
-pushed onto the stack. The word that is added to the top of the stack is
-appended to $\mathsf{tail}(L)$ and then we recursively step into another state
-called on argument $w \cdot \mathsf{tail}(L)$.
+% Meaning, each state has a transition for every character. \emph{Note:} the above use
+%   of a disjunction over $\Sigma$ is a bit of abuse of notation.
+%   As $\Sigma$ is a finite alphabet, we wish to think of this as an iterated
+%   binary sum, or perhaps as a finitely indexed sum defined via linear dependent
+%   sums in the same manner by which we defined binary sums.
+
+% We now wish to define a parser term for DFA grammars. In
+% particular, for a DFA $D$ we want to build a term,
+
+% \[
+%   w : \String \vdash \mathsf{parse}_{D} : \mathsf{AccTrace}_{D} \oplus \top
+% \]
+
+% where left injection into the output type denotes acceptance
+% by the parser, and right injection denotes rejection. To
+% build such a parser, it will suffice to construct a term
+
+% \[
+%   w : \String \vdash \mathsf{parse}_{D} : \LinSigTy q Q {\mathsf{Trace}_{D}(q_{0} , q)}
+% \]
+% This is because given a trace of a DFA, we may easily check
+% if we should accept or reject by simply testing
+% if the final state is accepting.
+
+% Because $w$ is a Kleene star of characters, we may construct
+% our desired $\mathsf{parse}_{D}$ term as a $\mathsf{foldl}$
+% over $w$. In the empty case, we just have the trace that
+% ends at the accepting state. In the recursive case, we
+% effectively add to our trace by transitioning one character
+% at a time, as we read them moving across $w$.
+
+% Perhaps this derivation is not too surprising. All it says
+% is that a DFA may be ran by transitioning a single character
+% at a time, and then accepting or rejecting based on the
+% final state. This is exactly what DFAs did initially, so
+% what did we gain? Well, this has the benefit of our type
+% system to ensure its correctness. Moreover, this construction exports to an
+% intrinsically verified and executable decision procedure for DFAs in \theoryabbv
+% embedded in Agda.
+
+% We should note that the
+% construction of a DFA parser requires the addition of an additional but seemingly innocuous
+% axiom.
+
+% \[
+%   \top \cong \left( \bigoplus_{c : \Sigma} c \right)^{*}
+% \]
+
+% That is, the characters in the alphabet $\Sigma$ really are all of the
+% characters. The need for this axiom arises when we go to take a transition
+% within the DFA,
+% as we must ensure that the next character of the string has a corresponding
+% transition from the current state
+% --- which would be guaranteed by determinism provided there are no surprise characters.
+
+% \subsection{Regular Expressions and DFAs}
+% \label{subsec:deriv}
+
+% In order to extend the DFA
+% parser from \cref{subsec:regexparser} to the construction of
+% a verified parser
+% generator for regular expression we need to perform some
+% plumbing establishing an equivalence between regular
+% expressions and DFAs.
+
+% There are several routes we may hope to take in establishing
+% this equivalence. First, we could prove an equivalence
+% between NFAs and regular expressions, and separately prove
+% an equivalence between NFAs and DFAs.
+% In \cref{subsec:eqproofs}, we include a version of
+% Thompson's construction --- which established the
+% equivalence between regular grammars and DFAs. We may additionally
+% hope to internalize a variant of the powerset construction \cite{rabinFiniteAutomataTheir1959}
+% --- which takes as input an NFA and constructs a DFA that
+% recognizes the same language --- and combine the results of
+% Thompson's construction and the powerset constructions to give an equivalence
+% between regular expressions and DFAs.  This route is alluring, as it
+% internalizes several classic grammar-theoretic constructions. However, it may
+% necessitate extensions to the LNL theory, like
+% a propositional truncation, and we have not yet investigated
+% how this would interact with the existing types in the
+% theory. The addition of a propositional truncation may seem
+% harmless, but it is not always immediately clear how
+% distinct constructions will interact. For instance, when
+% exploring LNL models, Benton discovered that the synthesis
+% of linear and dependent types require a new presentation of
+% the $!$ modality from linear logic
+% \cite{bentonMixedLinearNonlinear1995}. That is all to say,
+% this is a work in progress and
+% it is not immediate that the addition of a propositional
+% truncation is adequate for establishing the weak equivalence
+% between NFAs and DFAs.
+
+% We may instead hope to internalize an equivalence between
+% regular grammars and DFAs by using Brzozowski derivatives to
+% directly create a DFA that is weakly equivalent to a given
+% regular expression, as described by Owens et al.
+% \cite{owensRegularexpressionDerivativesReexamined2009}.
+% One characterization of regular grammars is that they are
+% precisely those grammars which have finitely many inequivalent Brzozowski
+% derivatives
+% \cite{brzozowskiDerivativesRegularExpressions1964}.
+% The algorithm used by Owens takes in a
+% regular grammar and generates a DFA that recognizes the same
+% language, and the states in this DFA are the finitely many
+% derivative equivalence classes. We initially had a version of
+% this theorem very roughly internalized in the LNL theory.
+% To our taste, too much of this presentation relied on
+% meta-arguments that lived outside of
+% our formalism, and thus this particular phrasing of the
+% theorem did not translate well into formalization.
+
+% In any case, we believe
+% that revisiting these lines of thought will lead to a
+% satisfactory internalization of the equivalence between
+% regular grammars and DFAs, and thus would bridge the gap
+% between our DFA parser and a full regular expression parser.
+
+% \subsection{Equivalence Between Regular Expressions and Finite Automata}
+% \label{subsec:eqproofs}
+% In this section, we describe a version of Thompson's
+% construction \cite{thompsonProgrammingTechniquesRegular1968}
+%    where we construct an NFA that recognizes a given regular
+% expression. Moreover, we will show that this NFA is strongly equivalent to the
+% original grammar. Witnessing this construction in our syntax has two benefits
+% \begin{enumerate}
+%   \item It reinforces this high-level view that the syntax is a natural and
+%         general setting for formal grammar reasoning, as we demonstrate that
+%         this formal system subsume results from existing systems, and
+%   \item Following the
+%         development of Thompson's construction, we then need
+%         only establish the equivalence of NFAs and DFAs to
+%         complete the full regular expression parser
+% \end{enumerate}
+
+% \begin{theorem}[Thompson]
+%   \label{thm:thompson}
+%   For $g$ a regular grammar $g$, there is an NFA $N$ that recognizes the same
+%   language as $g$.
+% \end{theorem}
+% We make a pretty straightforward adaptation of Thompson's theorem to our setting,
+
+% \begin{theorem}[Typed Thompson]
+%   \label{thm:typthompson}
+%   For $g$ a regular expression $g$, there is an NFA $N$ such that $g$ is isomorphic
+%   to $\mathsf{AccTrace}_{N}$.
+% \end{theorem}
+
+% \begin{proof}[Proof Sketch]
+%   Recall that regular expressions are inductively defined via
+%   disjunction, concatenation, and Kleene star over literals
+%   and the empty grammar. By induction over regular expressions,
+%   we will construct an NFA that is equivalent to $g$.
+
+%   First, define the recognizing NFA for the empty grammar
+%   $I$.
+
+%   \begin{figure}[h!]
+%   \begin{tikzpicture}[node distance = 25mm ]
+%     \node[state, initial] (1) {$1$};
+%     \node[state, right of=1, accepting] (2) {$2$};
+
+%     \path[->] (1) edge[below] node{$\varepsilon$} (2);
+%   \end{tikzpicture}
+%   \caption{$NFA(I)$}
+%   \label{fig:emptyNFA}
+%   \end{figure}
+
+%   The type of traces from the initial state of $NFA(I)$ to the single
+%   accepting state is given by,
+
+%   \[
+%     \mathsf{Trace}_{NFA(I)}(q_{1}, q_{2}) = \mu
+%       \begin{pmatrix}
+%          g_{q_{1}} := g_{q_{2}} \\
+%          g_{q_{2}} := I
+%       \end{pmatrix}. g_{q_{1}}
+%   \]
+
+%   The accepting traces through $NFA(I)$ are then described
+%   as,
+
+%   \[
+%     \mathsf{AccTrace}_{NFA(I)} = \LinSigTy q {\{1 , 2\}} {\left( \mathsf{Trace}_{NFA(I)}(q_{1} , q) \pair \mathsf{acc}(q) \right)}
+%   \]
+
+%   A quick inspection of \cref{fig:emptyNFA} reveals that the
+%   only reasonable choice for $q$ is $q_{2}$ --- because
+%   state 2 is accepting while state 1 is not. Therefore,
+
+%   \begin{align*}
+%     \mathsf{AccTrace}_{NFA(I)}
+%     & \cong \mathsf{Trace}_{NFA(I)}(q_{1} , q_{2}) \pair \mathsf{acc}(q_{2}) \\
+%     & \cong \mathsf{Trace}_{NFA(I)}(q_{1} , q_{2}) \pair \top \\
+%     & \cong \mathsf{Trace}_{NFA(I)}(q_{1} , q_{2})
+%   \end{align*}
+
+%   From here, to prove
+%   $I \cong \mathsf{AccTrace}_{NFA(I)}$ it suffices to show
+%   $I \cong \mathsf{Trace}_{NFA(I)}(q_{1} , q_{2})$. Below we
+%   give two parse transformers, one from $I$ to
+%   $\mathsf{Trace}_{NFA(I)}(q_{1} , q_{2})$ and vice versa.
+
+%   \[
+%     \inferrule
+%     {p : I \vdash \mathsf{nil} : \mathsf{Trace}(q_{2} , q_{2}) \\
+%      \exists \text{~transition~} q_{1} \overset{\varepsilon}{\to} q_{2}
+%     }
+%     {p : I \vdash \mathsf{\varepsilon cons}(\mathsf{nil}) : \mathsf{Trace}_{NFA(I)}(q_{1} , q_{2})}
+%   \]
+
+%   Let $\gamma$ be the substitution $\{ g_{q_{2}} / g_{q_{1}}, I / g_{q_{2}} \}$,
+
+%   \[
+%     \inferrule
+%     {
+%       p : \mathsf{Trace}_{NFA(I)}(q_{1} , q_{2}) \vdash p :
+%         \mathsf{Trace}_{NFA(I)}(q_{1} , q_{2}) \\
+%       x_{1} : \simulsubst {g_{q_{2}}} {\gamma} = I \vdash x_{1} : I \\
+%       x_{2} : \simulsubst {I} {\gamma} = I \vdash x_{2} : I
+%     }
+%     {
+%       p : \mathsf{Trace}_{NFA(I)}(q_{1} , q_{2}) \vdash \mathsf{mfold}(x_{1}.x_{1} , x_{2}.x_{2})(p) : I
+%     }
+%   \]
+
+%   This concludes the proof for the case of the empty
+%   grammar. Let's now walk through the construction for
+%   literal grammars. Given a character $c$, we construct an
+%   NFA that recognizes only the string containing the single
+%   character $c$ as,
+
+
+%   \begin{figure}[h!]
+%   \begin{tikzpicture}[node distance = 25mm ]
+%     \node[state, initial] (1) {$1$};
+%     \node[state, right of=1, accepting] (2) {$2$};
+
+%     \path[->] (1) edge[below] node{$c$} (2);
+%   \end{tikzpicture}
+%   \caption{$NFA(c)$}
+%   \label{fig:literalNFA}
+%   \end{figure}
+
+%   The automaton in \cref{fig:literalNFA} induces the
+%   following type of traces from $q_{1}$ to $q_{2}$.
+
+%   \[
+%     \mathsf{Trace}_{NFA(c)}(q_{1}, q_{2}) = \mu
+%       \begin{pmatrix}
+%          g_{q_{1}} := c \otimes g_{q_{2}} \\
+%          g_{q_{2}} := I
+%       \end{pmatrix}. g_{q_{1}}
+%   \]
+
+%   Through the same argument as the empty grammar, the only
+%   state that is accepting is $q_{2}$ and thus,
+
+%   \[
+%     \mathsf{AccTrace}_{NFA(c)} \cong \mathsf{Trace}_{NFA(c)}(q_{1} , q_{2})
+%   \]
+
+%   To show the desired isomorphism of $c \cong NFA(c)$ we
+%   make a similar argument as we did for the empty grammar
+%   $I$, except we leverage the $\mathsf{cons}$ rule instead
+%   of $\mathsf{\varepsilon cons}$. That is, the parse
+%   transformers in either direction are given as,
+
+%   \[
+%     \inferrule
+%     {\cdot \vdash \mathsf{nil} : \mathsf{Trace}(q_{2} , q_{2}) \\
+%      \exists \text{~transition~} q_{1} \overset{c}{\to} q_{2}
+%     }
+%     {p : c \vdash \mathsf{cons}(\mathsf{nil}) :
+%       \mathsf{Trace}_{NFA(c)}(q_{1} , q_{2})}
+%   \]
+
+
+%   \[
+%     \inferrule
+%     {
+%       p : \mathsf{Trace}_{NFA(c)}(q_{1} , q_{2}) \vdash p :
+%         \mathsf{Trace}_{NFA(c)}(q_{1} , q_{2}) \\
+%       x_{1} : \simulsubst {(c \otimes g_{q_{2}})} {\gamma} = c \otimes I \vdash \mathsf{unitR}(x_{1}) : c \\
+%       x_{2} : \simulsubst {I} {\gamma} = I \vdash x_{2} : I
+%     }
+%     {
+%       p : \mathsf{Trace}_{NFA(c)}(q_{1} , q_{2}) \vdash \mathsf{mfold}(x_{1}.\mathsf{unitR}(x_{1}) , x_{2}.x_{2})(p) : c
+%     }
+%   \]
+
+%   Where $\gamma$ is the substitution
+%   $\{ c \otimes g_{q_{2}} / g_{q_{1}}, I / g_{q_{2}} \}$ and
+%   $\mathsf{unitR}$ is a witness to the isomorphism
+%   $c \otimes I \cong c$. Again, we may see that these do
+%   indeed mutually invert each other in the Agda code.
+
+%   It remains to show that the desired isomorphisms are
+%   preserved by $\otimes$, $\oplus$, and Kleene star. Here,
+%   we will give the argument for the disjunction case, the
+%   others are defined quite similarly.
+
+%   Given two NFAs $N$ and $M$, we may define a new NFA that
+%   encodes the disjunction of $N$ and $M$. Denote the
+%   internal states of $N$ by $q_{j}$'s and the internal states
+%   of $M$ by $r_{k}$'s,
+
+%   \begin{figure}[h!]
+%   \begin{tikzpicture}[node distance = 20mm ]
+%     \node[state] (2) {$q_{init}$};
+%     \node[state, initial, below left of=2] (1) {$init$};
+%     \node[state, below right of=1] (3) {$r_{init}$};
+%     \node[right of=2] (4) {$\cdots$};
+%     \node[right of=3] (5) {$\cdots$};
+%     % \node[state, below right of=1] (3) {$3$};
+%     % \node[state, below of=2] (4) {$4$};
+
+%     \path[->] (1) edge[below] node{$\varepsilon$} (2);
+%     \path[->] (1) edge[below] node{$\varepsilon$} (3);
+%     \path[->] (2) edge[below] node{} (4);
+%     \path[->] (3) edge[below] node{} (5);
+
+%     \node[label={[name=l] $N$}, draw,line width=2pt,rounded corners=5pt, fit=(2)(4)] {};
+%     \node[label={[name=l] $M$}, draw,line width=2pt,rounded corners=5pt, fit=(3)(5)] {};
+%   \end{tikzpicture}
+%   \caption{$N \oplus_{NFA} M$}
+%   \label{fig:disjunctionNFA}
+%   \end{figure}
+
+%   \Cref{fig:disjunctionNFA} shows the process of
+%   disjunctively combining NFAs. Precisely, we add a single
+%   new state and we included copies of the states from each of $N$
+%   and $M$. The new state acts as the initial state and has
+%   $\varepsilon$-transitions to the initial states of $N$ and
+%   $M$. We include all of the internal transitions from $N$
+%   and $M$, and the accepting states of $N \oplus_{NFA} M$
+%   are exactly the accepting states in each subautomaton.
+
+%   Let $g$ and $g'$ be two regular grammars such that
+%   $g \cong NFA(g)$ and $g' \cong NFA(g')$. As a matter of
+%   notation\footnote{We shall similarly abuse notation for
+%     $\otimes$ and Kleene. That is, for a regular grammar
+%     $g$, when we write $NFA(g)$ we mean the NFA inductively
+%     built up with the NFA-analogues to the constructors that
+%     built up $g$.}, we will write $NFA(g \oplus g')$ for
+%   $NFA(g) \oplus_{NFA} NFA(g')$. The traces of
+%   $NFA(g \oplus g')$ are then given by,
+
+%   \[
+%     \mathsf{Trace}_{NFA(g \oplus g')}(src , dst) = \mu
+%       \begin{pmatrix}
+%         g_{init} := g_{q_{0}} \oplus g_{r_{0}} \\
+%         g_{q_{j}} := \mathsf{Trans}_{NFA(g)}(q_{j}) \oplus \mathsf{isDst}(q_{j}) \\
+%         g_{r_{k}} := \mathsf{Trans}_{NFA(g')}(r_{k}) \oplus \mathsf{isDst}(r_{k})
+%       \end{pmatrix}.g_{src}
+%     \]
+
+%   where $\mathsf{Trans}$ is used to echo the same syntactic
+%   definitions that appear in the $NFA(g)$ and $NFA(g')$.
+%   Also, $src$ and $dst$ may take on any value in
+%   $Q := \{init\} \cup \{q_{j}\} \cup \{r_{k}\}$, and
+%   $\mathsf{isDst}(q)$ checks if $q$ is equal to $dst$. Which
+%   is all to say, the traces of $NFA(g \oplus g')$ comprise
+%   either a trace of $NFA(g)$, or a trace of $NFA(g')$, and
+%   the transition coming out of $g_{init}$ determines which
+%   subautomaton we step into.
+
+%   The parse transformer from $g \oplus g'$ checks which side
+%   of the sum type we are on, then takes the appropriate step
+%   from $g_{init}$ in the automaton.
+
+%   \[
+%     \inferrule
+%     {
+%       u : g \vdash \iota (\phi (u)) : \mathsf{AccTrace}_{NFA(g \oplus g')} \\
+%       v : g' \vdash \iota' (\psi (v)) : \mathsf{AccTrace}_{NFA(g \oplus g')}
+%     }
+%     {p : g \oplus g' \vdash \mathsf{case}~p \{ \mathsf{inl}(u) \mapsto s , \mathsf{inr}(v) \mapsto r \} : \mathsf{AccTrace}_{NFA(g \oplus g')}}
+%   \]
+
+%   with $\iota$ and $\iota'$ as embeddings from $NFA(g)$ and
+%   $NFA(g')$, respectively, into $NFA(g \oplus g')$,
+%   $\phi: g \cong \mathsf{AccTrace}_{NFA(g)}$, and $\psi: g' \cong \mathsf{AccTrace}_{NFA(g')}$. On a
+%   high level, all this construction does is turn a parse of
+%   $g$ into a parse of $NFA(g)$ and then embeds that inside
+%   of the larger automaton $NFA(g \oplus g')$. Likewise for $g'$.
+
+%   In the other direction, recall that the data of an
+%   accepting trace for $NFA(g \oplus g')$ is a pair of a
+%   trace and a proof that
+%   the end state $q'$ of that trace is accepting. By
+%   multifolding over the first part of that pair, we turn the
+%   term of type
+%   $\mathsf{Trace}_{NFA(g \oplus g')}(init , q')$ into a
+%   trace of either of the subautomata,
+
+%   \[
+%     p : \mathsf{Trace}_{NFA(g \oplus g')}(init , q') \vdash \mathsf{mfold}_{NFA(g \oplus g')} : \mathsf{Trace}_{NFA(g)}(q_{0} , q') \oplus \mathsf{Trace}_{NFA(g')}(r_{0} , q')
+%   \]
+
+%   Additionally, we leverage the fact that the only accepting
+%   states for $NFA(g \oplus g')$ are those from the
+%   subautomata to extract that $q'$ must be an accepting
+%   state from a subautomaton.
+
+%   \[
+%     x : \mathsf{acc}_{NFA(g \oplus g')}(q') \vdash M : \mathsf{acc}_{NFA(g)}(q') \oplus \mathsf{acc}_{NFA(g')}(q')
+%   \]
+
+%   We then combine the trace and proof of acceptance into an
+%   accepting trace of one of the subautomata,
+
+%   \[
+%     p : \mathsf{AccTrace}_{NFA(g \oplus g')} \vdash N : \mathsf{AccTrace}_{NFA(g)} \oplus \mathsf{AccTrace}_{NFA(g')}
+%   \]
+
+%   Lastly, we then inductively use the isomorphisms $\phi$
+%   and $\psi$ to turn the accepting traces into a parse of
+%   $g$ or $g'$,
+
+%   \[
+%      N : \mathsf{AccTrace}_{NFA(g)} \oplus \mathsf{AccTrace}_{NFA(g')} \vdash \mathsf{case}~N~\{\mathsf{inl}(n) \mapsto \phi^{-1}(n), \mathsf{inr}(n') \mapsto \psi^{-1}(n')\} : g \oplus g'
+%   \]
+
+%   When you wish to show that these actually maps invert each other to form an isomorphism\max{jumbled sentence}, we invoke the
+%   universal properties of the $\mathsf{Trace}$ types. That is,
+%   $\mathsf{Trace}_{\oplus NFA}$, $\mathsf{Trace}_{N}$, and $\mathsf{Trace}_{M}$
+%   are \emph{initial} algebras to the the functors that define them. Thus, the
+%   embedding of traces through $N$ into $\oplus NFA$ and then mapped back down to $N$
+%   compose to a map from $\mathsf{Trace}_{N}$ to itself. Such a map is unique and
+%   thus must be the identity. The same reasoning shows that the desired
+%   composite do indeed invert each other to form an isomorphism.
+
+%   As discussed above, the other cases and
+%   follow through a similar argument. This
+%   concludes the proof of our variant of Thompson's construction.
+% \end{proof}
+
+% \subsection{Other Automata}
+% \subsubsection{Pushdown Automata}
+% \label{subsubsec:pda}
+% A (nondeterministic) \emph{pushdown automaton} is an automaton that employs a
+% stack. Just like NFAs, they have transitions labeled with characters from a
+% fixed string alphabet $\Sigma$. Additionally, they maintain a stack of
+% characters drawn from a stack alphabet $\Gamma$. They are often represented
+% formally as a 7-tuple $(Q, \Sigma, \Gamma, \delta, q_{0}, Z, F)$,
+
+% \begin{itemize}
+%   \item $Q$ a finite set of states
+%   \item $\Sigma$ a fixed, finite string alphabet
+%   \item $S$ a fixed, finite stack alphabet
+%   \item
+%         $\delta \subset Q \times (\Sigma \cup \{ \varepsilon \}) \times S \to \mathcal{P}(Q \times S^{*})$
+%         the transition function
+%   \item $q_{0} \in Q$ the start state
+%   \item $Z \in S$ the initial stack symbol
+%   \item $F \subset Q$ the accepting states
+% \end{itemize}
+
+% We encode the traces of a pushdown automaton very similarly to those of an NFA,
+% except the transitions of a PDA are instead encoded via the LNL product type
+% $\bigamp$. This is because when simply transitioning via character, a PDA must
+% also pop and push characters onto a stack, which is used as the argument to
+% these dependent functions. Thus, the appropriate formation of traces through a
+% PDA is dependent on a stack.
+
+% Let $S$ be a non-linear type encoding the stack
+% alphabet, and build lists over $S$ as the (non-linear) least fixed-point
+% $\mathsf{List}(S) := \mu X . 1 + S \times X$. Then, the type of states for a
+% PDA $P$ with stack alphabet $S$ are given as a functions that takes in lists $L$,
+% and then makes a case distinction between possible transitions based on what was witnessed as $\mathsf{head}(L)$. The choice of transition
+% will then determine which character to transition by and what word $w$ should be
+% pushed onto the stack. The word that is added to the top of the stack is
+% appended to $\mathsf{tail}(L)$ and then we recursively step into another state
+% called on argument $w \cdot \mathsf{tail}(L)$.
 
 % \begin{gather*}
 %   \mathsf{State} \in {g _{q} : q \in Q} \\
@@ -1815,515 +2483,996 @@ \subsubsection{Pushdown Automata}
 
 % That is to say, when transitioning a PDA pops off the head $hd$ of the stack
 
-\subsubsection{Turing Machines}
-\label{subsubsec:tm}
+% \subsubsection{Turing Machines}
+% \label{subsubsec:tm}
 
-Above we gave a grammar presentation of
-traces through a PDA by using a non-linear type $S$
-to encode the stack. We may similarly use pairs $S \times S$
-to encode the tape of a Turing machine. With two stacks we can simulate the behavior of the
-infinite tape of a Turing machine. The intuition behind this correspondence is
-that the left half of the tape is on one stack, the right
-half of the tape the other, and we treat the tops of stacks
-like the head of the tape.
+% Above we gave a grammar presentation of
+% traces through a PDA by using a non-linear type $S$
+% to encode the stack. We may similarly use pairs $S \times S$
+% to encode the tape of a Turing machine. With two stacks we can simulate the behavior of the
+% infinite tape of a Turing machine. The intuition behind this correspondence is
+% that the left half of the tape is on one stack, the right
+% half of the tape the other, and we treat the tops of stacks
+% like the head of the tape.
 
-\pedro{I like the overall structure of this section! Showing how the type theory
-can encode and reason about different kinds of languages is very instructive!}
+% \pedro{I like the overall structure of this section! Showing how the type theory
+% can encode and reason about different kinds of languages is very instructive!}
 
+\section{Denotational Semantics}
+\label{sec:semantics-and-metatheory}
 
-\section{Categorical Semantics of \theoryabbv}
-\label{sec:categorify}
+To justify our assertion that \theoryabbv is a syntax for formal
+grammars and parse transformers, we will now define a
+\emph{denotational semantics} that makes this mathematically
+precise. To interpret our types of codes we assume a universe of
+\emph{small} sets.
 
-As described in previous sections, \theoryabbv has a ``standard''
-interpretation where non-linear types denote sets, linear types denote
-formal grammars, non-linear terms denote functions and linear terms
-denote parse transformers.
-%
-Further, as is typical for type theory, every type constructor in the
-calculus is interpreted by using a \emph{universal construction} in
-either the category of sets or the category of grammars\footnote{the
-only exception is that the axioms for Brzozowski derivatives we
-considered do not follow solely from universal properties. In this
-section we focus on models that do not necessarily satisfy these
-axioms.}.
-%
-This leads to an immediate opportunity for generalization: the type
-theory has in addition to the standard grammar-theoretic
-interpretation, an interpretation in any categorical structure that
-exhibits the same universal constructions.
-%
-On the one hand this gives us a structured way to formalize the
-standard semantics precisely, but additionally it enables us to
-consider \emph{non-standard} models in the next section that point to
-further applications as well as meta-theoretic results.
-
-In developing this categorical semantics, we will start with the
-closest analogue from formal language theory: Kleene algebra.  Kleene
-algebras are an important tool in the theory of regular languages
-serving as a bridge between algebraic reasoning and equivalence of
-regular expressions. More broadly, through various extensions, they
-serve as a theoretical substrate to studying different kinds of formal
-languages.
-%
-We can then see that the ``regular fragment'' of our type theory
-(i.e., just characters, $\otimes$, $\oplus$, $0,1$ and a Kleene star)
-has models in what we call a \emph{Kleene category} a categorification
-of Kleene algebra from posets to categories.
-%
-We then further develop this into our final notion of model, which we
-call a \emph{Chomsky category} as it can model not just regular
-languages, but the full Chomsky hierarchy.
-
-\subsection{Kleene Algebra and Kleene Categories}
-A Kleene algebra is a tuple $(A, +, \cdot, (-)^*, 1, 0)$, where $A$ is
-a set, $+$ and $\cdot$ are binary operations over $A$, $(-)^*$ is a
-function over $A$, and $1$ and $0$ are constants. These structures
-satisfy the axioms depicted in Figure~\ref{fig:axioms}.
-
-\begin{figure}
-  \begin{align*}
-    x + (y + z) &= (x + y) + z & x + y &= y + x\\
-    x + 0 &= x & x + x &= x\\
-    x(yz) &= (xy)z & x1 &= 1x = x\\
-    x(y + z) &= xy + xz & (x + y)z &= xz + yz\\
-    x0 &= 0x = x & & \\
-    1 + aa^* &\leq a^* & 1 + a^*a &\leq a^*\\
-     b + ax \leq x &\implies a^*b \leq x &  b + xa \leq x &\implies ba^* \leq x
-  \end{align*}
-  \caption{Kleene algebra axioms}
-  \label{fig:axioms}
-\end{figure}
+\subsection{Formal Grammars and Parse Transformers}
 
-The addition operation can be used to define the partial order
-structure $a \leq b$ if, and only if, $a + b = b$. In the theory of
-formal languages, this order structure can be used to model language
-containment. In this section, we categorify the concept of Kleene
-algebra and build on top of it in order to define an abstract theory
-of parsing. We start by defining \emph{Kleene categories}.
+First, we need to establish a precise definition of a formal grammar
+that accounts for the possibility of ambiguity. The most common
+definition of a formal grammar is as a generative grammars, defined by
+a set of non-terminals, with a specified start symbol and set of
+production rules. We instead use a more abstract formulation that is
+closer in spirit to the standard definition of a formal language:
 
 \begin{definition}
-  A Kleene category is a distributive monoidal category $\cat{K}$
-  such that for every objects $A$ and $B$, the endofunctors $F_{A, B}
-  = B + A \otimes X$ and $G_{A, B} = B + X \otimes A$ have initial
-  algebras (denoted $\mu X.\, F_{A, B}(X)$) such that $B \otimes (\mu
-  X.\, F_{A, 1}) \cong \mu X.\, F_{A, B}(X)$ and the analogous isomorphism
-  for $G_{A,B}$ also holds.
+  A \emph{formal language} $L$ is a function from strings to propositions.
+  A \emph{formal grammar} $A$ is a function from strings to small sets.
 \end{definition}
 
-As a sanity check, note that Kleene algebras are indeed examples of
-Kleene categories.
-
-\begin{example}
-  Every Kleene algebra, seen a posetal category, is a Kleene category.
-  The product $\cdot$ is a monoidal product and the addition is a
-  least-upper bound, which corresponds to a coproduct. Lastly, the
-  axioms of the Kleene star have a direct correspondence to the
-  coherence conditions postulated by the initial algebras of Kleene
-  categories.
-\end{example}
-
-This example provides a neat categorical justification to how
-restrictive Kleene algebras are in terms of reasoning about
-languages. By only having at most one morphism between objects, there
-is not a lot of information they can convey. In this case, the only
-information you get is language containment. As demonstrated by
-$\mathbf{Gr}$, the extra degrees of freedom granted by having more
-morphisms gives you more algebraic structure for reasoning about
-languages.
-
-For the next example, we see an unexpected connection with the theory
-of substructural logics.
-
-\begin{example}
-  The opposite category of every Kleene category is a model of a variant of
-  conjunctive ordered logic, where the Kleene star plays the role of the ``of
-  course'' modality from substructural logics which allows hypotheses to
-  be discarded or duplicated.
-\end{example}
-
-As we have seen, the proposed axioms are a direct translation of the
-Kleene algebra axioms to a categorical setting. Its most unusual aspect is the
-axiomatization of the Kleene star as a family of initial algebras
-satisfying certain isomorphisms. If the Kleene category $\cat{K}$ has
-more structure, then these isomorphisms hold ``for free''.
-
-\begin{theorem}
-  \label{th:kleeneclosed}
-  Let $\cat{K}$ be a Kleene category such that it is also monoidal
-  closed.  Then, the initial algebras isomorphisms hold automatically.
-\end{theorem}
-\begin{proof}
-  We prove this by the unicity (up-to isomorphism) of initial
-  algebras. Let $[hd, tl]: I + (\mu X.\, F_{A, I}(X)) \otimes A \to
-  (\mu X.\, F_{A, I}(X))$ be the initial algebra structure of $(\mu
-  X.\, F_{A, I}(X))$ and consider the map $[hd, tl] : B + B \otimes
-  (\mu X.\, F_{A, I}(X)) \otimes A \to B\otimes (\mu X.\, F_{A,
-    I}(X))$.
-
-  Now, let $[f,g] : B + A \otimes Y \to Y$ be an $F_{A,B}$-algebra and
-  we want to show that there is a unique algebra morphism $h : \mu X.\, F_{A,I} \to B \lto Y$. We can show existence and
-  uniqueness by showing that the diagram on top commutes if, and
-  only if, the diagram on the bottom commutes:
-
-% https://q.uiver.app/#q=WzAsOCxbMCwwLCJCICsgQiBcXG90aW1lcyAoXFxtdSBYLlxcLCAxICsgWCBcXG90aW1lcyBBKSJdLFsyLDAsIkIgKyBZIFxcb3RpbWVzIEEiXSxbMiwyLCJZIl0sWzAsMiwiQiBcXG90aW1lcyAoXFxtdSBYLlxcLCAxICsgWCBcXG90aW1lcyBBKSJdLFswLDMsIjEgKyAoXFxtdSBYLlxcLCAxICsgWCBcXG90aW1lcyBBKSJdLFswLDUsIlxcbXUgWC5cXCwgMSArIFggXFxvdGltZXMgQSJdLFsyLDMsIjEgKyAoIEIgXFxsdG8gWSkgXFxvdGltZXMgQSJdLFsyLDUsIkIgXFxsdG8gWSJdLFswLDEsImlkICsgKGlkIFxcb3RpbWVzIGg7IGV2KSBcXG90aW1lcyBpZF9BIl0sWzEsMiwiW2YsZ10iXSxbMywyLCJpZCBcXG90aW1lcyBoOyBldiIsMl0sWzAsMywiW2lkIFxcb3RpbWVzIGgsIGlkIFxcb3RpbWVzIHRsXSIsMl0sWzQsNiwiaWQgKyAoaCBcXG90aW1lcyBYKVxcb3RpbWVzIGlkIl0sWzUsNywiaCIsMl0sWzYsNywiW2YnLCBnJ10iXSxbNCw1LCJbaGQsIHRsXSIsMl1d
-\[\begin{tikzcd}
-	{B + B \otimes (\mu X.\, I + X \otimes A)} && {B + Y \otimes A} \\
-	\\
-	{B \otimes (\mu X.\, I + X \otimes A)} && Y \\
-	{I + (\mu X.\, I + X \otimes A)} && {I + ( B \lto Y) \otimes A} \\
-	\\
-	{\mu X.\, I + X \otimes A} && {B \lto Y}
-	\arrow["{id + (id \otimes h; ev) \otimes id_A}", from=1-1, to=1-3]
-	\arrow["{[f,g]}", from=1-3, to=3-3]
-	\arrow["{id \otimes h; ev}"', from=3-1, to=3-3]
-	\arrow["{[id \otimes h, id \otimes tl]}"', from=1-1, to=3-1]
-	\arrow["{id + (h \otimes X)\otimes id}", from=4-1, to=4-3]
-	\arrow["h"', from=6-1, to=6-3]
-	\arrow["{[f', g']}", from=4-3, to=6-3]
-	\arrow["{[hd, tl]}"', from=4-1, to=6-1]
-\end{tikzcd}\]
-  This equivalence follows by using the adjunction structure given
-  by the monoidal closed structure of $\cat{K}$. A completely analogous
-  argument for $G_{A,B}$ also holds. Furthermore, by generalizing the
-  construction of \Cref{sec:formaltype}, we can also show that from
-  the monoidal closed assumption it follows that $\mu X.\, F_{A, I}(X) \cong \mu X.\, G_{A, I}(X)$
-\end{proof}
-
-Something surprising about this lemma is that it provides an alternative
-perspective on the observation that if a Kleene algebra has an
-residuation operation, also called action algebra \cite{kozen1994action},
-then the Kleene star admits a simpler axiomatization.
-
-Since we want Kleene categories to generalize our notion of formal
-grammars as presheaves $\String \to \Set$, we prove that they do
-indeed form a Kleene category. We start by presenting a well-known
-construction from presheaf categories.
+We think of the grammar $A$ as taking a string to the set of all parse
+trees for that string. However since $A$ could be any function
+whatsoever there is no requirement that an element of $A(w)$ be a
+``tree'' in the usual sense. This definition provides a simple,
+syntax-independent definition of a grammar that can be used for any
+formalism: generative grammars, categorial grammars, or our own
+type-theoretic grammars. Note that the definition of a formal grammar
+is a generalization of the usual notion of formal language since a
+proposition can be equivalently defined as a subset of a one-element
+set. Then the difference between a formal grammar and a formal
+language is that formal grammars can be \emph{ambiguous} in that there
+can be more than one parse of the same string. Even for unambiguous
+grammars, we care not just about \emph{whether} a string has a parse
+tree, but \emph{which} parse tree it has, i.e., what the structure of
+the element of $A(w)$ is.
+
+The linear types of \theoryabbv can all be interpreted as formal
+grammars, but what about the linear terms? These can be interpreted as
+\emph{parse transformers}
+\begin{definition}
+  Let $L_1,L_2$ be formal languages. Then we say $L_1\subseteq L_2$ if
+  for every string $w$, $L_1(w)\Rightarrow L_2(w)$.
 
+  Let $A_1,A_2$ be formal grammars. Then a parse transformer $f$ from
+  $A_1$ to $A_2$ is a function assigning to each string $w$ a function
+  $f_w : A_1(w) \to A_2(w)$.
+\end{definition}
+First, note that parse transformers generalize language inclusion: if
+$A_1(w), A_2(w)$ are all subsets of a one-element set, then a parse
+transformer is equivalent to showing that $A_1(w) \subseteq A_2(w)$.
+In our denotational semantics, linear terms will be interpreted as
+such parse transformers, and the notions of unambiguous grammar,
+parseability, etc, introduced in \Cref{sec:concepts} can be verified
+to correspond to their intended meanings under this interpretation.
+
+Parse transformers have an obvious notion of composition and identity
+transformer. This defines a \emph{category}.
 \begin{definition}
-  Let $\cat{C}$ be a locally small monoidal category and $F$, $G$ be
-  two functors $\cat{C} \to \Set$. Their Day convolution tensor
-  product is defined as the following coend formula:
-  \[
-  (F \otimes_{Day} G)(x) = \int^{(y,z) \in \cat{C}\times\cat{C}}\cat{C}(y\otimes z, x) \times F(y) \times G(z)
-  \]
-  Dually, its internal hom is given by the following end formula:
-  \[
-  (F \lto_{Day} G)(x) = \int_{y} \Set(F(y), G(x \otimes y))
-  \]
+  Define $\Grammar$ to be the category whose objects are formal
+  grammars and morphisms are parse transformers.
 \end{definition}
-
-\begin{lemma}[Day \cite{day1970construction}]
-  Under the assumptions above, the presheaf category $\Set^{\cat{C}}$ is
-  monoidal closed.
-\end{lemma}
-
+This category is equivalent to the slice category $\Set/\Sigma^*$ and
+as such is very well-behaved. It is complete, co-complete, cartesian
+closed and carries a monoidal biclosed structure. We will use these
+structures to model the linear types, terms and equalities in
+\theoryabbv.
 %% \begin{theorem}
-%%   Let $\cat{K}$ be a Kleene category and $A$ a discrete category.
-%%   The functor category $[A, \cat{K}]$.
-%%   (HOW GENERAL SHOULD THIS THEOREM BE? BY ASSUMING ENOUGH STRUCTURE,
-%%   E.G. K = Set, THIS THEOREM BECOMES SIMPLE TO PROVE)
+%%   The category $\Grammar$ is complete, co-complete and cartesian
+%%   closed. Furthermore, it is monoidal biclosed with the monoidal
+%%   product given by
+%%   \[ (A \otimes B) w = \{ (w_1,w_2,a,b) \pipe w_1w_2 = w \wedge a \in A w_1 \wedge b \in B w_2\} \]
+%%   and unit by
+%%   \[ I w = \{ () \pipe w = \varepsilon \} \]
 %% \end{theorem}
-\begin{theorem}
-  If $\cat{C}$ is a locally small monoidal category, then
-  $\Set^{\cat{C}}$ is a Kleene category.
-\end{theorem}
-\begin{proof}
-
-  By the lemma above, $\Set^{\cat{C}}$ is monoidal closed, and since it
-  is a presheaf category, it has coproducts. Furthermore, the tensor
-  is a left adjoint, i.e. it preserves colimits and, therefore, it is
-  a distributive category.
-
-  As for the Kleene star, since presheaf categories admit small colimits,
-  the initial algebra of the functors $F_{A,B}$ and $G_{A,B}$ can be
-  defined as the filtered colimit of the diagrams:
-
-  From Theorem~\ref{th:kleeneclosed} it follows that these initial
-  algebras satisfy the required isomorphisms and this concludes the
-  proof.
-\end{proof}
-
-\begin{corollary}
-  For every alphabet $\Sigma$, the presheaf category $\Set^{\cat{\Sigma^*}}$
-  is a Kleene category.
-\end{corollary}
-\begin{proof}
-  Note that string concatenation and the empty string make the
-  discrete category $\Sigma^*$ a strict monoidal category.
-\end{proof}
-
-Much like in the posetal case, the abstract structure of a Kleene
-category is expressive enough to synthetically reason about regular
-languages. A significant difference between them is that while Kleene
-algebras can reason about language containment, Kleene categories can
-reason about \emph{ambiguity}, \emph{strong equivalence} of grammars.
-
-\subsection{Chomsky Categories}
-
-Though Kleene categories are expressive enough to reason about
-concepts that are outside of reach of Kleene algebras, their
-simply-typed nature makes them not so expressive from a type theoretic
-point of view. This is limiting because type theories are successful
-syntactic frameworks for manipulating complicated categorical
-structures while avoiding some issues common in category theory, such
-as coherence issues.
-
-With this in mind, we want to design a categorical semantics that
-builds on top of Kleene categories with the goal of extending them
-with dependent types and making them capable reasoning about languages
-and their parsers. This leads us to the abstract notion of model we
-are interested in capturing with \theoryabbv: a \emph{Chomsky
-category}.
-
-\pedro{We should probably define some of the words in this definition}
-\begin{definition}
-  A Chomsky category is a locally Cartesian category with two hierarchies of
-  universes $\{L_i\}_{i\in \nat}$ and $\{U_i\}_{i\in \nat}$ such that
-  every $L_i$ and $U_i$ are $U_{i+1}$-small. Furthermore, we require
-  $U_i$ to be closed under dependent products and sum,
-  $L_i$ to be closed under the Kleene category connectives,
-  dependent products, left and right closed structures, with
-  a type constructor $G : L_i \to U_i$ and a linear dependent sum
-  going the other direction.
-\end{definition}
-
-\steven{Max suggests augmenting the definition of a Chomsky category to
-  something like two categories $L$ and $U$, and $U$ is Cartesian closed I don't
-  fully recall the rest.
-
-
-  My best guess is that you take $L$ a Kleene category with a hierarchy of universes
-  two, and you further require that $L$ is $Psh(U)$-enriched, except he
-  suggested a further adjective on these presheaves. Perhaps representability?
-}
-
-\begin{theorem}
-  The presheaf category $\Grammar = \Set^{\cat{\Sigma^*}}$ is a Chomsky category.
-\end{theorem}
-
-
-Further, the syntactic category of \theoryname is manifestly a Chomsky category.
-
-\pedro{This is likely true, but if we explicitly say so, this warrants a proof. I think that
-if we don't say anything about the syntactic category, reviewers won't mind.}
-
-\steven{I agree that we don't want to say anything that opens unnecessary
-  questions for proofs we haven't written. However, it seems hard to make the
-  case that we have the right categorical model of the syntax if this statement
-  isn't true. By restating our definition of Chomsky category, this should be
-  obvious or a quick proof}
-
-\pedro{I agree, then we should add the quick proof :) }
-
-\section{Concrete Models of \theoryabbv}
-\label{sec:othermodels}
-
-One of the powers of type theories is that it can be profitable to interpret them
-in various models. In this section, by using our just-defined Chomsky categories,
-we show how other useful concepts from formal language theory can also be organized
-as models of \theoryabbv. We illustrate this point by providing two examples that
-are closely related to the theory of formal languages: language equivalence and
-semantic actions. Furthermore, in order to justify how $\mathbf{Gr}$ relates to
-more traditional notions of parsing, we define a glued model that proves a
-canonicity property of grammar terms.
-
-
-\subsection{Language Semantics}
-Every grammar induces a language semantics. Also languages can be taken as a
-propositionally truncated view of the syntax. Logical equivalence should induce
-weak equivalence, and thus even give a syntactic way to reason about language equivalence.
-\steven{TODO language semantics}
-
-\subsection{Semantic Actions}
-Returning to the problem of parsing, the output of a parse usually is not the
-literal parse tree. Rather, the output is the result of some \emph{semantic
-  actions} ran on the parse tree, which usually serve to remove some syntactic
-details that are unnecessary for later processing.
 
-Given a grammar $G : \String \to \Set$, we define a semantic action to be a set
-$X$ with a function $f$ that produces a semantic element from any parse of $G$.
-
-\[
-  f : \PiTy w \String {G w \to X}
-\]
-
-Further, semantic actions can be arranged into a structured category.
-Define $\SemAct$ as the comma
-category $\Grammar / \Delta$, where $\Delta : \Set \to \Grammar$ defines a
-discrete presheaf. That is, for a set $X$, $\Delta (X)(w) = X$ for all
-$w \in \String$. As $\SemAct$ is defined as a comma category, it has a forgetful
-functor into $\Grammar$. That is, $\SemAct$ serves as a notion of formal
-grammar. Moreover, $\SemAct$ is a model of \theoryabbv.
-
-\steven{It being a notion of formal grammar is distinct from being a model. This
-probably warrants a proof}
-
-\steven{TODO semantic actions}
-
-\pedro{This is a very nice opportunity of showing off the supremacy of denotational
-  reasoning ;) We should probably prove the gluing lemma in the previous section
-  and apply it here and in the canonicity section. The actual proof might have to be
-  moved to the appendix, though}
-
-\subsection{Parse Canonicity}
-Canonicity is an important metatheoretic theorem in the type theory
-literature.  It provides insight on the normal forms of terms and,
-therefore, on its computational aspects. Frequently, proving
-canonicity for boolean types, i.e. every closed term of type bool
-reduces to either true or false, is enough to justify that the type
-theory being studied is well-behaved. In our case, however, since we
-want to connect \theoryabbv to parsers, we must provide a more
-detailed account of canonicity. In particular, we give a nonstandard semantics
-of \theoryabbv that carries a proof of canonicity along with it.
-
-If $\cdot \vdash A$ is a closed linear type then there are
-two obvious notions of what constitutes a ``parse'' of a string w
-according to the grammar $A$:
-\begin{enumerate}
-\item On the one hand we have the set-theoretic semantics just
-  defined, $\llbracket A \rrbracket \cdot w$
-\item On the other hand, we can view the string $w = c_1c_2\cdots$ as
-  a linear context $\lceil w \rceil = x_1:c_1,x_2:c_2,\ldots$ and
-  define a parse to be a $\beta\eta$-equivalence class of linear terms $\cdot;
-  \lceil w \rceil \vdash e : G$.
-\end{enumerate}
-It is not difficult to see that at least for the ``purely positive''
-formulae (those featuring only the positive connectives
-$0,+,I,\otimes,\mu, \overline\Sigma,c$) that
-every element $t \in \llbracket A \rrbracket w$ is a kind of tree and
-that the nodes of the tree correspond precisely to the introduction
-forms of the type. However it is far less obvious that \emph{every}
-linear term $\lceil w \rceil \vdash p : \phi$ is equal to some
-sequence of introduction forms since proofs can include elimination
-forms as well. To show that this is indeed the case we give a
-\emph{canonicity} result for the calculus: that the parses for .
-
-\begin{definition}
-  A non-linear type $X$ is purely positive if it is built up using
-  only finite sums, finite products and least fixed points.
-
-  A linear type is purely positive if it is built up using only finite
-  sums, tensor products, generators $c$, least fixed points and linear
-  sigma types over purely positive non-linear types.
+%% \max{todo: need to decide if we need this section here or if it's in the intro already}.
+% The linear typing judgment in our syntax takes on the following schematic form
+% $\Gamma ; \Delta \vdash a : A$. First, $A$ represents a \emph{linear type} in
+% our syntax. The intended semantics of these linear types are formal grammars.
+% That is, the linear typing system is designed to syntactically reflect the
+% behavior of formal grammars. For this reason, we may often interchangeably use
+% the terms ``linear type'' and ``grammar''.
+
+% The term $a$ is an inhabitant of type $A$, which is thought of as a parse tree of
+% the grammar $A$. The core idea of this entire paper follows precisely from this
+% single correspondence: grammars are types, and the inhabitants of these types
+% are parse trees for the grammars.
+
+% $\Gamma$ represents a non-linear context, while $\Delta$ represents a
+% \emph{linear context} dependent on $\Gamma$. These linear contexts behave
+% substructurally. As stated earlier, they are linear --- they do not obey
+% weakening or contraction --- because a character is exhausted once read by a
+% parsing procedure. Moreover, the characters in strings appear in the order in
+% which we read them. We do not have the freedom to freely permute characters,
+% therefore any type theory that is used to reason about formal grammars ought to
+% omit the structural rule of exchange as well. This means that every variable
+% within $\Delta$ must be used \emph{exactly once} and \emph{in order of
+%   occurrence}. Thus, we can think of the linear contexts as an ordered list of
+% limited resources. Once a resource is consumed, we cannot make reference to it
+% again. Variables in a linear context then act like building
+% blocks for constructing patterns over strings.
+
+% We give the base types and type constructors for linear terms. As the
+% interpretation of types as grammars in $\Grammar$ serves as our intended
+% semantics, we simultaneously give the interpretation $\sem \cdot$ of the
+% semantics as grammars.
+
+% \pedro{The paragraphs above are a bit circular, what about the following alternative?}
+% \steven{I am in favor of cutting what's above, but we haven't introduced the
+%   syntax in the main body of the paper yet, even if the figures appeared
+%   earlier. So what you write needs to take that into account}
+% \pedro{In which case we should introduce it in the previous section. I suggest
+%   adding this right after ``and the intuitionistic typing rules in fig 3''}
+\subsection{Semantics}
+
+We can now define our denotational semantics.
+\begin{definition}[Grammar Semantics]
+  We define the following interpretations by mutual recursion on the
+  judgments of \theoryabbv:
+  \begin{enumerate}
+  \item For each non-linear context $\Gamma \isCtx$, we define a set $\sem \Gamma$.
+  \item For each non-linear type $\Gamma \vdash X \isTy$, and element
+    $\gamma \in \sem\Gamma$, we define a set $\sem X \gamma$.
+  \item For each linear type $\Gamma A \isLinTy$ and element $\gamma
+    \in \sem\Gamma$, we define a formal grammar $\sem{A}\gamma$. We
+    similarly define a formal grammar $\sem\Delta\gamma$ for each
+    linear context $\Gamma \Delta\isLinCtx$.
+  \item For each non-linear term $\Gamma \vdash M : X$ and $\gamma \in \sem{\Gamma}$, we define an element $\sem{M}\gamma \in \sem{X}\gamma$.
+  \item For each linear term $\Gamma; \Delta \vdash e : A$ and $\gamma \in \sem{\Gamma}$ we define a parse transformer from $\sem{\Delta}\gamma$ to $\sem{A}\gamma$.
+  \end{enumerate}
+  And we verify the following conditions:
+  \begin{enumerate}
+  \item If $\Gamma \vdash X \isSmall$, then $\sem X \gamma$ is a small set.
+  \item If $\Gamma \vdash X \cong X'$ then for every $\gamma$, $\sem{X}\gamma = \sem{X'}\gamma$
+  \item If $\Gamma \vdash A \cong A'$ then for every $\gamma$, $\sem{A}\gamma = \sem{A'}\gamma$
+  \item If $\Gamma \vdash M \cong M' : X$ then for every $\gamma$, $\sem{M}\gamma = \sem{M'}\gamma$
+  \item If $\Gamma;\Delta \vdash e \cong e' : A$ then for every $\gamma$, $\sem{e}\gamma = \sem{e'}\gamma$
+  \end{enumerate}
 \end{definition}
 
-\begin{definition}
-  %% Let $X$ be a closed non-linear type. The closed elements $\textrm{Cl}(X)$ of $X$ are the definitional equivalence classes of terms $\cdot \vdash e : X$.
+For reasons of brevity, we present only a selection of cases in
+\Cref{fig:semantics}. The full details are in the appendix.\max{todo:
+  put the full details in the appendix}.
+\max{TODO: how to describe $\mu$ succinctly?}\max{make sure $\lto$ and $\tol$ are right}
+\begin{figure}
+  \begin{align*}
+    \sem{c}\gamma\,w &= \{ c | w = c\}\\
+    \sem{I}\gamma\,w &= \{ () \pipe w = \varepsilon \}\\
+    \sem{A \otimes B}\gamma\,w &= \{ (w_1,w_2,a,b) \pipe w_1w_2 = w \wedge a \in \sem{A}\gamma\,w_1 \wedge b \in \sem{B}\gamma\,w_2 \}\\
+    \sem{\bigoplus_{x:X} A}\gamma\,w &= \{ (x, a) \pipe x \in \sem{X}\gamma \wedge a \in \sem{A}(\gamma,x)w \}\\
+    \sem{\bigwith_{x:X} A}\gamma\,w &= \prod_{x\in \sem{X}\gamma} \sem{A}(\gamma, x)\,w\\
+    \sem{A \lto B}\gamma\,w &= \prod_{w'} \sem{A}\gamma\,w' \to \sem{B}\gamma\,ww'\\
+    \sem{B \tol A}\gamma\,w &= \prod_{w'} \sem{A}\gamma\,w' \to \sem{B}\gamma\,w'w\\
+    \sem{\uparrow A}\gamma &= \sem{A}\gamma\,\varepsilon\\
+    \sem{\SPF\,X}\gamma &= \{ F : \textrm{Functor}(\textrm{Grammar}^{\sem{X}\gamma}, \textrm{Grammar}) \pipe F \textrm{ preserves $\omega$-colimits.} \}\\
+    \sem{\el(F)}\gamma\,G &= \sem{A}\gamma\,G\\
+    \sem{\mu A}\gamma &= \mu (\sem{A}\gamma) \tag{\Cref{thm:adamek}}
+  \end{align*}
+  \caption{Grammar Semantics}
+\end{figure}
 
-  Let $A$ be a closed linear type. The nerve $N(A)$ is a presheaf on
-  strings that takes a string $w$ to the definitional equivalence
-  classes of terms $\cdot; \lceil w\rceil \vdash e: N(A)$.
-\end{definition}
+\max{TODO: prose}
 
-\begin{theorem}[Canonicity]
-  Let $A$ be a closed, purely positive linear type. Then there is an
-  isomorphism between $\llbracket A\rrbracket$ and $N(A)$.
+\max{todo: spelling}
+\begin{theorem}[Adamek]
+  If a functor $F : \mathcal C \to \mathcal C$ preserves
+  $\omega$-colimits and $\mathcal C$ has an initial object and
+  $\omega$-colimits, then $F$ has an initial algebra given by the
+  colimit of the chain
+  \[ 0 \to F(0) \to F(F(0)) \to \cdots \]
 \end{theorem}
-\begin{proof}
-  We outline the proof here, more details are in the appendix. The
-  proof proceeds first by a standard logical families construction
-  that combines canonicity arguments for dependent type theory
-  TODO cite coquand
-  % \cite{coquand,etc}
-  with logical relations constructions for linear
-  types
-  TODO cite hylandschalk
-  % \cite{hylandschalk}
-  . It is easy to see by induction that the
-  logical family for $A$, $\hat A$ is isomorphic to $\llbracket A
-  \rrbracket$ and the fundamental lemma proves that the projection
-  morphism $p : \hat A \to N(A)$ has a section, the canonicalization
-  procedure. Then we establish again by induction that
-  canonicalization is also a retraction by observing that introduction
-  forms are taken to constructors.
-\end{proof}
 
 
-\begin{enumerate}
-\item Every term $\lceil w \rceil \vdash p : G + H$ is equal to $\sigma_1q$ or $\sigma_2 r$ (but not both)
-\item There are no terms $\lceil w \rceil \vdash p : 0$
-\item If there is a term $\lceil w \rceil \vdash p : c$ then $w = c$ and $p = x$.
-\item Every term $\lceil w \rceil \vdash p : G \otimes H$ is equal to $(q,r)$ for some $q,r$
-\item Every term $\lceil w \rceil \vdash p : \epsilon$ is equal to $()$
-\item Every term $\lceil w \rceil \vdash p : c$ is equal to $x:c$
-\item Every term $\lceil w \rceil \vdash p : \mu X. G$ is equal to $\textrm{roll}(q)$ where $q : G(\mu X.G/X)$
-\item Every term $\lceil w \rceil \vdash p : (x:A) \times G$ is equal
-  to $(M,q)$ where $\cdot \vdash M : A$
-\end{enumerate}
+%% That is, strings match $A \lto B$ if when prepended with a parse of $A$ they
+%% complete to parses of $B$. In this manner, the linear function types generalize
+%% Brzozowksi's notion of derivative
+%% \cite{brzozowskiDerivativesRegularExpressions1964}.
+%% Brzozowski initially only gave an accounting of this operation for
+%% generalized regular expressions, but later work from Might et al.\ demonstrates that the same
+%% construction can be generalized to context free grammars
+%% \cite{mightParsingDerivativesFunctional2011}. Here, via the linear function
+%% types, the same notion of derivative extends to the grammars of \theoryabbv.
+
+%% Note, to ensure that the linear function types do indeed generalize Brzozowski
+%% derivatives, we must include the equalities in \cref{fig:brzozowskideriv} as axioms\max{we must? why must we}.
+%% \pedro{We should further explain why these equations are required, or at the very
+%% least give some intuition}
+
+%% Of course, all the above statements for the left function type also have
+%% corresponding analogues for the
+%% right-handed counterpart.
+
+%% \begin{figure}
+%% \begin{align*}
+%%   c\lto c &\equiv I\\
+%%   c\lto d &\equiv \bot\\
+%%   c\lto I &\equiv \bot\\
+%%   c\lto 0 &\equiv \bot\\
+%%   c\lto (A \otimes B) & \equiv (c\lto A) \otimes B + (A \amp I) \otimes (c\lto B)\\
+%%   c\lto A^* &\equiv (A \amp I)^{*} \otimes (c \lto A) \otimes A^* \\
+%%   c\lto (A + B) &\equiv (c\lto A) + (c\lto B)
+%% \end{align*}
+%% \caption{Equality for Brzozowski Derivatives}
+%% \label{fig:brzozowskideriv}
+%% \end{figure}
+
+%% \paragraph{LNL Dependent Types}
+%% \steven{talk about how binary sum and binary with are definable over $\Bool$}
+%% Given a non-linear type $X$, we may form both the dependent product type
+%% $\LinPiTy x X A$ and the dependent pair type $\LinSigTy x X A$ as linear types
+%% where $x$ is free in $A$.
+
+%% $\sem{\LinPiTy x X A} \gamma w = \Pi(x:\sem{X}\gamma) \sem{A}(\gamma,x) w$
+
+%% $\sem{\LinSigTy x X A} \gamma w = \Sigma(x:\sem{X}\gamma) \sem{A}(\gamma,x) w$
+
+%% The grammar semantics of the linear product type is
+%% indeed a dependent function out of the
+%% semantics of $X$. Likewise, the grammar semantics of the linear dependent pair type is a dependent
+%% pair in $\Set$.\pedro{The connection to grammar semantics needs to be better explained. For instance,
+%% we should explain that disjunction corresponds to ``or'' of grammars}
+
+%% Note that even though we do not take the binary additive conjunction and
+%% disjunction as primitive, we may define them via these LNL dependent types. In
+%% particular, via a dependence on $\Bool$.
+%% %
+%% \[
+%%   A \amp B := \LinPiTy b \Bool {C(b)}
+%% \]
+%% \[
+%%   A \oplus B := \LinSigTy b \Bool {C(b)},
+%% \]
+%% where $C(\true) = A, C(\false) = B$.
+
+%% \paragraph{Universal Type}\max{this is absolutely not the right terminology. Universal type means multiple other completely different things}
+%% \steven{Definable as nullary product}
+%% The universal type $\top$ may be formed in any context.
+%% %
+%% \[ \sem{\top} \gamma w = \{ \ast \}\]
+%% %
+%% Its grammar semantics in set outputs the unit type in $\Set$ for all input strings in all contexts.
+
+%% \paragraph{Empty Type}
+%% \steven{Definable as nullary sum}
+%% The empty type $\bot$ has no inhabitants. The elimination for the empty type
+%% witnesses the principle of explosion --- i.e.\ from a term of type $\bot$ we may
+%% introduce a term $\mathsf{absurd}_{A} : A$ for any type $A$.
+%% %
+%% \[ \sem {\bot} \gamma w = \emptyset \]
+%% %
+%% \paragraph{The $G$ Modality}
+%% \steven{Rename this}
+%% \steven{Compare to persistence modalities from separation logic}
+%% The $G$ modality provides the embedding of linear types in the empty context
+%% into non-linear types. The introduction and elimination forms for $G A$ obey the
+%% same rules as given in \cite{bentonMixedLinearNonlinear1995} and \cite{krishnaswami_integrating_2015}.
+
+%% The left adjoint $F$ from non-linear types back to linear types may be defined
+%% as,
+%% %
+%% \[
+%%   F X := \LinSigTy a A I
+%% \]
+%% %
+%% \steven{Elaborate on $G$ more and rename $G A$to $\ltonl A$ in this section}
+
+%% \pedro{Agreed :) maybe mention connections to persistent propositions from the
+%% separation logic literature}
+
+%% The semantics of $G A$ are exactly the semantics of $A$ at the empty word $\varepsilon$.
+%% %
+%% \[ \sem{G A} \gamma = \sem{A} \gamma \varepsilon \]
+%% %
+%% %
+%% \paragraph{Recursive Types}
+%% \steven{reflavor this section as ``indexed inductive'' instead of ``recursive'',
+%% and entirely rework with the new inductive definition}
+%% \steven{The non-indexed inductive are definable as being trivially indexed over unit}
+%% Recursive linear types may be defined via the least-fixed point operator $\mu$.
+% The grammar semantics of which are the fixed-point of sets induced by the
+% grammar semantics of $A$.
+% %
+% \[ \sem{\mu x. A} \gamma = \mu (x:\Gr_i). \sem{A}(\gamma,x) \]
+% %
+% \pedro{We should justify, even if briefly, the existence of this
+%   fixed-point}
+
+% Observe that we need not take the Kleene star as a primitive
+% grammar constructor, as it is definable as a fixed point.
+% The Kleene star of a grammar $g$ is given as,
+
+% \[
+%   g^{*} := \mu X . I \oplus (g \otimes X)
+% \]
+
+% \begin{figure}[h!]
+% \begin{mathpar}
+%   \inferrule
+%   {\Gamma ; \Delta \vdash p : I}
+%   {\Gamma ; \Delta \vdash \mathsf{nil} : g^{*}}
+
+%   \and
+
+%   \inferrule
+%   {\Gamma ; \Delta \vdash p : g \\ \Gamma ; \Delta' \vdash q
+%   : g^{*}}
+%   {\Gamma ; \Delta \vdash \mathsf{cons}(p , q) : g^{*}}
+
+%   \\
+
+%   \inferrule
+%   {
+%     \Gamma ; \Delta \vdash p : g^{*} \\
+%     \Gamma ; \cdot \vdash p_{\varepsilon} : h \\
+%     \Gamma ; x : g , y : h \vdash p_{\ast} : h
+%   }
+%   {\Gamma ; \Delta \vdash \mathsf{foldr}(p_{\varepsilon} , p_{\ast}) : g^{*}}
+% \end{mathpar}
+% \caption{Kleene Star Rules}
+% \label{fig:star}
+% \end{figure}
+
+% Likewise, $g^{*}$ has admissible introduction and
+% elimination rules, shown in \cref{fig:star}. Note that this
+% definition of $g^{*}$ and these
+% rules arbitrarily assigns a handedness to the Kleene star.
+% We could have just as well took it to be a fixed point of
+% $I \oplus (X \otimes g)$. In fact, the definitions are
+% equivalent, as the existence of the $\mathsf{foldl}$ term below
+% shows that $g^{*}$ is also a fixed point of
+% $I \oplus (X \otimes g)$.
+% This is a marked point of difference from Kleene
+% algebra with recursion, where the fixed points for the left and right variants
+% of Kleene star need not agree \cite{leiss}.
+
+% \begin{equation}
+%   \label{eq:foldl}
+%   \inferrule
+%   {
+%     \Gamma ; \Delta \vdash p : g^{*} \\
+%     \Gamma ; \cdot \vdash p_{\varepsilon} : h \\
+%% %     \Gamma ; y : h , x : h \vdash p_{\ast} : h
+%% %   }
+%% %   {\Gamma ; \Delta \vdash \mathsf{foldl}(p_{\varepsilon} , p_{\ast}) : g^{*}}
+%% % \end{equation}
+
+%% % In fact, the $\mathsf{foldl}$ term is defined using
+%% % $\mathsf{foldr}$ --- much in the same way one
+%% % may define a left fold over lists in terms of a right fold
+%% % in a functional programming language.
+%% % The underlying trick is to fold over a list of linear functions
+%% % instead of the original string. We curry each character $c$
+%% % of the string into a function that concatenates $c$, and
+%% % right fold over this list of linear functions. Because function
+%% % application is left-associative, this results in a left
+%% % fold over the original string.
+
+%% % We only take fixed points of a single variable as a
+%% % primitive operation in the type theory, but we may apply
+%% % Beki\`c's theorem \cite{Bekić1984} to define an admissible
+%% % notion of multivariate fixed point. This is particularly
+%% % useful for defining grammars that encode the states of an
+%% % automaton, as we will see in \cref{sec:automata}. In \cref{fig:multifix} we provide the
+%% % introduction and elimination principles for such a fixed
+%% % point, where $\sigma$ is the substitution that unrolls the
+%% % mutually recursive definitions one level. That is,
+
+%% % \begin{align*}
+%% %   \sigma = \{ & \mu(X_{1} = A_{1} \dots, X_{n} = A_{n}).X_{1} / X_{1} , \dots, \mu(X_{1} = A_{1}, \dots, X_{n} = A_{n}).X_{n} / X_{n} \}
+%% % \end{align*}
+
+%% % \begin{figure}
+%% % \begin{mathpar}
+%% %   \inferrule
+%% %   {\Gamma ; \Delta \vdash e : \simulsubst {A_{k}} {\sigma}}
+%% %   {\Gamma ; \Delta \vdash \mathsf{cons}~e : \mu(X_{1} = A_{1}, \dots, X_{n} = A_{n}).X_{k}}
+
+%% %   \and
+
+%% %   \inferrule
+%% %   {\Gamma ; \Delta \vdash e : \mu(X_{1} = A_{1}, \dots, X_{n} = A_{n}).X_{k} \\
+%% %              \Gamma ; x_{j} : \simulsubst {A_{j}}{\sigma} \vdash e_{j} : B_{j}\quad \forall j
+%% %   }
+%% %   {\Gamma; \Delta \vdash \mathsf{mfold}(x_{1}.e_{1}, \dots, x_{n}.e_{n})(e) : B_{k}}
+%% % \end{mathpar}
+%% % \caption{Multi-fixed Points}
+%% % \label{fig:multifix}
+%% % \end{figure}
+
+%% %% \section{Categorical Semantics of \theoryabbv}
+%% %% \label{sec:categorify}
+
+%% As described in previous sections, \theoryabbv has a ``standard''
+%% interpretation where non-linear types denote sets, linear types denote
+%% formal grammars, non-linear terms denote functions and linear terms
+%% denote parse transformers.
+%% %
+%% Further, as is typical for type theory, every type constructor in the
+%% calculus is interpreted by using a \emph{universal construction} in
+%% either the category of sets or the category of grammars\footnote{the
+%% only exception is that the axioms for Brzozowski derivatives we
+%% considered do not follow solely from universal properties. In this
+%% section we focus on models that do not necessarily satisfy these
+%% axioms.}.
+%% %
+%% This leads to an immediate opportunity for generalization: the type
+%% theory has in addition to the standard grammar-theoretic
+%% interpretation, an interpretation in any categorical structure that
+%% exhibits the same universal constructions.
+%% %
+%% On the one hand this gives us a structured way to formalize the
+%% standard semantics precisely, but additionally it enables us to
+%% consider \emph{non-standard} models in the next section that point to
+%% further applications as well as meta-theoretic results.
+
+%% In developing this categorical semantics, we will start with the
+%% closest analogue from formal language theory: Kleene algebra.  Kleene
+%% algebras are an important tool in the theory of regular languages
+%% serving as a bridge between algebraic reasoning and equivalence of
+%% regular expressions. More broadly, through various extensions, they
+%% serve as a theoretical substrate to studying different kinds of formal
+%% languages.
+%% %
+%% We can then see that the ``regular fragment'' of our type theory
+%% (i.e., just characters, $\otimes$, $\oplus$, $0,1$ and a Kleene star)
+%% has models in what we call a \emph{Kleene category} a categorification
+%% of Kleene algebra from posets to categories.
+%% %
+%% We then further develop this into our final notion of model, which we
+%% call a \emph{Chomsky category} as it can model not just regular
+%% languages, but the full Chomsky hierarchy.
+
+%% \subsection{Comparison to Kleene Algebra}
+%% \subsection{Kleene Algebra and Kleene Categories}
+%% A Kleene algebra is a tuple $(A, +, \cdot, (-)^*, 1, 0)$, where $A$ is
+%% a set, $+$ and $\cdot$ are binary operations over $A$, $(-)^*$ is a
+%% function over $A$, and $1$ and $0$ are constants. These structures
+%% satisfy the axioms depicted in Figure~\ref{fig:axioms}.
+
+%% \begin{figure}
+%%   \begin{align*}
+%%     x + (y + z) &= (x + y) + z & x + y &= y + x\\
+%%     x + 0 &= x & x + x &= x\\
+%%     x(yz) &= (xy)z & x1 &= 1x = x\\
+%%     x(y + z) &= xy + xz & (x + y)z &= xz + yz\\
+%%     x0 &= 0x = x & & \\
+%%     1 + aa^* &\leq a^* & 1 + a^*a &\leq a^*\\
+%%      b + ax \leq x &\implies a^*b \leq x &  b + xa \leq x &\implies ba^* \leq x
+%%   \end{align*}
+%%   \caption{Kleene algebra axioms}
+%%   \label{fig:axioms}
+%% \end{figure}
+
+%% The addition operation can be used to define the partial order
+%% structure $a \leq b$ if, and only if, $a + b = b$. In the theory of
+%% formal languages, this order structure can be used to model language
+%% containment. In this section, we categorify the concept of Kleene
+%% algebra and build on top of it in order to define an abstract theory
+%% of parsing. We start by defining \emph{Kleene categories}.
+
+%% \begin{definition}
+%%   A Kleene category is a distributive monoidal category $\cat{K}$
+%%   such that for every objects $A$ and $B$, the endofunctors $F_{A, B}
+%%   = B + A \otimes X$ and $G_{A, B} = B + X \otimes A$ have initial
+%%   algebras (denoted $\mu X.\, F_{A, B}(X)$) such that $B \otimes (\mu
+%%   X.\, F_{A, 1}) \cong \mu X.\, F_{A, B}(X)$ and the analogous isomorphism
+%%   for $G_{A,B}$ also holds.
+%% \end{definition}
+
+%% As a sanity check, note that Kleene algebras are indeed examples of
+%% Kleene categories.
+
+%% \begin{example}
+%%   Every Kleene algebra, seen a posetal category, is a Kleene category.
+%%   The product $\cdot$ is a monoidal product and the addition is a
+%%   least-upper bound, which corresponds to a coproduct. Lastly, the
+%%   axioms of the Kleene star have a direct correspondence to the
+%%   coherence conditions postulated by the initial algebras of Kleene
+%%   categories.
+%% \end{example}
+
+%% This example provides a neat categorical justification to how
+%% restrictive Kleene algebras are in terms of reasoning about
+%% languages. By only having at most one morphism between objects, there
+%% is not a lot of information they can convey. In this case, the only
+%% information you get is language containment. As demonstrated by
+%% $\mathbf{Gr}$, the extra degrees of freedom granted by having more
+%% morphisms gives you more algebraic structure for reasoning about
+%% languages.
+
+%% For the next example, we see an unexpected connection with the theory
+%% of substructural logics.
+
+%% \begin{example}
+%%   The opposite category of every Kleene category is a model of a variant of
+%%   conjunctive ordered logic, where the Kleene star plays the role of the ``of
+%%   course'' modality from substructural logics which allows hypotheses to
+%%   be discarded or duplicated.
+%% \end{example}
+
+%% As we have seen, the proposed axioms are a direct translation of the
+%% Kleene algebra axioms to a categorical setting. Its most unusual aspect is the
+%% axiomatization of the Kleene star as a family of initial algebras
+%% satisfying certain isomorphisms. If the Kleene category $\cat{K}$ has
+%% more structure, then these isomorphisms hold ``for free''.
 
-To prove this result we will use a logical families model. We give a
-brief overview of this model concretely:
-\begin{enumerate}
-\item A context $\Gamma$ denotes a family of sets indexed by closing substitutions $\hat\Gamma : (\cdot \vdash \Gamma) \Rightarrow \Set_i$
-\item A type $\Gamma \vdash X : U_i$ denotes a family of sets $\hat X : \Pi(\gamma:\cdot \vdash \Gamma) \hat\Gamma \Rightarrow (\cdot \vdash \simulsubst X \gamma) \Rightarrow \Set_i$
-\item A term $\Gamma \vdash e : X$ denotes a section $\hat e : \Pi(\gamma)\Pi(\hat\gamma)\hat X \gamma \hat\gamma (\simulsubst e \gamma)$
-\item A linear type $\Gamma \vdash A : L_i$ denotes a family of grammars $\hat A : \Pi(\gamma:\cdot\vdash\Gamma)\,\hat\Gamma \Rightarrow \Pi(w:\Sigma^*) (\cdot;\lceil w\rceil \vdash A[\gamma])\Rightarrow \Set_i$, and the denotation of a linear context $\Delta$ is similar.
-\item A linear term $\Gamma;\Delta \vdash e : A$ denotes a function \[\hat e : \Pi(\gamma)\Pi(\hat\gamma)\Pi(w)\Pi(\delta : \lceil w \rceil \vdash \simulsubst \Delta \gamma) \hat\Delta \gamma \hat\gamma \delta \Rightarrow \hat A \gamma \hat\gamma w {(\simulsubst {\simulsubst e \gamma} \delta)}\]
-\end{enumerate}
-And some of the constructions:
-\begin{enumerate}
-\item $\widehat {(G A)} \gamma \hat\gamma e = \hat A \gamma \hat\gamma \varepsilon (G^{-1}e)$
-\item $\widehat {(A \otimes B)} \gamma \hat\gamma w e = \Sigma(w_Aw_B = w)\Sigma(e_A)\Sigma(e_B) (e_A,e_B) = e \wedge \hat A \gamma \hat \gamma w_A e_A \times \hat B \gamma \hat \gamma w_B e_B$
-\item $\widehat {(A \lto B)} \gamma \hat\gamma w e = \Pi(w_A)\Pi(e_A) \hat A \gamma\hat\gamma w_A e_A \Rightarrow \hat B \gamma\hat\gamma (ww_A) (\applto {e_A} e)$
-\end{enumerate}
+%% \begin{theorem}
+%%   \label{th:kleeneclosed}
+%%   Let $\cat{K}$ be a Kleene category such that it is also monoidal
+%%   closed.  Then, the initial algebras isomorphisms hold automatically.
+%% \end{theorem}
+%% \begin{proof}
+%%   We prove this by the unicity (up-to isomorphism) of initial
+%%   algebras. Let $[hd, tl]: I + (\mu X.\, F_{A, I}(X)) \otimes A \to
+%%   (\mu X.\, F_{A, I}(X))$ be the initial algebra structure of $(\mu
+%%   X.\, F_{A, I}(X))$ and consider the map $[hd, tl] : B + B \otimes
+%%   (\mu X.\, F_{A, I}(X)) \otimes A \to B\otimes (\mu X.\, F_{A,
+%%     I}(X))$.
+
+%%   Now, let $[f,g] : B + A \otimes Y \to Y$ be an $F_{A,B}$-algebra and
+%%   we want to show that there is a unique algebra morphism $h : \mu X.\, F_{A,I} \to B \lto Y$. We can show existence and
+%%   uniqueness by showing that the diagram on top commutes if, and
+%%   only if, the diagram on the bottom commutes:
+
+%% % https://q.uiver.app/#q=WzAsOCxbMCwwLCJCICsgQiBcXG90aW1lcyAoXFxtdSBYLlxcLCAxICsgWCBcXG90aW1lcyBBKSJdLFsyLDAsIkIgKyBZIFxcb3RpbWVzIEEiXSxbMiwyLCJZIl0sWzAsMiwiQiBcXG90aW1lcyAoXFxtdSBYLlxcLCAxICsgWCBcXG90aW1lcyBBKSJdLFswLDMsIjEgKyAoXFxtdSBYLlxcLCAxICsgWCBcXG90aW1lcyBBKSJdLFswLDUsIlxcbXUgWC5cXCwgMSArIFggXFxvdGltZXMgQSJdLFsyLDMsIjEgKyAoIEIgXFxsdG8gWSkgXFxvdGltZXMgQSJdLFsyLDUsIkIgXFxsdG8gWSJdLFswLDEsImlkICsgKGlkIFxcb3RpbWVzIGg7IGV2KSBcXG90aW1lcyBpZF9BIl0sWzEsMiwiW2YsZ10iXSxbMywyLCJpZCBcXG90aW1lcyBoOyBldiIsMl0sWzAsMywiW2lkIFxcb3RpbWVzIGgsIGlkIFxcb3RpbWVzIHRsXSIsMl0sWzQsNiwiaWQgKyAoaCBcXG90aW1lcyBYKVxcb3RpbWVzIGlkIl0sWzUsNywiaCIsMl0sWzYsNywiW2YnLCBnJ10iXSxbNCw1LCJbaGQsIHRsXSIsMl1d
+%% \[\begin{tikzcd}
+%% 	{B + B \otimes (\mu X.\, I + X \otimes A)} && {B + Y \otimes A} \\
+%% 	\\
+%% 	{B \otimes (\mu X.\, I + X \otimes A)} && Y \\
+%% 	{I + (\mu X.\, I + X \otimes A)} && {I + ( B \lto Y) \otimes A} \\
+%% 	\\
+%% 	{\mu X.\, I + X \otimes A} && {B \lto Y}
+%% 	\arrow["{id + (id \otimes h; ev) \otimes id_A}", from=1-1, to=1-3]
+%% 	\arrow["{[f,g]}", from=1-3, to=3-3]
+%% 	\arrow["{id \otimes h; ev}"', from=3-1, to=3-3]
+%% 	\arrow["{[id \otimes h, id \otimes tl]}"', from=1-1, to=3-1]
+%% 	\arrow["{id + (h \otimes X)\otimes id}", from=4-1, to=4-3]
+%% 	\arrow["h"', from=6-1, to=6-3]
+%% 	\arrow["{[f', g']}", from=4-3, to=6-3]
+%% 	\arrow["{[hd, tl]}"', from=4-1, to=6-1]
+%% \end{tikzcd}\]
+%%   This equivalence follows by using the adjunction structure given
+%%   by the monoidal closed structure of $\cat{K}$. A completely analogous
+%%   argument for $G_{A,B}$ also holds. Furthermore, by generalizing the
+%%   construction of \Cref{sec:formaltype}, we can also show that from
+%%   the monoidal closed assumption it follows that $\mu X.\, F_{A, I}(X) \cong \mu X.\, G_{A, I}(X)$
+%% \end{proof}
+
+%% Something surprising about this lemma is that it provides an alternative
+%% perspective on the observation that if a Kleene algebra has an
+%% residuation operation, also called action algebra \cite{kozen1994action},
+%% then the Kleene star admits a simpler axiomatization.
+
+%% Since we want Kleene categories to generalize our notion of formal
+%% grammars as presheaves $\String \to \Set$, we prove that they do
+%% indeed form a Kleene category. We start by presenting a well-known
+%% construction from presheaf categories.
+
+%% \begin{definition}
+%%   Let $\cat{C}$ be a locally small monoidal category and $F$, $G$ be
+%%   two functors $\cat{C} \to \Set$. Their Day convolution tensor
+%%   product is defined as the following coend formula:
+%%   \[
+%%   (F \otimes_{Day} G)(x) = \int^{(y,z) \in \cat{C}\times\cat{C}}\cat{C}(y\otimes z, x) \times F(y) \times G(z)
+%%   \]
+%%   Dually, its internal hom is given by the following end formula:
+%%   \[
+%%   (F \lto_{Day} G)(x) = \int_{y} \Set(F(y), G(x \otimes y))
+%%   \]
+%% \end{definition}
+
+%% \begin{lemma}[Day \cite{day1970construction}]
+%%   Under the assumptions above, the presheaf category $\Set^{\cat{C}}$ is
+%%   monoidal closed.
+%% \end{lemma}
+
+%% %% \begin{theorem}
+%% %%   Let $\cat{K}$ be a Kleene category and $A$ a discrete category.
+%% %%   The functor category $[A, \cat{K}]$.
+%% %%   (HOW GENERAL SHOULD THIS THEOREM BE? BY ASSUMING ENOUGH STRUCTURE,
+%% %%   E.G. K = Set, THIS THEOREM BECOMES SIMPLE TO PROVE)
+%% %% \end{theorem}
+%% \begin{theorem}
+%%   If $\cat{C}$ is a locally small monoidal category, then
+%%   $\Set^{\cat{C}}$ is a Kleene category.
+%% \end{theorem}
+%% \begin{proof}
+
+%%   By the lemma above, $\Set^{\cat{C}}$ is monoidal closed, and since it
+%%   is a presheaf category, it has coproducts. Furthermore, the tensor
+%%   is a left adjoint, i.e. it preserves colimits and, therefore, it is
+%%   a distributive category.
+
+%%   As for the Kleene star, since presheaf categories admit small colimits,
+%%   the initial algebra of the functors $F_{A,B}$ and $G_{A,B}$ can be
+%%   defined as the filtered colimit of the diagrams:
+
+%%   From Theorem~\ref{th:kleeneclosed} it follows that these initial
+%%   algebras satisfy the required isomorphisms and this concludes the
+%%   proof.
+%% \end{proof}
+
+%% \begin{corollary}
+%%   For every alphabet $\Sigma$, the presheaf category $\Set^{\cat{\Sigma^*}}$
+%%   is a Kleene category.
+%% \end{corollary}
+%% \begin{proof}
+%%   Note that string concatenation and the empty string make the
+%%   discrete category $\Sigma^*$ a strict monoidal category.
+%% \end{proof}
+
+%% Much like in the posetal case, the abstract structure of a Kleene
+%% category is expressive enough to synthetically reason about regular
+%% languages. A significant difference between them is that while Kleene
+%% algebras can reason about language containment, Kleene categories can
+%% reason about \emph{ambiguity}, \emph{strong equivalence} of grammars.
+
+%% \subsection{Lambek Hyperdoctrines and Chomsky Hyperdoctrines}
+
+%% Though Kleene categories are expressive enough to reason about
+%% concepts that are outside of reach of Kleene algebras, their
+%% simply-typed nature makes them not so expressive from a type theoretic
+%% point of view. This is limiting because type theories are successful
+%% syntactic frameworks for manipulating complicated categorical
+%% structures while avoiding some issues common in category theory, such
+%% as coherence issues.
+
+%% With this in mind, we want to design a categorical semantics that
+%% builds on top of Kleene categories with the goal of extending them
+%% with dependent types and making them capable reasoning about languages
+%% and their parsers. This leads us to the abstract notion of model we
+%% are interested in capturing with \theoryabbv: a \emph{Chomsky
+%% category}.
+
+%% We do this in two stages: first we define a \emph{Lambek hyperdoctrine} to be a
+%% notion of model for the judgmental structure of \theoryabbv: that is we have
+%% notions of linear and non-linear type, contexts, substitution and terms, but do
+%% not assume any particular type constructions exist. Then we will define when a
+%% Lambek hyperdoctrine is a Chomsky hyperdoctrine, meaning it can interpret all
+%% the type formers of \theoryabbv and therefore arbitrary strength grammars.
+
+%% \begin{definition}[Lambek Hyperdoctrine]
+%%   A \emph{Lambek hyperdoctrine} consists of
+%%   \begin{enumerate}
+%%   \item A category $\mathcal C$ with a terminal object.
+%%   \item A category with families structure over $\mathcal C$.
+%%   \item A contravariant functor $L : \mathcal C^{o} \to \textrm{MultiCat}$ from
+%%     $\mathcal C$ to the category of Multicategories.
+%%   \end{enumerate}
+%% \end{definition}
+%% Here the objects of $\mathcal C$ model the non-linear contexts, and morphisms
+%% model non-linear substitutions. The category with families structure over
+%% $\mathcal C$ models the dependent non-linear types. Finally the
+%% ``hyperdoctrine'' of multicategories $L$ models the linear types and terms. The
+%% fact that this is functorial in $\mathcal C$ corresponds to the fact that linear
+%% types are all relative to a non-linear context and that linear variables and
+%% composition commute with non-linear substitution.
+
+%% We will have three main Lambek hyperdoctrines of interest in this paper: the
+%% ``standard model'' of sets and grammars, the ``syntactic model'' given by our
+%% type theory itself and lastly a gluing model introduced in
+%% Section~\ref{sec:canonicity} to prove the canonicity theorem.
+%% \begin{example}
+%%   \begin{enumerate}
+%%   \item Let $\mathcal C$ be the category of sets, equipped with its usual
+%%     category with families structures of families. Define $L : \mathcal C^{o}
+%%     \to \textrm{MultiCat}$ to map $L(X)$ to the representable multicategory
+%%     $(\Set^{\Sigma^*})^X$ of $X$-indexed families of grammars where the monoidal
+%%     structure is given pointwise by the Day convolution monoidal structure.
+%%   \item We can build a ``syntactic'' Lambek hyperdoctrine $\Syn(\Sigma)$ from the syntax
+%%     itself: the category $\mathcal C$ is given by non-linear contexts and
+%%     substitutions, the category with families by the non-linear types and terms
+%%     and the hyperdoctrine of multicategories by the linear types and terms.
+%%   \end{enumerate}
+%% \end{example}
+
+%% Next each type linear and non-linear type constructor corresponds to extra data
+%% on a Lambek hyperdoctrine.
+%% \begin{definition}
+%%   \label{def:chomsky-data}
+%%   \begin{enumerate}
+%%   \item Dependent type structure: standard (say extensional type theory, give a reference)
+%%   \item Universes
+%%   \item Inductive grammars?
+%%   \item Non-inductive Grammar constructors: standard monoidal category stuff
+%%   \end{enumerate}
+%% \end{definition}
+%% \begin{definition}[Chomsky Hyperdoctrine]
+%%   A Chomsky Hyperdoctrine is a Lambek hyperdoctrine equipped with all of the data in Definition~\ref{def:chomsky-data}.
+%% \end{definition}
+%% \pedro{We should probably define some of the words in this definition}
+%% \begin{definition}
+%%   A Chomsky category is a locally Cartesian category with two hierarchies of
+%%   universes $\{L_i\}_{i\in \nat}$ and $\{U_i\}_{i\in \nat}$ such that
+%%   every $L_i$ and $U_i$ are $U_{i+1}$-small. Furthermore, we require
+%%   $U_i$ to be closed under dependent products and sum,
+%%   $L_i$ to be closed under the Kleene category connectives,
+%%   dependent products, left and right closed structures, with
+%%   a type constructor $G : L_i \to U_i$ and a linear dependent sum
+%%   going the other direction.
+%% \end{definition}
+
+%% \steven{Max suggests augmenting the definition of a Chomsky category to
+%%   something like two categories $L$ and $U$, and $U$ is Cartesian closed I don't
+%%   fully recall the rest.
+
+
+%%   My best guess is that you take $L$ a Kleene category with a hierarchy of universes
+%%   two, and you further require that $L$ is $Psh(U)$-enriched, except he
+%%   suggested a further adjective on these presheaves. Perhaps representability?
+%% }
 
-First, the category with families will be
-the category of logical families over set contexts/types
-$\Delta$/$A$. Then the propositional portion will be defined by
-mapping a logical family $\hat \Gamma \to \Gamma$
-
-First, let $L$ be the category of BI formulae and proofs (quotiented
-by $\beta\eta$ equality). Define a functor $N : L \to \Set^{\Sigma^*}$ by
-\[ N(\phi)(w) = L(w,\phi) \]
-
-Then define the gluing category $\mathcal G$ as the comma category
-$\Set^{\Sigma^*}/N$. That is, an object of this category is a pair of
-a formula $\phi \in L$ and an object $S \in \mathcal
-P(\Sigma^*)/N(\phi)$. We can then use the equivalence $\mathcal
-P(\Sigma^*)/N(\phi) \cong \mathcal P(\int N(\phi))$ to get a simple
-description of such an $S$: it is simply a family of sets indexed by
-proofs $L(w,\phi)$:
-\[ \prod_{w\in\Sigma^*} L(w,\phi) \to \Set \]
-This category clearly comes with a projection functor $\pi : \mathcal
-G \to \mathcal L$ and then our goal is to define a section by using
-the universal property of $\mathcal L$.
-
-To this end we define
-\begin{enumerate}
-\item $(\phi, S) \otimes (\psi, T) = (\phi \otimes \psi, S\otimes T)$ where
-  \[ (S \otimes T)(w, p) = (w_1w_2 = w) \times (q_1,q_2 = p) \times S\,w_1\,q_1 \times T\,w_2\,q_2\]
-\item $(\phi, S) \multimap (\psi, T) = (\phi \multimap \psi, S \multimap T)$ where
-  \[ (S \multimap T)(w,p) = w' \to q \to S\,w'\,q \to T (ww') (p\,q) \]
-\item $\mu X. ??$ ??
-\end{enumerate}
+%% \begin{theorem}
+%%   The presheaf category $\Grammar = \Set^{\cat{\Sigma^*}}$ is a Chomsky category.
+%% \end{theorem}
 
-\pedro{We should conclude this section by explaining the relevance of the canonicity
-theorem. Could also be done before stating the theorem.}
 
+%% Further, the syntactic category of \theoryname is manifestly a Chomsky category.
+
+%% \pedro{This is likely true, but if we explicitly say so, this warrants a proof. I think that
+%% if we don't say anything about the syntactic category, reviewers won't mind.}
+
+%% \steven{I agree that we don't want to say anything that opens unnecessary
+%%   questions for proofs we haven't written. However, it seems hard to make the
+%%   case that we have the right categorical model of the syntax if this statement
+%%   isn't true. By restating our definition of Chomsky category, this should be
+%%   obvious or a quick proof}
+
+%% \pedro{I agree, then we should add the quick proof :) }
+
+%% \subsection{Concrete Models of \theoryabbv}
+%% \label{sec:othermodels}
+
+%% \steven{Because we can define a version of semantic actions internally, we
+%%   shouldn't put it as a separate model}
+
+%% One of the powers of type theories is that it can be profitable to interpret them
+%% in various models. In this section, by using our just-defined Chomsky categories,
+%% we show how other useful concepts from formal language theory can also be organized
+%% as models of \theoryabbv. We illustrate this point by providing two examples that
+%% are closely related to the theory of formal languages: language equivalence and
+%% semantic actions. Furthermore, in order to justify how $\mathbf{Gr}$ relates to
+%% more traditional notions of parsing, we define a glued model that proves a
+%% canonicity property of grammar terms.
+
+
+%% \subsection{Language Semantics}
+%% Every grammar induces a language semantics. Also languages can be taken as a
+%% propositionally truncated view of the syntax. Logical equivalence should induce
+%% weak equivalence, and thus even give a syntactic way to reason about language equivalence.
+%% \steven{TODO language semantics}
+
+%% % \subsection{Semantic Actions}
+%% % \steven{Tentatively planning to cut this subsection for an internal representation
+%% %   of semantic actions}
+%% % Returning to the problem of parsing, the output of a parse usually is not the
+%% % literal parse tree. Rather, the output is the result of some \emph{semantic
+%% %   actions} ran on the parse tree, which usually serve to remove some syntactic
+%% % details that are unnecessary for later processing.
+
+%% % Given a grammar $G : \String \to \Set$, we define a semantic action to be a set
+%% % $X$ with a function $f$ that produces a semantic element from any parse of $G$.
+
+%% % \[
+%% %   f : \PiTy w \String {G w \to X}
+%% % \]
+
+%% % Further, semantic actions can be arranged into a structured category.
+%% % Define $\SemAct$ as the comma
+%% % category $\Grammar / \Delta$, where $\Delta : \Set \to \Grammar$ defines a
+%% % discrete presheaf. That is, for a set $X$, $\Delta (X)(w) = X$ for all
+%% % $w \in \String$. As $\SemAct$ is defined as a comma category, it has a forgetful
+%% % functor into $\Grammar$. That is, $\SemAct$ serves as a notion of formal
+%% % grammar. Moreover, $\SemAct$ is a model of \theoryabbv.
+
+%% % \steven{It being a notion of formal grammar is distinct from being a model. This
+%% % probably warrants a proof}
+
+%% % \steven{TODO semantic actions}
+
+%% % \pedro{This is a very nice opportunity of showing off the supremacy of denotational
+%% %   reasoning ;) We should probably prove the gluing lemma in the previous section
+%% %   and apply it here and in the canonicity section. The actual proof might have to be
+%% %   moved to the appendix, though}
+
+%% \subsection{Parse Canonicity}
+%% Canonicity is an important metatheoretic theorem in the type theory
+%% literature.  It provides insight on the normal forms of terms and,
+%% therefore, on its computational aspects. Frequently, proving
+%% canonicity for boolean types, i.e. every closed term of type bool
+%% reduces to either true or false, is enough to justify that the type
+%% theory being studied is well-behaved. In our case, however, since we
+%% want to connect \theoryabbv to parsers, we must provide a more
+%% detailed account of canonicity. In particular, we give a nonstandard semantics
+%% of \theoryabbv that carries a proof of canonicity along with it.
+
+%% If $\cdot \vdash A$ is a closed linear type then there are
+%% two obvious notions of what constitutes a ``parse'' of a string w
+%% according to the grammar $A$:
+%% \begin{enumerate}
+%% \item On the one hand we have the set-theoretic semantics just
+%%   defined, $\llbracket A \rrbracket \cdot w$
+%% \item On the other hand, we can view the string $w = c_1c_2\cdots$ as
+%%   a linear context $\lceil w \rceil = x_1:c_1,x_2:c_2,\ldots$ and
+%%   define a parse to be a $\beta\eta$-equivalence class of linear terms $\cdot;
+%%   \lceil w \rceil \vdash e : G$.
+%% \end{enumerate}
+%% It is not difficult to see that at least for the ``purely positive''
+%% formulae (those featuring only the positive connectives
+%% $0,+,I,\otimes,\mu, \overline\Sigma,c$) that
+%% every element $t \in \llbracket A \rrbracket w$ is a kind of tree and
+%% that the nodes of the tree correspond precisely to the introduction
+%% forms of the type. However it is far less obvious that \emph{every}
+%% linear term $\lceil w \rceil \vdash p : \phi$ is equal to some
+%% sequence of introduction forms since proofs can include elimination
+%% forms as well. To show that this is indeed the case we give a
+%% \emph{canonicity} result for the calculus: that the parses for .
+
+%% \begin{definition}
+%%   A non-linear type $X$ is purely positive if it is built up using
+%%   only finite sums, finite products and least fixed points.
+
+%%   A linear type is purely positive if it is built up using only finite
+%%   sums, tensor products, generators $c$, least fixed points and linear
+%%   sigma types over purely positive non-linear types.
+%% \end{definition}
+
+%% \begin{definition}
+%%   %% Let $X$ be a closed non-linear type. The closed elements $\textrm{Cl}(X)$ of $X$ are the definitional equivalence classes of terms $\cdot \vdash e : X$.
+
+%%   Let $A$ be a closed linear type. The nerve $N(A)$ is a presheaf on
+%%   strings that takes a string $w$ to the definitional equivalence
+%%   classes of terms $\cdot; \lceil w\rceil \vdash e: N(A)$.
+%% \end{definition}
+
+%% \begin{theorem}[Canonicity]
+%%   Let $A$ be a closed, purely positive linear type. Then there is an
+%%   isomorphism between $\llbracket A\rrbracket$ and $N(A)$.
+%% \end{theorem}
+%% \begin{proof}
+%%   We outline the proof here, more details are in the appendix. The
+%%   proof proceeds first by a standard logical families construction
+%%   that combines canonicity arguments for dependent type theory
+%%   TODO cite coquand
+%%   % \cite{coquand,etc}
+%%   with logical relations constructions for linear
+%%   types
+%%   TODO cite hylandschalk
+%%   % \cite{hylandschalk}
+%%   . It is easy to see by induction that the
+%%   logical family for $A$, $\hat A$ is isomorphic to $\llbracket A
+%%   \rrbracket$ and the fundamental lemma proves that the projection
+%%   morphism $p : \hat A \to N(A)$ has a section, the canonicalization
+%%   procedure. Then we establish again by induction that
+%%   canonicalization is also a retraction by observing that introduction
+%%   forms are taken to constructors.
+%% \end{proof}
+
+
+%% \begin{enumerate}
+%% \item Every term $\lceil w \rceil \vdash p : G + H$ is equal to $\sigma_1q$ or $\sigma_2 r$ (but not both)
+%% \item There are no terms $\lceil w \rceil \vdash p : 0$
+%% \item If there is a term $\lceil w \rceil \vdash p : c$ then $w = c$ and $p = x$.
+%% \item Every term $\lceil w \rceil \vdash p : G \otimes H$ is equal to $(q,r)$ for some $q,r$
+%% \item Every term $\lceil w \rceil \vdash p : \varepsilon$ is equal to $()$
+%% \item Every term $\lceil w \rceil \vdash p : c$ is equal to $x:c$
+%% \item Every term $\lceil w \rceil \vdash p : \mu X. G$ is equal to $\textrm{roll}(q)$ where $q : G(\mu X.G/X)$
+%% \item Every term $\lceil w \rceil \vdash p : (x:A) \times G$ is equal
+%%   to $(M,q)$ where $\cdot \vdash M : A$
+%% \end{enumerate}
+
+%% To prove this result we will use a logical families model. We give a
+%% brief overview of this model concretely:
+%% \begin{enumerate}
+%% \item A context $\Gamma$ denotes a family of sets indexed by closing substitutions $\hat\Gamma : (\cdot \vdash \Gamma) \Rightarrow \Set_i$
+%% \item A type $\Gamma \vdash X : U_i$ denotes a family of sets $\hat X : \Pi(\gamma:\cdot \vdash \Gamma) \hat\Gamma \Rightarrow (\cdot \vdash \simulsubst X \gamma) \Rightarrow \Set_i$
+%% \item A term $\Gamma \vdash e : X$ denotes a section $\hat e : \Pi(\gamma)\Pi(\hat\gamma)\hat X \gamma \hat\gamma (\simulsubst e \gamma)$
+%% \item A linear type $\Gamma \vdash A : L_i$ denotes a family of grammars $\hat A : \Pi(\gamma:\cdot\vdash\Gamma)\,\hat\Gamma \Rightarrow \Pi(w:\Sigma^*) (\cdot;\lceil w\rceil \vdash A[\gamma])\Rightarrow \Set_i$, and the denotation of a linear context $\Delta$ is similar.
+%% \item A linear term $\Gamma;\Delta \vdash e : A$ denotes a function \[\hat e : \Pi(\gamma)\Pi(\hat\gamma)\Pi(w)\Pi(\delta : \lceil w \rceil \vdash \simulsubst \Delta \gamma) \hat\Delta \gamma \hat\gamma \delta \Rightarrow \hat A \gamma \hat\gamma w {(\simulsubst {\simulsubst e \gamma} \delta)}\]
+%% \end{enumerate}
+%% And some of the constructions:
+%% \begin{enumerate}
+%% \item $\widehat {(G A)} \gamma \hat\gamma e = \hat A \gamma \hat\gamma \varepsilon (G^{-1}e)$
+%% \item $\widehat {(A \otimes B)} \gamma \hat\gamma w e = \Sigma(w_Aw_B = w)\Sigma(e_A)\Sigma(e_B) (e_A,e_B) = e \wedge \hat A \gamma \hat \gamma w_A e_A \times \hat B \gamma \hat \gamma w_B e_B$
+%% \item $\widehat {(A \lto B)} \gamma \hat\gamma w e = \Pi(w_A)\Pi(e_A) \hat A \gamma\hat\gamma w_A e_A \Rightarrow \hat B \gamma\hat\gamma (ww_A) (\applto {e_A} e)$
+%% \end{enumerate}
+
+%% First, the category with families will be
+%% the category of logical families over set contexts/types
+%% $\Delta$/$A$. Then the propositional portion will be defined by
+%% mapping a logical family $\hat \Gamma \to \Gamma$
+
+%% First, let $L$ be the category of BI formulae and proofs (quotiented
+%% by $\beta\eta$ equality). Define a functor $N : L \to \Set^{\Sigma^*}$ by
+%% \[ N(\phi)(w) = L(w,\phi) \]
+
+%% Then define the gluing category $\mathcal G$ as the comma category
+%% $\Set^{\Sigma^*}/N$. That is, an object of this category is a pair of
+%% a formula $\phi \in L$ and an object $S \in \mathcal
+%% P(\Sigma^*)/N(\phi)$. We can then use the equivalence $\mathcal
+%% P(\Sigma^*)/N(\phi) \cong \mathcal P(\int N(\phi))$ to get a simple
+%% description of such an $S$: it is simply a family of sets indexed by
+%% proofs $L(w,\phi)$:
+%% \[ \prod_{w\in\Sigma^*} L(w,\phi) \to \Set \]
+%% This category clearly comes with a projection functor $\pi : \mathcal
+%% G \to \mathcal L$ and then our goal is to define a section by using
+%% the universal property of $\mathcal L$.
+
+%% To this end we define
+%% \begin{enumerate}
+%% \item $(\phi, S) \otimes (\psi, T) = (\phi \otimes \psi, S\otimes T)$ where
+%%   \[ (S \otimes T)(w, p) = (w_1w_2 = w) \times (q_1,q_2 = p) \times S\,w_1\,q_1 \times T\,w_2\,q_2\]
+%% \item $(\phi, S) \multimap (\psi, T) = (\phi \multimap \psi, S \multimap T)$ where
+%%   \[ (S \multimap T)(w,p) = w' \to q \to S\,w'\,q \to T (ww') (p\,q) \]
+%% \item $\mu X. ??$ ??
+%% \end{enumerate}
+
+%% \pedro{We should conclude this section by explaining the relevance of the canonicity
+%% theorem. Could also be done before stating the theorem.}
 
 \section{Discussion and Future Work}
 \label{sec:discussion}
-
-The core objects of study in the theory of formal grammars have stood stalwart
-for decades. Study of interesting extensions or restrictions of particular
-grammar formalisms has remained consistent line of research over the years.
-Occasionally, researchers will either explicitly create a type system for their
-formalism, or researchers will leverage the same underlying structure of their
-systems. In this paper, we have not
-done much \emph{per se}, other than gather these ideas, abstract over their
-commonalities, and give the common language between formalisms.
-
-\paragraph{Typed Approaches to Grammars}
+\paragraph{Lambek Calculus and Categorial Grammar}
+In 1958 \cite{lambek1958mathematics}, Joachim Lambek introduced his syntactic
+calculus as a logical system for linguistic derivations. In fact, one may view
+the subtheory of our calculus generated over $\otimes, \lto, \tol$ to be a proof
+relevant presentation of Lambek's original system.
+
+\steven{Should discussion of this connection to lambek calculus be spread
+  throughout the paper?}
+
+Lambek later explicitly connects his syntactic calculus to the structure of a
+non-commutative biclosed monoidal category \cite{lambek1988categorial}. There is
+a rich line of subsequent research after these revelations by Lambek, including
+the study of pregroup grammars
+\cite{coeckeMathematicalFoundationsCompositional2010}, the presentation of
+abstract categorial grammars \cite{degrooteAbstractCategorialParsing2015}, and
+even Lambek calculus with dependent types \cite{luoLambekCalculusDependent}. As
+far as the authors can tell, these works seem to be primarily interested in natural
+language semantics in the style of Montague grammars, rather than parsing formal
+languages. That is, they seem to try to give a functorial semantic
+interpretation of natrual language.
+
+There seem to be some implementations from the abstract categorial grammar
+toolkit, but again they seem natural language focused and not up to the task of
+verified formal language parsing.
+
+Also mention here that \cite{luoLambekCalculusDependent} seems similar at first
+glance, but isn't as similar as it might look because the judgements and
+dependent types seem to
+take on a different interpretation of ours. Say something here like, ``as we
+understand it, it differs in x,y,z ways''
+
+\steven{TODO Edit the above}
+\steven{Even though the above sounds bad, are there lambek/categorial things that are missed?}
+
+%% 1. More practical: integrating it into a larger verified development
+%%    - verified imperative implementation: Lambek logic ala separation
+%%      logic?
+%% 2. Semantic actions
+%% 3. Type systems as tree grammars
+
+\paragraph{Typed Approaches to Regular Expressions}
 The usage of a simple type system to reason about regular expressions was
 introduced by Frisch and Cardelli in 2004 \cite{frischCardelli}, and later expanded
 by Hengelin and Nielsen in 2011 \cite{henglein_regular_2011} to handle proofs of regular expression
-containment.
-
-In 1992, while investigating the addition of arbitrary recursion to Kleene
+containment. In 1992, while investigating the addition of arbitrary recursion to Kleene
 algebra \cite{leiss},
 Lei{\ss} used a least fixed-point operator instead of the Kleene star. Although
 he did not explicitly make use of a type system, or have access to the view of Frisch and
-Cardelli, Lei{\ss} had added the full power of inductive types to a type system of regular expressions.
+Cardelli, Lei{\ss} had added the full power of inductive types to a type system
+of regular expressions.
 
 Pratt's work on the residuation of action algebras in \cite{prattActionLogicPure1991} closely mirrors
 our observation that Brzozowski derivatives mirror linear function types.
@@ -2332,24 +3481,34 @@ \section{Discussion and Future Work}
 adjunction between
 linear function types and tensor product in \theoryabbv.
 
-In \cite{firsovCertifiedNormalizationContextFree2015}, Firsov and Uustalu
-constructed a verified normalization procedure to turn a
-context-free grammar into an equivalent grammar in Chomsky normal form.
-Combining their work with a CYK parser, they have built a verified context-free
-grammar parser, \emph{up to weak equivalence}. To upgrade from a weak
-equivalence to a strong equivalence, they suggest updating their grammar
-formalism to treat parse trees as first-class proofs of language membership.
-That is, without explicitly stating so, they suggest that they next natural step
-is to reason about grammars in a system where parse trees are terms, as we have
-given in this paper.
+Our primary observation in this paper can be summarized as saying that all of
+these above works can embed naturally into a rich, unifying theory.
+
+\steven{I'd like to synthesize the discussion of these works with the categorial
+grammar works, but I am not sure how. It seems as though there are these
+parallel tracks of research that have missed each other for decades}
 
-\steven{Make the above paragraph shorter. Also idk if its worth including, but I
-like that they practically ask for a type theory}
+
+% In \cite{firsovCertifiedNormalizationContextFree2015}, Firsov and Uustalu
+% constructed a verified normalization procedure to turn a
+% context-free grammar into an equivalent grammar in Chomsky normal form.
+% Combining their work with a CYK parser, they have built a verified context-free
+% grammar parser, \emph{up to weak equivalence}. To upgrade from a weak
+% equivalence to a strong equivalence, they suggest updating their grammar
+% formalism to treat parse trees as first-class proofs of language membership.
+% That is, without explicitly stating so, they suggest that they next natural step
+% is to reason about grammars in a system where parse trees are terms, as we have
+% given in this paper.
+
+% \steven{Make the above paragraph shorter. Also idk if its worth including, but I
+% like that they practically ask for a type theory}
 
 \steven{Mention Neel's typed algebraic approach paper here. Say it is similar at
   first glance, as its parsing with types, but that he actually is representing
   something quite different. Or is it so different that its not worth mentioning?}
 
+\steven{TODO add lambek calculus/categorial grammars here}
+
 \paragraph{Kleene Algebra}
 Since the early works in the theory of formal languages, Kleene
 algebras have played an important role in its development. They
@@ -2377,55 +3536,299 @@ \section{Discussion and Future Work}
 \steven{What can we say about internalizing Kleene algebra proofs or extensions?}
 \steven{any KA citations needed above?}
 
-\paragraph{Linearity with Dependent Types}
-\steven{I don't know if it worth discussing the different influences of
-  categorical model here? We take Neel's model and add to it as necessary}
+%%    - verified imperative implementation: Lambek logic ala separation
+%%      logic?
+\paragraph{Relation to Separation Logic}
+
+\theoryabbv is similar in spirit to separation logic. Semantically,
+they are closely related: linear types in \theoryabbv denote families
+of sets indexed by a monoid of strings, whereas separation logic
+formulae typically denote families of predicates indexed by an ordered
+partial commutative monoid of worlds. The monoidal structure in both
+cases is are instances of the category-theoretic notion of Day
+convolution monoidal structure. From a separation-logic perspective,
+our notion of memory is very primitive: a memory shape is just a
+string of characters and the state of the memory is never allowed to
+evolve.
+
+Syntactically, our type theory can be viewed as a kind of ``separation
+type theory'' in that we care not just about provability of formulae
+but the constructive content of our linear terms, as these act as
+parsers. The design of \theoryabbv is based on the $\textrm{LNL}_D$
+calculus which explicitly was designed to be a dependent
+type-theoretic version of separation logic. Many concepts of
+separation logic then have direct analogues in our system. The
+characters $c$ are analogous to the ``points-to'' formulae $l \mapsto
+x$ in separation logic. Our $\otimes,\lto,\tol$ are analogous to the
+separating conjunction and magic-wand. The analogue of the persistence
+modality $\square$ of separation logic is $\bigoplus_{\_:\uparrow A} I$,
+which ``zeros out'' all but the parses from the empty string. If we
+think of the portions of the string we are parsing as our notion of
+resource, then these make sense as the ``resource-free'' elements of
+the type $A$.
+
+This semantic connection to separation logic also suggests an avenue
+of future work: rather than working with a dependent type theory, we
+could instead make a program logic based on non-commutative separation
+logic for verifying imperative implementations of parsers. This could
+be accomplished by modifying an existing separation logic
+implementation or embedding the logic within \theoryabbv.
+
+%% \paragraph{Linearity with Dependent Types}
+%% \steven{I don't know if it worth discussing the different influences of
+%%   categorical model here? We take Neel's model and add to it as necessary}
+
+% Vakar might be worth citing to discuss different characterizations of the
+% notion of model \cite{vakarSyntaxSemanticsLinear2015}
+
+% \cite{fu2023twolevellineardependenttype} give a compiler for a two-level
+% linear dependent type theory. This could serve as a template for a
+% compiler implementation of \theoryname that is not just inference rules
+% embedded in Agda.
 
-Vakar might be worth citing to discuss different characterizations of the
-notion of model \cite{vakarSyntaxSemanticsLinear2015}
+\paragraph{Implementations in Parsing}
+\steven{Talk about future implementation-side work}
 
 \cite{fu2023twolevellineardependenttype} give a compiler for a two-level
 linear dependent type theory. This could serve as a template for a
 compiler implementation of \theoryname that is not just inference rules
 embedded in Agda.
 
-\paragraph{Implementations in Parsing}
-Talk about how we have a DFA parser in Agda and want to to get out a regex
-parser. Then context free, etc
-
-future work related to building a usable correct-by-construction parser
-combinator/semantic action combinator library in this language.
-
-Contrast this
-potential library with existing verified parsers. Citations for some in prelim presentation
-
-\paragraph{Applications to Type Checking}
-While parsing typically refers to the generation of semantic
-objects from string input, many tasks in programming can be
-viewed as parsing of objects with more structure, such as
-trees with binding structure or graphs. Fundamental to the
-frontend of many
-programming language implementations are type systems. In
-particular, \emph{type checking}
---- analogous to language recognition --- and \emph{typed
-  elaboration} --- analogous to parsing --- arise when
-producing a semantic object subject to some analysis. Just
-as our string grammars were given as functors from $\String$
-to $\Set$, we envision adapting the same philosophy
-to functors from $\String$ to some \emph{category of trees} to craft a syntax
-that natively captures typed elaboration. This suggests an
-unusual sort of bunched type theory, where context extension
-no longer resembles concatenation of strings but instead
-takes on the form of tree constructors.
-
-\steven{condense the future work section?}
-\steven{Put the 2 category stuff here?}
-
-\paragraph{Other Future Work}
-Backtracking parsing: via (delimited) continuations? effect handlers?
+\paragraph{Type Checking and Elaboration}
+Our focus in this work has been on the verification of parsers for
+grammars over strings, but because \theoryabbv allows for the
+definition of arbitrarily powerful grammars, the system could also be
+used for more sophisticated semantic analysis such as scope checking
+or type checking. Alternatively, we could more directly encode type
+type systems as linear types in a modified version of \theoryabbv
+whose semantics is not in families of sets indexed by \emph{strings}
+but families of sets indexed by \emph{abstract syntax trees}.
+
+%% While parsing typically refers to the generation of semantic
+%% objects from string input, many tasks in programming can be
+%% viewed as parsing of objects with more structure, such as
+%% trees with binding structure or graphs. Fundamental to the
+%% frontend of many
+%% programming language implementations are type systems. In
+%% particular, \emph{type checking}
+%% --- analogous to language recognition --- and \emph{typed
+%%   elaboration} --- analogous to parsing --- arise when
+%% producing a semantic object subject to some analysis. Just
+%% as our string grammars were given as functors from $\String$
+%% to $\Set$, we envision adapting the same philosophy
+%% to functors from a \emph{category of trees} to $\Set$ to craft a syntax
+%% that natively captures typed elaboration. This suggests an
+%% unusual sort of bunched type theory, where context extension
+%% no longer resembles concatenation of strings but instead
+%% takes on the form of tree constructors.
+
+% type system              ~~ Formal Grammar
+% typing derivation        ~~ parse tree
+% algorithmic type system  ~~ LL(1), LR(1) grammar
+% Uniqueness of derivations ~~ unambiguous grammar
+% type elaborator           ~~ semantic actions
 
 \bibliographystyle{plain}
 \bibliography{refs.bib}
 
+\appendix
+
+\section{Elided Syntax}
+
+\max{TODO: add prose}
+
+\begin{figure}
+  \begin{mathpar}
+    \inferrule{~}{\ctxwff \cdot}
+    \and
+    \inferrule{\ctxwff \Gamma \\ \ctxwffjdg \Gamma X}{\ctxwff {\Gamma, x : X}}
+    \inferrule{\Gamma \vdash X : U_i}{\ctxwffjdg \Gamma X}
+
+    \boxed{\linctxwffjdg \Gamma A}
+
+    \inferrule{~}{\linctxwff \Gamma \cdot}
+    \and
+    \inferrule{\linctxwff \Gamma \Delta \\ \linctxwffjdg \gamma A}{\linctxwff \Gamma {\Delta, a : A}}
+  \end{mathpar}
+  \caption{Context Rules}
+\end{figure}
+
+
+\max{TODO: add rules for non-linear types $\Pi,\Sigma$, equality etc}
+
+\begin{figure}
+  \begin{mathpar}
+  \boxed{\Gamma \vdash M : X}
+
+  \inferrule{~}{\Gamma, x : X, \Gamma' \vdash x : X}
+  %
+  \and
+  %
+  \inferrule{\Gamma \vdash M : Y \quad \ctxwffjdg \Gamma {X \equiv Y}}{\Gamma \vdash M : X}
+  %
+  \and
+  %
+  \inferrule{~}{\Gamma \vdash () : 1}
+  %
+  \and
+  %
+  \inferrule{\Gamma \vdash M : X \\ \Gamma \vdash N : \subst Y M x}{\Gamma \vdash (M, N) : \SigTy x X Y}
+  %
+  \and
+%
+  \inferrule{\Gamma \vdash M : \SigTy x X Y}{\Gamma \vdash \pi_1\, M : X}
+  %
+  \and
+  %
+  \inferrule{\Gamma \vdash M : \SigTy x X Y}{\Gamma \vdash \pi_2\, M : \subst Y {\pi_1\, M} x}
+  \and
+  \inferrule{\Gamma, x : X \vdash M : Y}{\Gamma \vdash \lamb x M : \PiTy x X Y}
+  %
+  \and
+  %
+  \inferrule{\Gamma \vdash M : \PiTy x X Y \\ \Gamma \vdash N : X}{\Gamma \vdash \app M {N} : \subst Y {N} x}
+  %
+  \and
+  %
+  \inferrule{\Gamma \vdash M \equiv N : X}{\Gamma \vdash \mathsf{refl} : M =_X N}
+  \and
+  \textrm{TODO: inductive types (boolean, nat) with their eliminators allowed in }
+  \end{mathpar}
+  \caption{Intuitionistic typing}
+  \label{fig:inttyping}
+\end{figure}
+
+\begin{figure}
+  \begin{mathpar}
+    %
+    \inferrule{\Gamma ; a : A , \Delta \vdash e : B}{\Gamma ; \Delta \vdash \lambtol a e : B\tol A}
+    \and
+    \inferrule{\Gamma ; \Delta \vdash e : B \tol A \\ \Gamma ; \Delta' \vdash e' : A}{\Gamma ; \Delta', \Delta \vdash \apptol e {e'} : B}
+    %
+    \\
+
+  \end{mathpar}
+  \caption{Linear Terms, extended}
+\end{figure}
+
+\begin{figure}
+  \begin{mathpar}
+    \boxed{\Gamma \vdash M \equiv N : X}
+
+    \inferrule{\Gamma \vdash p : M =_X N}{\Gamma \vdash M \equiv N : X}
+%
+    \and
+%
+    \inferrule{~}{\Gamma \vdash \app {(\lamb x M)} {N} \equiv \subst x M {N} : X}
+%
+    \and
+%
+    \inferrule{~}{\Gamma \vdash m \equiv \lamb x {\app M x} : \PiTy x X Y}
+%
+    \and
+%
+    \inferrule{~}{\Gamma \vdash \pi_1\, (M, N) \equiv M : X}
+%
+    \and
+%
+    \inferrule{~}{\Gamma \vdash \pi_2\, (M , N) \equiv N : \subst x {M} Y}
+%
+    \and
+%
+    \inferrule{~}{\Gamma \vdash M \equiv (\pi_1\, M, \pi_2\, M) : \SigTy x X Y}
+%
+    \and
+%
+    % \inferrule{~}{}
+%
+    \inferrule{~}{\Gamma \vdash M \equiv N : 1}
+%
+    % \and
+%
+    % \inferrule{~}{\Gamma \vdash G\, (G^{-1} \, t) \equiv t : G A}
+
+    \\
+
+    \boxed{\Gamma ; \Delta \vdash e \equiv e' : A}
+
+    % \inferrule{~}{\Gamma; \cdot \vdash G^{-1}\, (G \, t ) \equiv t: A}
+%
+%
+    \inferrule{~}{\Gamma; \Delta \vdash \app {(\lamblto a e)} {e'} \equiv \subst e {e'} a : C}
+%
+    \and
+%
+    \inferrule{~}{\Gamma; \Delta \vdash e \equiv \lamblto a {\app e a} : A \lto B}
+%
+    \and
+%
+    \inferrule{~}{\Gamma; \Delta \vdash \app {(\lambtol a e)} {e'} \equiv \subst e {e'} a : C}
+%
+    \and
+%
+    \inferrule{~}{\Gamma; \Delta \vdash e \equiv \lambtol a {\app e a} : B \tol A}
+%
+    \and
+%
+    \inferrule{~}{\Gamma; \Delta \vdash \app {(\dlamb x e)} {M} \equiv \subst {e} M x : C}
+%
+    \and
+%
+    \inferrule{~}{\Gamma; \Delta \vdash e \equiv \dlamb x {\app e x} : \LinPiTy x X A}
+%
+    \and
+%
+%     \inferrule{~}{\Gamma; \Delta \vdash e \equiv e' : \top}
+% %
+%     \and
+%
+%     \inferrule{~}{\Gamma; \Delta \vdash e_i \equiv \pi_i (e_1, e_2) : A_i}
+% %
+%     \and
+% %
+%     \inferrule{~}{\Gamma; \Delta \vdash e \equiv (\pi_1 e, \pi_2 e) : A\& B}
+%
+    \and
+%
+    \inferrule{~}{\Gamma; \Delta \vdash \letin {()} {()} e \equiv e : C}
+    %
+    \and
+    %
+    \inferrule{~}{\Gamma; \Delta \vdash \letin {()} e {\subst {e'} {()} a} \equiv \subst {e'} e a : C}
+%
+    \and
+%
+    \inferrule{~}{\Gamma; \Delta \vdash \letin {a \otimes b} {e \otimes e'} e'' \equiv e'' \{ e/a , e'/b \} : C}
+%
+    \and
+    %
+    \inferrule{~}{\Gamma; \Delta \vdash \letin {a \otimes b} e {\subst {e'} {a \otimes b} c} \equiv \subst {e'} e c : C}
+%
+    \and
+%
+    \inferrule{~}{\Gamma;\Delta \vdash \letin {(x, a)} {(M, e)} {e'} \equiv e' \{ M/x , e/a \} : C}
+    %
+    \and
+    %
+    \inferrule{~}{\Gamma; \Delta \vdash \letin {(x, a)} e {\subst {e'} {(x, a)} y} \equiv \subst {e'} e y : C}
+    \and
+    %
+    \inferrule
+    {~}
+    {\Gamma ; \Delta \vdash \equalizerpi {\equalizerin {e}} \equiv e : A}
+    %
+    \and
+    %
+    \inferrule
+    {~}
+    {\Gamma ; \Delta \vdash \equalizerin {\equalizerpi {e}} \equiv e : A}
+\end{mathpar}
+  \caption{Judgmental equality for linear terms}
+  \label{fig:jdgeq}
+\end{figure}
+\steven{Do we need inverse for $\ltonl A$ in judgemental equality figure?}
+\steven{Do we need to say anything about the equalizer proofs being unique in
+  judgemental equality figure?}
+
 
 \end{document}