Skip to content

Commit

Permalink
Unicode alphabet, subalphabets, and easier tests (#21)
Browse files Browse the repository at this point in the history
  • Loading branch information
stschaef authored Sep 27, 2024
1 parent d127d1f commit 75a0633
Show file tree
Hide file tree
Showing 3 changed files with 215 additions and 0 deletions.
59 changes: 59 additions & 0 deletions code/cubical/Examples/Dyck.agda
Original file line number Diff line number Diff line change
Expand Up @@ -257,3 +257,62 @@ opaque
-- done = ⊗-unit-r

-- -- TODO: show this is a *strong* equivalence!

open import String.Unicode
open import String.SubAlphabet
open import Cubical.Data.Empty as Empty
open import Cubical.Functions.Embedding

pick-char : Bool ⟨ Unicode ⟩
pick-char true = '('
pick-char false = ')'

pick-char-inj : {b b' : Bool} pick-char b ≡ pick-char b' b ≡ b'
pick-char-inj {false} {false} _ = refl
pick-char-inj {false} {true} x =
Empty.rec (mkUnicodeCharPath-no ')' '(' refl x)
pick-char-inj {true} {false} x =
Empty.rec (mkUnicodeCharPath-no '(' ')' refl x)
pick-char-inj {true} {true} _ = refl

isEmbedding-pick-char : isEmbedding pick-char
isEmbedding-pick-char = injEmbedding (Unicode .snd) pick-char-inj

Bracket' : hSet ℓ-zero
Bracket' =
UnicodeSubAlphabet (Bool , isSetBool) (pick-char , isEmbedding-pick-char)
isFinSetBool


BracketIso : Iso Bracket ⟨ Bracket' ⟩
BracketIso .Iso.fun [ = '(' , (true , refl)
BracketIso .Iso.fun ] = ')' , (false , refl)
BracketIso .Iso.inv (c , true , _) = [
BracketIso .Iso.inv (c , false , _) = ]
BracketIso .Iso.rightInv (c , true , fib) i =
-- idek anymore
fib i , (true , isSetUnicodeChar _ _ (λ j fib (i ∧ j)) (λ j fib (i ∧ j)) i)
BracketIso .Iso.rightInv (c , false , fib) i =
fib i , (false , isSetUnicodeChar _ _ (λ j fib (i ∧ j)) (λ j fib (i ∧ j)) i)
BracketIso .Iso.leftInv [ = refl
BracketIso .Iso.leftInv ] = refl

test : UnicodeString
-- Also tested with a string that was about 21k characters long
-- Took 30 seconds to parse,
-- and correctly fails when the string is unbalanced
test = "()((((())))()()()()(())()())()()"

test' : List ⟨ Bracket' ⟩
test' = toString (Bool , isSetBool) (pick-char , isEmbedding-pick-char) isFinSetBool test

w : String
w = Cubical.Data.List.map (BracketIso .Iso.inv) test'

test-parse : (LinΣ[ b ∈ Bool ] BalancedStkTr 0 b) w
test-parse = (LinΠ-app 0 ∘g parseStkTr) w (⌈w⌉→string {w = w} w (internalize w))

opaque
unfolding ⟜-app _⟜_ ⟜-intro ⊸-app _⊸_ ⊸-app parseStkTr KL*r-elim ⌈w⌉→string
_ : test-parse .fst ≡ true
_ = refl
78 changes: 78 additions & 0 deletions code/cubical/String/SubAlphabet.agda
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
open import Cubical.Foundations.Prelude
open import Cubical.Foundations.HLevels

module String.SubAlphabet where

open import Cubical.Foundations.Structure
open import Cubical.Foundations.Isomorphism
open import Cubical.Foundations.Equiv

open import Cubical.Functions.Embedding

open import Cubical.Relation.Nullary.Base
open import Cubical.Relation.Nullary.Properties

open import Agda.Builtin.Char
open import Agda.Builtin.String

open import Cubical.Data.Bool
open import Cubical.Data.Maybe
open import Cubical.Data.Empty as Empty
open import Cubical.Data.Sigma
open import Cubical.Data.FinSet
open import Cubical.Data.FinSet.Constructors
open import Cubical.Data.FinSet.DecidablePredicate

open import Cubical.HITs.PropositionalTruncation as PT

open import Helper

-- Pick out a subalphabet of another alphabet
module _
(Alphabet Alphabet' : hSet ℓ-zero)
(embed : ⟨ Alphabet ⟩ ↪ ⟨ Alphabet' ⟩)
where
SubAlphabet' : Type ℓ-zero
SubAlphabet' = Σ[ c ∈ ⟨ Alphabet' ⟩ ] fiber (embed .fst) c

SubAlphabetIso :
Iso ⟨ Alphabet ⟩ (Σ ⟨ Alphabet' ⟩ (λ v fiber (λ z embed .fst z) v))
SubAlphabetIso =
iso
(λ x (embed .fst x) , (x , refl))
(λ x x .snd .fst)
(λ b
Σ≡Prop (λ x isEmbedding→hasPropFibers (embed .snd) x)
(b .snd .snd))
(λ _ refl)

open Iso
isSetSubAlphabet : isSet SubAlphabet'
isSetSubAlphabet =
isSetRetract (SubAlphabetIso .inv) (SubAlphabetIso .fun)
(SubAlphabetIso .rightInv) (Alphabet .snd)

SubAlphabet : hSet ℓ-zero
SubAlphabet .fst = SubAlphabet'
SubAlphabet .snd = isSetSubAlphabet

module _ (isFinSetAlphabet : isFinSet ⟨ Alphabet ⟩) where
isFinSetSubAlphabet : isFinSet SubAlphabet'
isFinSetSubAlphabet =
EquivPresIsFinSet (isoToEquiv SubAlphabetIso) isFinSetAlphabet

module _ (DiscreteAlphabet' : Discrete ⟨ Alphabet' ⟩) where
maybe-SubAlphabet : ⟨ Alphabet' ⟩ Maybe ⟨ SubAlphabet ⟩
maybe-SubAlphabet c' =
decRec
(λ ∣in-image∣
just
(c' ,
(PT.rec (isEmbedding→hasPropFibers (embed .snd) c')
(λ in-image in-image) ∣in-image∣)))
(λ ¬|in-image∣ nothing)
(DecProp∃
(_ , isFinSetAlphabet)
(λ c (_ , Alphabet' .snd _ _) ,
(DiscreteAlphabet' (embed .fst c) c'))
.snd )
78 changes: 78 additions & 0 deletions code/cubical/String/Unicode.agda
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
open import Cubical.Foundations.Prelude
open import Cubical.Foundations.HLevels

module String.Unicode where

open import Cubical.Foundations.Structure
open import Cubical.Foundations.Isomorphism
open import Cubical.Foundations.Equiv

open import Cubical.Functions.Embedding

open import Cubical.Relation.Nullary.Base
open import Cubical.Relation.Nullary.Properties

open import Agda.Builtin.Char
open import Agda.Builtin.String

open import Cubical.Data.Bool
open import Cubical.Data.Maybe as Maybe
open import Cubical.Data.List
open import Cubical.Data.Empty as Empty
open import Cubical.Data.Sigma
open import Cubical.Data.FinSet
open import Cubical.Data.FinSet.Constructors

open import String.SubAlphabet


UnicodeChar = Char
UnicodeString = String

-- Char equality is computed externally by JavaScript
-- Internalize a path oracle
postulate
mkUnicodeCharPath-yes : (c c' : UnicodeChar) primCharEquality c c' ≡ true c ≡ c'
mkUnicodeCharPath-no : (c c' : UnicodeChar) primCharEquality c c' ≡ false ¬ (c ≡ c')

DiscreteUnicodeChar : Discrete UnicodeChar
DiscreteUnicodeChar c c' =
decRec
(λ t yes (mkUnicodeCharPath-yes c c' t))
(λ f no (mkUnicodeCharPath-no c c' (flip (primCharEquality c c') f)))
(primCharEquality c c' ≟ true)
where
flip : (b : Bool) ¬ (b ≡ true) b ≡ false
flip false _ = refl
flip true ¬t=t = Empty.rec (¬t=t refl)

isSetUnicodeChar : isSet UnicodeChar
isSetUnicodeChar = Discrete→isSet DiscreteUnicodeChar

-- The unicode alphabet
Unicode : hSet ℓ-zero
Unicode .fst = UnicodeChar
Unicode .snd = isSetUnicodeChar

-- I thought this would be good to define alphabets, but
-- that cannot be true because you cannot pattern match on the characters
-- in an alphabet due to the paths in the fibers underlying the subalphabet
-- definitions
-- However, this should still be good to write test cases more concisely?
-- Still likely no. Maybe if Eq was used instead of path?
module _
(Alphabet : hSet ℓ-zero)
(embed : ⟨ Alphabet ⟩ ↪ ⟨ Unicode ⟩)
(isFinSetAlphabet : isFinSet ⟨ Alphabet ⟩)
where
UnicodeSubAlphabet : hSet ℓ-zero
UnicodeSubAlphabet = SubAlphabet Alphabet Unicode embed

-- This should probably be a stricter check that none of the
-- characters were from outside of the subalphabet
toString : String List ⟨ UnicodeSubAlphabet ⟩
toString w =
filterMap
(maybe-SubAlphabet Alphabet Unicode embed
isFinSetAlphabet DiscreteUnicodeChar)
(primStringToList w)

0 comments on commit 75a0633

Please sign in to comment.