From 1a92b8e8452ab1c0bbeb0a17c34b7cf4ae54a407 Mon Sep 17 00:00:00 2001 From: Langston Barrett Date: Wed, 1 Nov 2023 15:14:15 -0400 Subject: [PATCH] macaw-symbolic-syntax: Concrete syntax for macaw-symbolic CFGs This code was ported from ambient-verifier. --- .github/workflows/ci.yaml | 6 + cabal.project.dist | 2 + cabal.project.werror | 2 + symbolic-syntax/LICENSE | 30 + symbolic-syntax/macaw-symbolic-syntax.cabal | 132 ++++ .../src/Data/Macaw/Symbolic/Syntax.hs | 638 ++++++++++++++++++ symbolic-syntax/test-data/copy.cbl | 6 + symbolic-syntax/test-data/copy.out | 7 + symbolic-syntax/test-data/copy.out.good | 7 + symbolic-syntax/test/Test.hs | 54 ++ 10 files changed, 884 insertions(+) create mode 100644 symbolic-syntax/LICENSE create mode 100644 symbolic-syntax/macaw-symbolic-syntax.cabal create mode 100644 symbolic-syntax/src/Data/Macaw/Symbolic/Syntax.hs create mode 100644 symbolic-syntax/test-data/copy.cbl create mode 100644 symbolic-syntax/test-data/copy.out create mode 100644 symbolic-syntax/test-data/copy.out.good create mode 100644 symbolic-syntax/test/Test.hs diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index d4d137164..2122378b1 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -95,6 +95,12 @@ jobs: YICES_VERSION: "2.6.2" CVC4_VERSION: "4.1.8" + - name: Build macaw-symbolic-syntax + run: cabal build pkg:macaw-symbolic-syntax + + - name: Test macaw-symbolic-syntax + run: cabal test pkg:macaw-symbolic-syntax + - name: Build macaw-x86 run: cabal build pkg:macaw-x86 pkg:macaw-x86-symbolic diff --git a/cabal.project.dist b/cabal.project.dist index 3ff8e23e4..47b829c58 100644 --- a/cabal.project.dist +++ b/cabal.project.dist @@ -7,6 +7,7 @@ packages: base/ macaw-riscv/ x86/ symbolic/ + symbolic-syntax/ x86_symbolic/ refinement/ utils/compare-dwarfdump @@ -28,6 +29,7 @@ packages: base/ deps/crucible/crucible/ deps/crucible/crucible-llvm/ deps/crucible/crucible-symio/ + deps/crucible/crucible-syntax/ deps/what4/what4/ deps/dwarf/ deps/elf-edit/ diff --git a/cabal.project.werror b/cabal.project.werror index f3203aa20..7eadea2d6 100644 --- a/cabal.project.werror +++ b/cabal.project.werror @@ -7,6 +7,8 @@ package macaw-semmc ghc-options: -Wall package macaw-symbolic ghc-options: -Wall -Werror +package macaw-symbolic-syntax + ghc-options: -Wall -Werror -- Macaw-ppc has warnings. package macaw-ppc ghc-options: -Wall -Werror diff --git a/symbolic-syntax/LICENSE b/symbolic-syntax/LICENSE new file mode 100644 index 000000000..f01e82c96 --- /dev/null +++ b/symbolic-syntax/LICENSE @@ -0,0 +1,30 @@ +Copyright (c) 2023 Galois Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + + * Neither the name of Galois, Inc. nor the names of its contributors + may be used to endorse or promote products derived from this + software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/symbolic-syntax/macaw-symbolic-syntax.cabal b/symbolic-syntax/macaw-symbolic-syntax.cabal new file mode 100644 index 000000000..8388aa1e0 --- /dev/null +++ b/symbolic-syntax/macaw-symbolic-syntax.cabal @@ -0,0 +1,132 @@ +Cabal-version: 2.2 +Name: macaw-symbolic-syntax +Version: 0.1 +Author: Galois Inc. +Maintainer: langston@galois.com +Build-type: Simple +License: BSD-3-Clause +License-file: LICENSE +Category: Language +Synopsis: A syntax for reading and writing macaw-symbolic control-flow graphs +-- Description: + +extra-doc-files: README.md +extra-source-files: + test-data/*.cbl + test-data/*.out.good + +common shared + -- Specifying -Wall and -Werror can cause the project to fail to build on + -- newer versions of GHC simply due to new warnings being added to -Wall. To + -- prevent this from happening we manually list which warnings should be + -- considered errors. We also list some warnings that are not in -Wall, though + -- try to avoid "opinionated" warnings (though this judgement is clearly + -- subjective). + -- + -- Warnings are grouped by the GHC version that introduced them, and then + -- alphabetically. + -- + -- A list of warnings and the GHC version in which they were introduced is + -- available here: + -- https://ghc.gitlab.haskell.org/ghc/doc/users_guide/using-warnings.html + + -- Since GHC 8.10 or earlier: + ghc-options: + -Wall + -Werror=compat-unqualified-imports + -Werror=deferred-type-errors + -Werror=deprecated-flags + -Werror=deprecations + -Werror=deriving-defaults + -Werror=dodgy-foreign-imports + -Werror=duplicate-exports + -Werror=empty-enumerations + -Werror=identities + -Werror=inaccessible-code + -Werror=incomplete-patterns + -Werror=incomplete-record-updates + -Werror=incomplete-uni-patterns + -Werror=inline-rule-shadowing + -Werror=missed-extra-shared-lib + -Werror=missing-exported-signatures + -Werror=missing-fields + -Werror=missing-home-modules + -Werror=missing-methods + -Werror=overflowed-literals + -Werror=overlapping-patterns + -Werror=partial-fields + -Werror=partial-type-signatures + -Werror=simplifiable-class-constraints + -Werror=star-binder + -Werror=star-is-type + -Werror=tabs + -Werror=typed-holes + -Werror=unrecognised-pragmas + -Werror=unrecognised-warning-flags + -Werror=unsupported-calling-conventions + -Werror=unsupported-llvm-version + -Werror=unticked-promoted-constructors + -Werror=unused-imports + -Werror=warnings-deprecations + -Werror=wrong-do-bind + + if impl(ghc >= 9.2) + ghc-options: + -Werror=ambiguous-fields + -Werror=operator-whitespace + -Werror=operator-whitespace-ext-conflict + -Werror=redundant-bang-patterns + + if impl(ghc >= 9.4) + ghc-options: + -Werror=forall-identifier + -Werror=misplaced-pragmas + -Werror=redundant-strictness-flags + -Werror=type-equality-out-of-scope + -Werror=type-equality-requires-operators + + ghc-prof-options: -O2 -fprof-auto-top + default-language: Haskell2010 + +library + import: shared + + build-depends: + base >= 4.13, + containers, + crucible >= 0.1, + crucible-llvm, + crucible-syntax, + macaw-base, + macaw-symbolic, + mtl, + parameterized-utils >= 0.1.7, + prettyprinter, + text, + what4, + vector, + + hs-source-dirs: src + + exposed-modules: + Data.Macaw.Symbolic.Syntax + +test-suite macaw-symbolic-syntax-tests + import: shared + type: exitcode-stdio-1.0 + main-is: Test.hs + hs-source-dirs: test + build-depends: + base, + containers, + crucible >= 0.1, + crucible-syntax, + filepath, + macaw-symbolic, + macaw-symbolic-syntax, + macaw-x86, + parameterized-utils >= 0.1.7, + tasty, + tasty-golden, + text, + \ No newline at end of file diff --git a/symbolic-syntax/src/Data/Macaw/Symbolic/Syntax.hs b/symbolic-syntax/src/Data/Macaw/Symbolic/Syntax.hs new file mode 100644 index 000000000..19b7358c5 --- /dev/null +++ b/symbolic-syntax/src/Data/Macaw/Symbolic/Syntax.hs @@ -0,0 +1,638 @@ +{-# LANGUAGE ConstraintKinds #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE ImplicitParams #-} +{-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE TypeOperators #-} + +-- | 'LCSC.ParserHooks' for parsing macaw-symbolic CFGs. Originally ported from: +-- +-- https://github.com/GaloisInc/ambient-verifier/blob/eab04abb9750825a25ec0cbe0379add63f05f6c6/src/Ambient/FunctionOverride/Extension.hs#L863 +module Data.Macaw.Symbolic.Syntax ( + TypeAlias(..) + , TypeLookup(..) + , x86_64LinuxTypes + , ExtensionParser + , SomeExtensionWrapper(..) + , extensionParser + , extensionTypeParser + , machineCodeParserHooks + ) where + +import Prelude + +import Control.Applicative ( Alternative(empty) ) +import Control.Monad.IO.Class ( MonadIO(..) ) +import Control.Monad.State.Class ( MonadState ) +import Control.Monad.Writer.Class ( MonadWriter ) +import qualified Data.Map.Strict as Map +import qualified Data.Parameterized.Context as Ctx +import qualified Data.Parameterized.NatRepr as PN +import Data.Parameterized.Some ( Some(..) ) +import qualified Data.Text as DT +import qualified Data.Vector as DV +import GHC.TypeNats ( KnownNat, type (<=) ) +import Numeric.Natural ( Natural ) + +import qualified Data.Macaw.CFG as DMC +import qualified Data.Macaw.Memory as DMM +import qualified Data.Macaw.Symbolic as DMS +import qualified Lang.Crucible.CFG.Expr as LCE +import qualified Lang.Crucible.CFG.Extension as LCCE +import qualified Lang.Crucible.CFG.Reg as LCCR +import qualified Lang.Crucible.LLVM.MemModel as LCLM +import qualified Lang.Crucible.Syntax.Atoms as LCSA +import qualified Lang.Crucible.Syntax.Concrete as LCSC +import qualified Lang.Crucible.Syntax.ExprParse as LCSE +import qualified Lang.Crucible.Types as LCT + +import qualified What4.Interface as WI +import qualified What4.ProgramLoc as WP + +-- | The additional types beyond those built into crucible-syntax. +data TypeAlias = Byte | Int | Long | PidT | Pointer | Short | SizeT | UidT + deriving (Show, Eq, Enum, Bounded) + +-- | Lookup function from a 'TypeAlias' to the underlying crucible type it +-- represents. +newtype TypeLookup = TypeLookup (TypeAlias -> (Some LCT.TypeRepr)) + +-- | A lookup function from 'TypeAlias' to types with the appropriate width +-- on X86_64 Linux. +x86_64LinuxTypes :: TypeLookup +x86_64LinuxTypes = TypeLookup $ \tp -> + case tp of + Byte -> Some (LCT.BVRepr (PN.knownNat @8)) + Int -> Some (LCT.BVRepr (PN.knownNat @32)) + Long -> Some (LCT.BVRepr (PN.knownNat @64)) + PidT -> Some (LCT.BVRepr (PN.knownNat @32)) + Pointer -> Some (LCLM.LLVMPointerRepr (PN.knownNat @64)) + Short -> Some (LCT.BVRepr (PN.knownNat @16)) + SizeT -> Some (LCT.BVRepr (PN.knownNat @64)) + UidT -> Some (LCT.BVRepr (PN.knownNat @32)) +-- | The constraints on the abstract parser monad +type ExtensionParser m ext s = ( LCSE.MonadSyntax LCSA.Atomic m + , MonadWriter [WP.Posd (LCCR.Stmt ext s)] m + , MonadState (LCSC.SyntaxState s) m + , MonadIO m + , LCCE.IsSyntaxExtension ext + ) + +-- | A wrapper for a syntax extension statement +-- +-- Note that these are pure and are intended to guide the substitution of syntax +-- extensions for operations in the 'MDS.MacawExt' syntax. +data ExtensionWrapper arch args ret = + ExtensionWrapper { extName :: LCSA.AtomName + -- ^ Name of the syntax extension + , extArgTypes :: LCT.CtxRepr args + -- ^ Types of the arguments to the syntax extension + , extWrapper :: forall s m + . ( ExtensionParser m (DMS.MacawExt arch) s) + => Ctx.Assignment (LCCR.Atom s) args + -> m (LCCR.AtomValue (DMS.MacawExt arch) s ret) + -- ^ Syntax extension wrapper that takes the arguments passed + -- to the extension operator, returning a suitable crucible + -- 'LCCR.AtomValue' to represent it in the syntax extension. + } + +data SomeExtensionWrapper arch = + forall args ret. SomeExtensionWrapper (ExtensionWrapper arch args ret) + +-- | A list of every type alias. +allTypeAliases :: [TypeAlias] +allTypeAliases = [minBound .. maxBound] + +-- | Parser for type extensions to crucible syntax +extensionTypeParser + :: (LCSE.MonadSyntax LCSA.Atomic m) + => Map.Map LCSA.AtomName (Some LCT.TypeRepr) + -- ^ A mapping from additional type names to the crucible types they + -- represent + -> m (Some LCT.TypeRepr) +extensionTypeParser types = do + name <- LCSC.atomName + case Map.lookup name types of + Just someTypeRepr -> return someTypeRepr + Nothing -> empty + +-- | Check that a 'WI.NatRepr' containing a value in bits is a multiple of 8. +-- If so, pass the result of the value divided by 8 (i.e., as bytes) to the +-- continuation. Otherwise, return a default result. +bitsToBytes :: + WI.NatRepr bits -> + a -> + -- ^ Default value to use if @bits@ is not a multiple of 8 + (forall bytes. ((8 WI.* bytes) ~ bits) => WI.NatRepr bytes -> a) -> + -- ^ Continuation to invoke is @bits@ is a multiple of 8 + a +bitsToBytes bitsRep nonMultipleOf8 multipleOf8 = + PN.withDivModNat bitsRep w8 $ \bytesRep modRep -> + case PN.isZeroNat modRep of + PN.NonZeroNat -> nonMultipleOf8 + PN.ZeroNat + | PN.Refl <- PN.mulComm bytesRep w8 + -> multipleOf8 bytesRep + where + w8 = PN.knownNat @8 + +-- | Perform a case analysis on the type argument to @pointer-read@ or +-- @pointer-write@. If the supplied type is not supported, return 'empty'. +-- This function packages factors out all of the grungy pattern-matching and +-- constraint wrangling that @pointer-read@ and @pointer-write@ share in +-- common. +withSupportedPointerReadWriteTypes :: + Alternative f => + LCT.TypeRepr tp -> + -- ^ The type argument + (forall bits bytes. + ( tp ~ LCT.BVType bits + , (8 WI.* bytes) ~ bits + , 1 <= bits + , 1 <= bytes + ) => + WI.NatRepr bits -> + WI.NatRepr bytes -> + f a) -> + -- ^ Continuation to use if the argument is @'LCT.BVRepr (8 'WI.*' bytes)@ + -- (for some positive number @bytes@). + (forall bits bytes. + ( tp ~ LCLM.LLVMPointerType bits + , (8 WI.* bytes) ~ bits + , 1 <= bits + , 1 <= bytes + ) => + WI.NatRepr bits -> + WI.NatRepr bytes -> + f a) -> + -- ^ Continuation to use if the argument is + -- @'LCLM.LLVMPointerRepr (8 'WI.*' bytes)@ (for some positive number + -- @bytes@). + f a +withSupportedPointerReadWriteTypes tp bvK ptrK = + case tp of + LCT.BVRepr bits -> + bitsToBytes bits empty $ \bytes -> + case PN.isPosNat bytes of + Nothing -> empty + Just PN.LeqProof -> bvK bits bytes + LCLM.LLVMPointerRepr bits -> + bitsToBytes bits empty $ \bytes -> + case PN.isPosNat bytes of + Nothing -> empty + Just PN.LeqProof -> ptrK bits bytes + _ -> empty + +-- | Parser for syntax extensions to crucible syntax +-- +-- Note that the return value is a 'Pair.Pair', which seems a bit strange +-- because the 'LCT.TypeRepr' in the first of the pair is redundant (it can be +-- recovered by inspecting the 'LCCR.Atom'). The return value is set up this way +-- because the use site of the parser in crucible-syntax expects the 'Pair.Pair' +-- for all of the parsers that it attempts; returning the 'Pair.Pair' here +-- simplifies the use site. +extensionParser :: forall s m ext arch w + . ( ExtensionParser m ext s + , ext ~ (DMS.MacawExt arch) + , w ~ DMC.ArchAddrWidth arch + , 1 <= w + , KnownNat w + , DMM.MemWidth w + ) + => Map.Map LCSA.AtomName (SomeExtensionWrapper arch) + -- ^ Mapping from names to syntax extensions + -> LCSC.ParserHooks ext + -- ^ ParserHooks for the desired syntax extension + -> m (Some (LCCR.Atom s)) + -- ^ A pair containing a result type and an atom of that type +extensionParser wrappers hooks = + let ?parserHooks = hooks in + LCSE.depCons LCSC.atomName $ \name -> + case name of + LCSA.AtomName "pointer-read" -> do + -- Pointer reads are a special case because we must parse the type of + -- the value to read as well as the endianness of the read before + -- parsing the additional arguments as Atoms. + LCSE.depCons LCSC.isType $ \(Some tp) -> + LCSE.depCons LCSC.atomName $ \endiannessName -> + case endiannessFromAtomName endiannessName of + Just endianness -> + let readWrapper = + buildPointerReadWrapper tp endianness in + go (SomeExtensionWrapper readWrapper) + Nothing -> empty + LCSA.AtomName "pointer-write" -> do + -- Pointer writes are a special case because we must parse the type of + -- the value to write out as well as the endianness of the write before + -- parsing the additional arguments as Atoms. + LCSE.depCons LCSC.isType $ \(Some tp) -> + LCSE.depCons LCSC.atomName $ \endiannessName -> + case endiannessFromAtomName endiannessName of + Just endianness -> + let writeWrapper = + buildPointerWriteWrapper tp endianness in + go (SomeExtensionWrapper writeWrapper) + Nothing -> empty + LCSA.AtomName "bv-typed-literal" -> do + -- Bitvector literals with a type argument are a special case. We must + -- parse the type argument separately before parsing the remaining + -- argument as an Atom. + LCSE.depCons LCSC.isType $ \(Some tp) -> + case tp of + LCT.BVRepr{} -> + go (SomeExtensionWrapper (buildBvTypedLitWrapper tp)) + _ -> empty + LCSA.AtomName "fresh-vec" -> do + -- Generating fresh vectors are a special case. We must parse the + -- name and length arguments separately due to their need to be + -- concrete, and we must parse the type argument separately before we + -- can know the return type. + LCSE.depCons LCSC.string $ \nmPrefix -> + LCSE.depCons LCSC.isType $ \(Some tp) -> + LCSE.depCons LCSC.nat $ \len -> + go (SomeExtensionWrapper (buildFreshVecWrapper nmPrefix tp len)) + _ -> + case Map.lookup name wrappers of + Nothing -> empty + Just w -> go w + where + go :: (?parserHooks :: LCSC.ParserHooks ext) + => SomeExtensionWrapper arch + -> m (Some (LCCR.Atom s)) + go (SomeExtensionWrapper wrapper) = do + loc <- LCSE.position + -- Generate atoms for the arguments to this extension + operandAtoms <- LCSC.operands (extArgTypes wrapper) + -- Pass these atoms to the extension wrapper and return an atom for the + -- resulting value + atomVal <- extWrapper wrapper operandAtoms + endAtom <- LCSC.freshAtom loc atomVal + return (Some endAtom) + + -- Parse an 'LCSA.AtomName' representing an endianness into a + -- 'Maybe DMM.Endianness' + endiannessFromAtomName endianness = + case endianness of + LCSA.AtomName "le" -> Just DMM.LittleEndian + LCSA.AtomName "be" -> Just DMM.BigEndian + _ -> Nothing + +-- | Wrap a statement extension binary operator +binop :: (KnownNat w, Monad m) + => ( WI.NatRepr w + -> lhsType + -> rhsType + -> LCCE.StmtExtension ext (LCCR.Atom s) tp ) + -- ^ Binary operator + -> lhsType + -- ^ Left-hand side operand + -> rhsType + -- ^ Right-hand side operand + -> m (LCCR.AtomValue ext s tp) +binop op lhs rhs = + return (LCCR.EvalExt (op WI.knownNat lhs rhs)) + + +-- | Wrap a syntax extension binary operator, converting a bitvector in the +-- right-hand side position to an 'LLVMPointerType' before wrapping. +binopRhsBvToPtr :: ( ext ~ DMS.MacawExt arch + , ExtensionParser m ext s + , 1 <= w + , KnownNat w ) + => ( WI.NatRepr w + -> lhsType + -> LCCR.Atom s (LCLM.LLVMPointerType w) + -> LCCE.StmtExtension ext (LCCR.Atom s) tp) + -- ^ Binary operator taking an 'LLVMPointerType' in the + -- right-hand side position + -> lhsType + -- ^ Left-hand side operand + -> LCCR.Atom s (LCT.BVType w) + -- ^ Right-hand side operand as a bitvector to convert to an + -- 'LLVMPointerType' + -> m (LCCR.AtomValue ext s tp) +binopRhsBvToPtr op p1 p2 = do + toPtrAtom <- LCSC.freshAtom WP.InternalPos (LCCR.EvalApp (LCE.ExtensionApp (DMS.BitsToPtr WI.knownNat p2))) + binop op p1 toPtrAtom + +-- | Wrapper for the 'DMS.PtrAdd' syntax extension that enables users to add +-- integer offsets to pointers: +-- +-- > pointer-add ptr offset +-- +-- Note that the underlying macaw primitive allows the offset to be in either +-- position. This user-facing interface is more restrictive. +wrapPointerAdd + :: ( 1 <= w + , KnownNat w + , DMC.MemWidth w + , w ~ DMC.ArchAddrWidth arch ) + => ExtensionWrapper arch + (Ctx.EmptyCtx Ctx.::> LCLM.LLVMPointerType w + Ctx.::> LCT.BVType w) + (LCLM.LLVMPointerType w) +wrapPointerAdd = ExtensionWrapper + { extName = LCSA.AtomName "pointer-add" + , extArgTypes = Ctx.empty Ctx.:> LCLM.LLVMPointerRepr LCT.knownNat + Ctx.:> LCT.BVRepr LCT.knownNat + , extWrapper = \args -> Ctx.uncurryAssignment (binopRhsBvToPtr DMS.PtrAdd) args } + +-- | Wrapper for the 'DMS.PtrSub' syntax extension that enables users to +-- subtract integer offsets from pointers: +-- +-- > pointer-sub ptr offset +-- +-- Note that the underlying macaw primitive allows the offset to be in either +-- position. This user-facing interface is more restrictive. +wrapPointerSub + :: ( 1 <= w + , KnownNat w + , DMC.MemWidth w + , w ~ DMC.ArchAddrWidth arch ) + => ExtensionWrapper arch + (Ctx.EmptyCtx Ctx.::> LCLM.LLVMPointerType w + Ctx.::> LCT.BVType w) + (LCLM.LLVMPointerType w) +wrapPointerSub = ExtensionWrapper + { extName = LCSA.AtomName "pointer-sub" + , extArgTypes = Ctx.empty Ctx.:> LCLM.LLVMPointerRepr LCT.knownNat + Ctx.:> LCT.BVRepr LCT.knownNat + , extWrapper = \args -> Ctx.uncurryAssignment (binopRhsBvToPtr (DMS.PtrSub . DMM.addrWidthRepr)) args } + +-- | Compute the difference between two pointers in bytes using 'DMS.PtrSub' +pointerDiff :: ( w ~ DMC.ArchAddrWidth arch + , ext ~ DMS.MacawExt arch + , ExtensionParser m ext s + , 1 <= w + , KnownNat w + , DMM.MemWidth w ) + => LCCR.Atom s (LCLM.LLVMPointerType w) + -> LCCR.Atom s (LCLM.LLVMPointerType w) + -> m (LCCR.AtomValue ext s (LCT.BVType w)) +pointerDiff lhs rhs = do + ptrRes <- binop (DMS.PtrSub . DMM.addrWidthRepr) lhs rhs + ptrAtom <- LCSC.freshAtom WP.InternalPos ptrRes + -- 'DMS.PtrSub' of two pointers produces a bitvector (LLVMPointer with region + -- 0) so 'DMS.PtrToBits' is safe here. + return (LCCR.EvalApp (LCE.ExtensionApp (DMS.PtrToBits LCT.knownNat ptrAtom))) + +-- | Wrapper for the 'DMS.PtrSub' syntax extension that enables users to +-- subtract pointers from pointers: +-- +-- > pointer-diff ptr1 ptr2 +wrapPointerDiff + :: ( w ~ DMC.ArchAddrWidth arch + , 1 <= w + , KnownNat w + , DMC.MemWidth w ) + => ExtensionWrapper arch + (Ctx.EmptyCtx Ctx.::> LCLM.LLVMPointerType w + Ctx.::> LCLM.LLVMPointerType w ) + (LCT.BVType w) +wrapPointerDiff = ExtensionWrapper + { extName = LCSA.AtomName "pointer-diff" + , extArgTypes = Ctx.empty Ctx.:> LCLM.LLVMPointerRepr LCT.knownNat + Ctx.:> LCLM.LLVMPointerRepr LCT.knownNat + , extWrapper = \args -> Ctx.uncurryAssignment pointerDiff args } + +-- | Wrapper for 'DMS.MacawNullPtr' to construct a null pointer. +-- +-- > make-null +wrapMakeNull + :: ( w ~ DMC.ArchAddrWidth arch + , 1 <= w + , KnownNat w + , DMC.MemWidth w ) + => ExtensionWrapper arch + Ctx.EmptyCtx + (LCLM.LLVMPointerType w) +wrapMakeNull = ExtensionWrapper + { extName = LCSA.AtomName "make-null" + , extArgTypes = Ctx.empty + , extWrapper = \_ -> + let nullptr = DMS.MacawNullPtr (DMC.addrWidthRepr WI.knownNat) in + return (LCCR.EvalApp (LCE.ExtensionApp nullptr)) } + +-- | Wrapper for the 'DMS.PtrEq' syntax extension that enables users to test +-- the equality of two pointers. +-- +-- > pointer-eq ptr1 ptr2 +wrapPointerEq + :: ( 1 <= w + , KnownNat w + , DMC.MemWidth w + , w ~ DMC.ArchAddrWidth arch ) + => ExtensionWrapper arch + (Ctx.EmptyCtx Ctx.::> LCLM.LLVMPointerType w + Ctx.::> LCLM.LLVMPointerType w) + LCT.BoolType +wrapPointerEq = ExtensionWrapper + { extName = LCSA.AtomName "pointer-eq" + , extArgTypes = Ctx.empty Ctx.:> LCLM.LLVMPointerRepr LCT.knownNat + Ctx.:> LCLM.LLVMPointerRepr LCT.knownNat + , extWrapper = \args -> Ctx.uncurryAssignment (binop (DMS.PtrEq . DMM.addrWidthRepr)) args } + +-- | Wrapper for the 'DMS.MacawReadMem' syntax extension that enables users to +-- read through a pointer to retrieve data at the underlying memory location. +-- +-- > pointer-read type endianness ptr +buildPointerReadWrapper + :: ( 1 <= w + , KnownNat w + , DMC.MemWidth w + , w ~ DMC.ArchAddrWidth arch + ) + => LCT.TypeRepr tp + -> DMM.Endianness + -> ExtensionWrapper arch + (Ctx.EmptyCtx Ctx.::> LCLM.LLVMPointerType w) + tp +buildPointerReadWrapper tp endianness = ExtensionWrapper + { extName = LCSA.AtomName "pointer-read" + , extArgTypes = Ctx.empty Ctx.:> LCLM.LLVMPointerRepr LCT.knownNat + , extWrapper = + \args -> Ctx.uncurryAssignment (pointerRead tp endianness) args } + +-- | Read through a pointer using 'DMS.MacawReadMem'. +pointerRead :: ( w ~ DMC.ArchAddrWidth arch + , DMM.MemWidth w + , KnownNat w + , ExtensionParser m ext s + , ext ~ DMS.MacawExt arch + ) + => LCT.TypeRepr tp + -> DMM.Endianness + -> LCCR.Atom s (LCLM.LLVMPointerType w) + -> m (LCCR.AtomValue ext s tp) +pointerRead tp endianness ptr = + withSupportedPointerReadWriteTypes tp + (\bits bytes -> do -- `Bitvector w` case + let readInfo = DMC.BVMemRepr bytes endianness + let readExt = DMS.MacawReadMem (DMC.addrWidthRepr WI.knownNat) readInfo ptr + readAtom <- LCSC.freshAtom WP.InternalPos (LCCR.EvalExt readExt) + return (LCCR.EvalApp (LCE.ExtensionApp (DMS.PtrToBits bits readAtom)))) + (\_bits bytes -> do -- `Pointer` case + let readInfo = DMC.BVMemRepr bytes endianness + let readExt = DMS.MacawReadMem (DMC.addrWidthRepr WI.knownNat) readInfo ptr + return (LCCR.EvalExt readExt)) + +-- | Wrapper for the 'DMS.MacawWriteMem' syntax extension that enables users to +-- write data through a pointer to the underlying memory location. +-- +-- > pointer-write type endianness ptr (val :: type) +buildPointerWriteWrapper + :: ( w ~ DMC.ArchAddrWidth arch + , DMM.MemWidth w + , KnownNat w + , ext ~ DMS.MacawExt arch + ) + => LCT.TypeRepr tp + -> DMM.Endianness + -> ExtensionWrapper arch + (Ctx.EmptyCtx Ctx.::> LCLM.LLVMPointerType w + Ctx.::> tp) + LCT.UnitType +buildPointerWriteWrapper tp endianness = ExtensionWrapper + { extName = LCSA.AtomName "pointer-write" + , extArgTypes = Ctx.empty Ctx.:> LCLM.LLVMPointerRepr LCT.knownNat + Ctx.:> tp + , extWrapper = + \args -> Ctx.uncurryAssignment (pointerWrite tp endianness) args } + +-- | Read through a pointer using 'DMS.MacawWriteMem'. +pointerWrite :: ( w ~ DMC.ArchAddrWidth arch + , DMM.MemWidth w + , KnownNat w + , ExtensionParser m ext s + , ext ~ DMS.MacawExt arch + ) + => LCT.TypeRepr tp + -> DMM.Endianness + -> LCCR.Atom s (LCLM.LLVMPointerType w) + -> LCCR.Atom s tp + -> m (LCCR.AtomValue ext s LCT.UnitType) +pointerWrite tp endianness ptr val = + withSupportedPointerReadWriteTypes tp + (\bits bytes -> do -- `Bitvector w` case + toPtrAtom <- LCSC.freshAtom WP.InternalPos + (LCCR.EvalApp (LCE.ExtensionApp (DMS.BitsToPtr bits val))) + let writeInfo = DMC.BVMemRepr bytes endianness + let writeExt = DMS.MacawWriteMem (DMC.addrWidthRepr WI.knownNat) + writeInfo + ptr + toPtrAtom + return (LCCR.EvalExt writeExt)) + (\_bits bytes -> do -- `Pointer` case + let writeInfo = DMC.BVMemRepr bytes endianness + let writeExt = DMS.MacawWriteMem (DMC.addrWidthRepr WI.knownNat) + writeInfo + ptr + val + return (LCCR.EvalExt writeExt)) + +-- | Wrapper for constructing bitvector literals matching the size of an +-- 'LCT.BVRepr'. This enables users to instantiate literals with portable +-- types (such as SizeT). +-- +-- > bv-typed-literal type integer +buildBvTypedLitWrapper + :: LCT.TypeRepr (LCT.BVType w) + -> ExtensionWrapper arch + (Ctx.EmptyCtx Ctx.::> LCT.IntegerType) + (LCT.BVType w) +buildBvTypedLitWrapper tp = ExtensionWrapper + { extName = LCSA.AtomName "bv-typed-literal" + , extArgTypes = Ctx.empty Ctx.:> LCT.IntegerRepr + , extWrapper = \args -> Ctx.uncurryAssignment (bvTypedLit tp) args } + +-- | Create a bitvector literal matching the size of an 'LCT.BVRepr' +bvTypedLit :: forall s ext w m + . ( ExtensionParser m ext s ) + => LCT.TypeRepr (LCT.BVType w) + -> LCCR.Atom s LCT.IntegerType + -> m (LCCR.AtomValue ext s (LCT.BVType w)) +bvTypedLit (LCT.BVRepr w) val = return (LCCR.EvalApp (LCE.IntegerToBV w val)) + +-- | Wrapper for generating a vector of the given length, where each element is +-- a fresh constant of the supplied type whose name is derived from the given +-- string. This is a convenience for users who wish to quickly generate +-- symbolic data (e.g., for use with @write-bytes@). +-- +-- > fresh-vec string type integer +buildFreshVecWrapper :: + DT.Text + -> LCT.TypeRepr tp + -> Natural + -> ExtensionWrapper arch + Ctx.EmptyCtx + (LCT.VectorType tp) +buildFreshVecWrapper nmPrefix tp len = ExtensionWrapper + { extName = LCSA.AtomName "fresh-vec" + , extArgTypes = Ctx.empty + , extWrapper = \_ -> freshVec nmPrefix tp len } + +-- | Generate a vector of the given length, where each element is a fresh +-- constant of the supplied type whose name is derived from the given string. +freshVec :: forall s ext tp m. + ExtensionParser m ext s + => DT.Text + -> LCT.TypeRepr tp + -> Natural + -> m (LCCR.AtomValue ext s (LCT.VectorType tp)) +freshVec nmPrefix tp len = do + case tp of + LCT.FloatRepr fi -> + mkVec (LCCR.FreshFloat fi) + LCT.NatRepr -> + mkVec LCCR.FreshNat + _ | LCT.AsBaseType bt <- LCT.asBaseType tp -> + mkVec (LCCR.FreshConstant bt) + | otherwise -> + empty + where + -- Construct an expression that looks roughly like: + -- + -- (vector (fresh _0) ... (fresh _)) + -- + -- Where the implementation of `fresh` is determined by the first argument. + mkVec :: (Maybe WI.SolverSymbol -> LCCR.AtomValue ext s tp) + -> m (LCCR.AtomValue ext s (LCT.VectorType tp)) + mkVec mkFresh = do + vec <- DV.generateM (fromIntegral len) $ \i -> + LCSC.freshAtom WP.InternalPos $ mkFresh $ Just $ WI.safeSymbol $ + DT.unpack nmPrefix ++ "_" ++ show i + pure $ LCCR.EvalApp $ LCE.VectorLit tp vec + +-- | Syntax extension wrappers +extensionWrappers + :: (w ~ DMC.ArchAddrWidth arch, 1 <= w, KnownNat w, DMC.MemWidth w) + => Map.Map LCSA.AtomName (SomeExtensionWrapper arch) +extensionWrappers = Map.fromList + [ (LCSA.AtomName "pointer-add", SomeExtensionWrapper wrapPointerAdd) + , (LCSA.AtomName "pointer-diff", SomeExtensionWrapper wrapPointerDiff) + , (LCSA.AtomName "pointer-sub", SomeExtensionWrapper wrapPointerSub) + , (LCSA.AtomName "pointer-eq", SomeExtensionWrapper wrapPointerEq) + , (LCSA.AtomName "make-null", SomeExtensionWrapper wrapMakeNull) + ] + +-- | All of the crucible syntax extensions to support machine code +machineCodeParserHooks + :: forall w arch proxy + . (w ~ DMC.ArchAddrWidth arch, 1 <= w, KnownNat w, DMC.MemWidth w) + => proxy arch + -> TypeLookup + -- ^ A lookup function from a 'TypeAlias' to the underlying Crucible type + -- it represents. + -> LCSC.ParserHooks (DMS.MacawExt arch) +machineCodeParserHooks proxy typeLookup = + let TypeLookup lookupFn = typeLookup + types = Map.fromList [ (LCSA.AtomName (DT.pack (show t)), lookupFn t) + | t <- allTypeAliases ] in + LCSC.ParserHooks (extensionTypeParser types) + (extensionParser extensionWrappers (machineCodeParserHooks proxy typeLookup)) diff --git a/symbolic-syntax/test-data/copy.cbl b/symbolic-syntax/test-data/copy.cbl new file mode 100644 index 000000000..2e0cf995c --- /dev/null +++ b/symbolic-syntax/test-data/copy.cbl @@ -0,0 +1,6 @@ +; A basic function that copies the data pointed to by one pointer to another +(defun @copy ((src Pointer) (dest Pointer)) Unit + (start start: + (let src-data (pointer-read Int le src)) + (pointer-write Int le dest src-data) + (return ()))) \ No newline at end of file diff --git a/symbolic-syntax/test-data/copy.out b/symbolic-syntax/test-data/copy.out new file mode 100644 index 000000000..ddb2bdcaf --- /dev/null +++ b/symbolic-syntax/test-data/copy.out @@ -0,0 +1,7 @@ +(defun @copy ((src Pointer) (dest Pointer)) Unit + (start start: + (let src-data (pointer-read Int le src)) + (pointer-write Int le dest src-data) + (return ()))) + +At test-data/copy.cbl:2:20, expected Any or Bool or Char or ComplexReal or Integer or Nat or Real or Unit or atomic type but got Pointer diff --git a/symbolic-syntax/test-data/copy.out.good b/symbolic-syntax/test-data/copy.out.good new file mode 100644 index 000000000..ddb2bdcaf --- /dev/null +++ b/symbolic-syntax/test-data/copy.out.good @@ -0,0 +1,7 @@ +(defun @copy ((src Pointer) (dest Pointer)) Unit + (start start: + (let src-data (pointer-read Int le src)) + (pointer-write Int le dest src-data) + (return ()))) + +At test-data/copy.cbl:2:20, expected Any or Bool or Char or ComplexReal or Integer or Nat or Real or Unit or atomic type but got Pointer diff --git a/symbolic-syntax/test/Test.hs b/symbolic-syntax/test/Test.hs new file mode 100644 index 000000000..3c4c49c78 --- /dev/null +++ b/symbolic-syntax/test/Test.hs @@ -0,0 +1,54 @@ +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE ImplicitParams #-} +{-# LANGUAGE ImportQualifiedPost #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE TypeApplications #-} + +module Main (main) where + +import Data.List (sort) +import Data.Proxy (Proxy(Proxy)) +import Data.Text.IO qualified as T +import System.FilePath +import System.IO + +import Test.Tasty (defaultMain, TestTree, testGroup) +import Test.Tasty.Golden + +import Lang.Crucible.Syntax.Prog (doParseCheck) + +import Data.Macaw.X86 (X86_64) + +import Data.Macaw.Symbolic.Syntax (machineCodeParserHooks, x86_64LinuxTypes) + +main :: IO () +main = do + parseTests <- findTests "Parse tests" "test-data" testParser + defaultMain $ testGroup "Tests" [parseTests] + +findTests :: String -> FilePath -> (FilePath -> FilePath -> IO ()) -> IO TestTree +findTests groupName testDir testAction = + do inputs <- findByExtension [".cbl"] testDir + return $ testGroup groupName + [ goldenFileTestCase input testAction + | input <- sort inputs + ] + +goldenFileTestCase :: FilePath -> (FilePath -> FilePath -> IO ()) -> TestTree +goldenFileTestCase input testAction = + goldenVsFileDiff + (takeBaseName input) -- test name + (\x y -> ["diff", "-u", x, y]) + goodFile -- golden file path + outFile + (testAction input outFile) -- action whose result is tested + where + outFile = replaceExtension input ".out" + goodFile = replaceExtension input ".out.good" + +testParser :: FilePath -> FilePath -> IO () +testParser inFile outFile = + do contents <- T.readFile inFile + let ?parserHooks = machineCodeParserHooks (Proxy @X86_64) x86_64LinuxTypes + withFile outFile WriteMode $ doParseCheck inFile contents True +