From 9b1b120121ccdd2ec27fedc7081b1b261c349c38 Mon Sep 17 00:00:00 2001 From: Daniel Matichuk Date: Fri, 15 Dec 2023 12:28:08 -0800 Subject: [PATCH] Data.Macaw.PPC: add classifier to handle pc-relative loads for relocatable code To do PC-relative reads for relocatable code, PPC binaries will use the `bl` instruction to jump to the next address in order to read the PC into the link register. This is incorrectly classified as a function call by the default classifier. This adds an additional classifier to PPC that specifically checks for this case (i.e. a `bl` to the subsequent address) and instead treats it like a normal jump, where the LR is concretely known to be the PC at the jump target. It is unlikely that binaries would have this pattern but intend it to be treated like a normal function call. Nevertheless, this would be a breaking change for such a binary if it existed and would require refining this classifier further in order to distinguish this case. --- base/src/Data/Macaw/Discovery.hs | 1 + base/src/Data/Macaw/Discovery/Classifier.hs | 1 + macaw-ppc/src/Data/Macaw/PPC.hs | 44 ++++++++++++++++++++- 3 files changed, 44 insertions(+), 2 deletions(-) diff --git a/base/src/Data/Macaw/Discovery.hs b/base/src/Data/Macaw/Discovery.hs index 8027f07d..19adca53 100644 --- a/base/src/Data/Macaw/Discovery.hs +++ b/base/src/Data/Macaw/Discovery.hs @@ -75,6 +75,7 @@ module Data.Macaw.Discovery , jumpTableClassifier -- * Simplification , eliminateDeadStmts + , identifyCallTargets -- * Re-exports , ArchAddrWidth ) where diff --git a/base/src/Data/Macaw/Discovery/Classifier.hs b/base/src/Data/Macaw/Discovery/Classifier.hs index 8e1ab266..89a32c2e 100644 --- a/base/src/Data/Macaw/Discovery/Classifier.hs +++ b/base/src/Data/Macaw/Discovery/Classifier.hs @@ -27,6 +27,7 @@ module Data.Macaw.Discovery.Classifier ( -- * Reusable helpers , branchBlockState , classifierEndBlock + , identifyCallTargets ) where import Control.Applicative ( Alternative(empty) ) diff --git a/macaw-ppc/src/Data/Macaw/PPC.hs b/macaw-ppc/src/Data/Macaw/PPC.hs index f4549e09..c490c140 100644 --- a/macaw-ppc/src/Data/Macaw/PPC.hs +++ b/macaw-ppc/src/Data/Macaw/PPC.hs @@ -10,6 +10,7 @@ module Data.Macaw.PPC ( -- * Macaw configurations ppc32_linux_info, ppc64_linux_info, + ppcReadPCClassifier, -- * Type-level tags AnyPPC, Variant, @@ -29,6 +30,7 @@ module Data.Macaw.PPC ( import Control.Lens ( (^.) ) import Data.Maybe import Data.Proxy ( Proxy(..) ) +import Control.Applicative ( (<|>) ) import qualified Data.Macaw.Architecture.Info as MI import qualified Data.Macaw.CFG as MC @@ -64,6 +66,7 @@ import Data.Macaw.PPC.Identify ( identifyCall import qualified Data.Macaw.PPC.PPCReg as R import qualified Data.Macaw.PPC.Semantics.PPC32 as PPC32 import qualified Data.Macaw.PPC.Semantics.PPC64 as PPC64 +import qualified Control.Monad.Reader as CMR -- | The constructor for type tags for PowerPC type AnyPPC = PPC.AnyPPC @@ -111,7 +114,7 @@ ppc64_linux_info binData = , MI.initialBlockRegs = PPC.Eval.ppcInitialBlockRegs , MI.archCallParams = PPC.Eval.ppcCallParams (preserveRegAcrossSyscall proxy) , MI.extractBlockPrecond = PPC.Eval.ppcExtractBlockPrecond - , MI.archClassifier = MD.defaultClassifier + , MI.archClassifier = ppcReadPCClassifier <|> MD.defaultClassifier } where proxy = Proxy @PPC.V64 @@ -137,7 +140,44 @@ ppc32_linux_info = , MI.initialBlockRegs = PPC.Eval.ppcInitialBlockRegs , MI.archCallParams = PPC.Eval.ppcCallParams (preserveRegAcrossSyscall proxy) , MI.extractBlockPrecond = PPC.Eval.ppcExtractBlockPrecond - , MI.archClassifier = MD.defaultClassifier + , MI.archClassifier = ppcReadPCClassifier <|> MD.defaultClassifier } where proxy = Proxy @PPC.V32 + + +-- | This classifier handles a ppc-specific pattern used to read the pc +-- in order to do pc-relative loads for relocatable code. +-- +-- To do this, the pc is loaded into the link register by "calling" a +-- function at the immediate next address, causing the LR and +-- the pc to be equal. The original LR value +-- is then restored before the function return. +-- +-- e.g. +-- 0x000018f8 mflr r0 +-- 0x000018fc bdnzl 0x1900 +-- 0x00001900 mflr r30 +-- 0x00001904 lwz r9, -0x20(r30) +-- ... +-- 0x00001bd4 mtlr r0 +-- 0x00001bd8 blr +-- +-- This stashes the original LR in r0, and then the pc (i.e. 0x1900) in the +-- LR, which is then used to do a pc-relative load into r9. +-- Before the function returns, the original LR is restored from r0. + +ppcReadPCClassifier :: PPCArchConstraints v => MD.BlockClassifier (AnyPPC v) ids +ppcReadPCClassifier = MI.classifierName "PPCReadPC" $ do + bcc <- CMR.ask + let ctx = MI.classifierParseContext bcc + let finalRegs = MI.classifierFinalRegState bcc + let ainfo = MI.pctxArchInfo ctx + let mem = MI.pctxMemory ctx + ret <- case MI.identifyCall ainfo mem (MI.classifierStmts bcc) finalRegs of + Just (_prev_stmts, ret) -> pure ret + Nothing -> fail "no call identified" + let targets = MD.identifyCallTargets mem (MI.classifierAbsState bcc) finalRegs + case targets of + [target] | target == ret -> MD.directJumpClassifier + _ -> fail $ ("call is not a pc read: " ++ show targets) \ No newline at end of file