Skip to content

Commit

Permalink
Add support for concolic execution
Browse files Browse the repository at this point in the history
  • Loading branch information
samalws-tob committed Apr 29, 2024
1 parent 76088ea commit db6e1a8
Show file tree
Hide file tree
Showing 9 changed files with 171 additions and 50 deletions.
30 changes: 26 additions & 4 deletions lib/Echidna/Campaign.hs
Original file line number Diff line number Diff line change
Expand Up @@ -148,10 +148,31 @@ runSymWorker callback vm dict workerId initialCorpus name cs = do
symexecTxs transactions
listenerFunc _ = pure ()

symexecTxs txs = do
cfg <- asks (.cfg)
symexecTxs txs = mapM_ symexecTx =<< txsToTxAndVms txs

-- | Turn a list of transactions into inputs for symexecTx:
-- (maybe txn to concolic execute on, vm to symexec on, list of txns we're on top of)
txsToTxAndVms txs = do
isConc <- asks (.cfg.campaignConf.symExecConcolic)
if isConc
then txsToTxAndVmsConc txs vm []
else txsToTxAndVmsSym txs

txsToTxAndVmsConc [] _ _ = pure []
txsToTxAndVmsConc (h:t) vm' txsBase = do
(_, vm'') <- execTx vm' h
rest <- txsToTxAndVmsConc t vm'' (txsBase <> [h])
pure $ case h of
(Tx { call = SolCall _ }) -> (Just h,vm',txsBase):rest
_ -> rest

txsToTxAndVmsSym txs = do
vm' <- foldlM (\vm' tx -> snd <$> execTx vm' tx) vm txs
(threadId, symTxsChan) <- liftIO $ createSymTx cfg name cs vm'
pure [(Nothing,vm',txs)]

symexecTx (tx, vm', txsBase) = do
cfg <- asks (.cfg)
(threadId, symTxsChan) <- liftIO $ createSymTx cfg name cs tx vm'

modify' (\ws -> ws { runningThreads = [threadId] })
lift callback
Expand All @@ -161,7 +182,8 @@ runSymWorker callback vm dict workerId initialCorpus name cs = do
modify' (\ws -> ws { runningThreads = [] })
lift callback

newCoverage <- or <$> mapM (\symTx -> snd <$> callseq vm' [symTx]) symTxs
-- We can't do callseq vm' [symTx] because callseq might post the full call sequence as an event
newCoverage <- or <$> mapM (\symTx -> snd <$> callseq vm (txsBase <> [symTx])) symTxs

unless newCoverage (pushWorkerEvent SymNoNewCoverage)

Expand Down
1 change: 1 addition & 0 deletions lib/Echidna/Config.hs
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ instance FromJSON EConfigWithUsage where
<*> v ..:? "workers"
<*> v ..:? "server"
<*> v ..:? "symExec" ..!= False
<*> v ..:? "symExecConcolic" ..!= True
<*> v ..:? "symExecTimeout" ..!= defaultSymExecTimeout
<*> v ..:? "symExecNSolvers" ..!= defaultSymExecNWorkers
<*> v ..:? "symExecMaxIters" ..!= defaultSymExecMaxIters
Expand Down
163 changes: 126 additions & 37 deletions lib/Echidna/SymExec.hs
Original file line number Diff line number Diff line change
@@ -1,35 +1,42 @@
{-# LANGUAGE DataKinds #-}
{-# OPTIONS_GHC -Wno-gadt-mono-local-binds #-}

module Echidna.SymExec (createSymTx) where

import Control.Applicative ((<|>))
import Control.Concurrent.Async (mapConcurrently)
import Control.Concurrent (ThreadId, forkIO)
import Control.Concurrent.MVar (MVar, newEmptyMVar, putMVar, takeMVar)
import Control.Monad (forM)
import Control.Monad.IO.Class (liftIO)
import Control.Monad.Reader (runReaderT)
import Data.ByteString.Lazy qualified as BS
import Data.Foldable (fold)
import Data.Function ((&))
import Data.Functor ((<&>))
import Data.List (singleton)
import Data.Map qualified as Map
import Data.Maybe (fromMaybe, mapMaybe)
import Data.Maybe (fromMaybe, mapMaybe, isJust, fromJust)
import Data.Set (Set)
import Data.Set qualified as Set
import Data.Text (Text)
import Data.Text qualified as T
import Data.Vector (toList, fromList)
import Optics.Core ((.~), (%))
import Echidna.Solidity (chooseContract)
import Echidna.Types (fromEVM)
import Echidna.Types.Campaign (CampaignConf(..))
import Echidna.Types.Config (EConfig(..))
import Echidna.Types.Solidity (SolConf(..))
import EVM.ABI (Sig(..), decodeAbiValue)
import EVM.ABI (AbiValue(..), AbiType(..), Sig(..), decodeAbiValue)
import EVM.Expr (simplify)
import EVM.Fetch qualified as Fetch
import EVM.SMT (SMTCex(..))
import EVM.SMT (SMTCex(..), SMT2, assertProps)
import EVM (loadContract, resetState)
import EVM.Effects (defaultEnv)
import EVM.Effects (defaultEnv, defaultConfig)
import EVM.Solidity (SolcContract(..), Method(..))
import EVM.Solvers (withSolvers, Solver(Z3), CheckSatResult(Sat))
import EVM.SymExec (interpret, runExpr, abstractVM, mkCalldata, produceModels, LoopHeuristic (Naive))
import EVM.Types (Addr, VM(..), Frame(..), FrameState(..), VMType(..), Env(..), Expr(..), EType(..), BaseState(..), word256Bytes)
import EVM.Solvers (withSolvers, Solver(Z3), CheckSatResult(Sat), SolverGroup, checkSat)
import EVM.SymExec (interpret, runExpr, abstractVM, mkCalldata, LoopHeuristic (Naive), flattenExpr, extractProps)
import EVM.Types (Addr, VM(..), Frame(..), FrameState(..), VMType(..), Env(..), Expr(..), EType(..), Query(..), Prop(..), BranchCondition(..), W256, word256Bytes, word)
import EVM.Traversals (mapExpr)
import Control.Monad.ST (stToIO, RealWorld)
import Control.Monad.State.Strict (execState, runStateT)
import Echidna.Types.Tx (Tx(..), TxCall(..), maxGasPerBlock)
Expand All @@ -39,15 +46,28 @@ import Echidna.Types.Tx (Tx(..), TxCall(..), maxGasPerBlock)
-- Spawns a new thread; returns its thread ID as the first return value.
-- The second return value is an MVar which is populated with transactions
-- once the symbolic execution is finished.
createSymTx :: EConfig -> Maybe Text -> [SolcContract] -> VM Concrete RealWorld -> IO (ThreadId, MVar [Tx])
createSymTx cfg name cs vm = do
mainContract <- chooseContract cs name
exploreContract cfg mainContract vm
-- Also takes an optional Tx argument; this is used as the transaction
-- to follow during concolic execution. If none is provided, we do full
-- symbolic execution.
-- The Tx argument, if present, must have a .call value of type SolCall.
createSymTx :: EConfig -> Maybe Text -> [SolcContract] -> Maybe Tx -> VM Concrete RealWorld -> IO (ThreadId, MVar [Tx])
createSymTx cfg name cs tx vm = do
mainContract <- chooseContract cs name
exploreContract cfg mainContract tx vm

exploreContract :: EConfig -> SolcContract -> VM Concrete RealWorld -> IO (ThreadId, MVar [Tx])
exploreContract conf contract vm = do
let methods = Map.elems contract.abiMap
timeout = Just (fromIntegral conf.campaignConf.symExecTimeout)
exploreContract :: EConfig -> SolcContract -> Maybe Tx -> VM Concrete RealWorld -> IO (ThreadId, MVar [Tx])
exploreContract conf contract tx vm = do
let
isConc = isJust tx
allMethods = Map.elems contract.abiMap
concMethods (Tx { call = SolCall (methodName, _) }) = filter (\method -> method.name == methodName) methods
concMethods _ = error "`exploreContract` should only be called with Nothing or Just Tx{call=SolCall _} for its tx argument"
methods = maybe allMethods concMethods tx
timeout = Just (fromIntegral conf.campaignConf.symExecTimeout)
maxIters = if isConc then Nothing else Just conf.campaignConf.symExecMaxIters
askSmtIters = if isConc then 0 else conf.campaignConf.symExecAskSMTIters
rpcInfo = Nothing
defaultSender = fromJust $ fmap (.dst) tx <|> Set.lookupMin conf.solConf.sender <|> Just 0

threadIdChan <- newEmptyMVar
doneChan <- newEmptyMVar
Expand All @@ -57,36 +77,41 @@ exploreContract conf contract vm = do
threadId <- liftIO $ forkIO $ flip runReaderT defaultEnv $ do
res <- forM methods $ \method -> do
let
fetcher = concOrSymFetcher tx solvers rpcInfo
dst = conf.solConf.contractAddr
calldata@(cd, constraints) = mkCalldata (Just (Sig method.methodSignature (snd <$> method.inputs))) []
vmSym = abstractVM calldata contract.runtimeCode Nothing False
maxIters = Just conf.campaignConf.symExecMaxIters
askSmtIters = conf.campaignConf.symExecAskSMTIters
rpcInfo = Nothing
vmSym' <- liftIO $ stToIO vmSym
vmReset <- liftIO $ snd <$> runStateT (fromEVM resetState) vm
let vm' = vmReset & execState (loadContract (LitAddr dst))
& vmMakeSymbolic
& #frames .~ []
& #constraints .~ constraints
& #state % #callvalue .~ TxValue
& #state % #caller .~ SymAddr "caller"
& #state % #calldata .~ cd
& #state % #pc .~ 0
& #state % #stack .~ []
& #config % #baseState .~ AbstractBase
& #env % #contracts .~ (Map.union vmSym'.env.contracts vm.env.contracts)
exprInter <- interpret (Fetch.oracle solvers rpcInfo) maxIters askSmtIters Naive vm' runExpr
models <- produceModels solvers (simplify exprInter)
pure $ mapMaybe (modelToTx dst method) models
liftIO $ putMVar resultChan (mconcat res)
-- TODO we might want to switch vm's state.baseState value to to AbstractBase eventually.
-- Doing so might mess up concolic execution.
exprInter <- interpret fetcher maxIters askSmtIters Naive vm' runExpr
models <- liftIO $ mapConcurrently (checkSat solvers) $ manipulateExprInter isConc exprInter
pure $ mapMaybe (modelToTx dst method conf.solConf.sender defaultSender) models
liftIO $ putMVar resultChan $ concat res
liftIO $ putMVar doneChan ()
liftIO $ putMVar threadIdChan threadId
liftIO $ takeMVar doneChan

threadId <- takeMVar threadIdChan
pure (threadId, resultChan)

-- | Turn the expression returned by `interpret` into into SMT2 values to feed into the solver
manipulateExprInter :: Bool -> Expr End -> [SMT2]
manipulateExprInter isConc = map (assertProps defaultConfig) . middleStep . map (extractProps . simplify) . flattenExpr . simplify where
middleStep = if isConc then middleStepConc else id
middleStepConc = map singleton . concatMap (go (PBool True))
go :: Prop -> [Prop] -> [Prop]
go _ [] = []
go acc (h:t) = (PNeg h `PAnd` acc) : go (h `PAnd` acc) t

-- | Sets result to Nothing, and sets gas to ()
vmMakeSymbolic :: VM Concrete s -> VM Symbolic s
vmMakeSymbolic vm
Expand Down Expand Up @@ -129,23 +154,42 @@ frameStateMakeSymbolic fs
frameMakeSymbolic :: Frame Concrete s -> Frame Symbolic s
frameMakeSymbolic fr = Frame { context = fr.context, state = frameStateMakeSymbolic fr.state }

modelToTx :: Addr -> Method -> (Expr 'End, CheckSatResult) -> Maybe Tx
modelToTx dst method (_end, result) =
modelToTx :: Addr -> Method -> Set Addr -> Addr -> CheckSatResult -> Maybe Tx
modelToTx dst method senders fallbackSender result =
case result of
Sat cex ->
let
args = (zip [1..] method.inputs) <&> \(i::Int, (_argName, argType)) ->
case Map.lookup (Var ("arg" <> T.pack (show i))) cex.vars of
Just w ->
decodeAbiValue argType (BS.fromStrict (word256Bytes w))
Nothing -> -- put a placeholder
decodeAbiValue argType (BS.repeat 0)
args = zipWith grabArg (snd <$> method.inputs) ["arg" <> T.pack (show n) | n <- [1..] :: [Int]]

grabArg t
= case t of
AbiUIntType _ -> grabNormalArg t
AbiIntType _ -> grabNormalArg t
AbiBoolType -> grabNormalArg t
AbiBytesType _ -> grabNormalArg t
AbiAddressType -> grabAddressArg
AbiArrayType n mt -> grabArrayArg n mt
_ -> error "Unexpected ABI type in `modelToTx`"

grabNormalArg argType name
= case Map.lookup (Var name) cex.vars of
Just w ->
decodeAbiValue argType (BS.fromStrict (word256Bytes w))
Nothing -> -- put a placeholder
decodeAbiValue argType (BS.repeat 0)

grabAddressArg name = AbiAddress $ fromMaybe 0 $ Map.lookup (SymAddr name) cex.addrs

grabArrayArg nElem memberType name = AbiArray nElem memberType $ fromList [grabArg memberType $ name <> T.pack (show n) | n <- [0..nElem] :: [Int]]

src_ = fromMaybe 0 $ Map.lookup (SymAddr "sender") cex.addrs
src = if Set.member src_ senders then src_ else fallbackSender

value = fromMaybe 0 $ Map.lookup TxValue cex.txContext

in Just Tx
{ call = SolCall (method.name, args)
, src = 0
, src = src
, dst = dst
, gasprice = 0
, gas = maxGasPerBlock
Expand All @@ -154,3 +198,48 @@ modelToTx dst method (_end, result) =
}

_ -> Nothing

-- | Symbolic variable -> concrete value mapping used during concolic execution.
-- The third member in the tuple is the transaction value.
type Substs = ([(Text, W256)], [(Text, Addr)], W256)

-- | Mirrors hevm's `symAbiArg` function; whenever that changes, we need to change this too
genSubsts :: Tx -> Substs
genSubsts (Tx { call = SolCall (_, abiVals), src, value }) = addOnFinalValues $ fold $ zipWith genVal abiVals (T.pack . ("arg" <>) . show <$> ([1..] :: [Int])) where
addOnFinalValues (a, b) = (a, ("sender", src):b, value)
genVal (AbiUInt _ i) name = ([(name, fromIntegral i)], [])
genVal (AbiInt _ i) name = ([(name, fromIntegral i)], [])
genVal (AbiBool b) name = ([(name, if b then 1 else 0)], [])
genVal (AbiAddress addr) name = ([], [(name, addr)])
genVal (AbiBytes n b) name | n > 0 && n <= 32 = ([(name, word b)], [])
genVal (AbiArray _ _ vals) name = fold $ zipWith genVal (toList vals) [name <> T.pack (show n) | n <- [0..] :: [Int]]
genVal _ _ = error "`genSubsts` is not implemented for all API types, mirroring hevm's `symAbiArg` function"
genSubsts _ = error "`genSubsts` should only be called with a `SolCall` transaction argument"

-- | Apply substitutions into an expression
substExpr :: Substs -> Expr a -> Expr a
substExpr (sw, sa, val) = mapExpr go where
go v@(Var t) = maybe v Lit (lookup t sw)
go v@(SymAddr t) = maybe v LitAddr (lookup t sa)
go TxValue = Lit val
go e = e

-- | Fetcher used during concolic exeuction.
-- This is the most important function for concolic execution;
-- it determines what branch `interpret` should take.
-- We ensure that this fetcher is always used by setting askSMTIter to 0.
-- We determine what branch to take by substituting concrete values into
-- the provided `Prop`, and then simplifying.
-- We fall back on `Fetch.oracle`.
concFetcher :: Substs -> SolverGroup -> Fetch.RpcInfo -> Fetch.Fetcher t m s
concFetcher substs s r (PleaseAskSMT branchcondition pathconditions continue) =
case simplify (substExpr substs branchcondition) of
Lit n -> pure (continue (Case (n/=0)))
simplifiedExpr -> Fetch.oracle s r (PleaseAskSMT simplifiedExpr pathconditions continue)
concFetcher _ s r q = Fetch.oracle s r q

-- | Depending on whether we're doing concolic or full symbolic execution,
-- choose a fetcher to be used in `interpret` (either `concFetcher` or `Fetch.oracle`).
concOrSymFetcher :: Maybe Tx -> SolverGroup -> Fetch.RpcInfo -> Fetch.Fetcher t m s
concOrSymFetcher (Just c) = concFetcher $ genSubsts c
concOrSymFetcher Nothing = Fetch.oracle
9 changes: 6 additions & 3 deletions lib/Echidna/Types/Campaign.hs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ data CampaignConf = CampaignConf
-- ^ Server-Sent Events HTTP port number, if missing server is not ran
, symExec :: Bool
-- ^ Whether to add an additional symbolic execution worker
, symExecConcolic :: Bool
-- ^ Whether symbolic execution will be concolic (vs full symbolic execution)
-- Only relevant if symExec is True
, symExecTimeout :: Int
-- ^ Timeout for symbolic execution SMT solver.
-- Only relevant if symExec is True
Expand All @@ -54,11 +57,11 @@ data CampaignConf = CampaignConf
-- Only relevant if symExec is True
, symExecMaxIters :: Integer
-- ^ Number of times we may revisit a particular branching point.
-- Only relevant if symExec is True
-- Only relevant if symExec is True and symExecConcolic is False
, symExecAskSMTIters :: Integer
-- ^ Number of times we may revisit a particular branching point
-- before we consult the smt solver to check reachability.
-- Only relevant if symExec is True
-- before we consult the SMT solver to check reachability.
-- Only relevant if symExec is True and symExecConcolic is False
}

data WorkerType = FuzzWorker | SymbolicWorker deriving (Eq)
Expand Down
1 change: 1 addition & 0 deletions package.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ ghc-options:
dependencies:
- base
- aeson
- async
- base16-bytestring
- binary
- bytestring
Expand Down
4 changes: 2 additions & 2 deletions stack.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ packages:
- '.'

extra-deps:
- git: https://github.com/ethereum/hevm.git
commit: a39b1c07a3f643330f920042eb94a43d7e6454b5
- git: https://github.com/samalws-tob/hevm.git
commit: ccb3d403a95d08105afe2a64cfe0a4f7efff57c8

- restless-git-0.7@sha256:346a5775a586f07ecb291036a8d3016c3484ccdc188b574bcdec0a82c12db293,968
- s-cargot-0.1.4.0@sha256:61ea1833fbb4c80d93577144870e449d2007d311c34d74252850bb48aa8c31fb,3525
Expand Down
11 changes: 7 additions & 4 deletions tests/solidity/basic/default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -93,15 +93,18 @@ workers: 1
server: null
# whether to add an additional symbolic execution worker
symExec: false
# timeout for symbolic execution SMT solver
# whether symbolic execution will be concolic (vs full symbolic execution)
# only relevant if symExec is true
symExecNSolvers: 1
symExecConcolic: true
# number of SMT solvers used in symbolic execution
# only relevant if symExec is true
symExecNSolvers: 1
# timeout for symbolic execution SMT solver
# only relevant if symExec is true
symExecTimeout: 30
# Number of times we may revisit a particular branching point
# only relevant if symExec is true
# only relevant if symExec is true and symExecConcolic is false
symExecMaxIters: 10
# Number of times we may revisit a particular branching point before we consult the smt solver to check reachability
# only relevant if symExec is true
# only relevant if symExec is true and symExecConcolic is false
symExecAskSMTIters: 1
1 change: 1 addition & 0 deletions tests/solidity/symbolic/sym-assert.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
testMode: assertion
symExec: true
symExecConcolic: false
workers: 0
1 change: 1 addition & 0 deletions tests/solidity/symbolic/sym.yaml
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
symExec: true
workers: 0
symExecConcolic: false

0 comments on commit db6e1a8

Please sign in to comment.