never executed always true always false
    1 {-# LANGUAGE BangPatterns #-}
    2 {-# LANGUAGE CPP #-}
    3 {-# LANGUAGE FlexibleContexts #-}
    4 {-# LANGUAGE FlexibleInstances #-}
    5 {-# LANGUAGE GADTs #-}
    6 {-# LANGUAGE MultiParamTypeClasses #-}
    7 {-# LANGUAGE ScopedTypeVariables #-}
    8 {-# LANGUAGE StandaloneDeriving #-}
    9 {-# LANGUAGE UndecidableInstances #-}
   10 
   11 {-# OPTIONS_GHC -Wno-incomplete-record-updates #-}
   12 {-# OPTIONS_GHC -Wno-incomplete-uni-patterns   #-}
   13 
   14 -- CmmNode type for representation using Hoopl graphs.
   15 
   16 module GHC.Cmm.Node (
   17      CmmNode(..), CmmFormal, CmmActual, CmmTickish,
   18      UpdFrameOffset, Convention(..),
   19      ForeignConvention(..), ForeignTarget(..), foreignTargetHints,
   20      CmmReturnInfo(..),
   21      mapExp, mapExpDeep, wrapRecExp, foldExp, foldExpDeep, wrapRecExpf,
   22      mapExpM, mapExpDeepM, wrapRecExpM, mapSuccessors, mapCollectSuccessors,
   23 
   24      -- * Tick scopes
   25      CmmTickScope(..), isTickSubScope, combineTickScopes,
   26   ) where
   27 
   28 import GHC.Prelude hiding (succ)
   29 
   30 import GHC.Platform.Regs
   31 import GHC.Cmm.Expr
   32 import GHC.Cmm.Switch
   33 import GHC.Data.FastString
   34 import GHC.Types.ForeignCall
   35 import GHC.Utils.Outputable
   36 import GHC.Runtime.Heap.Layout
   37 import GHC.Types.Tickish (CmmTickish)
   38 import qualified GHC.Types.Unique as U
   39 
   40 import GHC.Cmm.Dataflow.Block
   41 import GHC.Cmm.Dataflow.Graph
   42 import GHC.Cmm.Dataflow.Collections
   43 import GHC.Cmm.Dataflow.Label
   44 import Data.Maybe
   45 import Data.List (tails,sortBy)
   46 import GHC.Types.Unique (nonDetCmpUnique)
   47 import GHC.Utils.Misc
   48 
   49 
   50 ------------------------
   51 -- CmmNode
   52 
   53 #define ULabel {-# UNPACK #-} !Label
   54 
   55 data CmmNode e x where
   56   CmmEntry :: ULabel -> CmmTickScope -> CmmNode C O
   57 
   58   CmmComment :: FastString -> CmmNode O O
   59 
   60     -- Tick annotation, covering Cmm code in our tick scope. We only
   61     -- expect non-code @Tickish@ at this point (e.g. @SourceNote@).
   62     -- See Note [CmmTick scoping details]
   63   CmmTick :: !CmmTickish -> CmmNode O O
   64 
   65     -- Unwind pseudo-instruction, encoding stack unwinding
   66     -- instructions for a debugger. This describes how to reconstruct
   67     -- the "old" value of a register if we want to navigate the stack
   68     -- up one frame. Having unwind information for @Sp@ will allow the
   69     -- debugger to "walk" the stack.
   70     --
   71     -- See Note [What is this unwinding business?] in "GHC.Cmm.DebugBlock"
   72   CmmUnwind :: [(GlobalReg, Maybe CmmExpr)] -> CmmNode O O
   73 
   74   CmmAssign :: !CmmReg -> !CmmExpr -> CmmNode O O
   75     -- Assign to register
   76 
   77   CmmStore :: !CmmExpr -> !CmmExpr -> CmmNode O O
   78     -- Assign to memory location.  Size is
   79     -- given by cmmExprType of the rhs.
   80 
   81   CmmUnsafeForeignCall ::       -- An unsafe foreign call;
   82                                 -- see Note [Foreign calls]
   83                                 -- Like a "fat machine instruction"; can occur
   84                                 -- in the middle of a block
   85       ForeignTarget ->          -- call target
   86       [CmmFormal] ->            -- zero or more results
   87       [CmmActual] ->            -- zero or more arguments
   88       CmmNode O O
   89       -- Semantics: clobbers any GlobalRegs for which callerSaves r == True
   90       -- See Note [Unsafe foreign calls clobber caller-save registers]
   91       --
   92       -- Invariant: the arguments and the ForeignTarget must not
   93       -- mention any registers for which GHC.Platform.callerSaves
   94       -- is True.  See Note [Register parameter passing].
   95 
   96   CmmBranch :: ULabel -> CmmNode O C
   97                                    -- Goto another block in the same procedure
   98 
   99   CmmCondBranch :: {                 -- conditional branch
  100       cml_pred :: CmmExpr,
  101       cml_true, cml_false :: ULabel,
  102       cml_likely :: Maybe Bool       -- likely result of the conditional,
  103                                      -- if known
  104   } -> CmmNode O C
  105 
  106   CmmSwitch
  107     :: CmmExpr       -- Scrutinee, of some integral type
  108     -> SwitchTargets -- Cases. See [Note SwitchTargets]
  109     -> CmmNode O C
  110 
  111   CmmCall :: {                -- A native call or tail call
  112       cml_target :: CmmExpr,  -- never a CmmPrim to a CallishMachOp!
  113 
  114       cml_cont :: Maybe Label,
  115           -- Label of continuation (Nothing for return or tail call)
  116           --
  117           -- Note [Continuation BlockIds]: these BlockIds are called
  118           -- Continuation BlockIds, and are the only BlockIds that can
  119           -- occur in CmmExprs, namely as (CmmLit (CmmBlock b)) or
  120           -- (CmmStackSlot (Young b) _).
  121 
  122       cml_args_regs :: [GlobalReg],
  123           -- The argument GlobalRegs (Rx, Fx, Dx, Lx) that are passed
  124           -- to the call.  This is essential information for the
  125           -- native code generator's register allocator; without
  126           -- knowing which GlobalRegs are live it has to assume that
  127           -- they are all live.  This list should only include
  128           -- GlobalRegs that are mapped to real machine registers on
  129           -- the target platform.
  130 
  131       cml_args :: ByteOff,
  132           -- Byte offset, from the *old* end of the Area associated with
  133           -- the Label (if cml_cont = Nothing, then Old area), of
  134           -- youngest outgoing arg.  Set the stack pointer to this before
  135           -- transferring control.
  136           -- (NB: an update frame might also have been stored in the Old
  137           --      area, but it'll be in an older part than the args.)
  138 
  139       cml_ret_args :: ByteOff,
  140           -- For calls *only*, the byte offset for youngest returned value
  141           -- This is really needed at the *return* point rather than here
  142           -- at the call, but in practice it's convenient to record it here.
  143 
  144       cml_ret_off :: ByteOff
  145         -- For calls *only*, the byte offset of the base of the frame that
  146         -- must be described by the info table for the return point.
  147         -- The older words are an update frames, which have their own
  148         -- info-table and layout information
  149 
  150         -- From a liveness point of view, the stack words older than
  151         -- cml_ret_off are treated as live, even if the sequel of
  152         -- the call goes into a loop.
  153   } -> CmmNode O C
  154 
  155   CmmForeignCall :: {           -- A safe foreign call; see Note [Foreign calls]
  156                                 -- Always the last node of a block
  157       tgt   :: ForeignTarget,   -- call target and convention
  158       res   :: [CmmFormal],     -- zero or more results
  159       args  :: [CmmActual],     -- zero or more arguments; see Note [Register parameter passing]
  160       succ  :: ULabel,          -- Label of continuation
  161       ret_args :: ByteOff,      -- same as cml_ret_args
  162       ret_off :: ByteOff,       -- same as cml_ret_off
  163       intrbl:: Bool             -- whether or not the call is interruptible
  164   } -> CmmNode O C
  165 
  166 {- Note [Foreign calls]
  167 ~~~~~~~~~~~~~~~~~~~~~~~
  168 A CmmUnsafeForeignCall is used for *unsafe* foreign calls;
  169 a CmmForeignCall call is used for *safe* foreign calls.
  170 
  171 Unsafe ones are mostly easy: think of them as a "fat machine
  172 instruction".  In particular, they do *not* kill all live registers,
  173 just the registers they return to (there was a bit of code in GHC that
  174 conservatively assumed otherwise.)  However, see [Register parameter passing].
  175 
  176 Safe ones are trickier.  A safe foreign call
  177      r = f(x)
  178 ultimately expands to
  179      push "return address"      -- Never used to return to;
  180                                 -- just points an info table
  181      save registers into TSO
  182      call suspendThread
  183      r = f(x)                   -- Make the call
  184      call resumeThread
  185      restore registers
  186      pop "return address"
  187 We cannot "lower" a safe foreign call to this sequence of Cmms, because
  188 after we've saved Sp all the Cmm optimiser's assumptions are broken.
  189 
  190 Note that a safe foreign call needs an info table.
  191 
  192 So Safe Foreign Calls must remain as last nodes until the stack is
  193 made manifest in GHC.Cmm.LayoutStack, where they are lowered into the above
  194 sequence.
  195 -}
  196 
  197 {- Note [Unsafe foreign calls clobber caller-save registers]
  198 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  199 
  200 A foreign call is defined to clobber any GlobalRegs that are mapped to
  201 caller-saves machine registers (according to the prevailing C ABI).
  202 GHC.StgToCmm.Utils.callerSaves tells you which GlobalRegs are caller-saves.
  203 
  204 This is a design choice that makes it easier to generate code later.
  205 We could instead choose to say that foreign calls do *not* clobber
  206 caller-saves regs, but then we would have to figure out which regs
  207 were live across the call later and insert some saves/restores.
  208 
  209 Furthermore when we generate code we never have any GlobalRegs live
  210 across a call, because they are always copied-in to LocalRegs and
  211 copied-out again before making a call/jump.  So all we have to do is
  212 avoid any code motion that would make a caller-saves GlobalReg live
  213 across a foreign call during subsequent optimisations.
  214 -}
  215 
  216 {- Note [Register parameter passing]
  217 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  218 On certain architectures, some registers are utilized for parameter
  219 passing in the C calling convention.  For example, in x86-64 Linux
  220 convention, rdi, rsi, rdx and rcx (as well as r8 and r9) may be used for
  221 argument passing.  These are registers R3-R6, which our generated
  222 code may also be using; as a result, it's necessary to save these
  223 values before doing a foreign call.  This is done during initial
  224 code generation in callerSaveVolatileRegs in GHC.StgToCmm.Utils.
  225 
  226 However, one result of doing this is that the contents of these registers may
  227 mysteriously change if referenced inside the arguments.  This is dangerous, so
  228 you'll need to disable inlining much in the same way is done in GHC.Cmm.Sink
  229 currently.  We should fix this!
  230 -}
  231 
  232 ---------------------------------------------
  233 -- Eq instance of CmmNode
  234 
  235 deriving instance Eq (CmmNode e x)
  236 
  237 ----------------------------------------------
  238 -- Hoopl instances of CmmNode
  239 
  240 instance NonLocal CmmNode where
  241   entryLabel (CmmEntry l _) = l
  242 
  243   successors (CmmBranch l) = [l]
  244   successors (CmmCondBranch {cml_true=t, cml_false=f}) = [f, t] -- meets layout constraint
  245   successors (CmmSwitch _ ids) = switchTargetsToList ids
  246   successors (CmmCall {cml_cont=l}) = maybeToList l
  247   successors (CmmForeignCall {succ=l}) = [l]
  248 
  249 
  250 --------------------------------------------------
  251 -- Various helper types
  252 
  253 type CmmActual = CmmExpr
  254 type CmmFormal = LocalReg
  255 
  256 type UpdFrameOffset = ByteOff
  257 
  258 -- | A convention maps a list of values (function arguments or return
  259 -- values) to registers or stack locations.
  260 data Convention
  261   = NativeDirectCall
  262        -- ^ top-level Haskell functions use @NativeDirectCall@, which
  263        -- maps arguments to registers starting with R2, according to
  264        -- how many registers are available on the platform.  This
  265        -- convention ignores R1, because for a top-level function call
  266        -- the function closure is implicit, and doesn't need to be passed.
  267   | NativeNodeCall
  268        -- ^ non-top-level Haskell functions, which pass the address of
  269        -- the function closure in R1 (regardless of whether R1 is a
  270        -- real register or not), and the rest of the arguments in
  271        -- registers or on the stack.
  272   | NativeReturn
  273        -- ^ a native return.  The convention for returns depends on
  274        -- how many values are returned: for just one value returned,
  275        -- the appropriate register is used (R1, F1, etc.). regardless
  276        -- of whether it is a real register or not.  For multiple
  277        -- values returned, they are mapped to registers or the stack.
  278   | Slow
  279        -- ^ Slow entry points: all args pushed on the stack
  280   | GC
  281        -- ^ Entry to the garbage collector: uses the node reg!
  282        -- (TODO: I don't think we need this --SDM)
  283   deriving( Eq )
  284 
  285 data ForeignConvention
  286   = ForeignConvention
  287         CCallConv               -- Which foreign-call convention
  288         [ForeignHint]           -- Extra info about the args
  289         [ForeignHint]           -- Extra info about the result
  290         CmmReturnInfo
  291   deriving Eq
  292 
  293 data CmmReturnInfo
  294   = CmmMayReturn
  295   | CmmNeverReturns
  296   deriving ( Eq )
  297 
  298 data ForeignTarget        -- The target of a foreign call
  299   = ForeignTarget                -- A foreign procedure
  300         CmmExpr                  -- Its address
  301         ForeignConvention        -- Its calling convention
  302   | PrimTarget            -- A possibly-side-effecting machine operation
  303         CallishMachOp            -- Which one
  304   deriving Eq
  305 
  306 foreignTargetHints :: ForeignTarget -> ([ForeignHint], [ForeignHint])
  307 foreignTargetHints target
  308   = ( res_hints ++ repeat NoHint
  309     , arg_hints ++ repeat NoHint )
  310   where
  311     (res_hints, arg_hints) =
  312        case target of
  313           PrimTarget op -> callishMachOpHints op
  314           ForeignTarget _ (ForeignConvention _ arg_hints res_hints _) ->
  315              (res_hints, arg_hints)
  316 
  317 --------------------------------------------------
  318 -- Instances of register and slot users / definers
  319 
  320 instance UserOfRegs LocalReg (CmmNode e x) where
  321   {-# INLINEABLE foldRegsUsed #-}
  322   foldRegsUsed platform f !z n = case n of
  323     CmmAssign _ expr -> fold f z expr
  324     CmmStore addr rval -> fold f (fold f z addr) rval
  325     CmmUnsafeForeignCall t _ args -> fold f (fold f z t) args
  326     CmmCondBranch expr _ _ _ -> fold f z expr
  327     CmmSwitch expr _ -> fold f z expr
  328     CmmCall {cml_target=tgt} -> fold f z tgt
  329     CmmForeignCall {tgt=tgt, args=args} -> fold f (fold f z tgt) args
  330     _ -> z
  331     where fold :: forall a b. UserOfRegs LocalReg a
  332                => (b -> LocalReg -> b) -> b -> a -> b
  333           fold f z n = foldRegsUsed platform f z n
  334 
  335 instance UserOfRegs GlobalReg (CmmNode e x) where
  336   {-# INLINEABLE foldRegsUsed #-}
  337   foldRegsUsed platform f !z n = case n of
  338     CmmAssign _ expr -> fold f z expr
  339     CmmStore addr rval -> fold f (fold f z addr) rval
  340     CmmUnsafeForeignCall t _ args -> fold f (fold f z t) args
  341     CmmCondBranch expr _ _ _ -> fold f z expr
  342     CmmSwitch expr _ -> fold f z expr
  343     CmmCall {cml_target=tgt, cml_args_regs=args} -> fold f (fold f z args) tgt
  344     CmmForeignCall {tgt=tgt, args=args} -> fold f (fold f z tgt) args
  345     _ -> z
  346     where fold :: forall a b.  UserOfRegs GlobalReg a
  347                => (b -> GlobalReg -> b) -> b -> a -> b
  348           fold f z n = foldRegsUsed platform f z n
  349 
  350 instance (Ord r, UserOfRegs r CmmReg) => UserOfRegs r ForeignTarget where
  351   -- The (Ord r) in the context is necessary here
  352   -- See Note [Recursive superclasses] in GHC.Tc.TyCl.Instance
  353   {-# INLINEABLE foldRegsUsed #-}
  354   foldRegsUsed _        _ !z (PrimTarget _)      = z
  355   foldRegsUsed platform f !z (ForeignTarget e _) = foldRegsUsed platform f z e
  356 
  357 instance DefinerOfRegs LocalReg (CmmNode e x) where
  358   {-# INLINEABLE foldRegsDefd #-}
  359   foldRegsDefd platform f !z n = case n of
  360     CmmAssign lhs _ -> fold f z lhs
  361     CmmUnsafeForeignCall _ fs _ -> fold f z fs
  362     CmmForeignCall {res=res} -> fold f z res
  363     _ -> z
  364     where fold :: forall a b. DefinerOfRegs LocalReg a
  365                => (b -> LocalReg -> b) -> b -> a -> b
  366           fold f z n = foldRegsDefd platform f z n
  367 
  368 instance DefinerOfRegs GlobalReg (CmmNode e x) where
  369   {-# INLINEABLE foldRegsDefd #-}
  370   foldRegsDefd platform f !z n = case n of
  371     CmmAssign lhs _ -> fold f z lhs
  372     CmmUnsafeForeignCall tgt _ _  -> fold f z (foreignTargetRegs tgt)
  373     CmmCall        {} -> fold f z activeRegs
  374     CmmForeignCall {} -> fold f z activeRegs
  375                       -- See Note [Safe foreign calls clobber STG registers]
  376     _ -> z
  377     where fold :: forall a b. DefinerOfRegs GlobalReg a
  378                => (b -> GlobalReg -> b) -> b -> a -> b
  379           fold f z n = foldRegsDefd platform f z n
  380 
  381           activeRegs = activeStgRegs platform
  382           activeCallerSavesRegs = filter (callerSaves platform) activeRegs
  383 
  384           foreignTargetRegs (ForeignTarget _ (ForeignConvention _ _ _ CmmNeverReturns)) = []
  385           foreignTargetRegs _ = activeCallerSavesRegs
  386 
  387 -- Note [Safe foreign calls clobber STG registers]
  388 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  389 --
  390 -- During stack layout phase every safe foreign call is expanded into a block
  391 -- that contains unsafe foreign call (instead of safe foreign call) and ends
  392 -- with a normal call (See Note [Foreign calls]). This means that we must
  393 -- treat safe foreign call as if it was a normal call (because eventually it
  394 -- will be). This is important if we try to run sinking pass before stack
  395 -- layout phase. Consider this example of what might go wrong (this is cmm
  396 -- code from stablename001 test). Here is code after common block elimination
  397 -- (before stack layout):
  398 --
  399 --  c1q6:
  400 --      _s1pf::P64 = R1;
  401 --      _c1q8::I64 = performMajorGC;
  402 --      I64[(young<c1q9> + 8)] = c1q9;
  403 --      foreign call "ccall" arg hints:  []  result hints:  [] (_c1q8::I64)(...)
  404 --                   returns to c1q9 args: ([]) ress: ([])ret_args: 8ret_off: 8;
  405 --  c1q9:
  406 --      I64[(young<c1qb> + 8)] = c1qb;
  407 --      R1 = _s1pc::P64;
  408 --      call stg_makeStableName#(R1) returns to c1qb, args: 8, res: 8, upd: 8;
  409 --
  410 -- If we run sinking pass now (still before stack layout) we will get this:
  411 --
  412 --  c1q6:
  413 --      I64[(young<c1q9> + 8)] = c1q9;
  414 --      foreign call "ccall" arg hints:  []  result hints:  [] performMajorGC(...)
  415 --                   returns to c1q9 args: ([]) ress: ([])ret_args: 8ret_off: 8;
  416 --  c1q9:
  417 --      I64[(young<c1qb> + 8)] = c1qb;
  418 --      _s1pf::P64 = R1;         <------ _s1pf sunk past safe foreign call
  419 --      R1 = _s1pc::P64;
  420 --      call stg_makeStableName#(R1) returns to c1qb, args: 8, res: 8, upd: 8;
  421 --
  422 -- Notice that _s1pf was sunk past a foreign call. When we run stack layout
  423 -- safe call to performMajorGC will be turned into:
  424 --
  425 --  c1q6:
  426 --      _s1pc::P64 = P64[Sp + 8];
  427 --      I64[Sp - 8] = c1q9;
  428 --      Sp = Sp - 8;
  429 --      I64[I64[CurrentTSO + 24] + 16] = Sp;
  430 --      P64[CurrentNursery + 8] = Hp + 8;
  431 --      (_u1qI::I64) = call "ccall" arg hints:  [PtrHint,]
  432 --                           result hints:  [PtrHint] suspendThread(BaseReg, 0);
  433 --      call "ccall" arg hints:  []  result hints:  [] performMajorGC();
  434 --      (_u1qJ::I64) = call "ccall" arg hints:  [PtrHint]
  435 --                           result hints:  [PtrHint] resumeThread(_u1qI::I64);
  436 --      BaseReg = _u1qJ::I64;
  437 --      _u1qK::P64 = CurrentTSO;
  438 --      _u1qL::P64 = I64[_u1qK::P64 + 24];
  439 --      Sp = I64[_u1qL::P64 + 16];
  440 --      SpLim = _u1qL::P64 + 192;
  441 --      HpAlloc = 0;
  442 --      Hp = I64[CurrentNursery + 8] - 8;
  443 --      HpLim = I64[CurrentNursery] + (%MO_SS_Conv_W32_W64(I32[CurrentNursery + 48]) * 4096 - 1);
  444 --      call (I64[Sp])() returns to c1q9, args: 8, res: 8, upd: 8;
  445 --  c1q9:
  446 --      I64[(young<c1qb> + 8)] = c1qb;
  447 --      _s1pf::P64 = R1;         <------ INCORRECT!
  448 --      R1 = _s1pc::P64;
  449 --      call stg_makeStableName#(R1) returns to c1qb, args: 8, res: 8, upd: 8;
  450 --
  451 -- Notice that c1q6 now ends with a call. Sinking _s1pf::P64 = R1 past that
  452 -- call is clearly incorrect. This is what would happen if we assumed that
  453 -- safe foreign call has the same semantics as unsafe foreign call. To prevent
  454 -- this we need to treat safe foreign call as if was normal call.
  455 
  456 -----------------------------------
  457 -- mapping Expr in GHC.Cmm.Node
  458 
  459 mapForeignTarget :: (CmmExpr -> CmmExpr) -> ForeignTarget -> ForeignTarget
  460 mapForeignTarget exp   (ForeignTarget e c) = ForeignTarget (exp e) c
  461 mapForeignTarget _   m@(PrimTarget _)      = m
  462 
  463 wrapRecExp :: (CmmExpr -> CmmExpr) -> CmmExpr -> CmmExpr
  464 -- Take a transformer on expressions and apply it recursively.
  465 -- (wrapRecExp f e) first recursively applies itself to sub-expressions of e
  466 --                  then  uses f to rewrite the resulting expression
  467 wrapRecExp f (CmmMachOp op es)    = f (CmmMachOp op $ map (wrapRecExp f) es)
  468 wrapRecExp f (CmmLoad addr ty)    = f (CmmLoad (wrapRecExp f addr) ty)
  469 wrapRecExp f e                    = f e
  470 
  471 mapExp :: (CmmExpr -> CmmExpr) -> CmmNode e x -> CmmNode e x
  472 mapExp _ f@(CmmEntry{})                          = f
  473 mapExp _ m@(CmmComment _)                        = m
  474 mapExp _ m@(CmmTick _)                           = m
  475 mapExp f   (CmmUnwind regs)                      = CmmUnwind (map (fmap (fmap f)) regs)
  476 mapExp f   (CmmAssign r e)                       = CmmAssign r (f e)
  477 mapExp f   (CmmStore addr e)                     = CmmStore (f addr) (f e)
  478 mapExp f   (CmmUnsafeForeignCall tgt fs as)      = CmmUnsafeForeignCall (mapForeignTarget f tgt) fs (map f as)
  479 mapExp _ l@(CmmBranch _)                         = l
  480 mapExp f   (CmmCondBranch e ti fi l)             = CmmCondBranch (f e) ti fi l
  481 mapExp f   (CmmSwitch e ids)                     = CmmSwitch (f e) ids
  482 mapExp f   n@CmmCall {cml_target=tgt}            = n{cml_target = f tgt}
  483 mapExp f   (CmmForeignCall tgt fs as succ ret_args updfr intrbl) = CmmForeignCall (mapForeignTarget f tgt) fs (map f as) succ ret_args updfr intrbl
  484 
  485 mapExpDeep :: (CmmExpr -> CmmExpr) -> CmmNode e x -> CmmNode e x
  486 mapExpDeep f = mapExp $ wrapRecExp f
  487 
  488 ------------------------------------------------------------------------
  489 -- mapping Expr in GHC.Cmm.Node, but not performing allocation if no changes
  490 
  491 mapForeignTargetM :: (CmmExpr -> Maybe CmmExpr) -> ForeignTarget -> Maybe ForeignTarget
  492 mapForeignTargetM f (ForeignTarget e c) = (\x -> ForeignTarget x c) `fmap` f e
  493 mapForeignTargetM _ (PrimTarget _)      = Nothing
  494 
  495 wrapRecExpM :: (CmmExpr -> Maybe CmmExpr) -> (CmmExpr -> Maybe CmmExpr)
  496 -- (wrapRecExpM f e) first recursively applies itself to sub-expressions of e
  497 --                   then  gives f a chance to rewrite the resulting expression
  498 wrapRecExpM f n@(CmmMachOp op es)  = maybe (f n) (f . CmmMachOp op)    (mapListM (wrapRecExpM f) es)
  499 wrapRecExpM f n@(CmmLoad addr ty)  = maybe (f n) (f . flip CmmLoad ty) (wrapRecExpM f addr)
  500 wrapRecExpM f e                    = f e
  501 
  502 mapExpM :: (CmmExpr -> Maybe CmmExpr) -> CmmNode e x -> Maybe (CmmNode e x)
  503 mapExpM _ (CmmEntry{})              = Nothing
  504 mapExpM _ (CmmComment _)            = Nothing
  505 mapExpM _ (CmmTick _)               = Nothing
  506 mapExpM f (CmmUnwind regs)          = CmmUnwind `fmap` mapM (\(r,e) -> mapM f e >>= \e' -> pure (r,e')) regs
  507 mapExpM f (CmmAssign r e)           = CmmAssign r `fmap` f e
  508 mapExpM f (CmmStore addr e)         = (\[addr', e'] -> CmmStore addr' e') `fmap` mapListM f [addr, e]
  509 mapExpM _ (CmmBranch _)             = Nothing
  510 mapExpM f (CmmCondBranch e ti fi l) = (\x -> CmmCondBranch x ti fi l) `fmap` f e
  511 mapExpM f (CmmSwitch e tbl)         = (\x -> CmmSwitch x tbl)       `fmap` f e
  512 mapExpM f (CmmCall tgt mb_id r o i s) = (\x -> CmmCall x mb_id r o i s) `fmap` f tgt
  513 mapExpM f (CmmUnsafeForeignCall tgt fs as)
  514     = case mapForeignTargetM f tgt of
  515         Just tgt' -> Just (CmmUnsafeForeignCall tgt' fs (mapListJ f as))
  516         Nothing   -> (\xs -> CmmUnsafeForeignCall tgt fs xs) `fmap` mapListM f as
  517 mapExpM f (CmmForeignCall tgt fs as succ ret_args updfr intrbl)
  518     = case mapForeignTargetM f tgt of
  519         Just tgt' -> Just (CmmForeignCall tgt' fs (mapListJ f as) succ ret_args updfr intrbl)
  520         Nothing   -> (\xs -> CmmForeignCall tgt fs xs succ ret_args updfr intrbl) `fmap` mapListM f as
  521 
  522 -- share as much as possible
  523 mapListM :: (a -> Maybe a) -> [a] -> Maybe [a]
  524 mapListM f xs = let (b, r) = mapListT f xs
  525                 in if b then Just r else Nothing
  526 
  527 mapListJ :: (a -> Maybe a) -> [a] -> [a]
  528 mapListJ f xs = snd (mapListT f xs)
  529 
  530 mapListT :: (a -> Maybe a) -> [a] -> (Bool, [a])
  531 mapListT f xs = foldr g (False, []) (zip3 (tails xs) xs (map f xs))
  532     where g (_,   y, Nothing) (True, ys)  = (True,  y:ys)
  533           g (_,   _, Just y)  (True, ys)  = (True,  y:ys)
  534           g (ys', _, Nothing) (False, _)  = (False, ys')
  535           g (_,   _, Just y)  (False, ys) = (True,  y:ys)
  536 
  537 mapExpDeepM :: (CmmExpr -> Maybe CmmExpr) -> CmmNode e x -> Maybe (CmmNode e x)
  538 mapExpDeepM f = mapExpM $ wrapRecExpM f
  539 
  540 -----------------------------------
  541 -- folding Expr in GHC.Cmm.Node
  542 
  543 foldExpForeignTarget :: (CmmExpr -> z -> z) -> ForeignTarget -> z -> z
  544 foldExpForeignTarget exp (ForeignTarget e _) z = exp e z
  545 foldExpForeignTarget _   (PrimTarget _)      z = z
  546 
  547 -- Take a folder on expressions and apply it recursively.
  548 -- Specifically (wrapRecExpf f e z) deals with CmmMachOp and CmmLoad
  549 -- itself, delegating all the other CmmExpr forms to 'f'.
  550 wrapRecExpf :: (CmmExpr -> z -> z) -> CmmExpr -> z -> z
  551 wrapRecExpf f e@(CmmMachOp _ es) z = foldr (wrapRecExpf f) (f e z) es
  552 wrapRecExpf f e@(CmmLoad addr _) z = wrapRecExpf f addr (f e z)
  553 wrapRecExpf f e                  z = f e z
  554 
  555 foldExp :: (CmmExpr -> z -> z) -> CmmNode e x -> z -> z
  556 foldExp _ (CmmEntry {}) z                         = z
  557 foldExp _ (CmmComment {}) z                       = z
  558 foldExp _ (CmmTick {}) z                          = z
  559 foldExp f (CmmUnwind xs) z                        = foldr (maybe id f) z (map snd xs)
  560 foldExp f (CmmAssign _ e) z                       = f e z
  561 foldExp f (CmmStore addr e) z                     = f addr $ f e z
  562 foldExp f (CmmUnsafeForeignCall t _ as) z         = foldr f (foldExpForeignTarget f t z) as
  563 foldExp _ (CmmBranch _) z                         = z
  564 foldExp f (CmmCondBranch e _ _ _) z               = f e z
  565 foldExp f (CmmSwitch e _) z                       = f e z
  566 foldExp f (CmmCall {cml_target=tgt}) z            = f tgt z
  567 foldExp f (CmmForeignCall {tgt=tgt, args=args}) z = foldr f (foldExpForeignTarget f tgt z) args
  568 
  569 foldExpDeep :: (CmmExpr -> z -> z) -> CmmNode e x -> z -> z
  570 foldExpDeep f = foldExp (wrapRecExpf f)
  571 
  572 -- -----------------------------------------------------------------------------
  573 
  574 mapSuccessors :: (Label -> Label) -> CmmNode O C -> CmmNode O C
  575 mapSuccessors f (CmmBranch bid)         = CmmBranch (f bid)
  576 mapSuccessors f (CmmCondBranch p y n l) = CmmCondBranch p (f y) (f n) l
  577 mapSuccessors f (CmmSwitch e ids)       = CmmSwitch e (mapSwitchTargets f ids)
  578 mapSuccessors _ n = n
  579 
  580 mapCollectSuccessors :: forall a. (Label -> (Label,a)) -> CmmNode O C
  581                      -> (CmmNode O C, [a])
  582 mapCollectSuccessors f (CmmBranch bid)
  583   = let (bid', acc) = f bid in (CmmBranch bid', [acc])
  584 mapCollectSuccessors f (CmmCondBranch p y n l)
  585   = let (bidt, acct) = f y
  586         (bidf, accf) = f n
  587     in  (CmmCondBranch p bidt bidf l, [accf, acct])
  588 mapCollectSuccessors f (CmmSwitch e ids)
  589   = let lbls = switchTargetsToList ids :: [Label]
  590         lblMap = mapFromList $ zip lbls (map f lbls) :: LabelMap (Label, a)
  591     in ( CmmSwitch e
  592           (mapSwitchTargets
  593             (\l -> fst $ mapFindWithDefault (error "impossible") l lblMap) ids)
  594           , map snd (mapElems lblMap)
  595         )
  596 mapCollectSuccessors _ n = (n, [])
  597 
  598 -- -----------------------------------------------------------------------------
  599 
  600 -- | Tick scope identifier, allowing us to reason about what
  601 -- annotations in a Cmm block should scope over. We especially take
  602 -- care to allow optimisations to reorganise blocks without losing
  603 -- tick association in the process.
  604 data CmmTickScope
  605   = GlobalScope
  606     -- ^ The global scope is the "root" of the scope graph. Every
  607     -- scope is a sub-scope of the global scope. It doesn't make sense
  608     -- to add ticks to this scope. On the other hand, this means that
  609     -- setting this scope on a block means no ticks apply to it.
  610 
  611   | SubScope !U.Unique CmmTickScope
  612     -- ^ Constructs a new sub-scope to an existing scope. This allows
  613     -- us to translate Core-style scoping rules (see @tickishScoped@)
  614     -- into the Cmm world. Suppose the following code:
  615     --
  616     --   tick<1> case ... of
  617     --             A -> tick<2> ...
  618     --             B -> tick<3> ...
  619     --
  620     -- We want the top-level tick annotation to apply to blocks
  621     -- generated for the A and B alternatives. We can achieve that by
  622     -- generating tick<1> into a block with scope a, while the code
  623     -- for alternatives A and B gets generated into sub-scopes a/b and
  624     -- a/c respectively.
  625 
  626   | CombinedScope CmmTickScope CmmTickScope
  627     -- ^ A combined scope scopes over everything that the two given
  628     -- scopes cover. It is therefore a sub-scope of either scope. This
  629     -- is required for optimisations. Consider common block elimination:
  630     --
  631     --   A -> tick<2> case ... of
  632     --     C -> [common]
  633     --   B -> tick<3> case ... of
  634     --     D -> [common]
  635     --
  636     -- We will generate code for the C and D alternatives, and figure
  637     -- out afterwards that it's actually common code. Scoping rules
  638     -- dictate that the resulting common block needs to be covered by
  639     -- both tick<2> and tick<3>, therefore we need to construct a
  640     -- scope that is a child to *both* scope. Now we can do that - if
  641     -- we assign the scopes a/c and b/d to the common-ed up blocks,
  642     -- the new block could have a combined tick scope a/c+b/d, which
  643     -- both tick<2> and tick<3> apply to.
  644 
  645 -- Note [CmmTick scoping details]:
  646 --
  647 -- The scope of a @CmmTick@ is given by the @CmmEntry@ node of the
  648 -- same block. Note that as a result of this, optimisations making
  649 -- tick scopes more specific can *reduce* the amount of code a tick
  650 -- scopes over. Fixing this would require a separate @CmmTickScope@
  651 -- field for @CmmTick@. Right now we do not do this simply because I
  652 -- couldn't find an example where it actually mattered -- multiple
  653 -- blocks within the same scope generally jump to each other, which
  654 -- prevents common block elimination from happening in the first
  655 -- place. But this is no strong reason, so if Cmm optimisations become
  656 -- more involved in future this might have to be revisited.
  657 
  658 -- | Output all scope paths.
  659 scopeToPaths :: CmmTickScope -> [[U.Unique]]
  660 scopeToPaths GlobalScope           = [[]]
  661 scopeToPaths (SubScope u s)        = map (u:) (scopeToPaths s)
  662 scopeToPaths (CombinedScope s1 s2) = scopeToPaths s1 ++ scopeToPaths s2
  663 
  664 -- | Returns the head uniques of the scopes. This is based on the
  665 -- assumption that the @Unique@ of @SubScope@ identifies the
  666 -- underlying super-scope. Used for efficient equality and comparison,
  667 -- see below.
  668 scopeUniques :: CmmTickScope -> [U.Unique]
  669 scopeUniques GlobalScope           = []
  670 scopeUniques (SubScope u _)        = [u]
  671 scopeUniques (CombinedScope s1 s2) = scopeUniques s1 ++ scopeUniques s2
  672 
  673 -- Equality and order is based on the head uniques defined above. We
  674 -- take care to short-cut the (extremely) common cases.
  675 instance Eq CmmTickScope where
  676   GlobalScope    == GlobalScope     = True
  677   GlobalScope    == _               = False
  678   _              == GlobalScope     = False
  679   (SubScope u _) == (SubScope u' _) = u == u'
  680   (SubScope _ _) == _               = False
  681   _              == (SubScope _ _)  = False
  682   scope          == scope'          =
  683     sortBy nonDetCmpUnique (scopeUniques scope) ==
  684     sortBy nonDetCmpUnique (scopeUniques scope')
  685     -- This is still deterministic because
  686     -- the order is the same for equal lists
  687 
  688 -- This is non-deterministic but we do not currently support deterministic
  689 -- code-generation. See Note [Unique Determinism and code generation]
  690 -- See Note [No Ord for Unique]
  691 instance Ord CmmTickScope where
  692   compare GlobalScope    GlobalScope     = EQ
  693   compare GlobalScope    _               = LT
  694   compare _              GlobalScope     = GT
  695   compare (SubScope u _) (SubScope u' _) = nonDetCmpUnique u u'
  696   compare scope scope'                   = cmpList nonDetCmpUnique
  697      (sortBy nonDetCmpUnique $ scopeUniques scope)
  698      (sortBy nonDetCmpUnique $ scopeUniques scope')
  699 
  700 instance Outputable CmmTickScope where
  701   ppr GlobalScope     = text "global"
  702   ppr (SubScope us GlobalScope)
  703                       = ppr us
  704   ppr (SubScope us s) = ppr s <> char '/' <> ppr us
  705   ppr combined        = parens $ hcat $ punctuate (char '+') $
  706                         map (hcat . punctuate (char '/') . map ppr . reverse) $
  707                         scopeToPaths combined
  708 
  709 -- | Checks whether two tick scopes are sub-scopes of each other. True
  710 -- if the two scopes are equal.
  711 isTickSubScope :: CmmTickScope -> CmmTickScope -> Bool
  712 isTickSubScope = cmp
  713   where cmp _              GlobalScope             = True
  714         cmp GlobalScope    _                       = False
  715         cmp (CombinedScope s1 s2) s'               = cmp s1 s' && cmp s2 s'
  716         cmp s              (CombinedScope s1' s2') = cmp s s1' || cmp s s2'
  717         cmp (SubScope u s) s'@(SubScope u' _)      = u == u' || cmp s s'
  718 
  719 -- | Combine two tick scopes. The new scope should be sub-scope of
  720 -- both parameters. We simplify automatically if one tick scope is a
  721 -- sub-scope of the other already.
  722 combineTickScopes :: CmmTickScope -> CmmTickScope -> CmmTickScope
  723 combineTickScopes s1 s2
  724   | s1 `isTickSubScope` s2 = s1
  725   | s2 `isTickSubScope` s1 = s2
  726   | otherwise              = CombinedScope s1 s2