never executed always true always false
    1 {-# LANGUAGE FlexibleInstances #-}
    2 {-# LANGUAGE GADTs #-}
    3 {-# LANGUAGE MultiParamTypeClasses #-}
    4 {-# LANGUAGE PatternSynonyms #-}
    5 
    6 -----------------------------------------------------------------------------
    7 --
    8 -- Monad for Stg to C-- code generation
    9 --
   10 -- (c) The University of Glasgow 2004-2006
   11 --
   12 -----------------------------------------------------------------------------
   13 
   14 module GHC.StgToCmm.Monad (
   15         FCode,        -- type
   16 
   17         initC, runC, fixC,
   18         newUnique,
   19 
   20         emitLabel,
   21 
   22         emit, emitDecl,
   23         emitProcWithConvention, emitProcWithStackFrame,
   24         emitOutOfLine, emitAssign, emitStore,
   25         emitComment, emitTick, emitUnwind,
   26 
   27         newTemp,
   28 
   29         getCmm, aGraphToGraph, getPlatform, getProfile,
   30         getCodeR, getCode, getCodeScoped, getHeapUsage,
   31         getCallOpts, getPtrOpts,
   32 
   33         mkCmmIfThenElse, mkCmmIfThen, mkCmmIfGoto,
   34         mkCmmIfThenElse', mkCmmIfThen', mkCmmIfGoto',
   35 
   36         mkCall, mkCmmCall,
   37 
   38         forkClosureBody, forkLneBody, forkAlts, forkAltPair, codeOnly,
   39 
   40         ConTagZ,
   41 
   42         Sequel(..), ReturnKind(..),
   43         withSequel, getSequel,
   44 
   45         setTickyCtrLabel, getTickyCtrLabel,
   46         tickScope, getTickScope,
   47 
   48         withUpdFrameOff, getUpdFrameOff, initUpdFrameOff,
   49 
   50         HeapUsage(..), VirtualHpOffset,        initHpUsage,
   51         getHpUsage,  setHpUsage, heapHWM,
   52         setVirtHp, getVirtHp, setRealHp,
   53 
   54         getModuleName,
   55 
   56         -- ideally we wouldn't export these, but some other modules access internal state
   57         getState, setState, getSelfLoop, withSelfLoop, getInfoDown, getDynFlags,
   58 
   59         -- more localised access to monad state
   60         CgIdInfo(..),
   61         getBinds, setBinds,
   62         -- out of general friendliness, we also export ...
   63         CgInfoDownwards(..), CgState(..) -- non-abstract
   64     ) where
   65 
   66 import GHC.Prelude hiding( sequence, succ )
   67 
   68 import GHC.Platform
   69 import GHC.Platform.Profile
   70 import GHC.Cmm
   71 import GHC.StgToCmm.Closure
   72 import GHC.Driver.Session
   73 import GHC.Cmm.Dataflow.Collections
   74 import GHC.Cmm.Graph as CmmGraph
   75 import GHC.Cmm.BlockId
   76 import GHC.Cmm.CLabel
   77 import GHC.Cmm.Info
   78 import GHC.Runtime.Heap.Layout
   79 import GHC.Unit
   80 import GHC.Types.Id
   81 import GHC.Types.Var.Env
   82 import GHC.Data.OrdList
   83 import GHC.Types.Basic( ConTagZ )
   84 import GHC.Types.Unique
   85 import GHC.Types.Unique.Supply
   86 import GHC.Data.FastString
   87 import GHC.Utils.Outputable
   88 import GHC.Utils.Panic
   89 import GHC.Utils.Constants (debugIsOn)
   90 import GHC.Exts (oneShot)
   91 
   92 import Control.Monad
   93 import Data.List (mapAccumL)
   94 
   95 
   96 --------------------------------------------------------
   97 -- The FCode monad and its types
   98 --
   99 -- FCode is the monad plumbed through the Stg->Cmm code generator, and
  100 -- the Cmm parser.  It contains the following things:
  101 --
  102 --  - A writer monad, collecting:
  103 --    - code for the current function, in the form of a CmmAGraph.
  104 --      The function "emit" appends more code to this.
  105 --    - the top-level CmmDecls accumulated so far
  106 --
  107 --  - A state monad with:
  108 --    - the local bindings in scope
  109 --    - the current heap usage
  110 --    - a UniqSupply
  111 --
  112 --  - A reader monad, for CgInfoDownwards, containing
  113 --    - DynFlags,
  114 --    - the current Module
  115 --    - the update-frame offset
  116 --    - the ticky counter label
  117 --    - the Sequel (the continuation to return to)
  118 --    - the self-recursive tail call information
  119 
  120 --------------------------------------------------------
  121 
  122 newtype FCode a = FCode' { doFCode :: CgInfoDownwards -> CgState -> (a, CgState) }
  123 
  124 -- Not derived because of #18202.
  125 -- See Note [The one-shot state monad trick] in GHC.Utils.Monad
  126 instance Functor FCode where
  127   fmap f (FCode m) =
  128     FCode $ \info_down state ->
  129       case m info_down state of
  130         (x, state') -> (f x, state')
  131 
  132 -- This pattern synonym makes the simplifier monad eta-expand,
  133 -- which as a very beneficial effect on compiler performance
  134 -- See #18202.
  135 -- See Note [The one-shot state monad trick] in GHC.Utils.Monad
  136 {-# COMPLETE FCode #-}
  137 pattern FCode :: (CgInfoDownwards -> CgState -> (a, CgState))
  138               -> FCode a
  139 pattern FCode m <- FCode' m
  140   where
  141     FCode m = FCode' $ oneShot (\cgInfoDown -> oneShot (\state ->m cgInfoDown state))
  142 
  143 instance Applicative FCode where
  144     pure val = FCode (\_info_down state -> (val, state))
  145     {-# INLINE pure #-}
  146     (<*>) = ap
  147 
  148 instance Monad FCode where
  149     FCode m >>= k = FCode $
  150         \info_down state ->
  151             case m info_down state of
  152               (m_result, new_state) ->
  153                  case k m_result of
  154                    FCode kcode -> kcode info_down new_state
  155     {-# INLINE (>>=) #-}
  156 
  157 instance MonadUnique FCode where
  158   getUniqueSupplyM = cgs_uniqs <$> getState
  159   getUniqueM = FCode $ \_ st ->
  160     let (u, us') = takeUniqFromSupply (cgs_uniqs st)
  161     in (u, st { cgs_uniqs = us' })
  162 
  163 initC :: IO CgState
  164 initC  = do { uniqs <- mkSplitUniqSupply 'c'
  165             ; return (initCgState uniqs) }
  166 
  167 runC :: DynFlags -> Module -> CgState -> FCode a -> (a,CgState)
  168 runC dflags mod st fcode = doFCode fcode (initCgInfoDown dflags mod) st
  169 
  170 fixC :: (a -> FCode a) -> FCode a
  171 fixC fcode = FCode $
  172     \info_down state -> let (v, s) = doFCode (fcode v) info_down state
  173                         in (v, s)
  174 
  175 --------------------------------------------------------
  176 --        The code generator environment
  177 --------------------------------------------------------
  178 
  179 -- This monadery has some information that it only passes
  180 -- *downwards*, as well as some ``state'' which is modified
  181 -- as we go along.
  182 
  183 data CgInfoDownwards        -- information only passed *downwards* by the monad
  184   = MkCgInfoDown {
  185         cgd_dflags    :: DynFlags,
  186         cgd_mod       :: Module,            -- Module being compiled
  187         cgd_updfr_off :: UpdFrameOffset,    -- Size of current update frame
  188         cgd_ticky     :: CLabel,            -- Current destination for ticky counts
  189         cgd_sequel    :: Sequel,            -- What to do at end of basic block
  190         cgd_self_loop :: Maybe SelfLoopInfo,-- Which tail calls can be compiled
  191                                             -- as local jumps? See Note
  192                                             -- [Self-recursive tail calls] in
  193                                             -- GHC.StgToCmm.Expr
  194         cgd_tick_scope:: CmmTickScope       -- Tick scope for new blocks & ticks
  195   }
  196 
  197 type CgBindings = IdEnv CgIdInfo
  198 
  199 data CgIdInfo
  200   = CgIdInfo
  201         { cg_id :: Id   -- Id that this is the info for
  202         , cg_lf  :: LambdaFormInfo
  203         , cg_loc :: CgLoc                     -- CmmExpr for the *tagged* value
  204         }
  205 
  206 instance OutputableP Platform CgIdInfo where
  207   pdoc env (CgIdInfo { cg_id = id, cg_loc = loc })
  208     = ppr id <+> text "-->" <+> pdoc env loc
  209 
  210 -- Sequel tells what to do with the result of this expression
  211 data Sequel
  212   = Return              -- Return result(s) to continuation found on the stack.
  213 
  214   | AssignTo
  215         [LocalReg]      -- Put result(s) in these regs and fall through
  216                         -- NB: no void arguments here
  217                         --
  218         Bool            -- Should we adjust the heap pointer back to
  219                         -- recover space that's unused on this path?
  220                         -- We need to do this only if the expression
  221                         -- may allocate (e.g. it's a foreign call or
  222                         -- allocating primOp)
  223 
  224 instance Outputable Sequel where
  225     ppr Return = text "Return"
  226     ppr (AssignTo regs b) = text "AssignTo" <+> ppr regs <+> ppr b
  227 
  228 -- See Note [sharing continuations] below
  229 data ReturnKind
  230   = AssignedDirectly
  231   | ReturnedTo BlockId ByteOff
  232 
  233 -- Note [sharing continuations]
  234 --
  235 -- ReturnKind says how the expression being compiled returned its
  236 -- results: either by assigning directly to the registers specified
  237 -- by the Sequel, or by returning to a continuation that does the
  238 -- assignments.  The point of this is we might be able to re-use the
  239 -- continuation in a subsequent heap-check.  Consider:
  240 --
  241 --    case f x of z
  242 --      True  -> <True code>
  243 --      False -> <False code>
  244 --
  245 -- Naively we would generate
  246 --
  247 --    R2 = x   -- argument to f
  248 --    Sp[young(L1)] = L1
  249 --    call f returns to L1
  250 --  L1:
  251 --    z = R1
  252 --    if (z & 1) then Ltrue else Lfalse
  253 --  Ltrue:
  254 --    Hp = Hp + 24
  255 --    if (Hp > HpLim) then L4 else L7
  256 --  L4:
  257 --    HpAlloc = 24
  258 --    goto L5
  259 --  L5:
  260 --    R1 = z
  261 --    Sp[young(L6)] = L6
  262 --    call stg_gc_unpt_r1 returns to L6
  263 --  L6:
  264 --    z = R1
  265 --    goto L1
  266 --  L7:
  267 --    <True code>
  268 --  Lfalse:
  269 --    <False code>
  270 --
  271 -- We want the gc call in L4 to return to L1, and discard L6.  Note
  272 -- that not only can we share L1 and L6, but the assignment of the
  273 -- return address in L4 is unnecessary because the return address for
  274 -- L1 is already on the stack.  We used to catch the sharing of L1 and
  275 -- L6 in the common-block-eliminator, but not the unnecessary return
  276 -- address assignment.
  277 --
  278 -- Since this case is so common I decided to make it more explicit and
  279 -- robust by programming the sharing directly, rather than relying on
  280 -- the common-block eliminator to catch it.  This makes
  281 -- common-block-elimination an optional optimisation, and furthermore
  282 -- generates less code in the first place that we have to subsequently
  283 -- clean up.
  284 --
  285 -- There are some rarer cases of common blocks that we don't catch
  286 -- this way, but that's ok.  Common-block-elimination is still available
  287 -- to catch them when optimisation is enabled.  Some examples are:
  288 --
  289 --   - when both the True and False branches do a heap check, we
  290 --     can share the heap-check failure code L4a and maybe L4
  291 --
  292 --   - in a case-of-case, there might be multiple continuations that
  293 --     we can common up.
  294 --
  295 -- It is always safe to use AssignedDirectly.  Expressions that jump
  296 -- to the continuation from multiple places (e.g. case expressions)
  297 -- fall back to AssignedDirectly.
  298 --
  299 
  300 
  301 initCgInfoDown :: DynFlags -> Module -> CgInfoDownwards
  302 initCgInfoDown dflags mod
  303   = MkCgInfoDown { cgd_dflags    = dflags
  304                  , cgd_mod       = mod
  305                  , cgd_updfr_off = initUpdFrameOff (targetPlatform dflags)
  306                  , cgd_ticky     = mkTopTickyCtrLabel
  307                  , cgd_sequel    = initSequel
  308                  , cgd_self_loop = Nothing
  309                  , cgd_tick_scope= GlobalScope }
  310 
  311 initSequel :: Sequel
  312 initSequel = Return
  313 
  314 initUpdFrameOff :: Platform -> UpdFrameOffset
  315 initUpdFrameOff platform = platformWordSizeInBytes platform -- space for the RA
  316 
  317 
  318 --------------------------------------------------------
  319 --        The code generator state
  320 --------------------------------------------------------
  321 
  322 data CgState
  323   = MkCgState {
  324      cgs_stmts :: CmmAGraph,          -- Current procedure
  325 
  326      cgs_tops  :: OrdList CmmDecl,
  327         -- Other procedures and data blocks in this compilation unit
  328         -- Both are ordered only so that we can
  329         -- reduce forward references, when it's easy to do so
  330 
  331      cgs_binds :: CgBindings,
  332 
  333      cgs_hp_usg  :: HeapUsage,
  334 
  335      cgs_uniqs :: UniqSupply }
  336 -- If you are wondering why you have to be careful forcing CgState then
  337 -- the reason is the knot-tying in 'getHeapUsage'. This problem is tracked
  338 -- in #19245
  339 
  340 data HeapUsage   -- See Note [Virtual and real heap pointers]
  341   = HeapUsage {
  342         virtHp :: VirtualHpOffset,       -- Virtual offset of highest-allocated word
  343                                          --   Incremented whenever we allocate
  344         realHp :: VirtualHpOffset        -- realHp: Virtual offset of real heap ptr
  345                                          --   Used in instruction addressing modes
  346     }
  347 
  348 type VirtualHpOffset = WordOff
  349 
  350 
  351 {- Note [Virtual and real heap pointers]
  352 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  353 The code generator can allocate one or more objects contiguously, performing
  354 one heap check to cover allocation of all the objects at once.  Let's call
  355 this little chunk of heap space an "allocation chunk".  The code generator
  356 will emit code to
  357   * Perform a heap-exhaustion check
  358   * Move the heap pointer to the end of the allocation chunk
  359   * Allocate multiple objects within the chunk
  360 
  361 The code generator uses VirtualHpOffsets to address words within a
  362 single allocation chunk; these start at one and increase positively.
  363 The first word of the chunk has VirtualHpOffset=1, the second has
  364 VirtualHpOffset=2, and so on.
  365 
  366  * The field realHp tracks (the VirtualHpOffset) where the real Hp
  367    register is pointing.  Typically it'll be pointing to the end of the
  368    allocation chunk.
  369 
  370  * The field virtHp gives the VirtualHpOffset of the highest-allocated
  371    word so far.  It starts at zero (meaning no word has been allocated),
  372    and increases whenever an object is allocated.
  373 
  374 The difference between realHp and virtHp gives the offset from the
  375 real Hp register of a particular word in the allocation chunk. This
  376 is what getHpRelOffset does.  Since the returned offset is relative
  377 to the real Hp register, it is valid only until you change the real
  378 Hp register.  (Changing virtHp doesn't matter.)
  379 -}
  380 
  381 
  382 initCgState :: UniqSupply -> CgState
  383 initCgState uniqs
  384   = MkCgState { cgs_stmts  = mkNop
  385               , cgs_tops   = nilOL
  386               , cgs_binds  = emptyVarEnv
  387               , cgs_hp_usg = initHpUsage
  388               , cgs_uniqs  = uniqs }
  389 
  390 stateIncUsage :: CgState -> CgState -> CgState
  391 -- stateIncUsage@ e1 e2 incorporates in e1
  392 -- the heap high water mark found in e2.
  393 stateIncUsage s1 s2@(MkCgState { cgs_hp_usg = hp_usg })
  394      = s1 { cgs_hp_usg  = cgs_hp_usg  s1 `maxHpHw`  virtHp hp_usg }
  395        `addCodeBlocksFrom` s2
  396 
  397 addCodeBlocksFrom :: CgState -> CgState -> CgState
  398 -- Add code blocks from the latter to the former
  399 -- (The cgs_stmts will often be empty, but not always; see codeOnly)
  400 s1 `addCodeBlocksFrom` s2
  401   = s1 { cgs_stmts = cgs_stmts s1 CmmGraph.<*> cgs_stmts s2,
  402          cgs_tops  = cgs_tops  s1 `appOL` cgs_tops  s2 }
  403 
  404 -- The heap high water mark is the larger of virtHp and hwHp.  The latter is
  405 -- only records the high water marks of forked-off branches, so to find the
  406 -- heap high water mark you have to take the max of virtHp and hwHp.  Remember,
  407 -- virtHp never retreats!
  408 --
  409 -- Note Jan 04: ok, so why do we only look at the virtual Hp??
  410 
  411 heapHWM :: HeapUsage -> VirtualHpOffset
  412 heapHWM = virtHp
  413 
  414 initHpUsage :: HeapUsage
  415 initHpUsage = HeapUsage { virtHp = 0, realHp = 0 }
  416 
  417 maxHpHw :: HeapUsage -> VirtualHpOffset -> HeapUsage
  418 hp_usg `maxHpHw` hw = hp_usg { virtHp = virtHp hp_usg `max` hw }
  419 
  420 --------------------------------------------------------
  421 -- Operators for getting and setting the state and "info_down".
  422 --------------------------------------------------------
  423 
  424 getState :: FCode CgState
  425 getState = FCode $ \_info_down state -> (state, state)
  426 
  427 setState :: CgState -> FCode ()
  428 setState state = FCode $ \_info_down _ -> ((), state)
  429 
  430 getHpUsage :: FCode HeapUsage
  431 getHpUsage = do
  432         state <- getState
  433         return $ cgs_hp_usg state
  434 
  435 setHpUsage :: HeapUsage -> FCode ()
  436 setHpUsage new_hp_usg = do
  437         state <- getState
  438         setState $ state {cgs_hp_usg = new_hp_usg}
  439 
  440 setVirtHp :: VirtualHpOffset -> FCode ()
  441 setVirtHp new_virtHp
  442   = do  { hp_usage <- getHpUsage
  443         ; setHpUsage (hp_usage {virtHp = new_virtHp}) }
  444 
  445 getVirtHp :: FCode VirtualHpOffset
  446 getVirtHp
  447   = do  { hp_usage <- getHpUsage
  448         ; return (virtHp hp_usage) }
  449 
  450 setRealHp ::  VirtualHpOffset -> FCode ()
  451 setRealHp new_realHp
  452   = do  { hp_usage <- getHpUsage
  453         ; setHpUsage (hp_usage {realHp = new_realHp}) }
  454 
  455 getBinds :: FCode CgBindings
  456 getBinds = do
  457         state <- getState
  458         return $ cgs_binds state
  459 
  460 setBinds :: CgBindings -> FCode ()
  461 setBinds new_binds = do
  462         state <- getState
  463         setState $ state {cgs_binds = new_binds}
  464 
  465 withState :: FCode a -> CgState -> FCode (a,CgState)
  466 withState (FCode fcode) newstate = FCode $ \info_down state ->
  467   case fcode info_down newstate of
  468     (retval, state2) -> ((retval,state2), state)
  469 
  470 newUniqSupply :: FCode UniqSupply
  471 newUniqSupply = do
  472         state <- getState
  473         let (us1, us2) = splitUniqSupply (cgs_uniqs state)
  474         setState $ state { cgs_uniqs = us1 }
  475         return us2
  476 
  477 newUnique :: FCode Unique
  478 newUnique = do
  479         state <- getState
  480         let (u,us') = takeUniqFromSupply (cgs_uniqs state)
  481         setState $ state { cgs_uniqs = us' }
  482         return u
  483 
  484 newTemp :: MonadUnique m => CmmType -> m LocalReg
  485 newTemp rep = do { uniq <- getUniqueM
  486                  ; return (LocalReg uniq rep) }
  487 
  488 ------------------
  489 getInfoDown :: FCode CgInfoDownwards
  490 getInfoDown = FCode $ \info_down state -> (info_down,state)
  491 
  492 getSelfLoop :: FCode (Maybe SelfLoopInfo)
  493 getSelfLoop = do
  494         info_down <- getInfoDown
  495         return $ cgd_self_loop info_down
  496 
  497 withSelfLoop :: SelfLoopInfo -> FCode a -> FCode a
  498 withSelfLoop self_loop code = do
  499         info_down <- getInfoDown
  500         withInfoDown code (info_down {cgd_self_loop = Just self_loop})
  501 
  502 instance HasDynFlags FCode where
  503     getDynFlags = liftM cgd_dflags getInfoDown
  504 
  505 getProfile :: FCode Profile
  506 getProfile = targetProfile <$> getDynFlags
  507 
  508 getPlatform :: FCode Platform
  509 getPlatform = profilePlatform <$> getProfile
  510 
  511 getCallOpts :: FCode CallOpts
  512 getCallOpts = do
  513    dflags <- getDynFlags
  514    profile <- getProfile
  515    pure $ CallOpts
  516     { co_profile       = profile
  517     , co_loopification = gopt Opt_Loopification dflags
  518     , co_ticky         = gopt Opt_Ticky dflags
  519     }
  520 
  521 getPtrOpts :: FCode PtrOpts
  522 getPtrOpts = do
  523    dflags <- getDynFlags
  524    profile <- getProfile
  525    pure $ PtrOpts
  526       { po_profile     = profile
  527       , po_align_check = gopt Opt_AlignmentSanitisation dflags
  528       }
  529 
  530 
  531 withInfoDown :: FCode a -> CgInfoDownwards -> FCode a
  532 withInfoDown (FCode fcode) info_down = FCode $ \_ state -> fcode info_down state
  533 
  534 -- ----------------------------------------------------------------------------
  535 -- Get the current module name
  536 
  537 getModuleName :: FCode Module
  538 getModuleName = do { info <- getInfoDown; return (cgd_mod info) }
  539 
  540 -- ----------------------------------------------------------------------------
  541 -- Get/set the end-of-block info
  542 
  543 withSequel :: Sequel -> FCode a -> FCode a
  544 withSequel sequel code
  545   = do  { info  <- getInfoDown
  546         ; withInfoDown code (info {cgd_sequel = sequel, cgd_self_loop = Nothing }) }
  547 
  548 getSequel :: FCode Sequel
  549 getSequel = do  { info <- getInfoDown
  550                 ; return (cgd_sequel info) }
  551 
  552 -- ----------------------------------------------------------------------------
  553 -- Get/set the size of the update frame
  554 
  555 -- We keep track of the size of the update frame so that we
  556 -- can set the stack pointer to the proper address on return
  557 -- (or tail call) from the closure.
  558 -- There should be at most one update frame for each closure.
  559 -- Note: I'm including the size of the original return address
  560 -- in the size of the update frame -- hence the default case on `get'.
  561 
  562 withUpdFrameOff :: UpdFrameOffset -> FCode a -> FCode a
  563 withUpdFrameOff size code
  564   = do  { info  <- getInfoDown
  565         ; withInfoDown code (info {cgd_updfr_off = size }) }
  566 
  567 getUpdFrameOff :: FCode UpdFrameOffset
  568 getUpdFrameOff
  569   = do  { info  <- getInfoDown
  570         ; return $ cgd_updfr_off info }
  571 
  572 -- ----------------------------------------------------------------------------
  573 -- Get/set the current ticky counter label
  574 
  575 getTickyCtrLabel :: FCode CLabel
  576 getTickyCtrLabel = do
  577         info <- getInfoDown
  578         return (cgd_ticky info)
  579 
  580 setTickyCtrLabel :: CLabel -> FCode a -> FCode a
  581 setTickyCtrLabel ticky code = do
  582         info <- getInfoDown
  583         withInfoDown code (info {cgd_ticky = ticky})
  584 
  585 -- ----------------------------------------------------------------------------
  586 -- Manage tick scopes
  587 
  588 -- | The current tick scope. We will assign this to generated blocks.
  589 getTickScope :: FCode CmmTickScope
  590 getTickScope = do
  591         info <- getInfoDown
  592         return (cgd_tick_scope info)
  593 
  594 -- | Places blocks generated by the given code into a fresh
  595 -- (sub-)scope. This will make sure that Cmm annotations in our scope
  596 -- will apply to the Cmm blocks generated therein - but not the other
  597 -- way around.
  598 tickScope :: FCode a -> FCode a
  599 tickScope code = do
  600         info <- getInfoDown
  601         if debugLevel (cgd_dflags info) == 0 then code else do
  602           u <- newUnique
  603           let scope' = SubScope u (cgd_tick_scope info)
  604           withInfoDown code info{ cgd_tick_scope = scope' }
  605 
  606 
  607 --------------------------------------------------------
  608 --                 Forking
  609 --------------------------------------------------------
  610 
  611 forkClosureBody :: FCode () -> FCode ()
  612 -- forkClosureBody compiles body_code in environment where:
  613 --   - sequel, update stack frame and self loop info are
  614 --     set to fresh values
  615 --   - state is set to a fresh value, except for local bindings
  616 --     that are passed in unchanged. It's up to the enclosed code to
  617 --     re-bind the free variables to a field of the closure.
  618 
  619 forkClosureBody body_code
  620   = do  { platform <- getPlatform
  621         ; info   <- getInfoDown
  622         ; us     <- newUniqSupply
  623         ; state  <- getState
  624         ; let body_info_down = info { cgd_sequel    = initSequel
  625                                     , cgd_updfr_off = initUpdFrameOff platform
  626                                     , cgd_self_loop = Nothing }
  627               fork_state_in = (initCgState us) { cgs_binds = cgs_binds state }
  628               ((),fork_state_out) = doFCode body_code body_info_down fork_state_in
  629         ; setState $ state `addCodeBlocksFrom` fork_state_out }
  630 
  631 forkLneBody :: FCode a -> FCode a
  632 -- 'forkLneBody' takes a body of let-no-escape binding and compiles
  633 -- it in the *current* environment, returning the graph thus constructed.
  634 --
  635 -- The current environment is passed on completely unchanged to
  636 -- the successor.  In particular, any heap usage from the enclosed
  637 -- code is discarded; it should deal with its own heap consumption.
  638 forkLneBody body_code
  639   = do  { info_down <- getInfoDown
  640         ; us        <- newUniqSupply
  641         ; state     <- getState
  642         ; let fork_state_in = (initCgState us) { cgs_binds = cgs_binds state }
  643               (result, fork_state_out) = doFCode body_code info_down fork_state_in
  644         ; setState $ state `addCodeBlocksFrom` fork_state_out
  645         ; return result }
  646 
  647 codeOnly :: FCode () -> FCode ()
  648 -- Emit any code from the inner thing into the outer thing
  649 -- Do not affect anything else in the outer state
  650 -- Used in almost-circular code to prevent false loop dependencies
  651 codeOnly body_code
  652   = do  { info_down <- getInfoDown
  653         ; us        <- newUniqSupply
  654         ; state     <- getState
  655         ; let   fork_state_in = (initCgState us) { cgs_binds   = cgs_binds state
  656                                                  , cgs_hp_usg  = cgs_hp_usg state }
  657                 ((), fork_state_out) = doFCode body_code info_down fork_state_in
  658         ; setState $ state `addCodeBlocksFrom` fork_state_out }
  659 
  660 forkAlts :: [FCode a] -> FCode [a]
  661 -- (forkAlts' bs d) takes fcodes 'bs' for the branches of a 'case', and
  662 -- an fcode for the default case 'd', and compiles each in the current
  663 -- environment.  The current environment is passed on unmodified, except
  664 -- that the virtual Hp is moved on to the worst virtual Hp for the branches
  665 
  666 forkAlts branch_fcodes
  667   = do  { info_down <- getInfoDown
  668         ; us <- newUniqSupply
  669         ; state <- getState
  670         ; let compile us branch
  671                 = (us2, doFCode branch info_down branch_state)
  672                 where
  673                   (us1,us2) = splitUniqSupply us
  674                   branch_state = (initCgState us1) {
  675                                         cgs_binds  = cgs_binds state
  676                                       , cgs_hp_usg = cgs_hp_usg state }
  677               (_us, results) = mapAccumL compile us branch_fcodes
  678               (branch_results, branch_out_states) = unzip results
  679         ; setState $ foldl' stateIncUsage state branch_out_states
  680                 -- NB foldl.  state is the *left* argument to stateIncUsage
  681         ; return branch_results }
  682 
  683 forkAltPair :: FCode a -> FCode a -> FCode (a,a)
  684 -- Most common use of 'forkAlts'; having this helper function avoids
  685 -- accidental use of failible pattern-matches in @do@-notation
  686 forkAltPair x y = do
  687   xy' <- forkAlts [x,y]
  688   case xy' of
  689     [x',y'] -> return (x',y')
  690     _ -> panic "forkAltPair"
  691 
  692 -- collect the code emitted by an FCode computation
  693 getCodeR :: FCode a -> FCode (a, CmmAGraph)
  694 getCodeR fcode
  695   = do  { state1 <- getState
  696         ; (a, state2) <- withState fcode (state1 { cgs_stmts = mkNop })
  697         ; setState $ state2 { cgs_stmts = cgs_stmts state1  }
  698         ; return (a, cgs_stmts state2) }
  699 
  700 getCode :: FCode a -> FCode CmmAGraph
  701 getCode fcode = do { (_,stmts) <- getCodeR fcode; return stmts }
  702 
  703 -- | Generate code into a fresh tick (sub-)scope and gather generated code
  704 getCodeScoped :: FCode a -> FCode (a, CmmAGraphScoped)
  705 getCodeScoped fcode
  706   = do  { state1 <- getState
  707         ; ((a, tscope), state2) <-
  708             tickScope $
  709             flip withState state1 { cgs_stmts = mkNop } $
  710             do { a   <- fcode
  711                ; scp <- getTickScope
  712                ; return (a, scp) }
  713         ; setState $ state2 { cgs_stmts = cgs_stmts state1  }
  714         ; return (a, (cgs_stmts state2, tscope)) }
  715 
  716 
  717 -- 'getHeapUsage' applies a function to the amount of heap that it uses.
  718 -- It initialises the heap usage to zeros, and passes on an unchanged
  719 -- heap usage.
  720 --
  721 -- It is usually a prelude to performing a GC check, so everything must
  722 -- be in a tidy and consistent state.
  723 --
  724 -- Note the slightly subtle fixed point behaviour needed here
  725 
  726 getHeapUsage :: (VirtualHpOffset -> FCode a) -> FCode a
  727 getHeapUsage fcode
  728   = do  { info_down <- getInfoDown
  729         ; state <- getState
  730         ; let   fstate_in = state { cgs_hp_usg  = initHpUsage }
  731                 (r, fstate_out) = doFCode (fcode hp_hw) info_down fstate_in
  732                 hp_hw = heapHWM (cgs_hp_usg fstate_out)        -- Loop here!
  733 
  734         ; setState $ fstate_out { cgs_hp_usg = cgs_hp_usg state }
  735         ; return r }
  736 
  737 -- ----------------------------------------------------------------------------
  738 -- Combinators for emitting code
  739 
  740 emitCgStmt :: CgStmt -> FCode ()
  741 emitCgStmt stmt
  742   = do  { state <- getState
  743         ; setState $ state { cgs_stmts = cgs_stmts state `snocOL` stmt }
  744         }
  745 
  746 emitLabel :: BlockId -> FCode ()
  747 emitLabel id = do tscope <- getTickScope
  748                   emitCgStmt (CgLabel id tscope)
  749 
  750 emitComment :: FastString -> FCode ()
  751 emitComment s
  752   | debugIsOn = emitCgStmt (CgStmt (CmmComment s))
  753   | otherwise = return ()
  754 
  755 emitTick :: CmmTickish -> FCode ()
  756 emitTick = emitCgStmt . CgStmt . CmmTick
  757 
  758 emitUnwind :: [(GlobalReg, Maybe CmmExpr)] -> FCode ()
  759 emitUnwind regs = do
  760   dflags <- getDynFlags
  761   when (debugLevel dflags > 0) $
  762      emitCgStmt $ CgStmt $ CmmUnwind regs
  763 
  764 emitAssign :: CmmReg  -> CmmExpr -> FCode ()
  765 emitAssign l r = emitCgStmt (CgStmt (CmmAssign l r))
  766 
  767 emitStore :: CmmExpr  -> CmmExpr -> FCode ()
  768 emitStore l r = emitCgStmt (CgStmt (CmmStore l r))
  769 
  770 emit :: CmmAGraph -> FCode ()
  771 emit ag
  772   = do  { state <- getState
  773         ; setState $ state { cgs_stmts = cgs_stmts state CmmGraph.<*> ag } }
  774 
  775 emitDecl :: CmmDecl -> FCode ()
  776 emitDecl decl
  777   = do  { state <- getState
  778         ; setState $ state { cgs_tops = cgs_tops state `snocOL` decl } }
  779 
  780 emitOutOfLine :: BlockId -> CmmAGraphScoped -> FCode ()
  781 emitOutOfLine l (stmts, tscope) = emitCgStmt (CgFork l stmts tscope)
  782 
  783 emitProcWithStackFrame
  784    :: Convention                        -- entry convention
  785    -> Maybe CmmInfoTable                -- info table?
  786    -> CLabel                            -- label for the proc
  787    -> [CmmFormal]                       -- stack frame
  788    -> [CmmFormal]                       -- arguments
  789    -> CmmAGraphScoped                   -- code
  790    -> Bool                              -- do stack layout?
  791    -> FCode ()
  792 
  793 emitProcWithStackFrame _conv mb_info lbl _stk_args [] blocks False
  794   = do  { platform <- getPlatform
  795         ; emitProc mb_info lbl [] blocks (widthInBytes (wordWidth platform)) False
  796         }
  797 emitProcWithStackFrame conv mb_info lbl stk_args args (graph, tscope) True
  798         -- do layout
  799   = do  { profile <- getProfile
  800         ; let (offset, live, entry) = mkCallEntry profile conv args stk_args
  801               graph' = entry CmmGraph.<*> graph
  802         ; emitProc mb_info lbl live (graph', tscope) offset True
  803         }
  804 emitProcWithStackFrame _ _ _ _ _ _ _ = panic "emitProcWithStackFrame"
  805 
  806 emitProcWithConvention :: Convention -> Maybe CmmInfoTable -> CLabel
  807                        -> [CmmFormal]
  808                        -> CmmAGraphScoped
  809                        -> FCode ()
  810 emitProcWithConvention conv mb_info lbl args blocks
  811   = emitProcWithStackFrame conv mb_info lbl [] args blocks True
  812 
  813 emitProc :: Maybe CmmInfoTable -> CLabel -> [GlobalReg] -> CmmAGraphScoped
  814          -> Int -> Bool -> FCode ()
  815 emitProc mb_info lbl live blocks offset do_layout
  816   = do  { l <- newBlockId
  817         ; let
  818               blks :: CmmGraph
  819               blks = labelAGraph l blocks
  820 
  821               infos | Just info <- mb_info = mapSingleton (g_entry blks) info
  822                     | otherwise            = mapEmpty
  823 
  824               sinfo = StackInfo { arg_space = offset
  825                                 , do_layout = do_layout }
  826 
  827               tinfo = TopInfo { info_tbls = infos
  828                               , stack_info=sinfo}
  829 
  830               proc_block = CmmProc tinfo lbl live blks
  831 
  832         ; state <- getState
  833         ; setState $ state { cgs_tops = cgs_tops state `snocOL` proc_block } }
  834 
  835 getCmm :: FCode a -> FCode (a, CmmGroup)
  836 -- Get all the CmmTops (there should be no stmts)
  837 -- Return a single Cmm which may be split from other Cmms by
  838 -- object splitting (at a later stage)
  839 getCmm code
  840   = do  { state1 <- getState
  841         ; (a, state2) <- withState code (state1 { cgs_tops  = nilOL })
  842         ; setState $ state2 { cgs_tops = cgs_tops state1 }
  843         ; return (a, fromOL (cgs_tops state2)) }
  844 
  845 
  846 mkCmmIfThenElse :: CmmExpr -> CmmAGraph -> CmmAGraph -> FCode CmmAGraph
  847 mkCmmIfThenElse e tbranch fbranch = mkCmmIfThenElse' e tbranch fbranch Nothing
  848 
  849 mkCmmIfThenElse' :: CmmExpr -> CmmAGraph -> CmmAGraph
  850                  -> Maybe Bool -> FCode CmmAGraph
  851 mkCmmIfThenElse' e tbranch fbranch likely = do
  852   tscp  <- getTickScope
  853   endif <- newBlockId
  854   tid   <- newBlockId
  855   fid   <- newBlockId
  856 
  857   let
  858     (test, then_, else_, likely') = case likely of
  859       Just False | Just e' <- maybeInvertCmmExpr e
  860         -- currently NCG doesn't know about likely
  861         -- annotations. We manually switch then and
  862         -- else branch so the likely false branch
  863         -- becomes a fallthrough.
  864         -> (e', fbranch, tbranch, Just True)
  865       _ -> (e, tbranch, fbranch, likely)
  866 
  867   return $ catAGraphs [ mkCbranch test tid fid likely'
  868                       , mkLabel tid tscp, then_, mkBranch endif
  869                       , mkLabel fid tscp, else_, mkLabel endif tscp ]
  870 
  871 mkCmmIfGoto :: CmmExpr -> BlockId -> FCode CmmAGraph
  872 mkCmmIfGoto e tid = mkCmmIfGoto' e tid Nothing
  873 
  874 mkCmmIfGoto' :: CmmExpr -> BlockId -> Maybe Bool -> FCode CmmAGraph
  875 mkCmmIfGoto' e tid l = do
  876   endif <- newBlockId
  877   tscp  <- getTickScope
  878   return $ catAGraphs [ mkCbranch e tid endif l, mkLabel endif tscp ]
  879 
  880 mkCmmIfThen :: CmmExpr -> CmmAGraph -> FCode CmmAGraph
  881 mkCmmIfThen e tbranch = mkCmmIfThen' e tbranch Nothing
  882 
  883 mkCmmIfThen' :: CmmExpr -> CmmAGraph -> Maybe Bool -> FCode CmmAGraph
  884 mkCmmIfThen' e tbranch l = do
  885   endif <- newBlockId
  886   tid   <- newBlockId
  887   tscp  <- getTickScope
  888   return $ catAGraphs [ mkCbranch e tid endif l
  889                       , mkLabel tid tscp, tbranch, mkLabel endif tscp ]
  890 
  891 mkCall :: CmmExpr -> (Convention, Convention) -> [CmmFormal] -> [CmmExpr]
  892        -> UpdFrameOffset -> [CmmExpr] -> FCode CmmAGraph
  893 mkCall f (callConv, retConv) results actuals updfr_off extra_stack = do
  894   profile <- getProfile
  895   k       <- newBlockId
  896   tscp    <- getTickScope
  897   let area = Young k
  898       (off, _, copyin) = copyInOflow profile retConv area results []
  899       copyout = mkCallReturnsTo profile f callConv actuals k off updfr_off extra_stack
  900   return $ catAGraphs [copyout, mkLabel k tscp, copyin]
  901 
  902 mkCmmCall :: CmmExpr -> [CmmFormal] -> [CmmExpr] -> UpdFrameOffset
  903           -> FCode CmmAGraph
  904 mkCmmCall f results actuals updfr_off
  905    = mkCall f (NativeDirectCall, NativeReturn) results actuals updfr_off []
  906 
  907 
  908 -- ----------------------------------------------------------------------------
  909 -- turn CmmAGraph into CmmGraph, for making a new proc.
  910 
  911 aGraphToGraph :: CmmAGraphScoped -> FCode CmmGraph
  912 aGraphToGraph stmts
  913   = do  { l <- newBlockId
  914         ; return (labelAGraph l stmts) }