never executed always true always false
    1 -----------------------------------------------------------------------------
    2 --
    3 -- Stg to C--: heap management functions
    4 --
    5 -- (c) The University of Glasgow 2004-2006
    6 --
    7 -----------------------------------------------------------------------------
    8 
    9 {-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}
   10 
   11 module GHC.StgToCmm.Heap (
   12         getVirtHp, setVirtHp, setRealHp,
   13         getHpRelOffset,
   14 
   15         entryHeapCheck, altHeapCheck, noEscapeHeapCheck, altHeapCheckReturnsTo,
   16         heapStackCheckGen,
   17         entryHeapCheck',
   18 
   19         mkStaticClosureFields, mkStaticClosure,
   20 
   21         allocDynClosure, allocDynClosureCmm, allocHeapClosure,
   22         emitSetDynHdr
   23     ) where
   24 
   25 import GHC.Prelude hiding ((<*>))
   26 
   27 import GHC.Stg.Syntax
   28 import GHC.Cmm.CLabel
   29 import GHC.StgToCmm.Layout
   30 import GHC.StgToCmm.Utils
   31 import GHC.StgToCmm.Monad
   32 import GHC.StgToCmm.Prof (profDynAlloc, dynProfHdr, staticProfHdr)
   33 import GHC.StgToCmm.Ticky
   34 import GHC.StgToCmm.Closure
   35 
   36 import GHC.Cmm.Graph
   37 
   38 import GHC.Cmm.Dataflow.Label
   39 import GHC.Runtime.Heap.Layout
   40 import GHC.Cmm.BlockId
   41 import GHC.Cmm
   42 import GHC.Cmm.Utils
   43 import GHC.Types.CostCentre
   44 import GHC.Types.Id.Info( CafInfo(..), mayHaveCafRefs )
   45 import GHC.Types.Id ( Id )
   46 import GHC.Unit
   47 import GHC.Driver.Session
   48 import GHC.Platform
   49 import GHC.Platform.Profile
   50 import GHC.Data.FastString( mkFastString, fsLit )
   51 import GHC.Utils.Panic( sorry )
   52 
   53 import Control.Monad (when)
   54 import Data.Maybe (isJust)
   55 
   56 -----------------------------------------------------------
   57 --              Initialise dynamic heap objects
   58 -----------------------------------------------------------
   59 
   60 allocDynClosure
   61         :: Maybe Id
   62         -> CmmInfoTable
   63         -> LambdaFormInfo
   64         -> CmmExpr              -- Cost Centre to stick in the object
   65         -> CmmExpr              -- Cost Centre to blame for this alloc
   66                                 -- (usually the same; sometimes "OVERHEAD")
   67 
   68         -> [(NonVoid StgArg, VirtualHpOffset)]  -- Offsets from start of object
   69                                                 -- ie Info ptr has offset zero.
   70                                                 -- No void args in here
   71         -> FCode CmmExpr -- returns Hp+n
   72 
   73 allocDynClosureCmm
   74         :: Maybe Id -> CmmInfoTable -> LambdaFormInfo -> CmmExpr -> CmmExpr
   75         -> [(CmmExpr, ByteOff)]
   76         -> FCode CmmExpr -- returns Hp+n
   77 
   78 -- allocDynClosure allocates the thing in the heap,
   79 -- and modifies the virtual Hp to account for this.
   80 -- The second return value is the graph that sets the value of the
   81 -- returned LocalReg, which should point to the closure after executing
   82 -- the graph.
   83 
   84 -- allocDynClosure returns an (Hp+8) CmmExpr, and hence the result is
   85 -- only valid until Hp is changed.  The caller should assign the
   86 -- result to a LocalReg if it is required to remain live.
   87 --
   88 -- The reason we don't assign it to a LocalReg here is that the caller
   89 -- is often about to call regIdInfo, which immediately assigns the
   90 -- result of allocDynClosure to a new temp in order to add the tag.
   91 -- So by not generating a LocalReg here we avoid a common source of
   92 -- new temporaries and save some compile time.  This can be quite
   93 -- significant - see test T4801.
   94 
   95 
   96 allocDynClosure mb_id info_tbl lf_info use_cc _blame_cc args_w_offsets = do
   97   let (args, offsets) = unzip args_w_offsets
   98   cmm_args <- mapM getArgAmode args     -- No void args
   99   allocDynClosureCmm mb_id info_tbl lf_info
  100                      use_cc _blame_cc (zip cmm_args offsets)
  101 
  102 
  103 allocDynClosureCmm mb_id info_tbl lf_info use_cc _blame_cc amodes_w_offsets = do
  104   -- SAY WHAT WE ARE ABOUT TO DO
  105   let rep = cit_rep info_tbl
  106   tickyDynAlloc mb_id rep lf_info
  107   let info_ptr = CmmLit (CmmLabel (cit_lbl info_tbl))
  108   allocHeapClosure rep info_ptr use_cc amodes_w_offsets
  109 
  110 
  111 -- | Low-level heap object allocation.
  112 allocHeapClosure
  113   :: SMRep                            -- ^ representation of the object
  114   -> CmmExpr                          -- ^ info pointer
  115   -> CmmExpr                          -- ^ cost centre
  116   -> [(CmmExpr,ByteOff)]              -- ^ payload
  117   -> FCode CmmExpr                    -- ^ returns the address of the object
  118 allocHeapClosure rep info_ptr use_cc payload = do
  119   profDynAlloc rep use_cc
  120 
  121   virt_hp <- getVirtHp
  122 
  123   -- Find the offset of the info-ptr word
  124   let info_offset = virt_hp + 1
  125             -- info_offset is the VirtualHpOffset of the first
  126             -- word of the new object
  127             -- Remember, virtHp points to last allocated word,
  128             -- ie 1 *before* the info-ptr word of new object.
  129 
  130   base <- getHpRelOffset info_offset
  131   emitComment $ mkFastString "allocHeapClosure"
  132   emitSetDynHdr base info_ptr use_cc
  133 
  134   -- Fill in the fields
  135   hpStore base payload
  136 
  137   -- Bump the virtual heap pointer
  138   profile <- getProfile
  139   setVirtHp (virt_hp + heapClosureSizeW profile rep)
  140 
  141   return base
  142 
  143 
  144 emitSetDynHdr :: CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
  145 emitSetDynHdr base info_ptr ccs
  146   = do profile <- getProfile
  147        hpStore base (zip (header profile) [0, profileWordSizeInBytes profile ..])
  148   where
  149     header :: Profile -> [CmmExpr]
  150     header profile = [info_ptr] ++ dynProfHdr profile ccs
  151         -- ToDo: Parallel stuff
  152         -- No ticky header
  153 
  154 -- Store the item (expr,off) in base[off]
  155 hpStore :: CmmExpr -> [(CmmExpr, ByteOff)] -> FCode ()
  156 hpStore base vals = do
  157   platform <- getPlatform
  158   sequence_ $
  159     [ emitStore (cmmOffsetB platform base off) val | (val,off) <- vals ]
  160 
  161 -----------------------------------------------------------
  162 --              Layout of static closures
  163 -----------------------------------------------------------
  164 
  165 -- Make a static closure, adding on any extra padding needed for CAFs,
  166 -- and adding a static link field if necessary.
  167 
  168 mkStaticClosureFields
  169         :: Profile
  170         -> CmmInfoTable
  171         -> CostCentreStack
  172         -> CafInfo
  173         -> [CmmLit]             -- Payload
  174         -> [CmmLit]             -- The full closure
  175 mkStaticClosureFields profile info_tbl ccs caf_refs payload
  176   = mkStaticClosure profile info_lbl ccs payload padding
  177         static_link_field saved_info_field
  178   where
  179     platform = profilePlatform profile
  180     info_lbl = cit_lbl info_tbl
  181 
  182     -- CAFs must have consistent layout, regardless of whether they
  183     -- are actually updatable or not.  The layout of a CAF is:
  184     --
  185     --        3 saved_info
  186     --        2 static_link
  187     --        1 indirectee
  188     --        0 info ptr
  189     --
  190     -- the static_link and saved_info fields must always be in the
  191     -- same place.  So we use isThunkRep rather than closureUpdReqd
  192     -- here:
  193 
  194     is_caf = isThunkRep (cit_rep info_tbl)
  195 
  196     padding
  197         | is_caf && null payload = [mkIntCLit platform 0]
  198         | otherwise = []
  199 
  200     static_link_field
  201         | is_caf
  202         = [mkIntCLit platform 0]
  203         | staticClosureNeedsLink (mayHaveCafRefs caf_refs) info_tbl
  204         = [static_link_value]
  205         | otherwise
  206         = []
  207 
  208     saved_info_field
  209         | is_caf     = [mkIntCLit platform 0]
  210         | otherwise  = []
  211 
  212         -- For a static constructor which has NoCafRefs, we set the
  213         -- static link field to a non-zero value so the garbage
  214         -- collector will ignore it.
  215     static_link_value
  216         | mayHaveCafRefs caf_refs  = mkIntCLit platform 0
  217         | otherwise                = mkIntCLit platform 3  -- No CAF refs
  218                                       -- See Note [STATIC_LINK fields]
  219                                       -- in rts/sm/Storage.h
  220 
  221 mkStaticClosure :: Profile -> CLabel -> CostCentreStack -> [CmmLit]
  222   -> [CmmLit] -> [CmmLit] -> [CmmLit] -> [CmmLit]
  223 mkStaticClosure profile info_lbl ccs payload padding static_link_field saved_info_field
  224   =  [CmmLabel info_lbl]
  225   ++ staticProfHdr profile ccs
  226   ++ payload
  227   ++ padding
  228   ++ static_link_field
  229   ++ saved_info_field
  230 
  231 -----------------------------------------------------------
  232 --              Heap overflow checking
  233 -----------------------------------------------------------
  234 
  235 {- Note [Heap checks]
  236    ~~~~~~~~~~~~~~~~~~
  237 Heap checks come in various forms.  We provide the following entry
  238 points to the runtime system, all of which use the native C-- entry
  239 convention.
  240 
  241   * gc() performs garbage collection and returns
  242     nothing to its caller
  243 
  244   * A series of canned entry points like
  245         r = gc_1p( r )
  246     where r is a pointer.  This performs gc, and
  247     then returns its argument r to its caller.
  248 
  249   * A series of canned entry points like
  250         gcfun_2p( f, x, y )
  251     where f is a function closure of arity 2
  252     This performs garbage collection, keeping alive the
  253     three argument ptrs, and then tail-calls f(x,y)
  254 
  255 These are used in the following circumstances
  256 
  257 * entryHeapCheck: Function entry
  258     (a) With a canned GC entry sequence
  259         f( f_clo, x:ptr, y:ptr ) {
  260              Hp = Hp+8
  261              if Hp > HpLim goto L
  262              ...
  263           L: HpAlloc = 8
  264              jump gcfun_2p( f_clo, x, y ) }
  265      Note the tail call to the garbage collector;
  266      it should do no register shuffling
  267 
  268     (b) No canned sequence
  269         f( f_clo, x:ptr, y:ptr, ...etc... ) {
  270           T: Hp = Hp+8
  271              if Hp > HpLim goto L
  272              ...
  273           L: HpAlloc = 8
  274              call gc()  -- Needs an info table
  275              goto T }
  276 
  277 * altHeapCheck: Immediately following an eval
  278   Started as
  279         case f x y of r { (p,q) -> rhs }
  280   (a) With a canned sequence for the results of f
  281        (which is the very common case since
  282        all boxed cases return just one pointer
  283            ...
  284            r = f( x, y )
  285         K:      -- K needs an info table
  286            Hp = Hp+8
  287            if Hp > HpLim goto L
  288            ...code for rhs...
  289 
  290         L: r = gc_1p( r )
  291            goto K }
  292 
  293         Here, the info table needed by the call
  294         to gc_1p should be the *same* as the
  295         one for the call to f; the C-- optimiser
  296         spots this sharing opportunity)
  297 
  298    (b) No canned sequence for results of f
  299        Note second info table
  300            ...
  301            (r1,r2,r3) = call f( x, y )
  302         K:
  303            Hp = Hp+8
  304            if Hp > HpLim goto L
  305            ...code for rhs...
  306 
  307         L: call gc()    -- Extra info table here
  308            goto K
  309 
  310 * generalHeapCheck: Anywhere else
  311   e.g. entry to thunk
  312        case branch *not* following eval,
  313        or let-no-escape
  314   Exactly the same as the previous case:
  315 
  316         K:      -- K needs an info table
  317            Hp = Hp+8
  318            if Hp > HpLim goto L
  319            ...
  320 
  321         L: call gc()
  322            goto K
  323 -}
  324 
  325 --------------------------------------------------------------
  326 -- A heap/stack check at a function or thunk entry point.
  327 
  328 entryHeapCheck :: ClosureInfo
  329                -> Maybe LocalReg -- Function (closure environment)
  330                -> Int            -- Arity -- not same as len args b/c of voids
  331                -> [LocalReg]     -- Non-void args (empty for thunk)
  332                -> FCode ()
  333                -> FCode ()
  334 
  335 entryHeapCheck cl_info nodeSet arity args code = do
  336   platform <- getPlatform
  337   let
  338     node = case nodeSet of
  339               Just r  -> CmmReg (CmmLocal r)
  340               Nothing -> CmmLit (CmmLabel $ staticClosureLabel platform cl_info)
  341 
  342     is_fastf = case closureFunInfo cl_info of
  343                  Just (_, ArgGen _) -> False
  344                  _otherwise         -> True
  345 
  346   entryHeapCheck' is_fastf node arity args code
  347 
  348 -- | lower-level version for "GHC.Cmm.Parser"
  349 entryHeapCheck' :: Bool           -- is a known function pattern
  350                 -> CmmExpr        -- expression for the closure pointer
  351                 -> Int            -- Arity -- not same as len args b/c of voids
  352                 -> [LocalReg]     -- Non-void args (empty for thunk)
  353                 -> FCode ()
  354                 -> FCode ()
  355 entryHeapCheck' is_fastf node arity args code
  356   = do profile <- getProfile
  357        let is_thunk = arity == 0
  358 
  359            args' = map (CmmReg . CmmLocal) args
  360            stg_gc_fun    = CmmReg (CmmGlobal GCFun)
  361            stg_gc_enter1 = CmmReg (CmmGlobal GCEnter1)
  362 
  363            {- Thunks:          jump stg_gc_enter_1
  364 
  365               Function (fast): call (NativeNode) stg_gc_fun(fun, args)
  366 
  367               Function (slow): call (slow) stg_gc_fun(fun, args)
  368            -}
  369            gc_call upd
  370                | is_thunk
  371                  = mkJump profile NativeNodeCall stg_gc_enter1 [node] upd
  372 
  373                | is_fastf
  374                  = mkJump profile NativeNodeCall stg_gc_fun (node : args') upd
  375 
  376                | otherwise
  377                  = mkJump profile Slow stg_gc_fun (node : args') upd
  378 
  379        updfr_sz <- getUpdFrameOff
  380 
  381        loop_id <- newBlockId
  382        emitLabel loop_id
  383        heapCheck True True (gc_call updfr_sz <*> mkBranch loop_id) code
  384 
  385 -- ------------------------------------------------------------
  386 -- A heap/stack check in a case alternative
  387 
  388 
  389 -- If there are multiple alts and we need to GC, but don't have a
  390 -- continuation already (the scrut was simple), then we should
  391 -- pre-generate the continuation.  (if there are multiple alts it is
  392 -- always a canned GC point).
  393 
  394 -- altHeapCheck:
  395 -- If we have a return continuation,
  396 --   then if it is a canned GC pattern,
  397 --           then we do mkJumpReturnsTo
  398 --           else we do a normal call to stg_gc_noregs
  399 --   else if it is a canned GC pattern,
  400 --           then generate the continuation and do mkCallReturnsTo
  401 --           else we do a normal call to stg_gc_noregs
  402 
  403 altHeapCheck :: [LocalReg] -> FCode a -> FCode a
  404 altHeapCheck regs code = altOrNoEscapeHeapCheck False regs code
  405 
  406 altOrNoEscapeHeapCheck :: Bool -> [LocalReg] -> FCode a -> FCode a
  407 altOrNoEscapeHeapCheck checkYield regs code = do
  408     profile <- getProfile
  409     platform <- getPlatform
  410     case cannedGCEntryPoint platform regs of
  411       Nothing -> genericGC checkYield code
  412       Just gc -> do
  413         lret <- newBlockId
  414         let (off, _, copyin) = copyInOflow profile NativeReturn (Young lret) regs []
  415         lcont <- newBlockId
  416         tscope <- getTickScope
  417         emitOutOfLine lret (copyin <*> mkBranch lcont, tscope)
  418         emitLabel lcont
  419         cannedGCReturnsTo checkYield False gc regs lret off code
  420 
  421 altHeapCheckReturnsTo :: [LocalReg] -> Label -> ByteOff -> FCode a -> FCode a
  422 altHeapCheckReturnsTo regs lret off code
  423   = do platform <- getPlatform
  424        case cannedGCEntryPoint platform regs of
  425            Nothing -> genericGC False code
  426            Just gc -> cannedGCReturnsTo False True gc regs lret off code
  427 
  428 -- noEscapeHeapCheck is implemented identically to altHeapCheck (which
  429 -- is more efficient), but cannot be optimized away in the non-allocating
  430 -- case because it may occur in a loop
  431 noEscapeHeapCheck :: [LocalReg] -> FCode a -> FCode a
  432 noEscapeHeapCheck regs code = altOrNoEscapeHeapCheck True regs code
  433 
  434 cannedGCReturnsTo :: Bool -> Bool -> CmmExpr -> [LocalReg] -> Label -> ByteOff
  435                   -> FCode a
  436                   -> FCode a
  437 cannedGCReturnsTo checkYield cont_on_stack gc regs lret off code
  438   = do profile <- getProfile
  439        updfr_sz <- getUpdFrameOff
  440        heapCheck False checkYield (gc_call profile gc updfr_sz) code
  441   where
  442     reg_exprs = map (CmmReg . CmmLocal) regs
  443       -- Note [stg_gc arguments]
  444 
  445       -- NB. we use the NativeReturn convention for passing arguments
  446       -- to the canned heap-check routines, because we are in a case
  447       -- alternative and hence the [LocalReg] was passed to us in the
  448       -- NativeReturn convention.
  449     gc_call profile label sp
  450       | cont_on_stack
  451       = mkJumpReturnsTo profile label NativeReturn reg_exprs lret off sp
  452       | otherwise
  453       = mkCallReturnsTo profile label NativeReturn reg_exprs lret off sp []
  454 
  455 genericGC :: Bool -> FCode a -> FCode a
  456 genericGC checkYield code
  457   = do updfr_sz <- getUpdFrameOff
  458        lretry <- newBlockId
  459        emitLabel lretry
  460        call <- mkCall generic_gc (GC, GC) [] [] updfr_sz []
  461        heapCheck False checkYield (call <*> mkBranch lretry) code
  462 
  463 cannedGCEntryPoint :: Platform -> [LocalReg] -> Maybe CmmExpr
  464 cannedGCEntryPoint platform regs
  465   = case map localRegType regs of
  466       []  -> Just (mkGcLabel "stg_gc_noregs")
  467       [ty]
  468           | isGcPtrType ty -> Just (mkGcLabel "stg_gc_unpt_r1")
  469           | isFloatType ty -> case width of
  470                                   W32       -> Just (mkGcLabel "stg_gc_f1")
  471                                   W64       -> Just (mkGcLabel "stg_gc_d1")
  472                                   _         -> Nothing
  473 
  474           | width == wordWidth platform -> Just (mkGcLabel "stg_gc_unbx_r1")
  475           | width == W64                -> Just (mkGcLabel "stg_gc_l1")
  476           | otherwise                   -> Nothing
  477           where
  478               width = typeWidth ty
  479       [ty1,ty2]
  480           |  isGcPtrType ty1
  481           && isGcPtrType ty2 -> Just (mkGcLabel "stg_gc_pp")
  482       [ty1,ty2,ty3]
  483           |  isGcPtrType ty1
  484           && isGcPtrType ty2
  485           && isGcPtrType ty3 -> Just (mkGcLabel "stg_gc_ppp")
  486       [ty1,ty2,ty3,ty4]
  487           |  isGcPtrType ty1
  488           && isGcPtrType ty2
  489           && isGcPtrType ty3
  490           && isGcPtrType ty4 -> Just (mkGcLabel "stg_gc_pppp")
  491       _otherwise -> Nothing
  492 
  493 -- Note [stg_gc arguments]
  494 -- It might seem that we could avoid passing the arguments to the
  495 -- stg_gc function, because they are already in the right registers.
  496 -- While this is usually the case, it isn't always.  Sometimes the
  497 -- code generator has cleverly avoided the eval in a case, e.g. in
  498 -- ffi/should_run/4221.hs we found
  499 --
  500 --   case a_r1mb of z
  501 --     FunPtr x y -> ...
  502 --
  503 -- where a_r1mb is bound a top-level constructor, and is known to be
  504 -- evaluated.  The codegen just assigns x, y and z, and continues;
  505 -- R1 is never assigned.
  506 --
  507 -- So we'll have to rely on optimisations to eliminatethese
  508 -- assignments where possible.
  509 
  510 
  511 -- | The generic GC procedure; no params, no results
  512 generic_gc :: CmmExpr
  513 generic_gc = mkGcLabel "stg_gc_noregs"
  514 
  515 -- | Create a CLabel for calling a garbage collector entry point
  516 mkGcLabel :: String -> CmmExpr
  517 mkGcLabel s = CmmLit (CmmLabel (mkCmmCodeLabel rtsUnitId (fsLit s)))
  518 
  519 -------------------------------
  520 heapCheck :: Bool -> Bool -> CmmAGraph -> FCode a -> FCode a
  521 heapCheck checkStack checkYield do_gc code
  522   = getHeapUsage $ \ hpHw ->
  523     -- Emit heap checks, but be sure to do it lazily so
  524     -- that the conditionals on hpHw don't cause a black hole
  525     do  { platform <- getPlatform
  526         ; let mb_alloc_bytes
  527                  | hpHw > mBLOCK_SIZE = sorry $ unlines
  528                     [" Trying to allocate more than "++show mBLOCK_SIZE++" bytes.",
  529                      "",
  530                      "This is currently not possible due to a limitation of GHC's code generator.",
  531                      "See https://gitlab.haskell.org/ghc/ghc/issues/4505 for details.",
  532                      "Suggestion: read data from a file instead of having large static data",
  533                      "structures in code."]
  534                  | hpHw > 0  = Just (mkIntExpr platform (hpHw * (platformWordSizeInBytes platform)))
  535                  | otherwise = Nothing
  536                  where
  537                   constants = platformConstants platform
  538                   bLOCK_SIZE_W = pc_BLOCK_SIZE (platformConstants platform) `quot` platformWordSizeInBytes platform
  539                   mBLOCK_SIZE = pc_BLOCKS_PER_MBLOCK constants * bLOCK_SIZE_W
  540               stk_hwm | checkStack = Just (CmmLit CmmHighStackMark)
  541                       | otherwise  = Nothing
  542         ; codeOnly $ do_checks stk_hwm checkYield mb_alloc_bytes do_gc
  543         ; tickyAllocHeap True hpHw
  544         ; setRealHp hpHw
  545         ; code }
  546 
  547 heapStackCheckGen :: Maybe CmmExpr -> Maybe CmmExpr -> FCode ()
  548 heapStackCheckGen stk_hwm mb_bytes
  549   = do updfr_sz <- getUpdFrameOff
  550        lretry <- newBlockId
  551        emitLabel lretry
  552        call <- mkCall generic_gc (GC, GC) [] [] updfr_sz []
  553        do_checks stk_hwm False mb_bytes (call <*> mkBranch lretry)
  554 
  555 -- Note [Single stack check]
  556 -- ~~~~~~~~~~~~~~~~~~~~~~~~~
  557 -- When compiling a function we can determine how much stack space it
  558 -- will use. We therefore need to perform only a single stack check at
  559 -- the beginning of a function to see if we have enough stack space.
  560 --
  561 -- The check boils down to comparing Sp-N with SpLim, where N is the
  562 -- amount of stack space needed (see Note [Stack usage] below).  *BUT*
  563 -- at this stage of the pipeline we are not supposed to refer to Sp
  564 -- itself, because the stack is not yet manifest, so we don't quite
  565 -- know where Sp pointing.
  566 
  567 -- So instead of referring directly to Sp - as we used to do in the
  568 -- past - the code generator uses (old + 0) in the stack check. That
  569 -- is the address of the first word of the old area, so if we add N
  570 -- we'll get the address of highest used word.
  571 --
  572 -- This makes the check robust.  For example, while we need to perform
  573 -- only one stack check for each function, we could in theory place
  574 -- more stack checks later in the function. They would be redundant,
  575 -- but not incorrect (in a sense that they should not change program
  576 -- behaviour). We need to make sure however that a stack check
  577 -- inserted after incrementing the stack pointer checks for a
  578 -- respectively smaller stack space. This would not be the case if the
  579 -- code generator produced direct references to Sp. By referencing
  580 -- (old + 0) we make sure that we always check for a correct amount of
  581 -- stack: when converting (old + 0) to Sp the stack layout phase takes
  582 -- into account changes already made to stack pointer. The idea for
  583 -- this change came from observations made while debugging #8275.
  584 
  585 -- Note [Stack usage]
  586 -- ~~~~~~~~~~~~~~~~~~
  587 -- At the moment we convert from STG to Cmm we don't know N, the
  588 -- number of bytes of stack that the function will use, so we use a
  589 -- special late-bound CmmLit, namely
  590 --       CmmHighStackMark
  591 -- to stand for the number of bytes needed. When the stack is made
  592 -- manifest, the number of bytes needed is calculated, and used to
  593 -- replace occurrences of CmmHighStackMark
  594 --
  595 -- The (Maybe CmmExpr) passed to do_checks is usually
  596 --     Just (CmmLit CmmHighStackMark)
  597 -- but can also (in certain hand-written RTS functions)
  598 --     Just (CmmLit 8)  or some other fixed valuet
  599 -- If it is Nothing, we don't generate a stack check at all.
  600 
  601 do_checks :: Maybe CmmExpr    -- Should we check the stack?
  602                               -- See Note [Stack usage]
  603           -> Bool             -- Should we check for preemption?
  604           -> Maybe CmmExpr    -- Heap headroom (bytes)
  605           -> CmmAGraph        -- What to do on failure
  606           -> FCode ()
  607 do_checks mb_stk_hwm checkYield mb_alloc_lit do_gc = do
  608   dflags <- getDynFlags
  609   platform <- getPlatform
  610   gc_id <- newBlockId
  611 
  612   let
  613     Just alloc_lit = mb_alloc_lit
  614 
  615     bump_hp   = cmmOffsetExprB platform hpExpr alloc_lit
  616 
  617     -- Sp overflow if ((old + 0) - CmmHighStack < SpLim)
  618     -- At the beginning of a function old + 0 = Sp
  619     -- See Note [Single stack check]
  620     sp_oflo sp_hwm =
  621          CmmMachOp (mo_wordULt platform)
  622                   [CmmMachOp (MO_Sub (typeWidth (cmmRegType platform spReg)))
  623                              [CmmStackSlot Old 0, sp_hwm],
  624                    CmmReg spLimReg]
  625 
  626     -- Hp overflow if (Hp > HpLim)
  627     -- (Hp has been incremented by now)
  628     -- HpLim points to the LAST WORD of valid allocation space.
  629     hp_oflo = CmmMachOp (mo_wordUGt platform) [hpExpr, hpLimExpr]
  630 
  631     alloc_n = mkAssign hpAllocReg alloc_lit
  632 
  633   case mb_stk_hwm of
  634     Nothing -> return ()
  635     Just stk_hwm -> tickyStackCheck
  636       >> (emit =<< mkCmmIfGoto' (sp_oflo stk_hwm) gc_id (Just False) )
  637 
  638   -- Emit new label that might potentially be a header
  639   -- of a self-recursive tail call.
  640   -- See Note [Self-recursive loop header].
  641   self_loop_info <- getSelfLoop
  642   case self_loop_info of
  643     Just (_, loop_header_id, _)
  644         | checkYield && isJust mb_stk_hwm -> emitLabel loop_header_id
  645     _otherwise -> return ()
  646 
  647   if (isJust mb_alloc_lit)
  648     then do
  649      tickyHeapCheck
  650      emitAssign hpReg bump_hp
  651      emit =<< mkCmmIfThen' hp_oflo (alloc_n <*> mkBranch gc_id) (Just False)
  652     else
  653       when (checkYield && not (gopt Opt_OmitYields dflags)) $ do
  654          -- Yielding if HpLim == 0
  655          let yielding = CmmMachOp (mo_wordEq platform)
  656                                   [CmmReg hpLimReg,
  657                                    CmmLit (zeroCLit platform)]
  658          emit =<< mkCmmIfGoto' yielding gc_id (Just False)
  659 
  660   tscope <- getTickScope
  661   emitOutOfLine gc_id
  662    (do_gc, tscope) -- this is expected to jump back somewhere
  663 
  664                 -- Test for stack pointer exhaustion, then
  665                 -- bump heap pointer, and test for heap exhaustion
  666                 -- Note that we don't move the heap pointer unless the
  667                 -- stack check succeeds.  Otherwise we might end up
  668                 -- with slop at the end of the current block, which can
  669                 -- confuse the LDV profiler.
  670 
  671 -- Note [Self-recursive loop header]
  672 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  673 --
  674 -- Self-recursive loop header is required by loopification optimization (See
  675 -- Note [Self-recursive tail calls] in GHC.StgToCmm.Expr). We emit it if:
  676 --
  677 --  1. There is information about self-loop in the FCode environment. We don't
  678 --     check the binder (first component of the self_loop_info) because we are
  679 --     certain that if the self-loop info is present then we are compiling the
  680 --     binder body. Reason: the only possible way to get here with the
  681 --     self_loop_info present is from closureCodeBody.
  682 --
  683 --  2. checkYield && isJust mb_stk_hwm. checkYield tells us that it is possible
  684 --     to preempt the heap check (see #367 for motivation behind this check). It
  685 --     is True for heap checks placed at the entry to a function and
  686 --     let-no-escape heap checks but false for other heap checks (eg. in case
  687 --     alternatives or created from hand-written high-level Cmm). The second
  688 --     check (isJust mb_stk_hwm) is true for heap checks at the entry to a
  689 --     function and some heap checks created in hand-written Cmm. Otherwise it
  690 --     is Nothing. In other words the only situation when both conditions are
  691 --     true is when compiling stack and heap checks at the entry to a
  692 --     function. This is the only situation when we want to emit a self-loop
  693 --     label.