never executed always true always false
1 -----------------------------------------------------------------------------
2 --
3 -- Stg to C--: heap management functions
4 --
5 -- (c) The University of Glasgow 2004-2006
6 --
7 -----------------------------------------------------------------------------
8
9 {-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}
10
11 module GHC.StgToCmm.Heap (
12 getVirtHp, setVirtHp, setRealHp,
13 getHpRelOffset,
14
15 entryHeapCheck, altHeapCheck, noEscapeHeapCheck, altHeapCheckReturnsTo,
16 heapStackCheckGen,
17 entryHeapCheck',
18
19 mkStaticClosureFields, mkStaticClosure,
20
21 allocDynClosure, allocDynClosureCmm, allocHeapClosure,
22 emitSetDynHdr
23 ) where
24
25 import GHC.Prelude hiding ((<*>))
26
27 import GHC.Stg.Syntax
28 import GHC.Cmm.CLabel
29 import GHC.StgToCmm.Layout
30 import GHC.StgToCmm.Utils
31 import GHC.StgToCmm.Monad
32 import GHC.StgToCmm.Prof (profDynAlloc, dynProfHdr, staticProfHdr)
33 import GHC.StgToCmm.Ticky
34 import GHC.StgToCmm.Closure
35
36 import GHC.Cmm.Graph
37
38 import GHC.Cmm.Dataflow.Label
39 import GHC.Runtime.Heap.Layout
40 import GHC.Cmm.BlockId
41 import GHC.Cmm
42 import GHC.Cmm.Utils
43 import GHC.Types.CostCentre
44 import GHC.Types.Id.Info( CafInfo(..), mayHaveCafRefs )
45 import GHC.Types.Id ( Id )
46 import GHC.Unit
47 import GHC.Driver.Session
48 import GHC.Platform
49 import GHC.Platform.Profile
50 import GHC.Data.FastString( mkFastString, fsLit )
51 import GHC.Utils.Panic( sorry )
52
53 import Control.Monad (when)
54 import Data.Maybe (isJust)
55
56 -----------------------------------------------------------
57 -- Initialise dynamic heap objects
58 -----------------------------------------------------------
59
60 allocDynClosure
61 :: Maybe Id
62 -> CmmInfoTable
63 -> LambdaFormInfo
64 -> CmmExpr -- Cost Centre to stick in the object
65 -> CmmExpr -- Cost Centre to blame for this alloc
66 -- (usually the same; sometimes "OVERHEAD")
67
68 -> [(NonVoid StgArg, VirtualHpOffset)] -- Offsets from start of object
69 -- ie Info ptr has offset zero.
70 -- No void args in here
71 -> FCode CmmExpr -- returns Hp+n
72
73 allocDynClosureCmm
74 :: Maybe Id -> CmmInfoTable -> LambdaFormInfo -> CmmExpr -> CmmExpr
75 -> [(CmmExpr, ByteOff)]
76 -> FCode CmmExpr -- returns Hp+n
77
78 -- allocDynClosure allocates the thing in the heap,
79 -- and modifies the virtual Hp to account for this.
80 -- The second return value is the graph that sets the value of the
81 -- returned LocalReg, which should point to the closure after executing
82 -- the graph.
83
84 -- allocDynClosure returns an (Hp+8) CmmExpr, and hence the result is
85 -- only valid until Hp is changed. The caller should assign the
86 -- result to a LocalReg if it is required to remain live.
87 --
88 -- The reason we don't assign it to a LocalReg here is that the caller
89 -- is often about to call regIdInfo, which immediately assigns the
90 -- result of allocDynClosure to a new temp in order to add the tag.
91 -- So by not generating a LocalReg here we avoid a common source of
92 -- new temporaries and save some compile time. This can be quite
93 -- significant - see test T4801.
94
95
96 allocDynClosure mb_id info_tbl lf_info use_cc _blame_cc args_w_offsets = do
97 let (args, offsets) = unzip args_w_offsets
98 cmm_args <- mapM getArgAmode args -- No void args
99 allocDynClosureCmm mb_id info_tbl lf_info
100 use_cc _blame_cc (zip cmm_args offsets)
101
102
103 allocDynClosureCmm mb_id info_tbl lf_info use_cc _blame_cc amodes_w_offsets = do
104 -- SAY WHAT WE ARE ABOUT TO DO
105 let rep = cit_rep info_tbl
106 tickyDynAlloc mb_id rep lf_info
107 let info_ptr = CmmLit (CmmLabel (cit_lbl info_tbl))
108 allocHeapClosure rep info_ptr use_cc amodes_w_offsets
109
110
111 -- | Low-level heap object allocation.
112 allocHeapClosure
113 :: SMRep -- ^ representation of the object
114 -> CmmExpr -- ^ info pointer
115 -> CmmExpr -- ^ cost centre
116 -> [(CmmExpr,ByteOff)] -- ^ payload
117 -> FCode CmmExpr -- ^ returns the address of the object
118 allocHeapClosure rep info_ptr use_cc payload = do
119 profDynAlloc rep use_cc
120
121 virt_hp <- getVirtHp
122
123 -- Find the offset of the info-ptr word
124 let info_offset = virt_hp + 1
125 -- info_offset is the VirtualHpOffset of the first
126 -- word of the new object
127 -- Remember, virtHp points to last allocated word,
128 -- ie 1 *before* the info-ptr word of new object.
129
130 base <- getHpRelOffset info_offset
131 emitComment $ mkFastString "allocHeapClosure"
132 emitSetDynHdr base info_ptr use_cc
133
134 -- Fill in the fields
135 hpStore base payload
136
137 -- Bump the virtual heap pointer
138 profile <- getProfile
139 setVirtHp (virt_hp + heapClosureSizeW profile rep)
140
141 return base
142
143
144 emitSetDynHdr :: CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
145 emitSetDynHdr base info_ptr ccs
146 = do profile <- getProfile
147 hpStore base (zip (header profile) [0, profileWordSizeInBytes profile ..])
148 where
149 header :: Profile -> [CmmExpr]
150 header profile = [info_ptr] ++ dynProfHdr profile ccs
151 -- ToDo: Parallel stuff
152 -- No ticky header
153
154 -- Store the item (expr,off) in base[off]
155 hpStore :: CmmExpr -> [(CmmExpr, ByteOff)] -> FCode ()
156 hpStore base vals = do
157 platform <- getPlatform
158 sequence_ $
159 [ emitStore (cmmOffsetB platform base off) val | (val,off) <- vals ]
160
161 -----------------------------------------------------------
162 -- Layout of static closures
163 -----------------------------------------------------------
164
165 -- Make a static closure, adding on any extra padding needed for CAFs,
166 -- and adding a static link field if necessary.
167
168 mkStaticClosureFields
169 :: Profile
170 -> CmmInfoTable
171 -> CostCentreStack
172 -> CafInfo
173 -> [CmmLit] -- Payload
174 -> [CmmLit] -- The full closure
175 mkStaticClosureFields profile info_tbl ccs caf_refs payload
176 = mkStaticClosure profile info_lbl ccs payload padding
177 static_link_field saved_info_field
178 where
179 platform = profilePlatform profile
180 info_lbl = cit_lbl info_tbl
181
182 -- CAFs must have consistent layout, regardless of whether they
183 -- are actually updatable or not. The layout of a CAF is:
184 --
185 -- 3 saved_info
186 -- 2 static_link
187 -- 1 indirectee
188 -- 0 info ptr
189 --
190 -- the static_link and saved_info fields must always be in the
191 -- same place. So we use isThunkRep rather than closureUpdReqd
192 -- here:
193
194 is_caf = isThunkRep (cit_rep info_tbl)
195
196 padding
197 | is_caf && null payload = [mkIntCLit platform 0]
198 | otherwise = []
199
200 static_link_field
201 | is_caf
202 = [mkIntCLit platform 0]
203 | staticClosureNeedsLink (mayHaveCafRefs caf_refs) info_tbl
204 = [static_link_value]
205 | otherwise
206 = []
207
208 saved_info_field
209 | is_caf = [mkIntCLit platform 0]
210 | otherwise = []
211
212 -- For a static constructor which has NoCafRefs, we set the
213 -- static link field to a non-zero value so the garbage
214 -- collector will ignore it.
215 static_link_value
216 | mayHaveCafRefs caf_refs = mkIntCLit platform 0
217 | otherwise = mkIntCLit platform 3 -- No CAF refs
218 -- See Note [STATIC_LINK fields]
219 -- in rts/sm/Storage.h
220
221 mkStaticClosure :: Profile -> CLabel -> CostCentreStack -> [CmmLit]
222 -> [CmmLit] -> [CmmLit] -> [CmmLit] -> [CmmLit]
223 mkStaticClosure profile info_lbl ccs payload padding static_link_field saved_info_field
224 = [CmmLabel info_lbl]
225 ++ staticProfHdr profile ccs
226 ++ payload
227 ++ padding
228 ++ static_link_field
229 ++ saved_info_field
230
231 -----------------------------------------------------------
232 -- Heap overflow checking
233 -----------------------------------------------------------
234
235 {- Note [Heap checks]
236 ~~~~~~~~~~~~~~~~~~
237 Heap checks come in various forms. We provide the following entry
238 points to the runtime system, all of which use the native C-- entry
239 convention.
240
241 * gc() performs garbage collection and returns
242 nothing to its caller
243
244 * A series of canned entry points like
245 r = gc_1p( r )
246 where r is a pointer. This performs gc, and
247 then returns its argument r to its caller.
248
249 * A series of canned entry points like
250 gcfun_2p( f, x, y )
251 where f is a function closure of arity 2
252 This performs garbage collection, keeping alive the
253 three argument ptrs, and then tail-calls f(x,y)
254
255 These are used in the following circumstances
256
257 * entryHeapCheck: Function entry
258 (a) With a canned GC entry sequence
259 f( f_clo, x:ptr, y:ptr ) {
260 Hp = Hp+8
261 if Hp > HpLim goto L
262 ...
263 L: HpAlloc = 8
264 jump gcfun_2p( f_clo, x, y ) }
265 Note the tail call to the garbage collector;
266 it should do no register shuffling
267
268 (b) No canned sequence
269 f( f_clo, x:ptr, y:ptr, ...etc... ) {
270 T: Hp = Hp+8
271 if Hp > HpLim goto L
272 ...
273 L: HpAlloc = 8
274 call gc() -- Needs an info table
275 goto T }
276
277 * altHeapCheck: Immediately following an eval
278 Started as
279 case f x y of r { (p,q) -> rhs }
280 (a) With a canned sequence for the results of f
281 (which is the very common case since
282 all boxed cases return just one pointer
283 ...
284 r = f( x, y )
285 K: -- K needs an info table
286 Hp = Hp+8
287 if Hp > HpLim goto L
288 ...code for rhs...
289
290 L: r = gc_1p( r )
291 goto K }
292
293 Here, the info table needed by the call
294 to gc_1p should be the *same* as the
295 one for the call to f; the C-- optimiser
296 spots this sharing opportunity)
297
298 (b) No canned sequence for results of f
299 Note second info table
300 ...
301 (r1,r2,r3) = call f( x, y )
302 K:
303 Hp = Hp+8
304 if Hp > HpLim goto L
305 ...code for rhs...
306
307 L: call gc() -- Extra info table here
308 goto K
309
310 * generalHeapCheck: Anywhere else
311 e.g. entry to thunk
312 case branch *not* following eval,
313 or let-no-escape
314 Exactly the same as the previous case:
315
316 K: -- K needs an info table
317 Hp = Hp+8
318 if Hp > HpLim goto L
319 ...
320
321 L: call gc()
322 goto K
323 -}
324
325 --------------------------------------------------------------
326 -- A heap/stack check at a function or thunk entry point.
327
328 entryHeapCheck :: ClosureInfo
329 -> Maybe LocalReg -- Function (closure environment)
330 -> Int -- Arity -- not same as len args b/c of voids
331 -> [LocalReg] -- Non-void args (empty for thunk)
332 -> FCode ()
333 -> FCode ()
334
335 entryHeapCheck cl_info nodeSet arity args code = do
336 platform <- getPlatform
337 let
338 node = case nodeSet of
339 Just r -> CmmReg (CmmLocal r)
340 Nothing -> CmmLit (CmmLabel $ staticClosureLabel platform cl_info)
341
342 is_fastf = case closureFunInfo cl_info of
343 Just (_, ArgGen _) -> False
344 _otherwise -> True
345
346 entryHeapCheck' is_fastf node arity args code
347
348 -- | lower-level version for "GHC.Cmm.Parser"
349 entryHeapCheck' :: Bool -- is a known function pattern
350 -> CmmExpr -- expression for the closure pointer
351 -> Int -- Arity -- not same as len args b/c of voids
352 -> [LocalReg] -- Non-void args (empty for thunk)
353 -> FCode ()
354 -> FCode ()
355 entryHeapCheck' is_fastf node arity args code
356 = do profile <- getProfile
357 let is_thunk = arity == 0
358
359 args' = map (CmmReg . CmmLocal) args
360 stg_gc_fun = CmmReg (CmmGlobal GCFun)
361 stg_gc_enter1 = CmmReg (CmmGlobal GCEnter1)
362
363 {- Thunks: jump stg_gc_enter_1
364
365 Function (fast): call (NativeNode) stg_gc_fun(fun, args)
366
367 Function (slow): call (slow) stg_gc_fun(fun, args)
368 -}
369 gc_call upd
370 | is_thunk
371 = mkJump profile NativeNodeCall stg_gc_enter1 [node] upd
372
373 | is_fastf
374 = mkJump profile NativeNodeCall stg_gc_fun (node : args') upd
375
376 | otherwise
377 = mkJump profile Slow stg_gc_fun (node : args') upd
378
379 updfr_sz <- getUpdFrameOff
380
381 loop_id <- newBlockId
382 emitLabel loop_id
383 heapCheck True True (gc_call updfr_sz <*> mkBranch loop_id) code
384
385 -- ------------------------------------------------------------
386 -- A heap/stack check in a case alternative
387
388
389 -- If there are multiple alts and we need to GC, but don't have a
390 -- continuation already (the scrut was simple), then we should
391 -- pre-generate the continuation. (if there are multiple alts it is
392 -- always a canned GC point).
393
394 -- altHeapCheck:
395 -- If we have a return continuation,
396 -- then if it is a canned GC pattern,
397 -- then we do mkJumpReturnsTo
398 -- else we do a normal call to stg_gc_noregs
399 -- else if it is a canned GC pattern,
400 -- then generate the continuation and do mkCallReturnsTo
401 -- else we do a normal call to stg_gc_noregs
402
403 altHeapCheck :: [LocalReg] -> FCode a -> FCode a
404 altHeapCheck regs code = altOrNoEscapeHeapCheck False regs code
405
406 altOrNoEscapeHeapCheck :: Bool -> [LocalReg] -> FCode a -> FCode a
407 altOrNoEscapeHeapCheck checkYield regs code = do
408 profile <- getProfile
409 platform <- getPlatform
410 case cannedGCEntryPoint platform regs of
411 Nothing -> genericGC checkYield code
412 Just gc -> do
413 lret <- newBlockId
414 let (off, _, copyin) = copyInOflow profile NativeReturn (Young lret) regs []
415 lcont <- newBlockId
416 tscope <- getTickScope
417 emitOutOfLine lret (copyin <*> mkBranch lcont, tscope)
418 emitLabel lcont
419 cannedGCReturnsTo checkYield False gc regs lret off code
420
421 altHeapCheckReturnsTo :: [LocalReg] -> Label -> ByteOff -> FCode a -> FCode a
422 altHeapCheckReturnsTo regs lret off code
423 = do platform <- getPlatform
424 case cannedGCEntryPoint platform regs of
425 Nothing -> genericGC False code
426 Just gc -> cannedGCReturnsTo False True gc regs lret off code
427
428 -- noEscapeHeapCheck is implemented identically to altHeapCheck (which
429 -- is more efficient), but cannot be optimized away in the non-allocating
430 -- case because it may occur in a loop
431 noEscapeHeapCheck :: [LocalReg] -> FCode a -> FCode a
432 noEscapeHeapCheck regs code = altOrNoEscapeHeapCheck True regs code
433
434 cannedGCReturnsTo :: Bool -> Bool -> CmmExpr -> [LocalReg] -> Label -> ByteOff
435 -> FCode a
436 -> FCode a
437 cannedGCReturnsTo checkYield cont_on_stack gc regs lret off code
438 = do profile <- getProfile
439 updfr_sz <- getUpdFrameOff
440 heapCheck False checkYield (gc_call profile gc updfr_sz) code
441 where
442 reg_exprs = map (CmmReg . CmmLocal) regs
443 -- Note [stg_gc arguments]
444
445 -- NB. we use the NativeReturn convention for passing arguments
446 -- to the canned heap-check routines, because we are in a case
447 -- alternative and hence the [LocalReg] was passed to us in the
448 -- NativeReturn convention.
449 gc_call profile label sp
450 | cont_on_stack
451 = mkJumpReturnsTo profile label NativeReturn reg_exprs lret off sp
452 | otherwise
453 = mkCallReturnsTo profile label NativeReturn reg_exprs lret off sp []
454
455 genericGC :: Bool -> FCode a -> FCode a
456 genericGC checkYield code
457 = do updfr_sz <- getUpdFrameOff
458 lretry <- newBlockId
459 emitLabel lretry
460 call <- mkCall generic_gc (GC, GC) [] [] updfr_sz []
461 heapCheck False checkYield (call <*> mkBranch lretry) code
462
463 cannedGCEntryPoint :: Platform -> [LocalReg] -> Maybe CmmExpr
464 cannedGCEntryPoint platform regs
465 = case map localRegType regs of
466 [] -> Just (mkGcLabel "stg_gc_noregs")
467 [ty]
468 | isGcPtrType ty -> Just (mkGcLabel "stg_gc_unpt_r1")
469 | isFloatType ty -> case width of
470 W32 -> Just (mkGcLabel "stg_gc_f1")
471 W64 -> Just (mkGcLabel "stg_gc_d1")
472 _ -> Nothing
473
474 | width == wordWidth platform -> Just (mkGcLabel "stg_gc_unbx_r1")
475 | width == W64 -> Just (mkGcLabel "stg_gc_l1")
476 | otherwise -> Nothing
477 where
478 width = typeWidth ty
479 [ty1,ty2]
480 | isGcPtrType ty1
481 && isGcPtrType ty2 -> Just (mkGcLabel "stg_gc_pp")
482 [ty1,ty2,ty3]
483 | isGcPtrType ty1
484 && isGcPtrType ty2
485 && isGcPtrType ty3 -> Just (mkGcLabel "stg_gc_ppp")
486 [ty1,ty2,ty3,ty4]
487 | isGcPtrType ty1
488 && isGcPtrType ty2
489 && isGcPtrType ty3
490 && isGcPtrType ty4 -> Just (mkGcLabel "stg_gc_pppp")
491 _otherwise -> Nothing
492
493 -- Note [stg_gc arguments]
494 -- It might seem that we could avoid passing the arguments to the
495 -- stg_gc function, because they are already in the right registers.
496 -- While this is usually the case, it isn't always. Sometimes the
497 -- code generator has cleverly avoided the eval in a case, e.g. in
498 -- ffi/should_run/4221.hs we found
499 --
500 -- case a_r1mb of z
501 -- FunPtr x y -> ...
502 --
503 -- where a_r1mb is bound a top-level constructor, and is known to be
504 -- evaluated. The codegen just assigns x, y and z, and continues;
505 -- R1 is never assigned.
506 --
507 -- So we'll have to rely on optimisations to eliminatethese
508 -- assignments where possible.
509
510
511 -- | The generic GC procedure; no params, no results
512 generic_gc :: CmmExpr
513 generic_gc = mkGcLabel "stg_gc_noregs"
514
515 -- | Create a CLabel for calling a garbage collector entry point
516 mkGcLabel :: String -> CmmExpr
517 mkGcLabel s = CmmLit (CmmLabel (mkCmmCodeLabel rtsUnitId (fsLit s)))
518
519 -------------------------------
520 heapCheck :: Bool -> Bool -> CmmAGraph -> FCode a -> FCode a
521 heapCheck checkStack checkYield do_gc code
522 = getHeapUsage $ \ hpHw ->
523 -- Emit heap checks, but be sure to do it lazily so
524 -- that the conditionals on hpHw don't cause a black hole
525 do { platform <- getPlatform
526 ; let mb_alloc_bytes
527 | hpHw > mBLOCK_SIZE = sorry $ unlines
528 [" Trying to allocate more than "++show mBLOCK_SIZE++" bytes.",
529 "",
530 "This is currently not possible due to a limitation of GHC's code generator.",
531 "See https://gitlab.haskell.org/ghc/ghc/issues/4505 for details.",
532 "Suggestion: read data from a file instead of having large static data",
533 "structures in code."]
534 | hpHw > 0 = Just (mkIntExpr platform (hpHw * (platformWordSizeInBytes platform)))
535 | otherwise = Nothing
536 where
537 constants = platformConstants platform
538 bLOCK_SIZE_W = pc_BLOCK_SIZE (platformConstants platform) `quot` platformWordSizeInBytes platform
539 mBLOCK_SIZE = pc_BLOCKS_PER_MBLOCK constants * bLOCK_SIZE_W
540 stk_hwm | checkStack = Just (CmmLit CmmHighStackMark)
541 | otherwise = Nothing
542 ; codeOnly $ do_checks stk_hwm checkYield mb_alloc_bytes do_gc
543 ; tickyAllocHeap True hpHw
544 ; setRealHp hpHw
545 ; code }
546
547 heapStackCheckGen :: Maybe CmmExpr -> Maybe CmmExpr -> FCode ()
548 heapStackCheckGen stk_hwm mb_bytes
549 = do updfr_sz <- getUpdFrameOff
550 lretry <- newBlockId
551 emitLabel lretry
552 call <- mkCall generic_gc (GC, GC) [] [] updfr_sz []
553 do_checks stk_hwm False mb_bytes (call <*> mkBranch lretry)
554
555 -- Note [Single stack check]
556 -- ~~~~~~~~~~~~~~~~~~~~~~~~~
557 -- When compiling a function we can determine how much stack space it
558 -- will use. We therefore need to perform only a single stack check at
559 -- the beginning of a function to see if we have enough stack space.
560 --
561 -- The check boils down to comparing Sp-N with SpLim, where N is the
562 -- amount of stack space needed (see Note [Stack usage] below). *BUT*
563 -- at this stage of the pipeline we are not supposed to refer to Sp
564 -- itself, because the stack is not yet manifest, so we don't quite
565 -- know where Sp pointing.
566
567 -- So instead of referring directly to Sp - as we used to do in the
568 -- past - the code generator uses (old + 0) in the stack check. That
569 -- is the address of the first word of the old area, so if we add N
570 -- we'll get the address of highest used word.
571 --
572 -- This makes the check robust. For example, while we need to perform
573 -- only one stack check for each function, we could in theory place
574 -- more stack checks later in the function. They would be redundant,
575 -- but not incorrect (in a sense that they should not change program
576 -- behaviour). We need to make sure however that a stack check
577 -- inserted after incrementing the stack pointer checks for a
578 -- respectively smaller stack space. This would not be the case if the
579 -- code generator produced direct references to Sp. By referencing
580 -- (old + 0) we make sure that we always check for a correct amount of
581 -- stack: when converting (old + 0) to Sp the stack layout phase takes
582 -- into account changes already made to stack pointer. The idea for
583 -- this change came from observations made while debugging #8275.
584
585 -- Note [Stack usage]
586 -- ~~~~~~~~~~~~~~~~~~
587 -- At the moment we convert from STG to Cmm we don't know N, the
588 -- number of bytes of stack that the function will use, so we use a
589 -- special late-bound CmmLit, namely
590 -- CmmHighStackMark
591 -- to stand for the number of bytes needed. When the stack is made
592 -- manifest, the number of bytes needed is calculated, and used to
593 -- replace occurrences of CmmHighStackMark
594 --
595 -- The (Maybe CmmExpr) passed to do_checks is usually
596 -- Just (CmmLit CmmHighStackMark)
597 -- but can also (in certain hand-written RTS functions)
598 -- Just (CmmLit 8) or some other fixed valuet
599 -- If it is Nothing, we don't generate a stack check at all.
600
601 do_checks :: Maybe CmmExpr -- Should we check the stack?
602 -- See Note [Stack usage]
603 -> Bool -- Should we check for preemption?
604 -> Maybe CmmExpr -- Heap headroom (bytes)
605 -> CmmAGraph -- What to do on failure
606 -> FCode ()
607 do_checks mb_stk_hwm checkYield mb_alloc_lit do_gc = do
608 dflags <- getDynFlags
609 platform <- getPlatform
610 gc_id <- newBlockId
611
612 let
613 Just alloc_lit = mb_alloc_lit
614
615 bump_hp = cmmOffsetExprB platform hpExpr alloc_lit
616
617 -- Sp overflow if ((old + 0) - CmmHighStack < SpLim)
618 -- At the beginning of a function old + 0 = Sp
619 -- See Note [Single stack check]
620 sp_oflo sp_hwm =
621 CmmMachOp (mo_wordULt platform)
622 [CmmMachOp (MO_Sub (typeWidth (cmmRegType platform spReg)))
623 [CmmStackSlot Old 0, sp_hwm],
624 CmmReg spLimReg]
625
626 -- Hp overflow if (Hp > HpLim)
627 -- (Hp has been incremented by now)
628 -- HpLim points to the LAST WORD of valid allocation space.
629 hp_oflo = CmmMachOp (mo_wordUGt platform) [hpExpr, hpLimExpr]
630
631 alloc_n = mkAssign hpAllocReg alloc_lit
632
633 case mb_stk_hwm of
634 Nothing -> return ()
635 Just stk_hwm -> tickyStackCheck
636 >> (emit =<< mkCmmIfGoto' (sp_oflo stk_hwm) gc_id (Just False) )
637
638 -- Emit new label that might potentially be a header
639 -- of a self-recursive tail call.
640 -- See Note [Self-recursive loop header].
641 self_loop_info <- getSelfLoop
642 case self_loop_info of
643 Just (_, loop_header_id, _)
644 | checkYield && isJust mb_stk_hwm -> emitLabel loop_header_id
645 _otherwise -> return ()
646
647 if (isJust mb_alloc_lit)
648 then do
649 tickyHeapCheck
650 emitAssign hpReg bump_hp
651 emit =<< mkCmmIfThen' hp_oflo (alloc_n <*> mkBranch gc_id) (Just False)
652 else
653 when (checkYield && not (gopt Opt_OmitYields dflags)) $ do
654 -- Yielding if HpLim == 0
655 let yielding = CmmMachOp (mo_wordEq platform)
656 [CmmReg hpLimReg,
657 CmmLit (zeroCLit platform)]
658 emit =<< mkCmmIfGoto' yielding gc_id (Just False)
659
660 tscope <- getTickScope
661 emitOutOfLine gc_id
662 (do_gc, tscope) -- this is expected to jump back somewhere
663
664 -- Test for stack pointer exhaustion, then
665 -- bump heap pointer, and test for heap exhaustion
666 -- Note that we don't move the heap pointer unless the
667 -- stack check succeeds. Otherwise we might end up
668 -- with slop at the end of the current block, which can
669 -- confuse the LDV profiler.
670
671 -- Note [Self-recursive loop header]
672 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
673 --
674 -- Self-recursive loop header is required by loopification optimization (See
675 -- Note [Self-recursive tail calls] in GHC.StgToCmm.Expr). We emit it if:
676 --
677 -- 1. There is information about self-loop in the FCode environment. We don't
678 -- check the binder (first component of the self_loop_info) because we are
679 -- certain that if the self-loop info is present then we are compiling the
680 -- binder body. Reason: the only possible way to get here with the
681 -- self_loop_info present is from closureCodeBody.
682 --
683 -- 2. checkYield && isJust mb_stk_hwm. checkYield tells us that it is possible
684 -- to preempt the heap check (see #367 for motivation behind this check). It
685 -- is True for heap checks placed at the entry to a function and
686 -- let-no-escape heap checks but false for other heap checks (eg. in case
687 -- alternatives or created from hand-written high-level Cmm). The second
688 -- check (isJust mb_stk_hwm) is true for heap checks at the entry to a
689 -- function and some heap checks created in hand-written Cmm. Otherwise it
690 -- is Nothing. In other words the only situation when both conditions are
691 -- true is when compiling stack and heap checks at the entry to a
692 -- function. This is the only situation when we want to emit a self-loop
693 -- label.