never executed always true always false
1 {-# LANGUAGE BangPatterns #-}
2
3 {-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}
4
5 -----------------------------------------------------------------------------
6 --
7 -- Stg to C-- code generation: expressions
8 --
9 -- (c) The University of Glasgow 2004-2006
10 --
11 -----------------------------------------------------------------------------
12
13 module GHC.StgToCmm.Expr ( cgExpr, cgLit ) where
14
15 import GHC.Prelude hiding ((<*>))
16
17 import {-# SOURCE #-} GHC.StgToCmm.Bind ( cgBind )
18
19 import GHC.StgToCmm.Monad
20 import GHC.StgToCmm.Heap
21 import GHC.StgToCmm.Env
22 import GHC.StgToCmm.DataCon
23 import GHC.StgToCmm.Prof (saveCurrentCostCentre, restoreCurrentCostCentre, emitSetCCC)
24 import GHC.StgToCmm.Layout
25 import GHC.StgToCmm.Lit
26 import GHC.StgToCmm.Prim
27 import GHC.StgToCmm.Hpc
28 import GHC.StgToCmm.Ticky
29 import GHC.StgToCmm.Utils
30 import GHC.StgToCmm.Closure
31
32 import GHC.Stg.Syntax
33
34 import GHC.Cmm.Graph
35 import GHC.Cmm.BlockId
36 import GHC.Cmm hiding ( succ )
37 import GHC.Cmm.Info
38 import GHC.Cmm.Utils ( zeroExpr, cmmTagMask, mkWordCLit, mAX_PTR_TAG )
39 import GHC.Core
40 import GHC.Core.DataCon
41 import GHC.Types.ForeignCall
42 import GHC.Types.Id
43 import GHC.Builtin.PrimOps
44 import GHC.Core.TyCon
45 import GHC.Core.Type ( isUnliftedType )
46 import GHC.Types.RepType ( isVoidTy, countConRepArgs )
47 import GHC.Types.CostCentre ( CostCentreStack, currentCCS )
48 import GHC.Types.Tickish
49 import GHC.Data.Maybe
50 import GHC.Utils.Misc
51 import GHC.Data.FastString
52 import GHC.Utils.Outputable
53 import GHC.Utils.Panic
54 import GHC.Utils.Panic.Plain
55
56 import Control.Monad ( unless, void )
57 import Control.Arrow ( first )
58 import Data.List ( partition )
59
60 ------------------------------------------------------------------------
61 -- cgExpr: the main function
62 ------------------------------------------------------------------------
63
64 cgExpr :: CgStgExpr -> FCode ReturnKind
65
66 cgExpr (StgApp fun args) = cgIdApp fun args
67
68 -- seq# a s ==> a
69 -- See Note [seq# magic] in GHC.Core.Opt.ConstantFold
70 cgExpr (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _res_ty) =
71 cgIdApp a []
72
73 -- dataToTag# :: a -> Int#
74 -- See Note [dataToTag# magic] in primops.txt.pp
75 cgExpr (StgOpApp (StgPrimOp DataToTagOp) [StgVarArg a] _res_ty) = do
76 platform <- getPlatform
77 emitComment (mkFastString "dataToTag#")
78 info <- getCgIdInfo a
79 let amode = idInfoToAmode info
80 tag_reg <- assignTemp $ cmmConstrTag1 platform amode
81 result_reg <- newTemp (bWord platform)
82 let tag = CmmReg $ CmmLocal tag_reg
83 is_tagged = cmmNeWord platform tag (zeroExpr platform)
84 is_too_big_tag = cmmEqWord platform tag (cmmTagMask platform)
85 -- Here we will first check the tag bits of the pointer we were given;
86 -- if this doesn't work then enter the closure and use the info table
87 -- to determine the constructor. Note that all tag bits set means that
88 -- the constructor index is too large to fit in the pointer and therefore
89 -- we must look in the info table. See Note [Tagging big families].
90
91 slow_path <- getCode $ do
92 tmp <- newTemp (bWord platform)
93 _ <- withSequel (AssignTo [tmp] False) (cgIdApp a [])
94 ptr_opts <- getPtrOpts
95 emitAssign (CmmLocal result_reg)
96 $ getConstrTag ptr_opts (cmmUntag platform (CmmReg (CmmLocal tmp)))
97
98 fast_path <- getCode $ do
99 -- Return the constructor index from the pointer tag
100 return_ptr_tag <- getCode $ do
101 emitAssign (CmmLocal result_reg)
102 $ cmmSubWord platform tag (CmmLit $ mkWordCLit platform 1)
103 -- Return the constructor index recorded in the info table
104 return_info_tag <- getCode $ do
105 ptr_opts <- getPtrOpts
106 emitAssign (CmmLocal result_reg)
107 $ getConstrTag ptr_opts (cmmUntag platform amode)
108
109 emit =<< mkCmmIfThenElse' is_too_big_tag return_info_tag return_ptr_tag (Just False)
110
111 emit =<< mkCmmIfThenElse' is_tagged fast_path slow_path (Just True)
112 emitReturn [CmmReg $ CmmLocal result_reg]
113
114
115 cgExpr (StgOpApp op args ty) = cgOpApp op args ty
116 cgExpr (StgConApp con mn args _) = cgConApp con mn args
117 cgExpr (StgTick t e) = cgTick t >> cgExpr e
118 cgExpr (StgLit lit) = do cmm_expr <- cgLit lit
119 emitReturn [cmm_expr]
120
121 cgExpr (StgLet _ binds expr) = do { cgBind binds; cgExpr expr }
122 cgExpr (StgLetNoEscape _ binds expr) =
123 do { u <- newUnique
124 ; let join_id = mkBlockId u
125 ; cgLneBinds join_id binds
126 ; r <- cgExpr expr
127 ; emitLabel join_id
128 ; return r }
129
130 cgExpr (StgCase expr bndr alt_type alts) =
131 cgCase expr bndr alt_type alts
132
133 ------------------------------------------------------------------------
134 -- Let no escape
135 ------------------------------------------------------------------------
136
137 {- Generating code for a let-no-escape binding, aka join point is very
138 very similar to what we do for a case expression. The duality is
139 between
140 let-no-escape x = b
141 in e
142 and
143 case e of ... -> b
144
145 That is, the RHS of 'x' (ie 'b') will execute *later*, just like
146 the alternative of the case; it needs to be compiled in an environment
147 in which all volatile bindings are forgotten, and the free vars are
148 bound only to stable things like stack locations.. The 'e' part will
149 execute *next*, just like the scrutinee of a case. -}
150
151 -------------------------
152 cgLneBinds :: BlockId -> CgStgBinding -> FCode ()
153 cgLneBinds join_id (StgNonRec bndr rhs)
154 = do { local_cc <- saveCurrentCostCentre
155 -- See Note [Saving the current cost centre]
156 ; (info, fcode) <- cgLetNoEscapeRhs join_id local_cc bndr rhs
157 ; fcode
158 ; addBindC info }
159
160 cgLneBinds join_id (StgRec pairs)
161 = do { local_cc <- saveCurrentCostCentre
162 ; r <- sequence $ unzipWith (cgLetNoEscapeRhs join_id local_cc) pairs
163 ; let (infos, fcodes) = unzip r
164 ; addBindsC infos
165 ; sequence_ fcodes
166 }
167
168 -------------------------
169 cgLetNoEscapeRhs
170 :: BlockId -- join point for successor of let-no-escape
171 -> Maybe LocalReg -- Saved cost centre
172 -> Id
173 -> CgStgRhs
174 -> FCode (CgIdInfo, FCode ())
175
176 cgLetNoEscapeRhs join_id local_cc bndr rhs =
177 do { (info, rhs_code) <- cgLetNoEscapeRhsBody local_cc bndr rhs
178 ; let (bid, _) = expectJust "cgLetNoEscapeRhs" $ maybeLetNoEscape info
179 ; let code = do { (_, body) <- getCodeScoped rhs_code
180 ; emitOutOfLine bid (first (<*> mkBranch join_id) body) }
181 ; return (info, code)
182 }
183
184 cgLetNoEscapeRhsBody
185 :: Maybe LocalReg -- Saved cost centre
186 -> Id
187 -> CgStgRhs
188 -> FCode (CgIdInfo, FCode ())
189 cgLetNoEscapeRhsBody local_cc bndr (StgRhsClosure _ cc _upd args body)
190 = cgLetNoEscapeClosure bndr local_cc cc (nonVoidIds args) body
191 cgLetNoEscapeRhsBody local_cc bndr (StgRhsCon cc con mn _ts args)
192 = cgLetNoEscapeClosure bndr local_cc cc []
193 (StgConApp con mn args (pprPanic "cgLetNoEscapeRhsBody" $
194 text "StgRhsCon doesn't have type args"))
195 -- For a constructor RHS we want to generate a single chunk of
196 -- code which can be jumped to from many places, which will
197 -- return the constructor. It's easy; just behave as if it
198 -- was an StgRhsClosure with a ConApp inside!
199
200 -------------------------
201 cgLetNoEscapeClosure
202 :: Id -- binder
203 -> Maybe LocalReg -- Slot for saved current cost centre
204 -> CostCentreStack -- XXX: *** NOT USED *** why not?
205 -> [NonVoid Id] -- Args (as in \ args -> body)
206 -> CgStgExpr -- Body (as in above)
207 -> FCode (CgIdInfo, FCode ())
208
209 cgLetNoEscapeClosure bndr cc_slot _unused_cc args body
210 = do platform <- getPlatform
211 return ( lneIdInfo platform bndr args, code )
212 where
213 code = forkLneBody $ withNewTickyCounterLNE (idName bndr) args $ do
214 { restoreCurrentCostCentre cc_slot
215 ; arg_regs <- bindArgsToRegs args
216 ; void $ noEscapeHeapCheck arg_regs (tickyEnterLNE >> cgExpr body) }
217
218
219 ------------------------------------------------------------------------
220 -- Case expressions
221 ------------------------------------------------------------------------
222
223 {- Note [Compiling case expressions]
224 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
225 It is quite interesting to decide whether to put a heap-check at the
226 start of each alternative. Of course we certainly have to do so if
227 the case forces an evaluation, or if there is a primitive op which can
228 trigger GC.
229
230 A more interesting situation is this (a Plan-B situation)
231
232 !P!;
233 ...P...
234 case x# of
235 0# -> !Q!; ...Q...
236 default -> !R!; ...R...
237
238 where !x! indicates a possible heap-check point. The heap checks
239 in the alternatives *can* be omitted, in which case the topmost
240 heapcheck will take their worst case into account.
241
242 In favour of omitting !Q!, !R!:
243
244 - *May* save a heap overflow test,
245 if ...P... allocates anything.
246
247 - We can use relative addressing from a single Hp to
248 get at all the closures so allocated.
249
250 - No need to save volatile vars etc across heap checks
251 in !Q!, !R!
252
253 Against omitting !Q!, !R!
254
255 - May put a heap-check into the inner loop. Suppose
256 the main loop is P -> R -> P -> R...
257 Q is the loop exit, and only it does allocation.
258 This only hurts us if P does no allocation. If P allocates,
259 then there is a heap check in the inner loop anyway.
260
261 - May do more allocation than reqd. This sometimes bites us
262 badly. For example, nfib (ha!) allocates about 30\% more space if the
263 worst-casing is done, because many many calls to nfib are leaf calls
264 which don't need to allocate anything.
265
266 We can un-allocate, but that costs an instruction
267
268 Neither problem hurts us if there is only one alternative.
269
270 Suppose the inner loop is P->R->P->R etc. Then here is
271 how many heap checks we get in the *inner loop* under various
272 conditions
273
274 Alloc Heap check in branches (!Q!, !R!)?
275 P Q R yes no (absorb to !P!)
276 --------------------------------------
277 n n n 0 0
278 n y n 0 1
279 n . y 1 1
280 y . y 2 1
281 y . n 1 1
282
283 Best choices: absorb heap checks from Q and R into !P! iff
284 a) P itself does some allocation
285 or
286 b) P does allocation, or there is exactly one alternative
287
288 We adopt (b) because that is more likely to put the heap check at the
289 entry to a function, when not many things are live. After a bunch of
290 single-branch cases, we may have lots of things live
291
292 Hence: two basic plans for
293
294 case e of r { alts }
295
296 ------ Plan A: the general case ---------
297
298 ...save current cost centre...
299
300 ...code for e,
301 with sequel (SetLocals r)
302
303 ...restore current cost centre...
304 ...code for alts...
305 ...alts do their own heap checks
306
307 ------ Plan B: special case when ---------
308 (i) e does not allocate or call GC
309 (ii) either upstream code performs allocation
310 or there is just one alternative
311
312 Then heap allocation in the (single) case branch
313 is absorbed by the upstream check.
314 Very common example: primops on unboxed values
315
316 ...code for e,
317 with sequel (SetLocals r)...
318
319 ...code for alts...
320 ...no heap check...
321 -}
322
323
324
325 -------------------------------------
326 data GcPlan
327 = GcInAlts -- Put a GC check at the start the case alternatives,
328 [LocalReg] -- which binds these registers
329 | NoGcInAlts -- The scrutinee is a primitive value, or a call to a
330 -- primitive op which does no GC. Absorb the allocation
331 -- of the case alternative(s) into the upstream check
332
333 -------------------------------------
334 cgCase :: CgStgExpr -> Id -> AltType -> [CgStgAlt] -> FCode ReturnKind
335
336 {-
337 Note [Scrutinising VoidRep]
338 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
339 Suppose we have this STG code:
340 f = \[s : State# RealWorld] ->
341 case s of _ -> blah
342 This is very odd. Why are we scrutinising a state token? But it
343 can arise with bizarre NOINLINE pragmas (#9964)
344 crash :: IO ()
345 crash = IO (\s -> let {-# NOINLINE s' #-}
346 s' = s
347 in (# s', () #))
348
349 Now the trouble is that 's' has VoidRep, and we do not bind void
350 arguments in the environment; they don't live anywhere. See the
351 calls to nonVoidIds in various places. So we must not look up
352 's' in the environment. Instead, just evaluate the RHS! Simple.
353
354 Note [Dead-binder optimisation]
355 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
356 A case-binder, or data-constructor argument, may be marked as dead,
357 because we preserve occurrence-info on binders in GHC.Core.Tidy (see
358 GHC.Core.Tidy.tidyIdBndr).
359
360 If the binder is dead, we can sometimes eliminate a load. While
361 CmmSink will eliminate that load, it's very easy to kill it at source
362 (giving CmmSink less work to do), and in any case CmmSink only runs
363 with -O. Since the majority of case binders are dead, this
364 optimisation probably still has a great benefit-cost ratio and we want
365 to keep it for -O0. See also Phab:D5358.
366
367 This probably also was the reason for occurrence hack in Phab:D5339 to
368 exist, perhaps because the occurrence information preserved by
369 'GHC.Core.Tidy.tidyIdBndr' was insufficient. But now that CmmSink does the
370 job we deleted the hacks.
371 -}
372
373 cgCase (StgApp v []) _ (PrimAlt _) alts
374 | isVoidRep (idPrimRep v) -- See Note [Scrutinising VoidRep]
375 , [(DEFAULT, _, rhs)] <- alts
376 = cgExpr rhs
377
378 {- Note [Dodgy unsafeCoerce 1]
379 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
380 Consider
381 case (x :: HValue) |> co of (y :: MutVar# Int)
382 DEFAULT -> ...
383 We want to generate an assignment
384 y := x
385 We want to allow this assignment to be generated in the case when the
386 types are compatible, because this allows some slightly-dodgy but
387 occasionally-useful casts to be used, such as in GHC.Runtime.Heap.Inspect
388 where we cast an HValue to a MutVar# so we can print out the contents
389 of the MutVar#. If instead we generate code that enters the HValue,
390 then we'll get a runtime panic, because the HValue really is a
391 MutVar#. The types are compatible though, so we can just generate an
392 assignment.
393 -}
394 cgCase (StgApp v []) bndr alt_type@(PrimAlt _) alts
395 | isUnliftedType (idType v) -- Note [Dodgy unsafeCoerce 1]
396 = -- assignment suffices for unlifted types
397 do { platform <- getPlatform
398 ; unless (reps_compatible platform) $
399 pprPanic "cgCase: reps do not match, perhaps a dodgy unsafeCoerce?"
400 (pp_bndr v $$ pp_bndr bndr)
401 ; v_info <- getCgIdInfo v
402 ; emitAssign (CmmLocal (idToReg platform (NonVoid bndr)))
403 (idInfoToAmode v_info)
404 -- Add bndr to the environment
405 ; _ <- bindArgToReg (NonVoid bndr)
406 ; cgAlts (NoGcInAlts,AssignedDirectly) (NonVoid bndr) alt_type alts }
407 where
408 reps_compatible platform = primRepCompatible platform (idPrimRep v) (idPrimRep bndr)
409
410 pp_bndr id = ppr id <+> dcolon <+> ppr (idType id) <+> parens (ppr (idPrimRep id))
411
412 {- Note [Dodgy unsafeCoerce 2, #3132]
413 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
414 In all other cases of a lifted Id being cast to an unlifted type, the
415 Id should be bound to bottom, otherwise this is an unsafe use of
416 unsafeCoerce. We can generate code to enter the Id and assume that
417 it will never return. Hence, we emit the usual enter/return code, and
418 because bottom must be untagged, it will be entered. The Sequel is a
419 type-correct assignment, albeit bogus. The (dead) continuation loops;
420 it would be better to invoke some kind of panic function here.
421 -}
422 cgCase scrut@(StgApp v []) _ (PrimAlt _) _
423 = do { platform <- getPlatform
424 ; mb_cc <- maybeSaveCostCentre True
425 ; _ <- withSequel
426 (AssignTo [idToReg platform (NonVoid v)] False) (cgExpr scrut)
427 ; restoreCurrentCostCentre mb_cc
428 ; emitComment $ mkFastString "should be unreachable code"
429 ; l <- newBlockId
430 ; emitLabel l
431 ; emit (mkBranch l) -- an infinite loop
432 ; return AssignedDirectly
433 }
434
435 {- Note [Handle seq#]
436 ~~~~~~~~~~~~~~~~~~~~~
437 See Note [seq# magic] in GHC.Core.Opt.ConstantFold.
438 The special case for seq# in cgCase does this:
439
440 case seq# a s of v
441 (# s', a' #) -> e
442 ==>
443 case a of v
444 (# s', a' #) -> e
445
446 (taking advantage of the fact that the return convention for (# State#, a #)
447 is the same as the return convention for just 'a')
448 -}
449
450 cgCase (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _) bndr alt_type alts
451 = -- Note [Handle seq#]
452 -- And see Note [seq# magic] in GHC.Core.Opt.ConstantFold
453 -- Use the same return convention as vanilla 'a'.
454 cgCase (StgApp a []) bndr alt_type alts
455
456 cgCase scrut bndr alt_type alts
457 = -- the general case
458 do { platform <- getPlatform
459 ; up_hp_usg <- getVirtHp -- Upstream heap usage
460 ; let ret_bndrs = chooseReturnBndrs bndr alt_type alts
461 alt_regs = map (idToReg platform) ret_bndrs
462 ; simple_scrut <- isSimpleScrut scrut alt_type
463 ; let do_gc | is_cmp_op scrut = False -- See Note [GC for conditionals]
464 | not simple_scrut = True
465 | isSingleton alts = False
466 | up_hp_usg > 0 = False
467 | otherwise = True
468 -- cf Note [Compiling case expressions]
469 gc_plan = if do_gc then GcInAlts alt_regs else NoGcInAlts
470
471 ; mb_cc <- maybeSaveCostCentre simple_scrut
472
473 ; let sequel = AssignTo alt_regs do_gc{- Note [scrut sequel] -}
474 ; ret_kind <- withSequel sequel (cgExpr scrut)
475 ; restoreCurrentCostCentre mb_cc
476 ; _ <- bindArgsToRegs ret_bndrs
477 ; cgAlts (gc_plan,ret_kind) (NonVoid bndr) alt_type alts
478 }
479 where
480 is_cmp_op (StgOpApp (StgPrimOp op) _ _) = isComparisonPrimOp op
481 is_cmp_op _ = False
482
483 {- Note [GC for conditionals]
484 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
485 For boolean conditionals it seems that we have always done NoGcInAlts.
486 That is, we have always done the GC check before the conditional.
487 This is enshrined in the special case for
488 case tagToEnum# (a>b) of ...
489 See Note [case on bool]
490
491 It's odd, and it's flagrantly inconsistent with the rules described
492 Note [Compiling case expressions]. However, after eliminating the
493 tagToEnum# (#13397) we will have:
494 case (a>b) of ...
495 Rather than make it behave quite differently, I am testing for a
496 comparison operator here in the general case as well.
497
498 ToDo: figure out what the Right Rule should be.
499
500 Note [scrut sequel]
501 ~~~~~~~~~~~~~~~~~~~
502 The job of the scrutinee is to assign its value(s) to alt_regs.
503 Additionally, if we plan to do a heap-check in the alternatives (see
504 Note [Compiling case expressions]), then we *must* retreat Hp to
505 recover any unused heap before passing control to the sequel. If we
506 don't do this, then any unused heap will become slop because the heap
507 check will reset the heap usage. Slop in the heap breaks LDV profiling
508 (+RTS -hb) which needs to do a linear sweep through the nursery.
509
510
511 Note [Inlining out-of-line primops and heap checks]
512 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
513 If shouldInlinePrimOp returns True when called from GHC.StgToCmm.Expr for the
514 purpose of heap check placement, we *must* inline the primop later in
515 GHC.StgToCmm.Prim. If we don't things will go wrong.
516 -}
517
518 -----------------
519 maybeSaveCostCentre :: Bool -> FCode (Maybe LocalReg)
520 maybeSaveCostCentre simple_scrut
521 | simple_scrut = return Nothing
522 | otherwise = saveCurrentCostCentre
523
524
525 -----------------
526 isSimpleScrut :: CgStgExpr -> AltType -> FCode Bool
527 -- Simple scrutinee, does not block or allocate; hence safe to amalgamate
528 -- heap usage from alternatives into the stuff before the case
529 -- NB: if you get this wrong, and claim that the expression doesn't allocate
530 -- when it does, you'll deeply mess up allocation
531 isSimpleScrut (StgOpApp op args _) _ = isSimpleOp op args
532 isSimpleScrut (StgLit _) _ = return True -- case 1# of { 0# -> ..; ... }
533 isSimpleScrut (StgApp _ []) (PrimAlt _) = return True -- case x# of { 0# -> ..; ... }
534 isSimpleScrut _ _ = return False
535
536 isSimpleOp :: StgOp -> [StgArg] -> FCode Bool
537 -- True iff the op cannot block or allocate
538 isSimpleOp (StgFCallOp (CCall (CCallSpec _ _ safe)) _) _ = return $! not (playSafe safe)
539 -- dataToTag# evaluates its argument, see Note [dataToTag#] in primops.txt.pp
540 isSimpleOp (StgPrimOp DataToTagOp) _ = return False
541 isSimpleOp (StgPrimOp op) stg_args = do
542 arg_exprs <- getNonVoidArgAmodes stg_args
543 dflags <- getDynFlags
544 -- See Note [Inlining out-of-line primops and heap checks]
545 return $! shouldInlinePrimOp dflags op arg_exprs
546 isSimpleOp (StgPrimCallOp _) _ = return False
547
548 -----------------
549 chooseReturnBndrs :: Id -> AltType -> [CgStgAlt] -> [NonVoid Id]
550 -- These are the binders of a case that are assigned by the evaluation of the
551 -- scrutinee.
552 -- They're non-void, see Note [Post-unarisation invariants] in GHC.Stg.Unarise.
553 chooseReturnBndrs bndr (PrimAlt _) _alts
554 = assertNonVoidIds [bndr]
555
556 chooseReturnBndrs _bndr (MultiValAlt n) [(_, ids, _)]
557 = assertPpr (ids `lengthIs` n) (ppr n $$ ppr ids $$ ppr _bndr) $
558 assertNonVoidIds ids -- 'bndr' is not assigned!
559
560 chooseReturnBndrs bndr (AlgAlt _) _alts
561 = assertNonVoidIds [bndr] -- Only 'bndr' is assigned
562
563 chooseReturnBndrs bndr PolyAlt _alts
564 = assertNonVoidIds [bndr] -- Only 'bndr' is assigned
565
566 chooseReturnBndrs _ _ _ = panic "chooseReturnBndrs"
567 -- MultiValAlt has only one alternative
568
569 -------------------------------------
570 cgAlts :: (GcPlan,ReturnKind) -> NonVoid Id -> AltType -> [CgStgAlt]
571 -> FCode ReturnKind
572 -- At this point the result of the case are in the binders
573 cgAlts gc_plan _bndr PolyAlt [(_, _, rhs)]
574 = maybeAltHeapCheck gc_plan (cgExpr rhs)
575
576 cgAlts gc_plan _bndr (MultiValAlt _) [(_, _, rhs)]
577 = maybeAltHeapCheck gc_plan (cgExpr rhs)
578 -- Here bndrs are *already* in scope, so don't rebind them
579
580 cgAlts gc_plan bndr (PrimAlt _) alts
581 = do { platform <- getPlatform
582
583 ; tagged_cmms <- cgAltRhss gc_plan bndr alts
584
585 ; let bndr_reg = CmmLocal (idToReg platform bndr)
586 (DEFAULT,deflt) = head tagged_cmms
587 -- PrimAlts always have a DEFAULT case
588 -- and it always comes first
589
590 tagged_cmms' = [(lit,code)
591 | (LitAlt lit, code) <- tagged_cmms]
592 ; emitCmmLitSwitch (CmmReg bndr_reg) tagged_cmms' deflt
593 ; return AssignedDirectly }
594
595 cgAlts gc_plan bndr (AlgAlt tycon) alts
596 = do { platform <- getPlatform
597
598 ; (mb_deflt, branches) <- cgAlgAltRhss gc_plan bndr alts
599
600 ; let !fam_sz = tyConFamilySize tycon
601 !bndr_reg = CmmLocal (idToReg platform bndr)
602 !ptag_expr = cmmConstrTag1 platform (CmmReg bndr_reg)
603 !branches' = first succ <$> branches
604 !maxpt = mAX_PTR_TAG platform
605 (!via_ptr, !via_info) = partition ((< maxpt) . fst) branches'
606 !small = isSmallFamily platform fam_sz
607
608 -- Is the constructor tag in the node reg?
609 -- See Note [Tagging big families]
610 ; if small || null via_info
611 then -- Yes, bndr_reg has constructor tag in ls bits
612 emitSwitch ptag_expr branches' mb_deflt 1
613 (if small then fam_sz else maxpt)
614
615 else -- No, the get exact tag from info table when mAX_PTR_TAG
616 -- See Note [Double switching for big families]
617 do
618 ptr_opts <- getPtrOpts
619 let !untagged_ptr = cmmUntag platform (CmmReg bndr_reg)
620 !itag_expr = getConstrTag ptr_opts untagged_ptr
621 !info0 = first pred <$> via_info
622 if null via_ptr then
623 emitSwitch itag_expr info0 mb_deflt 0 (fam_sz - 1)
624 else do
625 infos_lbl <- newBlockId
626 infos_scp <- getTickScope
627
628 let spillover = (maxpt, (mkBranch infos_lbl, infos_scp))
629
630 (mb_shared_deflt, mb_shared_branch) <- case mb_deflt of
631 (Just (stmts, scp)) ->
632 do lbl <- newBlockId
633 return ( Just (mkLabel lbl scp <*> stmts, scp)
634 , Just (mkBranch lbl, scp))
635 _ -> return (Nothing, Nothing)
636 -- Switch on pointer tag
637 emitSwitch ptag_expr (spillover : via_ptr) mb_shared_deflt 1 maxpt
638 join_lbl <- newBlockId
639 emit (mkBranch join_lbl)
640 -- Switch on info table tag
641 emitLabel infos_lbl
642 emitSwitch itag_expr info0 mb_shared_branch
643 (maxpt - 1) (fam_sz - 1)
644 emitLabel join_lbl
645
646 ; return AssignedDirectly }
647
648 cgAlts _ _ _ _ = panic "cgAlts"
649 -- UbxTupAlt and PolyAlt have only one alternative
650
651 -- Note [Double switching for big families]
652 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
653 --
654 -- An algebraic data type can have a n >= 0 summands
655 -- (or alternatives), which are identified (labeled) by
656 -- constructors. In memory they are kept apart by tags
657 -- (see Note [Data constructor dynamic tags] in GHC.StgToCmm.Closure).
658 -- Due to the characteristics of the platform that
659 -- contribute to the alignment of memory objects, there
660 -- is a natural limit of information about constructors
661 -- that can be encoded in the pointer tag. When the mapping
662 -- of constructors to the pointer tag range 1..mAX_PTR_TAG
663 -- is not injective, then we have a "big data type", also
664 -- called a "big (constructor) family" in the literature.
665 -- Constructor tags residing in the info table are injective,
666 -- but considerably more expensive to obtain, due to additional
667 -- memory access(es).
668 --
669 -- When doing case analysis on a value of a "big data type"
670 -- we need two nested switch statements to make up for the lack
671 -- of injectivity of pointer tagging, also taking the info
672 -- table tag into account. The exact mechanism is described next.
673 --
674 -- In the general case, switching on big family alternatives
675 -- is done by two nested switch statements. According to
676 -- Note [Tagging big families], the outer switch
677 -- looks at the pointer tag and the inner dereferences the
678 -- pointer and switches on the info table tag.
679 --
680 -- We can handle a simple case first, namely when none
681 -- of the case alternatives mention a constructor having
682 -- a pointer tag of 1..mAX_PTR_TAG-1. In this case we
683 -- simply emit a switch on the info table tag.
684 -- Note that the other simple case is when all mentioned
685 -- alternatives lie in 1..mAX_PTR_TAG-1, in which case we can
686 -- switch on the ptr tag only, just like in the small family case.
687 --
688 -- There is a single intricacy with a nested switch:
689 -- Both should branch to the same default alternative, and as such
690 -- avoid duplicate codegen of potentially heavy code. The outer
691 -- switch generates the actual code with a prepended fresh label,
692 -- while the inner one only generates a jump to that label.
693 --
694 -- For example, let's assume a 64-bit architecture, so that all
695 -- heap objects are 8-byte aligned, and hence the address of a
696 -- heap object ends in `000` (three zero bits).
697 --
698 -- Then consider the following data type
699 --
700 -- > data Big = T0 | T1 | T2 | T3 | T4 | T5 | T6 | T7 | T8
701 -- Ptr tag: 1 2 3 4 5 6 7 7 7
702 -- As bits: 001 010 011 100 101 110 111 111 111
703 -- Info pointer tag (zero based):
704 -- 0 1 2 3 4 5 6 7 8
705 --
706 -- Then \case T2 -> True; T8 -> True; _ -> False
707 -- will result in following code (slightly cleaned-up and
708 -- commented -ddump-cmm-from-stg):
709 {-
710 R1 = _sqI::P64; -- scrutinee
711 if (R1 & 7 != 0) goto cqO; else goto cqP;
712 cqP: // global -- enter
713 call (I64[R1])(R1) returns to cqO, args: 8, res: 8, upd: 8;
714 cqO: // global -- already WHNF
715 _sqJ::P64 = R1;
716 _cqX::P64 = _sqJ::P64 & 7; -- extract pointer tag
717 switch [1 .. 7] _cqX::P64 {
718 case 3 : goto cqW;
719 case 7 : goto cqR;
720 default: {goto cqS;}
721 }
722 cqR: // global
723 _cr2 = I32[I64[_sqJ::P64 & (-8)] - 4]; -- tag from info pointer
724 switch [6 .. 8] _cr2::I64 {
725 case 8 : goto cr1;
726 default: {goto cr0;}
727 }
728 cr1: // global
729 R1 = GHC.Types.True_closure+2;
730 call (P64[(old + 8)])(R1) args: 8, res: 0, upd: 8;
731 cr0: // global -- technically necessary label
732 goto cqS;
733 cqW: // global
734 R1 = GHC.Types.True_closure+2;
735 call (P64[(old + 8)])(R1) args: 8, res: 0, upd: 8;
736 cqS: // global
737 R1 = GHC.Types.False_closure+1;
738 call (P64[(old + 8)])(R1) args: 8, res: 0, upd: 8;
739 -}
740 --
741 -- For 32-bit systems we only have 2 tag bits in the pointers at our disposal,
742 -- so the performance win is dubious, especially in face of the increased code
743 -- size due to double switching. But we can take the viewpoint that 32-bit
744 -- architectures are not relevant for performance any more, so this can be
745 -- considered as moot.
746
747
748 -- Note [alg-alt heap check]
749 --
750 -- In an algebraic case with more than one alternative, we will have
751 -- code like
752 --
753 -- L0:
754 -- x = R1
755 -- goto L1
756 -- L1:
757 -- if (x & 7 >= 2) then goto L2 else goto L3
758 -- L2:
759 -- Hp = Hp + 16
760 -- if (Hp > HpLim) then goto L4
761 -- ...
762 -- L4:
763 -- call gc() returns to L5
764 -- L5:
765 -- x = R1
766 -- goto L1
767
768
769 -- Note [Tagging big families]
770 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
771 --
772 -- Both the big and the small constructor families are tagged,
773 -- that is, greater unions which overflow the tag space of TAG_BITS
774 -- (i.e. 3 on 32 resp. 7 constructors on 64 bit archs).
775 --
776 -- For example, let's assume a 64-bit architecture, so that all
777 -- heap objects are 8-byte aligned, and hence the address of a
778 -- heap object ends in `000` (three zero bits). Then consider
779 -- > data Maybe a = Nothing | Just a
780 -- > data Day a = Mon | Tue | Wed | Thu | Fri | Sat | Sun
781 -- > data Grade = G1 | G2 | G3 | G4 | G5 | G6 | G7 | G8 | G9 | G10
782 --
783 -- Since `Grade` has more than 7 constructors, it counts as a
784 -- "big data type" (also referred to as "big constructor family" in papers).
785 -- On the other hand, `Maybe` and `Day` have 7 constructors or fewer, so they
786 -- are "small data types".
787 --
788 -- Then
789 -- * A pointer to an unevaluated thunk of type `Maybe Int`, `Day` or `Grade` will end in `000`
790 -- * A tagged pointer to a `Nothing`, `Mon` or `G1` will end in `001`
791 -- * A tagged pointer to a `Just x`, `Tue` or `G2` will end in `010`
792 -- * A tagged pointer to `Wed` or `G3` will end in `011`
793 -- ...
794 -- * A tagged pointer to `Sat` or `G6` will end in `110`
795 -- * A tagged pointer to `Sun` or `G7` or `G8` or `G9` or `G10` will end in `111`
796 --
797 -- For big families we employ a mildly clever way of combining pointer and
798 -- info-table tagging. We use 1..MAX_PTR_TAG-1 as pointer-resident tags where
799 -- the tags in the pointer and the info table are in a one-to-one
800 -- relation, whereas tag MAX_PTR_TAG is used as "spill over", signifying
801 -- we have to fall back and get the precise constructor tag from the
802 -- info-table.
803 --
804 -- Consequently we now cascade switches, because we have to check
805 -- the pointer tag first, and when it is MAX_PTR_TAG, fetch the precise
806 -- tag from the info table, and switch on that. The only technically
807 -- tricky part is that the default case needs (logical) duplication.
808 -- To do this we emit an extra label for it and branch to that from
809 -- the second switch. This avoids duplicated codegen. See Trac #14373.
810 -- See note [Double switching for big families] for the mechanics
811 -- involved.
812 --
813 -- Also see note [Data constructor dynamic tags]
814 -- and the wiki https://gitlab.haskell.org/ghc/ghc/wikis/commentary/rts/haskell-execution/pointer-tagging
815 --
816
817 -------------------
818 cgAlgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [CgStgAlt]
819 -> FCode ( Maybe CmmAGraphScoped
820 , [(ConTagZ, CmmAGraphScoped)] )
821 cgAlgAltRhss gc_plan bndr alts
822 = do { tagged_cmms <- cgAltRhss gc_plan bndr alts
823
824 ; let { mb_deflt = case tagged_cmms of
825 ((DEFAULT,rhs) : _) -> Just rhs
826 _other -> Nothing
827 -- DEFAULT is always first, if present
828
829 ; branches = [ (dataConTagZ con, cmm)
830 | (DataAlt con, cmm) <- tagged_cmms ]
831 }
832
833 ; return (mb_deflt, branches)
834 }
835
836
837 -------------------
838 cgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [CgStgAlt]
839 -> FCode [(AltCon, CmmAGraphScoped)]
840 cgAltRhss gc_plan bndr alts = do
841 platform <- getPlatform
842 let
843 base_reg = idToReg platform bndr
844 cg_alt :: CgStgAlt -> FCode (AltCon, CmmAGraphScoped)
845 cg_alt (con, bndrs, rhs)
846 = getCodeScoped $
847 maybeAltHeapCheck gc_plan $
848 do { _ <- bindConArgs con base_reg (assertNonVoidIds bndrs)
849 -- alt binders are always non-void,
850 -- see Note [Post-unarisation invariants] in GHC.Stg.Unarise
851 ; _ <- cgExpr rhs
852 ; return con }
853 forkAlts (map cg_alt alts)
854
855 maybeAltHeapCheck :: (GcPlan,ReturnKind) -> FCode a -> FCode a
856 maybeAltHeapCheck (NoGcInAlts,_) code = code
857 maybeAltHeapCheck (GcInAlts regs, AssignedDirectly) code =
858 altHeapCheck regs code
859 maybeAltHeapCheck (GcInAlts regs, ReturnedTo lret off) code =
860 altHeapCheckReturnsTo regs lret off code
861
862 -----------------------------------------------------------------------------
863 -- Tail calls
864 -----------------------------------------------------------------------------
865
866 cgConApp :: DataCon -> ConstructorNumber -> [StgArg] -> FCode ReturnKind
867 cgConApp con mn stg_args
868 | isUnboxedTupleDataCon con -- Unboxed tuple: assign and return
869 = do { arg_exprs <- getNonVoidArgAmodes stg_args
870 ; tickyUnboxedTupleReturn (length arg_exprs)
871 ; emitReturn arg_exprs }
872
873 | otherwise -- Boxed constructors; allocate and return
874 = assertPpr (stg_args `lengthIs` countConRepArgs con)
875 (ppr con <> parens (ppr (countConRepArgs con)) <+> ppr stg_args) $
876 do { (idinfo, fcode_init) <- buildDynCon (dataConWorkId con) mn False
877 currentCCS con (assertNonVoidStgArgs stg_args)
878 -- con args are always non-void,
879 -- see Note [Post-unarisation invariants] in GHC.Stg.Unarise
880 -- The first "con" says that the name bound to this
881 -- closure is "con", which is a bit of a fudge, but
882 -- it only affects profiling (hence the False)
883
884 ; emit =<< fcode_init
885 ; tickyReturnNewCon (length stg_args)
886 ; emitReturn [idInfoToAmode idinfo] }
887
888 cgIdApp :: Id -> [StgArg] -> FCode ReturnKind
889 cgIdApp fun_id args = do
890 fun_info <- getCgIdInfo fun_id
891 self_loop_info <- getSelfLoop
892 call_opts <- getCallOpts
893 profile <- getProfile
894 let fun_arg = StgVarArg fun_id
895 fun_name = idName fun_id
896 fun = idInfoToAmode fun_info
897 lf_info = cg_lf fun_info
898 n_args = length args
899 v_args = length $ filter (isVoidTy . stgArgType) args
900 case getCallMethod call_opts fun_name fun_id lf_info n_args v_args (cg_loc fun_info) self_loop_info of
901 -- A value in WHNF, so we can just return it.
902 ReturnIt
903 | isVoidTy (idType fun_id) -> emitReturn []
904 | otherwise -> emitReturn [fun]
905 -- ToDo: does ReturnIt guarantee tagged?
906
907 EnterIt -> assert (null args) $ -- Discarding arguments
908 emitEnter fun
909
910 SlowCall -> do -- A slow function call via the RTS apply routines
911 { tickySlowCall lf_info args
912 ; emitComment $ mkFastString "slowCall"
913 ; slowCall fun args }
914
915 -- A direct function call (possibly with some left-over arguments)
916 DirectEntry lbl arity -> do
917 { tickyDirectCall arity args
918 ; if nodeMustPointToIt profile lf_info
919 then directCall NativeNodeCall lbl arity (fun_arg:args)
920 else directCall NativeDirectCall lbl arity args }
921
922 -- Let-no-escape call or self-recursive tail-call
923 JumpToIt blk_id lne_regs -> do
924 { adjustHpBackwards -- always do this before a tail-call
925 ; cmm_args <- getNonVoidArgAmodes args
926 ; emitMultiAssign lne_regs cmm_args
927 ; emit (mkBranch blk_id)
928 ; return AssignedDirectly }
929
930 -- Note [Self-recursive tail calls]
931 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
932 --
933 -- Self-recursive tail calls can be optimized into a local jump in the same
934 -- way as let-no-escape bindings (see Note [What is a non-escaping let] in
935 -- "GHC.CoreToStg"). Consider this:
936 --
937 -- foo.info:
938 -- a = R1 // calling convention
939 -- b = R2
940 -- goto L1
941 -- L1: ...
942 -- ...
943 -- ...
944 -- L2: R1 = x
945 -- R2 = y
946 -- call foo(R1,R2)
947 --
948 -- Instead of putting x and y into registers (or other locations required by the
949 -- calling convention) and performing a call we can put them into local
950 -- variables a and b and perform jump to L1:
951 --
952 -- foo.info:
953 -- a = R1
954 -- b = R2
955 -- goto L1
956 -- L1: ...
957 -- ...
958 -- ...
959 -- L2: a = x
960 -- b = y
961 -- goto L1
962 --
963 -- This can be done only when function is calling itself in a tail position
964 -- and only if the call passes number of parameters equal to function's arity.
965 -- Note that this cannot be performed if a function calls itself with a
966 -- continuation.
967 --
968 -- This in fact implements optimization known as "loopification". It was
969 -- described in "Low-level code optimizations in the Glasgow Haskell Compiler"
970 -- by Krzysztof Woś, though we use different approach. Krzysztof performed his
971 -- optimization at the Cmm level, whereas we perform ours during code generation
972 -- (Stg-to-Cmm pass) essentially making sure that optimized Cmm code is
973 -- generated in the first place.
974 --
975 -- Implementation is spread across a couple of places in the code:
976 --
977 -- * FCode monad stores additional information in its reader environment
978 -- (cgd_self_loop field). This information tells us which function can
979 -- tail call itself in an optimized way (it is the function currently
980 -- being compiled), what is the label of a loop header (L1 in example above)
981 -- and information about local registers in which we should arguments
982 -- before making a call (this would be a and b in example above).
983 --
984 -- * Whenever we are compiling a function, we set that information to reflect
985 -- the fact that function currently being compiled can be jumped to, instead
986 -- of called. This is done in closureCodyBody in GHC.StgToCmm.Bind.
987 --
988 -- * We also have to emit a label to which we will be jumping. We make sure
989 -- that the label is placed after a stack check but before the heap
990 -- check. The reason is that making a recursive tail-call does not increase
991 -- the stack so we only need to check once. But it may grow the heap, so we
992 -- have to repeat the heap check in every self-call. This is done in
993 -- do_checks in GHC.StgToCmm.Heap.
994 --
995 -- * When we begin compilation of another closure we remove the additional
996 -- information from the environment. This is done by forkClosureBody
997 -- in GHC.StgToCmm.Monad. Other functions that duplicate the environment -
998 -- forkLneBody, forkAlts, codeOnly - duplicate that information. In other
999 -- words, we only need to clean the environment of the self-loop information
1000 -- when compiling right hand side of a closure (binding).
1001 --
1002 -- * When compiling a call (cgIdApp) we use getCallMethod to decide what kind
1003 -- of call will be generated. getCallMethod decides to generate a self
1004 -- recursive tail call when (a) environment stores information about
1005 -- possible self tail-call; (b) that tail call is to a function currently
1006 -- being compiled; (c) number of passed non-void arguments is equal to
1007 -- function's arity. (d) loopification is turned on via -floopification
1008 -- command-line option.
1009 --
1010 -- * Command line option to turn loopification on and off is implemented in
1011 -- DynFlags.
1012 --
1013 --
1014 -- Note [Void arguments in self-recursive tail calls]
1015 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1016 --
1017 -- State# tokens can get in the way of the loopification optimization as seen in
1018 -- #11372. Consider this:
1019 --
1020 -- foo :: [a]
1021 -- -> (a -> State# s -> (# State s, Bool #))
1022 -- -> State# s
1023 -- -> (# State# s, Maybe a #)
1024 -- foo [] f s = (# s, Nothing #)
1025 -- foo (x:xs) f s = case f x s of
1026 -- (# s', b #) -> case b of
1027 -- True -> (# s', Just x #)
1028 -- False -> foo xs f s'
1029 --
1030 -- We would like to compile the call to foo as a local jump instead of a call
1031 -- (see Note [Self-recursive tail calls]). However, the generated function has
1032 -- an arity of 2 while we apply it to 3 arguments, one of them being of void
1033 -- type. Thus, we mustn't count arguments of void type when checking whether
1034 -- we can turn a call into a self-recursive jump.
1035 --
1036
1037 emitEnter :: CmmExpr -> FCode ReturnKind
1038 emitEnter fun = do
1039 { ptr_opts <- getPtrOpts
1040 ; platform <- getPlatform
1041 ; profile <- getProfile
1042 ; adjustHpBackwards
1043 ; sequel <- getSequel
1044 ; updfr_off <- getUpdFrameOff
1045 ; case sequel of
1046 -- For a return, we have the option of generating a tag-test or
1047 -- not. If the value is tagged, we can return directly, which
1048 -- is quicker than entering the value. This is a code
1049 -- size/speed trade-off: when optimising for speed rather than
1050 -- size we could generate the tag test.
1051 --
1052 -- Right now, we do what the old codegen did, and omit the tag
1053 -- test, just generating an enter.
1054 Return -> do
1055 { let entry = entryCode platform $ closureInfoPtr ptr_opts $ CmmReg nodeReg
1056 ; emit $ mkJump profile NativeNodeCall entry
1057 [cmmUntag platform fun] updfr_off
1058 ; return AssignedDirectly
1059 }
1060
1061 -- The result will be scrutinised in the sequel. This is where
1062 -- we generate a tag-test to avoid entering the closure if
1063 -- possible.
1064 --
1065 -- The generated code will be something like this:
1066 --
1067 -- R1 = fun -- copyout
1068 -- if (fun & 7 != 0) goto Lret else goto Lcall
1069 -- Lcall:
1070 -- call [fun] returns to Lret
1071 -- Lret:
1072 -- fun' = R1 -- copyin
1073 -- ...
1074 --
1075 -- Note in particular that the label Lret is used as a
1076 -- destination by both the tag-test and the call. This is
1077 -- because Lret will necessarily be a proc-point, and we want to
1078 -- ensure that we generate only one proc-point for this
1079 -- sequence.
1080 --
1081 -- Furthermore, we tell the caller that we generated a native
1082 -- return continuation by returning (ReturnedTo Lret off), so
1083 -- that the continuation can be reused by the heap-check failure
1084 -- code in the enclosing case expression.
1085 --
1086 AssignTo res_regs _ -> do
1087 { lret <- newBlockId
1088 ; let (off, _, copyin) = copyInOflow profile NativeReturn (Young lret) res_regs []
1089 ; lcall <- newBlockId
1090 ; updfr_off <- getUpdFrameOff
1091 ; let area = Young lret
1092 ; let (outArgs, regs, copyout) = copyOutOflow profile NativeNodeCall Call area
1093 [fun] updfr_off []
1094 -- refer to fun via nodeReg after the copyout, to avoid having
1095 -- both live simultaneously; this sometimes enables fun to be
1096 -- inlined in the RHS of the R1 assignment.
1097 ; let entry = entryCode platform (closureInfoPtr ptr_opts (CmmReg nodeReg))
1098 the_call = toCall entry (Just lret) updfr_off off outArgs regs
1099 ; tscope <- getTickScope
1100 ; emit $
1101 copyout <*>
1102 mkCbranch (cmmIsTagged platform (CmmReg nodeReg))
1103 lret lcall Nothing <*>
1104 outOfLine lcall (the_call,tscope) <*>
1105 mkLabel lret tscope <*>
1106 copyin
1107 ; return (ReturnedTo lret off)
1108 }
1109 }
1110
1111 ------------------------------------------------------------------------
1112 -- Ticks
1113 ------------------------------------------------------------------------
1114
1115 -- | Generate Cmm code for a tick. Depending on the type of Tickish,
1116 -- this will either generate actual Cmm instrumentation code, or
1117 -- simply pass on the annotation as a @CmmTickish@.
1118 cgTick :: StgTickish -> FCode ()
1119 cgTick tick
1120 = do { platform <- getPlatform
1121 ; case tick of
1122 ProfNote cc t p -> emitSetCCC cc t p
1123 HpcTick m n -> emit (mkTickBox platform m n)
1124 SourceNote s n -> emitTick $ SourceNote s n
1125 _other -> return () -- ignore
1126 }