never executed always true always false
    1 {-# LANGUAGE BangPatterns #-}
    2 
    3 {-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}
    4 
    5 -----------------------------------------------------------------------------
    6 --
    7 -- Stg to C-- code generation: expressions
    8 --
    9 -- (c) The University of Glasgow 2004-2006
   10 --
   11 -----------------------------------------------------------------------------
   12 
   13 module GHC.StgToCmm.Expr ( cgExpr, cgLit ) where
   14 
   15 import GHC.Prelude hiding ((<*>))
   16 
   17 import {-# SOURCE #-} GHC.StgToCmm.Bind ( cgBind )
   18 
   19 import GHC.StgToCmm.Monad
   20 import GHC.StgToCmm.Heap
   21 import GHC.StgToCmm.Env
   22 import GHC.StgToCmm.DataCon
   23 import GHC.StgToCmm.Prof (saveCurrentCostCentre, restoreCurrentCostCentre, emitSetCCC)
   24 import GHC.StgToCmm.Layout
   25 import GHC.StgToCmm.Lit
   26 import GHC.StgToCmm.Prim
   27 import GHC.StgToCmm.Hpc
   28 import GHC.StgToCmm.Ticky
   29 import GHC.StgToCmm.Utils
   30 import GHC.StgToCmm.Closure
   31 
   32 import GHC.Stg.Syntax
   33 
   34 import GHC.Cmm.Graph
   35 import GHC.Cmm.BlockId
   36 import GHC.Cmm hiding ( succ )
   37 import GHC.Cmm.Info
   38 import GHC.Cmm.Utils ( zeroExpr, cmmTagMask, mkWordCLit, mAX_PTR_TAG )
   39 import GHC.Core
   40 import GHC.Core.DataCon
   41 import GHC.Types.ForeignCall
   42 import GHC.Types.Id
   43 import GHC.Builtin.PrimOps
   44 import GHC.Core.TyCon
   45 import GHC.Core.Type        ( isUnliftedType )
   46 import GHC.Types.RepType    ( isVoidTy, countConRepArgs )
   47 import GHC.Types.CostCentre ( CostCentreStack, currentCCS )
   48 import GHC.Types.Tickish
   49 import GHC.Data.Maybe
   50 import GHC.Utils.Misc
   51 import GHC.Data.FastString
   52 import GHC.Utils.Outputable
   53 import GHC.Utils.Panic
   54 import GHC.Utils.Panic.Plain
   55 
   56 import Control.Monad ( unless, void )
   57 import Control.Arrow ( first )
   58 import Data.List     ( partition )
   59 
   60 ------------------------------------------------------------------------
   61 --              cgExpr: the main function
   62 ------------------------------------------------------------------------
   63 
   64 cgExpr  :: CgStgExpr -> FCode ReturnKind
   65 
   66 cgExpr (StgApp fun args)     = cgIdApp fun args
   67 
   68 -- seq# a s ==> a
   69 -- See Note [seq# magic] in GHC.Core.Opt.ConstantFold
   70 cgExpr (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _res_ty) =
   71   cgIdApp a []
   72 
   73 -- dataToTag# :: a -> Int#
   74 -- See Note [dataToTag# magic] in primops.txt.pp
   75 cgExpr (StgOpApp (StgPrimOp DataToTagOp) [StgVarArg a] _res_ty) = do
   76   platform <- getPlatform
   77   emitComment (mkFastString "dataToTag#")
   78   info <- getCgIdInfo a
   79   let amode = idInfoToAmode info
   80   tag_reg <- assignTemp $ cmmConstrTag1 platform amode
   81   result_reg <- newTemp (bWord platform)
   82   let tag = CmmReg $ CmmLocal tag_reg
   83       is_tagged = cmmNeWord platform tag (zeroExpr platform)
   84       is_too_big_tag = cmmEqWord platform tag (cmmTagMask platform)
   85   -- Here we will first check the tag bits of the pointer we were given;
   86   -- if this doesn't work then enter the closure and use the info table
   87   -- to determine the constructor. Note that all tag bits set means that
   88   -- the constructor index is too large to fit in the pointer and therefore
   89   -- we must look in the info table. See Note [Tagging big families].
   90 
   91   slow_path <- getCode $ do
   92       tmp <- newTemp (bWord platform)
   93       _ <- withSequel (AssignTo [tmp] False) (cgIdApp a [])
   94       ptr_opts <- getPtrOpts
   95       emitAssign (CmmLocal result_reg)
   96         $ getConstrTag ptr_opts (cmmUntag platform (CmmReg (CmmLocal tmp)))
   97 
   98   fast_path <- getCode $ do
   99       -- Return the constructor index from the pointer tag
  100       return_ptr_tag <- getCode $ do
  101           emitAssign (CmmLocal result_reg)
  102             $ cmmSubWord platform tag (CmmLit $ mkWordCLit platform 1)
  103       -- Return the constructor index recorded in the info table
  104       return_info_tag <- getCode $ do
  105           ptr_opts <- getPtrOpts
  106           emitAssign (CmmLocal result_reg)
  107             $ getConstrTag ptr_opts (cmmUntag platform amode)
  108 
  109       emit =<< mkCmmIfThenElse' is_too_big_tag return_info_tag return_ptr_tag (Just False)
  110 
  111   emit =<< mkCmmIfThenElse' is_tagged fast_path slow_path (Just True)
  112   emitReturn [CmmReg $ CmmLocal result_reg]
  113 
  114 
  115 cgExpr (StgOpApp op args ty) = cgOpApp op args ty
  116 cgExpr (StgConApp con mn args _) = cgConApp con mn args
  117 cgExpr (StgTick t e)         = cgTick t >> cgExpr e
  118 cgExpr (StgLit lit)          = do cmm_expr <- cgLit lit
  119                                   emitReturn [cmm_expr]
  120 
  121 cgExpr (StgLet _ binds expr) = do { cgBind binds;     cgExpr expr }
  122 cgExpr (StgLetNoEscape _ binds expr) =
  123   do { u <- newUnique
  124      ; let join_id = mkBlockId u
  125      ; cgLneBinds join_id binds
  126      ; r <- cgExpr expr
  127      ; emitLabel join_id
  128      ; return r }
  129 
  130 cgExpr (StgCase expr bndr alt_type alts) =
  131   cgCase expr bndr alt_type alts
  132 
  133 ------------------------------------------------------------------------
  134 --              Let no escape
  135 ------------------------------------------------------------------------
  136 
  137 {- Generating code for a let-no-escape binding, aka join point is very
  138 very similar to what we do for a case expression.  The duality is
  139 between
  140         let-no-escape x = b
  141         in e
  142 and
  143         case e of ... -> b
  144 
  145 That is, the RHS of 'x' (ie 'b') will execute *later*, just like
  146 the alternative of the case; it needs to be compiled in an environment
  147 in which all volatile bindings are forgotten, and the free vars are
  148 bound only to stable things like stack locations..  The 'e' part will
  149 execute *next*, just like the scrutinee of a case. -}
  150 
  151 -------------------------
  152 cgLneBinds :: BlockId -> CgStgBinding -> FCode ()
  153 cgLneBinds join_id (StgNonRec bndr rhs)
  154   = do  { local_cc <- saveCurrentCostCentre
  155                 -- See Note [Saving the current cost centre]
  156         ; (info, fcode) <- cgLetNoEscapeRhs join_id local_cc bndr rhs
  157         ; fcode
  158         ; addBindC info }
  159 
  160 cgLneBinds join_id (StgRec pairs)
  161   = do  { local_cc <- saveCurrentCostCentre
  162         ; r <- sequence $ unzipWith (cgLetNoEscapeRhs join_id local_cc) pairs
  163         ; let (infos, fcodes) = unzip r
  164         ; addBindsC infos
  165         ; sequence_ fcodes
  166         }
  167 
  168 -------------------------
  169 cgLetNoEscapeRhs
  170     :: BlockId          -- join point for successor of let-no-escape
  171     -> Maybe LocalReg   -- Saved cost centre
  172     -> Id
  173     -> CgStgRhs
  174     -> FCode (CgIdInfo, FCode ())
  175 
  176 cgLetNoEscapeRhs join_id local_cc bndr rhs =
  177   do { (info, rhs_code) <- cgLetNoEscapeRhsBody local_cc bndr rhs
  178      ; let (bid, _) = expectJust "cgLetNoEscapeRhs" $ maybeLetNoEscape info
  179      ; let code = do { (_, body) <- getCodeScoped rhs_code
  180                      ; emitOutOfLine bid (first (<*> mkBranch join_id) body) }
  181      ; return (info, code)
  182      }
  183 
  184 cgLetNoEscapeRhsBody
  185     :: Maybe LocalReg   -- Saved cost centre
  186     -> Id
  187     -> CgStgRhs
  188     -> FCode (CgIdInfo, FCode ())
  189 cgLetNoEscapeRhsBody local_cc bndr (StgRhsClosure _ cc _upd args body)
  190   = cgLetNoEscapeClosure bndr local_cc cc (nonVoidIds args) body
  191 cgLetNoEscapeRhsBody local_cc bndr (StgRhsCon cc con mn _ts args)
  192   = cgLetNoEscapeClosure bndr local_cc cc []
  193       (StgConApp con mn args (pprPanic "cgLetNoEscapeRhsBody" $
  194                            text "StgRhsCon doesn't have type args"))
  195         -- For a constructor RHS we want to generate a single chunk of
  196         -- code which can be jumped to from many places, which will
  197         -- return the constructor. It's easy; just behave as if it
  198         -- was an StgRhsClosure with a ConApp inside!
  199 
  200 -------------------------
  201 cgLetNoEscapeClosure
  202         :: Id                   -- binder
  203         -> Maybe LocalReg       -- Slot for saved current cost centre
  204         -> CostCentreStack      -- XXX: *** NOT USED *** why not?
  205         -> [NonVoid Id]         -- Args (as in \ args -> body)
  206         -> CgStgExpr            -- Body (as in above)
  207         -> FCode (CgIdInfo, FCode ())
  208 
  209 cgLetNoEscapeClosure bndr cc_slot _unused_cc args body
  210   = do platform <- getPlatform
  211        return ( lneIdInfo platform bndr args, code )
  212   where
  213    code = forkLneBody $ withNewTickyCounterLNE (idName bndr) args $ do
  214             { restoreCurrentCostCentre cc_slot
  215             ; arg_regs <- bindArgsToRegs args
  216             ; void $ noEscapeHeapCheck arg_regs (tickyEnterLNE >> cgExpr body) }
  217 
  218 
  219 ------------------------------------------------------------------------
  220 --              Case expressions
  221 ------------------------------------------------------------------------
  222 
  223 {- Note [Compiling case expressions]
  224 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  225 It is quite interesting to decide whether to put a heap-check at the
  226 start of each alternative.  Of course we certainly have to do so if
  227 the case forces an evaluation, or if there is a primitive op which can
  228 trigger GC.
  229 
  230 A more interesting situation is this (a Plan-B situation)
  231 
  232         !P!;
  233         ...P...
  234         case x# of
  235           0#      -> !Q!; ...Q...
  236           default -> !R!; ...R...
  237 
  238 where !x! indicates a possible heap-check point. The heap checks
  239 in the alternatives *can* be omitted, in which case the topmost
  240 heapcheck will take their worst case into account.
  241 
  242 In favour of omitting !Q!, !R!:
  243 
  244  - *May* save a heap overflow test,
  245    if ...P... allocates anything.
  246 
  247  - We can use relative addressing from a single Hp to
  248    get at all the closures so allocated.
  249 
  250  - No need to save volatile vars etc across heap checks
  251    in !Q!, !R!
  252 
  253 Against omitting !Q!, !R!
  254 
  255   - May put a heap-check into the inner loop.  Suppose
  256         the main loop is P -> R -> P -> R...
  257         Q is the loop exit, and only it does allocation.
  258     This only hurts us if P does no allocation.  If P allocates,
  259     then there is a heap check in the inner loop anyway.
  260 
  261   - May do more allocation than reqd.  This sometimes bites us
  262     badly.  For example, nfib (ha!) allocates about 30\% more space if the
  263     worst-casing is done, because many many calls to nfib are leaf calls
  264     which don't need to allocate anything.
  265 
  266     We can un-allocate, but that costs an instruction
  267 
  268 Neither problem hurts us if there is only one alternative.
  269 
  270 Suppose the inner loop is P->R->P->R etc.  Then here is
  271 how many heap checks we get in the *inner loop* under various
  272 conditions
  273 
  274   Alloc   Heap check in branches (!Q!, !R!)?
  275   P Q R      yes     no (absorb to !P!)
  276 --------------------------------------
  277   n n n      0          0
  278   n y n      0          1
  279   n . y      1          1
  280   y . y      2          1
  281   y . n      1          1
  282 
  283 Best choices: absorb heap checks from Q and R into !P! iff
  284   a) P itself does some allocation
  285 or
  286   b) P does allocation, or there is exactly one alternative
  287 
  288 We adopt (b) because that is more likely to put the heap check at the
  289 entry to a function, when not many things are live.  After a bunch of
  290 single-branch cases, we may have lots of things live
  291 
  292 Hence: two basic plans for
  293 
  294         case e of r { alts }
  295 
  296 ------ Plan A: the general case ---------
  297 
  298         ...save current cost centre...
  299 
  300         ...code for e,
  301            with sequel (SetLocals r)
  302 
  303         ...restore current cost centre...
  304         ...code for alts...
  305         ...alts do their own heap checks
  306 
  307 ------ Plan B: special case when ---------
  308   (i)  e does not allocate or call GC
  309   (ii) either upstream code performs allocation
  310        or there is just one alternative
  311 
  312   Then heap allocation in the (single) case branch
  313   is absorbed by the upstream check.
  314   Very common example: primops on unboxed values
  315 
  316         ...code for e,
  317            with sequel (SetLocals r)...
  318 
  319         ...code for alts...
  320         ...no heap check...
  321 -}
  322 
  323 
  324 
  325 -------------------------------------
  326 data GcPlan
  327   = GcInAlts            -- Put a GC check at the start the case alternatives,
  328         [LocalReg]      -- which binds these registers
  329   | NoGcInAlts          -- The scrutinee is a primitive value, or a call to a
  330                         -- primitive op which does no GC.  Absorb the allocation
  331                         -- of the case alternative(s) into the upstream check
  332 
  333 -------------------------------------
  334 cgCase :: CgStgExpr -> Id -> AltType -> [CgStgAlt] -> FCode ReturnKind
  335 
  336 {-
  337 Note [Scrutinising VoidRep]
  338 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
  339 Suppose we have this STG code:
  340    f = \[s : State# RealWorld] ->
  341        case s of _ -> blah
  342 This is very odd.  Why are we scrutinising a state token?  But it
  343 can arise with bizarre NOINLINE pragmas (#9964)
  344     crash :: IO ()
  345     crash = IO (\s -> let {-# NOINLINE s' #-}
  346                           s' = s
  347                       in (# s', () #))
  348 
  349 Now the trouble is that 's' has VoidRep, and we do not bind void
  350 arguments in the environment; they don't live anywhere.  See the
  351 calls to nonVoidIds in various places.  So we must not look up
  352 's' in the environment.  Instead, just evaluate the RHS!  Simple.
  353 
  354 Note [Dead-binder optimisation]
  355 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  356 A case-binder, or data-constructor argument, may be marked as dead,
  357 because we preserve occurrence-info on binders in GHC.Core.Tidy (see
  358 GHC.Core.Tidy.tidyIdBndr).
  359 
  360 If the binder is dead, we can sometimes eliminate a load.  While
  361 CmmSink will eliminate that load, it's very easy to kill it at source
  362 (giving CmmSink less work to do), and in any case CmmSink only runs
  363 with -O. Since the majority of case binders are dead, this
  364 optimisation probably still has a great benefit-cost ratio and we want
  365 to keep it for -O0. See also Phab:D5358.
  366 
  367 This probably also was the reason for occurrence hack in Phab:D5339 to
  368 exist, perhaps because the occurrence information preserved by
  369 'GHC.Core.Tidy.tidyIdBndr' was insufficient.  But now that CmmSink does the
  370 job we deleted the hacks.
  371 -}
  372 
  373 cgCase (StgApp v []) _ (PrimAlt _) alts
  374   | isVoidRep (idPrimRep v)  -- See Note [Scrutinising VoidRep]
  375   , [(DEFAULT, _, rhs)] <- alts
  376   = cgExpr rhs
  377 
  378 {- Note [Dodgy unsafeCoerce 1]
  379 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  380 Consider
  381     case (x :: HValue) |> co of (y :: MutVar# Int)
  382         DEFAULT -> ...
  383 We want to generate an assignment
  384      y := x
  385 We want to allow this assignment to be generated in the case when the
  386 types are compatible, because this allows some slightly-dodgy but
  387 occasionally-useful casts to be used, such as in GHC.Runtime.Heap.Inspect
  388 where we cast an HValue to a MutVar# so we can print out the contents
  389 of the MutVar#.  If instead we generate code that enters the HValue,
  390 then we'll get a runtime panic, because the HValue really is a
  391 MutVar#.  The types are compatible though, so we can just generate an
  392 assignment.
  393 -}
  394 cgCase (StgApp v []) bndr alt_type@(PrimAlt _) alts
  395   | isUnliftedType (idType v)  -- Note [Dodgy unsafeCoerce 1]
  396   = -- assignment suffices for unlifted types
  397     do { platform <- getPlatform
  398        ; unless (reps_compatible platform) $
  399            pprPanic "cgCase: reps do not match, perhaps a dodgy unsafeCoerce?"
  400                     (pp_bndr v $$ pp_bndr bndr)
  401        ; v_info <- getCgIdInfo v
  402        ; emitAssign (CmmLocal (idToReg platform (NonVoid bndr)))
  403                     (idInfoToAmode v_info)
  404        -- Add bndr to the environment
  405        ; _ <- bindArgToReg (NonVoid bndr)
  406        ; cgAlts (NoGcInAlts,AssignedDirectly) (NonVoid bndr) alt_type alts }
  407   where
  408     reps_compatible platform = primRepCompatible platform (idPrimRep v) (idPrimRep bndr)
  409 
  410     pp_bndr id = ppr id <+> dcolon <+> ppr (idType id) <+> parens (ppr (idPrimRep id))
  411 
  412 {- Note [Dodgy unsafeCoerce 2, #3132]
  413 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  414 In all other cases of a lifted Id being cast to an unlifted type, the
  415 Id should be bound to bottom, otherwise this is an unsafe use of
  416 unsafeCoerce.  We can generate code to enter the Id and assume that
  417 it will never return.  Hence, we emit the usual enter/return code, and
  418 because bottom must be untagged, it will be entered.  The Sequel is a
  419 type-correct assignment, albeit bogus.  The (dead) continuation loops;
  420 it would be better to invoke some kind of panic function here.
  421 -}
  422 cgCase scrut@(StgApp v []) _ (PrimAlt _) _
  423   = do { platform <- getPlatform
  424        ; mb_cc <- maybeSaveCostCentre True
  425        ; _ <- withSequel
  426                   (AssignTo [idToReg platform (NonVoid v)] False) (cgExpr scrut)
  427        ; restoreCurrentCostCentre mb_cc
  428        ; emitComment $ mkFastString "should be unreachable code"
  429        ; l <- newBlockId
  430        ; emitLabel l
  431        ; emit (mkBranch l)  -- an infinite loop
  432        ; return AssignedDirectly
  433        }
  434 
  435 {- Note [Handle seq#]
  436 ~~~~~~~~~~~~~~~~~~~~~
  437 See Note [seq# magic] in GHC.Core.Opt.ConstantFold.
  438 The special case for seq# in cgCase does this:
  439 
  440   case seq# a s of v
  441     (# s', a' #) -> e
  442 ==>
  443   case a of v
  444     (# s', a' #) -> e
  445 
  446 (taking advantage of the fact that the return convention for (# State#, a #)
  447 is the same as the return convention for just 'a')
  448 -}
  449 
  450 cgCase (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _) bndr alt_type alts
  451   = -- Note [Handle seq#]
  452     -- And see Note [seq# magic] in GHC.Core.Opt.ConstantFold
  453     -- Use the same return convention as vanilla 'a'.
  454     cgCase (StgApp a []) bndr alt_type alts
  455 
  456 cgCase scrut bndr alt_type alts
  457   = -- the general case
  458     do { platform <- getPlatform
  459        ; up_hp_usg <- getVirtHp        -- Upstream heap usage
  460        ; let ret_bndrs = chooseReturnBndrs bndr alt_type alts
  461              alt_regs  = map (idToReg platform) ret_bndrs
  462        ; simple_scrut <- isSimpleScrut scrut alt_type
  463        ; let do_gc  | is_cmp_op scrut  = False  -- See Note [GC for conditionals]
  464                     | not simple_scrut = True
  465                     | isSingleton alts = False
  466                     | up_hp_usg > 0    = False
  467                     | otherwise        = True
  468                -- cf Note [Compiling case expressions]
  469              gc_plan = if do_gc then GcInAlts alt_regs else NoGcInAlts
  470 
  471        ; mb_cc <- maybeSaveCostCentre simple_scrut
  472 
  473        ; let sequel = AssignTo alt_regs do_gc{- Note [scrut sequel] -}
  474        ; ret_kind <- withSequel sequel (cgExpr scrut)
  475        ; restoreCurrentCostCentre mb_cc
  476        ; _ <- bindArgsToRegs ret_bndrs
  477        ; cgAlts (gc_plan,ret_kind) (NonVoid bndr) alt_type alts
  478        }
  479   where
  480     is_cmp_op (StgOpApp (StgPrimOp op) _ _) = isComparisonPrimOp op
  481     is_cmp_op _                             = False
  482 
  483 {- Note [GC for conditionals]
  484 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  485 For boolean conditionals it seems that we have always done NoGcInAlts.
  486 That is, we have always done the GC check before the conditional.
  487 This is enshrined in the special case for
  488    case tagToEnum# (a>b) of ...
  489 See Note [case on bool]
  490 
  491 It's odd, and it's flagrantly inconsistent with the rules described
  492 Note [Compiling case expressions].  However, after eliminating the
  493 tagToEnum# (#13397) we will have:
  494    case (a>b) of ...
  495 Rather than make it behave quite differently, I am testing for a
  496 comparison operator here in the general case as well.
  497 
  498 ToDo: figure out what the Right Rule should be.
  499 
  500 Note [scrut sequel]
  501 ~~~~~~~~~~~~~~~~~~~
  502 The job of the scrutinee is to assign its value(s) to alt_regs.
  503 Additionally, if we plan to do a heap-check in the alternatives (see
  504 Note [Compiling case expressions]), then we *must* retreat Hp to
  505 recover any unused heap before passing control to the sequel.  If we
  506 don't do this, then any unused heap will become slop because the heap
  507 check will reset the heap usage. Slop in the heap breaks LDV profiling
  508 (+RTS -hb) which needs to do a linear sweep through the nursery.
  509 
  510 
  511 Note [Inlining out-of-line primops and heap checks]
  512 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  513 If shouldInlinePrimOp returns True when called from GHC.StgToCmm.Expr for the
  514 purpose of heap check placement, we *must* inline the primop later in
  515 GHC.StgToCmm.Prim. If we don't things will go wrong.
  516 -}
  517 
  518 -----------------
  519 maybeSaveCostCentre :: Bool -> FCode (Maybe LocalReg)
  520 maybeSaveCostCentre simple_scrut
  521   | simple_scrut = return Nothing
  522   | otherwise    = saveCurrentCostCentre
  523 
  524 
  525 -----------------
  526 isSimpleScrut :: CgStgExpr -> AltType -> FCode Bool
  527 -- Simple scrutinee, does not block or allocate; hence safe to amalgamate
  528 -- heap usage from alternatives into the stuff before the case
  529 -- NB: if you get this wrong, and claim that the expression doesn't allocate
  530 --     when it does, you'll deeply mess up allocation
  531 isSimpleScrut (StgOpApp op args _) _       = isSimpleOp op args
  532 isSimpleScrut (StgLit _)       _           = return True       -- case 1# of { 0# -> ..; ... }
  533 isSimpleScrut (StgApp _ [])    (PrimAlt _) = return True       -- case x# of { 0# -> ..; ... }
  534 isSimpleScrut _                _           = return False
  535 
  536 isSimpleOp :: StgOp -> [StgArg] -> FCode Bool
  537 -- True iff the op cannot block or allocate
  538 isSimpleOp (StgFCallOp (CCall (CCallSpec _ _ safe)) _) _ = return $! not (playSafe safe)
  539 -- dataToTag# evaluates its argument, see Note [dataToTag#] in primops.txt.pp
  540 isSimpleOp (StgPrimOp DataToTagOp) _ = return False
  541 isSimpleOp (StgPrimOp op) stg_args                  = do
  542     arg_exprs <- getNonVoidArgAmodes stg_args
  543     dflags <- getDynFlags
  544     -- See Note [Inlining out-of-line primops and heap checks]
  545     return $! shouldInlinePrimOp dflags op arg_exprs
  546 isSimpleOp (StgPrimCallOp _) _                           = return False
  547 
  548 -----------------
  549 chooseReturnBndrs :: Id -> AltType -> [CgStgAlt] -> [NonVoid Id]
  550 -- These are the binders of a case that are assigned by the evaluation of the
  551 -- scrutinee.
  552 -- They're non-void, see Note [Post-unarisation invariants] in GHC.Stg.Unarise.
  553 chooseReturnBndrs bndr (PrimAlt _) _alts
  554   = assertNonVoidIds [bndr]
  555 
  556 chooseReturnBndrs _bndr (MultiValAlt n) [(_, ids, _)]
  557   = assertPpr (ids `lengthIs` n) (ppr n $$ ppr ids $$ ppr _bndr) $
  558     assertNonVoidIds ids     -- 'bndr' is not assigned!
  559 
  560 chooseReturnBndrs bndr (AlgAlt _) _alts
  561   = assertNonVoidIds [bndr]  -- Only 'bndr' is assigned
  562 
  563 chooseReturnBndrs bndr PolyAlt _alts
  564   = assertNonVoidIds [bndr]  -- Only 'bndr' is assigned
  565 
  566 chooseReturnBndrs _ _ _ = panic "chooseReturnBndrs"
  567                              -- MultiValAlt has only one alternative
  568 
  569 -------------------------------------
  570 cgAlts :: (GcPlan,ReturnKind) -> NonVoid Id -> AltType -> [CgStgAlt]
  571        -> FCode ReturnKind
  572 -- At this point the result of the case are in the binders
  573 cgAlts gc_plan _bndr PolyAlt [(_, _, rhs)]
  574   = maybeAltHeapCheck gc_plan (cgExpr rhs)
  575 
  576 cgAlts gc_plan _bndr (MultiValAlt _) [(_, _, rhs)]
  577   = maybeAltHeapCheck gc_plan (cgExpr rhs)
  578         -- Here bndrs are *already* in scope, so don't rebind them
  579 
  580 cgAlts gc_plan bndr (PrimAlt _) alts
  581   = do  { platform <- getPlatform
  582 
  583         ; tagged_cmms <- cgAltRhss gc_plan bndr alts
  584 
  585         ; let bndr_reg = CmmLocal (idToReg platform bndr)
  586               (DEFAULT,deflt) = head tagged_cmms
  587                 -- PrimAlts always have a DEFAULT case
  588                 -- and it always comes first
  589 
  590               tagged_cmms' = [(lit,code)
  591                              | (LitAlt lit, code) <- tagged_cmms]
  592         ; emitCmmLitSwitch (CmmReg bndr_reg) tagged_cmms' deflt
  593         ; return AssignedDirectly }
  594 
  595 cgAlts gc_plan bndr (AlgAlt tycon) alts
  596   = do  { platform <- getPlatform
  597 
  598         ; (mb_deflt, branches) <- cgAlgAltRhss gc_plan bndr alts
  599 
  600         ; let !fam_sz   = tyConFamilySize tycon
  601               !bndr_reg = CmmLocal (idToReg platform bndr)
  602               !ptag_expr = cmmConstrTag1 platform (CmmReg bndr_reg)
  603               !branches' = first succ <$> branches
  604               !maxpt = mAX_PTR_TAG platform
  605               (!via_ptr, !via_info) = partition ((< maxpt) . fst) branches'
  606               !small = isSmallFamily platform fam_sz
  607 
  608                 -- Is the constructor tag in the node reg?
  609                 -- See Note [Tagging big families]
  610         ; if small || null via_info
  611            then -- Yes, bndr_reg has constructor tag in ls bits
  612                emitSwitch ptag_expr branches' mb_deflt 1
  613                  (if small then fam_sz else maxpt)
  614 
  615            else -- No, the get exact tag from info table when mAX_PTR_TAG
  616                 -- See Note [Double switching for big families]
  617               do
  618                 ptr_opts <- getPtrOpts
  619                 let !untagged_ptr = cmmUntag platform (CmmReg bndr_reg)
  620                     !itag_expr = getConstrTag ptr_opts untagged_ptr
  621                     !info0 = first pred <$> via_info
  622                 if null via_ptr then
  623                   emitSwitch itag_expr info0 mb_deflt 0 (fam_sz - 1)
  624                 else do
  625                   infos_lbl <- newBlockId
  626                   infos_scp <- getTickScope
  627 
  628                   let spillover = (maxpt, (mkBranch infos_lbl, infos_scp))
  629 
  630                   (mb_shared_deflt, mb_shared_branch) <- case mb_deflt of
  631                       (Just (stmts, scp)) ->
  632                           do lbl <- newBlockId
  633                              return ( Just (mkLabel lbl scp <*> stmts, scp)
  634                                     , Just (mkBranch lbl, scp))
  635                       _ -> return (Nothing, Nothing)
  636                   -- Switch on pointer tag
  637                   emitSwitch ptag_expr (spillover : via_ptr) mb_shared_deflt 1 maxpt
  638                   join_lbl <- newBlockId
  639                   emit (mkBranch join_lbl)
  640                   -- Switch on info table tag
  641                   emitLabel infos_lbl
  642                   emitSwitch itag_expr info0 mb_shared_branch
  643                     (maxpt - 1) (fam_sz - 1)
  644                   emitLabel join_lbl
  645 
  646         ; return AssignedDirectly }
  647 
  648 cgAlts _ _ _ _ = panic "cgAlts"
  649         -- UbxTupAlt and PolyAlt have only one alternative
  650 
  651 -- Note [Double switching for big families]
  652 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  653 --
  654 -- An algebraic data type can have a n >= 0 summands
  655 -- (or alternatives), which are identified (labeled) by
  656 -- constructors. In memory they are kept apart by tags
  657 -- (see Note [Data constructor dynamic tags] in GHC.StgToCmm.Closure).
  658 -- Due to the characteristics of the platform that
  659 -- contribute to the alignment of memory objects, there
  660 -- is a natural limit of information about constructors
  661 -- that can be encoded in the pointer tag. When the mapping
  662 -- of constructors to the pointer tag range 1..mAX_PTR_TAG
  663 -- is not injective, then we have a "big data type", also
  664 -- called a "big (constructor) family" in the literature.
  665 -- Constructor tags residing in the info table are injective,
  666 -- but considerably more expensive to obtain, due to additional
  667 -- memory access(es).
  668 --
  669 -- When doing case analysis on a value of a "big data type"
  670 -- we need two nested switch statements to make up for the lack
  671 -- of injectivity of pointer tagging, also taking the info
  672 -- table tag into account. The exact mechanism is described next.
  673 --
  674 -- In the general case, switching on big family alternatives
  675 -- is done by two nested switch statements. According to
  676 -- Note [Tagging big families], the outer switch
  677 -- looks at the pointer tag and the inner dereferences the
  678 -- pointer and switches on the info table tag.
  679 --
  680 -- We can handle a simple case first, namely when none
  681 -- of the case alternatives mention a constructor having
  682 -- a pointer tag of 1..mAX_PTR_TAG-1. In this case we
  683 -- simply emit a switch on the info table tag.
  684 -- Note that the other simple case is when all mentioned
  685 -- alternatives lie in 1..mAX_PTR_TAG-1, in which case we can
  686 -- switch on the ptr tag only, just like in the small family case.
  687 --
  688 -- There is a single intricacy with a nested switch:
  689 -- Both should branch to the same default alternative, and as such
  690 -- avoid duplicate codegen of potentially heavy code. The outer
  691 -- switch generates the actual code with a prepended fresh label,
  692 -- while the inner one only generates a jump to that label.
  693 --
  694 -- For example, let's assume a 64-bit architecture, so that all
  695 -- heap objects are 8-byte aligned, and hence the address of a
  696 -- heap object ends in `000` (three zero bits).
  697 --
  698 -- Then consider the following data type
  699 --
  700 --   > data Big = T0 | T1 | T2 | T3 | T4 | T5 | T6 | T7 | T8
  701 --   Ptr tag:      1    2    3    4    5    6    7    7    7
  702 --   As bits:    001  010  011  100  101  110  111  111  111
  703 --   Info pointer tag (zero based):
  704 --                 0    1    2    3    4    5    6    7    8
  705 --
  706 -- Then     \case T2 -> True; T8 -> True; _ -> False
  707 -- will result in following code (slightly cleaned-up and
  708 -- commented -ddump-cmm-from-stg):
  709 {-
  710            R1 = _sqI::P64;  -- scrutinee
  711            if (R1 & 7 != 0) goto cqO; else goto cqP;
  712        cqP: // global       -- enter
  713            call (I64[R1])(R1) returns to cqO, args: 8, res: 8, upd: 8;
  714        cqO: // global       -- already WHNF
  715            _sqJ::P64 = R1;
  716            _cqX::P64 = _sqJ::P64 & 7;  -- extract pointer tag
  717            switch [1 .. 7] _cqX::P64 {
  718                case 3 : goto cqW;
  719                case 7 : goto cqR;
  720                default: {goto cqS;}
  721            }
  722        cqR: // global
  723            _cr2 = I32[I64[_sqJ::P64 & (-8)] - 4]; -- tag from info pointer
  724            switch [6 .. 8] _cr2::I64 {
  725                case 8 : goto cr1;
  726                default: {goto cr0;}
  727            }
  728        cr1: // global
  729            R1 = GHC.Types.True_closure+2;
  730            call (P64[(old + 8)])(R1) args: 8, res: 0, upd: 8;
  731        cr0: // global     -- technically necessary label
  732            goto cqS;
  733        cqW: // global
  734            R1 = GHC.Types.True_closure+2;
  735            call (P64[(old + 8)])(R1) args: 8, res: 0, upd: 8;
  736        cqS: // global
  737            R1 = GHC.Types.False_closure+1;
  738            call (P64[(old + 8)])(R1) args: 8, res: 0, upd: 8;
  739 -}
  740 --
  741 -- For 32-bit systems we only have 2 tag bits in the pointers at our disposal,
  742 -- so the performance win is dubious, especially in face of the increased code
  743 -- size due to double switching. But we can take the viewpoint that 32-bit
  744 -- architectures are not relevant for performance any more, so this can be
  745 -- considered as moot.
  746 
  747 
  748 -- Note [alg-alt heap check]
  749 --
  750 -- In an algebraic case with more than one alternative, we will have
  751 -- code like
  752 --
  753 -- L0:
  754 --   x = R1
  755 --   goto L1
  756 -- L1:
  757 --   if (x & 7 >= 2) then goto L2 else goto L3
  758 -- L2:
  759 --   Hp = Hp + 16
  760 --   if (Hp > HpLim) then goto L4
  761 --   ...
  762 -- L4:
  763 --   call gc() returns to L5
  764 -- L5:
  765 --   x = R1
  766 --   goto L1
  767 
  768 
  769 -- Note [Tagging big families]
  770 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
  771 --
  772 -- Both the big and the small constructor families are tagged,
  773 -- that is, greater unions which overflow the tag space of TAG_BITS
  774 -- (i.e. 3 on 32 resp. 7 constructors on 64 bit archs).
  775 --
  776 -- For example, let's assume a 64-bit architecture, so that all
  777 -- heap objects are 8-byte aligned, and hence the address of a
  778 -- heap object ends in `000` (three zero bits).  Then consider
  779 -- > data Maybe a = Nothing | Just a
  780 -- > data Day a = Mon | Tue | Wed | Thu | Fri | Sat | Sun
  781 -- > data Grade = G1 | G2 | G3 | G4 | G5 | G6 | G7 | G8 | G9 | G10
  782 --
  783 -- Since `Grade` has more than 7 constructors, it counts as a
  784 -- "big data type" (also referred to as "big constructor family" in papers).
  785 -- On the other hand, `Maybe` and `Day` have 7 constructors or fewer, so they
  786 -- are "small data types".
  787 --
  788 -- Then
  789 --   * A pointer to an unevaluated thunk of type `Maybe Int`, `Day` or `Grade` will end in `000`
  790 --   * A tagged pointer to a `Nothing`, `Mon` or `G1` will end in `001`
  791 --   * A tagged pointer to a `Just x`, `Tue` or `G2`  will end in `010`
  792 --   * A tagged pointer to `Wed` or `G3` will end in `011`
  793 --       ...
  794 --   * A tagged pointer to `Sat` or `G6` will end in `110`
  795 --   * A tagged pointer to `Sun` or `G7` or `G8` or `G9` or `G10` will end in `111`
  796 --
  797 -- For big families we employ a mildly clever way of combining pointer and
  798 -- info-table tagging. We use 1..MAX_PTR_TAG-1 as pointer-resident tags where
  799 -- the tags in the pointer and the info table are in a one-to-one
  800 -- relation, whereas tag MAX_PTR_TAG is used as "spill over", signifying
  801 -- we have to fall back and get the precise constructor tag from the
  802 -- info-table.
  803 --
  804 -- Consequently we now cascade switches, because we have to check
  805 -- the pointer tag first, and when it is MAX_PTR_TAG, fetch the precise
  806 -- tag from the info table, and switch on that. The only technically
  807 -- tricky part is that the default case needs (logical) duplication.
  808 -- To do this we emit an extra label for it and branch to that from
  809 -- the second switch. This avoids duplicated codegen. See Trac #14373.
  810 -- See note [Double switching for big families] for the mechanics
  811 -- involved.
  812 --
  813 -- Also see note [Data constructor dynamic tags]
  814 -- and the wiki https://gitlab.haskell.org/ghc/ghc/wikis/commentary/rts/haskell-execution/pointer-tagging
  815 --
  816 
  817 -------------------
  818 cgAlgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [CgStgAlt]
  819              -> FCode ( Maybe CmmAGraphScoped
  820                       , [(ConTagZ, CmmAGraphScoped)] )
  821 cgAlgAltRhss gc_plan bndr alts
  822   = do { tagged_cmms <- cgAltRhss gc_plan bndr alts
  823 
  824        ; let { mb_deflt = case tagged_cmms of
  825                            ((DEFAULT,rhs) : _) -> Just rhs
  826                            _other              -> Nothing
  827                             -- DEFAULT is always first, if present
  828 
  829               ; branches = [ (dataConTagZ con, cmm)
  830                            | (DataAlt con, cmm) <- tagged_cmms ]
  831               }
  832 
  833        ; return (mb_deflt, branches)
  834        }
  835 
  836 
  837 -------------------
  838 cgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [CgStgAlt]
  839           -> FCode [(AltCon, CmmAGraphScoped)]
  840 cgAltRhss gc_plan bndr alts = do
  841   platform <- getPlatform
  842   let
  843     base_reg = idToReg platform bndr
  844     cg_alt :: CgStgAlt -> FCode (AltCon, CmmAGraphScoped)
  845     cg_alt (con, bndrs, rhs)
  846       = getCodeScoped             $
  847         maybeAltHeapCheck gc_plan $
  848         do { _ <- bindConArgs con base_reg (assertNonVoidIds bndrs)
  849                     -- alt binders are always non-void,
  850                     -- see Note [Post-unarisation invariants] in GHC.Stg.Unarise
  851            ; _ <- cgExpr rhs
  852            ; return con }
  853   forkAlts (map cg_alt alts)
  854 
  855 maybeAltHeapCheck :: (GcPlan,ReturnKind) -> FCode a -> FCode a
  856 maybeAltHeapCheck (NoGcInAlts,_)  code = code
  857 maybeAltHeapCheck (GcInAlts regs, AssignedDirectly) code =
  858   altHeapCheck regs code
  859 maybeAltHeapCheck (GcInAlts regs, ReturnedTo lret off) code =
  860   altHeapCheckReturnsTo regs lret off code
  861 
  862 -----------------------------------------------------------------------------
  863 --      Tail calls
  864 -----------------------------------------------------------------------------
  865 
  866 cgConApp :: DataCon -> ConstructorNumber -> [StgArg] -> FCode ReturnKind
  867 cgConApp con mn stg_args
  868   | isUnboxedTupleDataCon con       -- Unboxed tuple: assign and return
  869   = do { arg_exprs <- getNonVoidArgAmodes stg_args
  870        ; tickyUnboxedTupleReturn (length arg_exprs)
  871        ; emitReturn arg_exprs }
  872 
  873   | otherwise   --  Boxed constructors; allocate and return
  874   = assertPpr (stg_args `lengthIs` countConRepArgs con)
  875               (ppr con <> parens (ppr (countConRepArgs con)) <+> ppr stg_args) $
  876     do  { (idinfo, fcode_init) <- buildDynCon (dataConWorkId con) mn False
  877                                      currentCCS con (assertNonVoidStgArgs stg_args)
  878                                      -- con args are always non-void,
  879                                      -- see Note [Post-unarisation invariants] in GHC.Stg.Unarise
  880                 -- The first "con" says that the name bound to this
  881                 -- closure is "con", which is a bit of a fudge, but
  882                 -- it only affects profiling (hence the False)
  883 
  884         ; emit =<< fcode_init
  885         ; tickyReturnNewCon (length stg_args)
  886         ; emitReturn [idInfoToAmode idinfo] }
  887 
  888 cgIdApp :: Id -> [StgArg] -> FCode ReturnKind
  889 cgIdApp fun_id args = do
  890     fun_info       <- getCgIdInfo fun_id
  891     self_loop_info <- getSelfLoop
  892     call_opts      <- getCallOpts
  893     profile        <- getProfile
  894     let fun_arg     = StgVarArg fun_id
  895         fun_name    = idName    fun_id
  896         fun         = idInfoToAmode fun_info
  897         lf_info     = cg_lf         fun_info
  898         n_args      = length args
  899         v_args      = length $ filter (isVoidTy . stgArgType) args
  900     case getCallMethod call_opts fun_name fun_id lf_info n_args v_args (cg_loc fun_info) self_loop_info of
  901             -- A value in WHNF, so we can just return it.
  902         ReturnIt
  903           | isVoidTy (idType fun_id) -> emitReturn []
  904           | otherwise                -> emitReturn [fun]
  905           -- ToDo: does ReturnIt guarantee tagged?
  906 
  907         EnterIt -> assert (null args) $  -- Discarding arguments
  908                    emitEnter fun
  909 
  910         SlowCall -> do      -- A slow function call via the RTS apply routines
  911                 { tickySlowCall lf_info args
  912                 ; emitComment $ mkFastString "slowCall"
  913                 ; slowCall fun args }
  914 
  915         -- A direct function call (possibly with some left-over arguments)
  916         DirectEntry lbl arity -> do
  917                 { tickyDirectCall arity args
  918                 ; if nodeMustPointToIt profile lf_info
  919                      then directCall NativeNodeCall   lbl arity (fun_arg:args)
  920                      else directCall NativeDirectCall lbl arity args }
  921 
  922         -- Let-no-escape call or self-recursive tail-call
  923         JumpToIt blk_id lne_regs -> do
  924           { adjustHpBackwards -- always do this before a tail-call
  925           ; cmm_args <- getNonVoidArgAmodes args
  926           ; emitMultiAssign lne_regs cmm_args
  927           ; emit (mkBranch blk_id)
  928           ; return AssignedDirectly }
  929 
  930 -- Note [Self-recursive tail calls]
  931 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  932 --
  933 -- Self-recursive tail calls can be optimized into a local jump in the same
  934 -- way as let-no-escape bindings (see Note [What is a non-escaping let] in
  935 -- "GHC.CoreToStg"). Consider this:
  936 --
  937 -- foo.info:
  938 --     a = R1  // calling convention
  939 --     b = R2
  940 --     goto L1
  941 -- L1: ...
  942 --     ...
  943 -- ...
  944 -- L2: R1 = x
  945 --     R2 = y
  946 --     call foo(R1,R2)
  947 --
  948 -- Instead of putting x and y into registers (or other locations required by the
  949 -- calling convention) and performing a call we can put them into local
  950 -- variables a and b and perform jump to L1:
  951 --
  952 -- foo.info:
  953 --     a = R1
  954 --     b = R2
  955 --     goto L1
  956 -- L1: ...
  957 --     ...
  958 -- ...
  959 -- L2: a = x
  960 --     b = y
  961 --     goto L1
  962 --
  963 -- This can be done only when function is calling itself in a tail position
  964 -- and only if the call passes number of parameters equal to function's arity.
  965 -- Note that this cannot be performed if a function calls itself with a
  966 -- continuation.
  967 --
  968 -- This in fact implements optimization known as "loopification". It was
  969 -- described in "Low-level code optimizations in the Glasgow Haskell Compiler"
  970 -- by Krzysztof Woś, though we use different approach. Krzysztof performed his
  971 -- optimization at the Cmm level, whereas we perform ours during code generation
  972 -- (Stg-to-Cmm pass) essentially making sure that optimized Cmm code is
  973 -- generated in the first place.
  974 --
  975 -- Implementation is spread across a couple of places in the code:
  976 --
  977 --   * FCode monad stores additional information in its reader environment
  978 --     (cgd_self_loop field). This information tells us which function can
  979 --     tail call itself in an optimized way (it is the function currently
  980 --     being compiled), what is the label of a loop header (L1 in example above)
  981 --     and information about local registers in which we should arguments
  982 --     before making a call (this would be a and b in example above).
  983 --
  984 --   * Whenever we are compiling a function, we set that information to reflect
  985 --     the fact that function currently being compiled can be jumped to, instead
  986 --     of called. This is done in closureCodyBody in GHC.StgToCmm.Bind.
  987 --
  988 --   * We also have to emit a label to which we will be jumping. We make sure
  989 --     that the label is placed after a stack check but before the heap
  990 --     check. The reason is that making a recursive tail-call does not increase
  991 --     the stack so we only need to check once. But it may grow the heap, so we
  992 --     have to repeat the heap check in every self-call. This is done in
  993 --     do_checks in GHC.StgToCmm.Heap.
  994 --
  995 --   * When we begin compilation of another closure we remove the additional
  996 --     information from the environment. This is done by forkClosureBody
  997 --     in GHC.StgToCmm.Monad. Other functions that duplicate the environment -
  998 --     forkLneBody, forkAlts, codeOnly - duplicate that information. In other
  999 --     words, we only need to clean the environment of the self-loop information
 1000 --     when compiling right hand side of a closure (binding).
 1001 --
 1002 --   * When compiling a call (cgIdApp) we use getCallMethod to decide what kind
 1003 --     of call will be generated. getCallMethod decides to generate a self
 1004 --     recursive tail call when (a) environment stores information about
 1005 --     possible self tail-call; (b) that tail call is to a function currently
 1006 --     being compiled; (c) number of passed non-void arguments is equal to
 1007 --     function's arity. (d) loopification is turned on via -floopification
 1008 --     command-line option.
 1009 --
 1010 --   * Command line option to turn loopification on and off is implemented in
 1011 --     DynFlags.
 1012 --
 1013 --
 1014 -- Note [Void arguments in self-recursive tail calls]
 1015 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 1016 --
 1017 -- State# tokens can get in the way of the loopification optimization as seen in
 1018 -- #11372. Consider this:
 1019 --
 1020 -- foo :: [a]
 1021 --     -> (a -> State# s -> (# State s, Bool #))
 1022 --     -> State# s
 1023 --     -> (# State# s, Maybe a #)
 1024 -- foo [] f s = (# s, Nothing #)
 1025 -- foo (x:xs) f s = case f x s of
 1026 --      (# s', b #) -> case b of
 1027 --          True -> (# s', Just x #)
 1028 --          False -> foo xs f s'
 1029 --
 1030 -- We would like to compile the call to foo as a local jump instead of a call
 1031 -- (see Note [Self-recursive tail calls]). However, the generated function has
 1032 -- an arity of 2 while we apply it to 3 arguments, one of them being of void
 1033 -- type. Thus, we mustn't count arguments of void type when checking whether
 1034 -- we can turn a call into a self-recursive jump.
 1035 --
 1036 
 1037 emitEnter :: CmmExpr -> FCode ReturnKind
 1038 emitEnter fun = do
 1039   { ptr_opts <- getPtrOpts
 1040   ; platform <- getPlatform
 1041   ; profile <- getProfile
 1042   ; adjustHpBackwards
 1043   ; sequel <- getSequel
 1044   ; updfr_off <- getUpdFrameOff
 1045   ; case sequel of
 1046       -- For a return, we have the option of generating a tag-test or
 1047       -- not.  If the value is tagged, we can return directly, which
 1048       -- is quicker than entering the value.  This is a code
 1049       -- size/speed trade-off: when optimising for speed rather than
 1050       -- size we could generate the tag test.
 1051       --
 1052       -- Right now, we do what the old codegen did, and omit the tag
 1053       -- test, just generating an enter.
 1054       Return -> do
 1055         { let entry = entryCode platform $ closureInfoPtr ptr_opts $ CmmReg nodeReg
 1056         ; emit $ mkJump profile NativeNodeCall entry
 1057                         [cmmUntag platform fun] updfr_off
 1058         ; return AssignedDirectly
 1059         }
 1060 
 1061       -- The result will be scrutinised in the sequel.  This is where
 1062       -- we generate a tag-test to avoid entering the closure if
 1063       -- possible.
 1064       --
 1065       -- The generated code will be something like this:
 1066       --
 1067       --    R1 = fun  -- copyout
 1068       --    if (fun & 7 != 0) goto Lret else goto Lcall
 1069       --  Lcall:
 1070       --    call [fun] returns to Lret
 1071       --  Lret:
 1072       --    fun' = R1  -- copyin
 1073       --    ...
 1074       --
 1075       -- Note in particular that the label Lret is used as a
 1076       -- destination by both the tag-test and the call.  This is
 1077       -- because Lret will necessarily be a proc-point, and we want to
 1078       -- ensure that we generate only one proc-point for this
 1079       -- sequence.
 1080       --
 1081       -- Furthermore, we tell the caller that we generated a native
 1082       -- return continuation by returning (ReturnedTo Lret off), so
 1083       -- that the continuation can be reused by the heap-check failure
 1084       -- code in the enclosing case expression.
 1085       --
 1086       AssignTo res_regs _ -> do
 1087        { lret <- newBlockId
 1088        ; let (off, _, copyin) = copyInOflow profile NativeReturn (Young lret) res_regs []
 1089        ; lcall <- newBlockId
 1090        ; updfr_off <- getUpdFrameOff
 1091        ; let area = Young lret
 1092        ; let (outArgs, regs, copyout) = copyOutOflow profile NativeNodeCall Call area
 1093                                           [fun] updfr_off []
 1094          -- refer to fun via nodeReg after the copyout, to avoid having
 1095          -- both live simultaneously; this sometimes enables fun to be
 1096          -- inlined in the RHS of the R1 assignment.
 1097        ; let entry = entryCode platform (closureInfoPtr ptr_opts (CmmReg nodeReg))
 1098              the_call = toCall entry (Just lret) updfr_off off outArgs regs
 1099        ; tscope <- getTickScope
 1100        ; emit $
 1101            copyout <*>
 1102            mkCbranch (cmmIsTagged platform (CmmReg nodeReg))
 1103                      lret lcall Nothing <*>
 1104            outOfLine lcall (the_call,tscope) <*>
 1105            mkLabel lret tscope <*>
 1106            copyin
 1107        ; return (ReturnedTo lret off)
 1108        }
 1109   }
 1110 
 1111 ------------------------------------------------------------------------
 1112 --              Ticks
 1113 ------------------------------------------------------------------------
 1114 
 1115 -- | Generate Cmm code for a tick. Depending on the type of Tickish,
 1116 -- this will either generate actual Cmm instrumentation code, or
 1117 -- simply pass on the annotation as a @CmmTickish@.
 1118 cgTick :: StgTickish -> FCode ()
 1119 cgTick tick
 1120   = do { platform <- getPlatform
 1121        ; case tick of
 1122            ProfNote   cc t p -> emitSetCCC cc t p
 1123            HpcTick    m n    -> emit (mkTickBox platform m n)
 1124            SourceNote s n    -> emitTick $ SourceNote s n
 1125            _other            -> return () -- ignore
 1126        }