never executed always true always false
    1 -----------------------------------------------------------------------------
    2 --
    3 -- Code generation for foreign calls.
    4 --
    5 -- (c) The University of Glasgow 2004-2006
    6 --
    7 -----------------------------------------------------------------------------
    8 
    9 module GHC.StgToCmm.Foreign (
   10   cgForeignCall,
   11   emitPrimCall, emitCCall,
   12   emitForeignCall,
   13   emitSaveThreadState,
   14   saveThreadState,
   15   emitLoadThreadState,
   16   emitSaveRegs,
   17   emitRestoreRegs,
   18   emitPushTupleRegs,
   19   emitPopTupleRegs,
   20   loadThreadState,
   21   emitOpenNursery,
   22   emitCloseNursery,
   23  ) where
   24 
   25 import GHC.Prelude hiding( succ, (<*>) )
   26 
   27 import GHC.Platform
   28 import GHC.Platform.Profile
   29 
   30 import GHC.Stg.Syntax
   31 import GHC.StgToCmm.Prof (storeCurCCS, ccsType)
   32 import GHC.StgToCmm.Monad
   33 import GHC.StgToCmm.Utils
   34 import GHC.StgToCmm.Closure
   35 import GHC.StgToCmm.Layout
   36 
   37 import GHC.Cmm.BlockId (newBlockId)
   38 import GHC.Cmm
   39 import GHC.Cmm.Utils
   40 import GHC.Cmm.Graph
   41 import GHC.Cmm.CallConv
   42 import GHC.Core.Type
   43 import GHC.Types.RepType
   44 import GHC.Cmm.CLabel
   45 import GHC.Runtime.Heap.Layout
   46 import GHC.Types.ForeignCall
   47 import GHC.Data.Maybe
   48 import GHC.Utils.Panic
   49 import GHC.Types.Unique.Supply
   50 import GHC.Types.Basic
   51 import GHC.Unit.Types
   52 
   53 import GHC.Core.TyCo.Rep
   54 import GHC.Builtin.Types.Prim
   55 import GHC.Utils.Misc (zipEqual)
   56 
   57 import Control.Monad
   58 
   59 -----------------------------------------------------------------------------
   60 -- Code generation for Foreign Calls
   61 -----------------------------------------------------------------------------
   62 
   63 -- | Emit code for a foreign call, and return the results to the sequel.
   64 -- Precondition: the length of the arguments list is the same as the
   65 -- arity of the foreign function.
   66 cgForeignCall :: ForeignCall            -- the op
   67               -> Type                   -- type of foreign function
   68               -> [StgArg]               -- x,y    arguments
   69               -> Type                   -- result type
   70               -> FCode ReturnKind
   71 
   72 cgForeignCall (CCall (CCallSpec target cconv safety)) typ stg_args res_ty
   73   = do  { platform <- getPlatform
   74         ; let -- in the stdcall calling convention, the symbol needs @size appended
   75               -- to it, where size is the total number of bytes of arguments.  We
   76               -- attach this info to the CLabel here, and the CLabel pretty printer
   77               -- will generate the suffix when the label is printed.
   78             call_size args
   79               | StdCallConv <- cconv = Just (sum (map arg_size args))
   80               | otherwise            = Nothing
   81 
   82               -- ToDo: this might not be correct for 64-bit API
   83               -- This is correct for the PowerPC ELF ABI version 1 and 2.
   84             arg_size (arg, _) = max (widthInBytes $ typeWidth $ cmmExprType platform arg)
   85                                      (platformWordSizeInBytes platform)
   86         ; cmm_args <- getFCallArgs stg_args typ
   87         -- ; traceM $ show cmm_args
   88         ; (res_regs, res_hints) <- newUnboxedTupleRegs res_ty
   89         ; let ((call_args, arg_hints), cmm_target)
   90                 = case target of
   91                    StaticTarget _ _   _      False ->
   92                        panic "cgForeignCall: unexpected FFI value import"
   93                    StaticTarget _ lbl mPkgId True
   94                      -> let labelSource
   95                                 = case mPkgId of
   96                                         Nothing         -> ForeignLabelInThisPackage
   97                                         Just pkgId      -> ForeignLabelInPackage (toUnitId pkgId)
   98                             size = call_size cmm_args
   99                         in  ( unzip cmm_args
  100                             , CmmLit (CmmLabel
  101                                         (mkForeignLabel lbl size labelSource IsFunction)))
  102 
  103                    DynamicTarget    ->  case cmm_args of
  104                                            (fn,_):rest -> (unzip rest, fn)
  105                                            [] -> panic "cgForeignCall []"
  106               fc = ForeignConvention cconv arg_hints res_hints CmmMayReturn
  107               call_target = ForeignTarget cmm_target fc
  108 
  109         -- we want to emit code for the call, and then emitReturn.
  110         -- However, if the sequel is AssignTo, we shortcut a little
  111         -- and generate a foreign call that assigns the results
  112         -- directly.  Otherwise we end up generating a bunch of
  113         -- useless "r = r" assignments, which are not merely annoying:
  114         -- they prevent the common block elimination from working correctly
  115         -- in the case of a safe foreign call.
  116         -- See Note [safe foreign call convention]
  117         --
  118         ; sequel <- getSequel
  119         ; case sequel of
  120             AssignTo assign_to_these _ ->
  121                 emitForeignCall safety assign_to_these call_target call_args
  122 
  123             _something_else ->
  124                 do { _ <- emitForeignCall safety res_regs call_target call_args
  125                    ; emitReturn (map (CmmReg . CmmLocal) res_regs)
  126                    }
  127          }
  128 
  129 {- Note [safe foreign call convention]
  130 
  131 The simple thing to do for a safe foreign call would be the same as an
  132 unsafe one: just
  133 
  134     emitForeignCall ...
  135     emitReturn ...
  136 
  137 but consider what happens in this case
  138 
  139    case foo x y z of
  140      (# s, r #) -> ...
  141 
  142 The sequel is AssignTo [r].  The call to newUnboxedTupleRegs picks [r]
  143 as the result reg, and we generate
  144 
  145   r = foo(x,y,z) returns to L1  -- emitForeignCall
  146  L1:
  147   r = r  -- emitReturn
  148   goto L2
  149 L2:
  150   ...
  151 
  152 Now L1 is a proc point (by definition, it is the continuation of the
  153 safe foreign call).  If L2 does a heap check, then L2 will also be a
  154 proc point.
  155 
  156 Furthermore, the stack layout algorithm has to arrange to save r
  157 somewhere between the call and the jump to L1, which is annoying: we
  158 would have to treat r differently from the other live variables, which
  159 have to be saved *before* the call.
  160 
  161 So we adopt a special convention for safe foreign calls: the results
  162 are copied out according to the NativeReturn convention by the call,
  163 and the continuation of the call should copyIn the results.  (The
  164 copyOut code is actually inserted when the safe foreign call is
  165 lowered later).  The result regs attached to the safe foreign call are
  166 only used temporarily to hold the results before they are copied out.
  167 
  168 We will now generate this:
  169 
  170   r = foo(x,y,z) returns to L1
  171  L1:
  172   r = R1  -- copyIn, inserted by mkSafeCall
  173   goto L2
  174  L2:
  175   ... r ...
  176 
  177 And when the safe foreign call is lowered later (see Note [lower safe
  178 foreign calls]) we get this:
  179 
  180   suspendThread()
  181   r = foo(x,y,z)
  182   resumeThread()
  183   R1 = r  -- copyOut, inserted by lowerSafeForeignCall
  184   jump L1
  185  L1:
  186   r = R1  -- copyIn, inserted by mkSafeCall
  187   goto L2
  188  L2:
  189   ... r ...
  190 
  191 Now consider what happens if L2 does a heap check: the Adams
  192 optimisation kicks in and commons up L1 with the heap-check
  193 continuation, resulting in just one proc point instead of two. Yay!
  194 -}
  195 
  196 
  197 emitCCall :: [(CmmFormal,ForeignHint)]
  198           -> CmmExpr
  199           -> [(CmmActual,ForeignHint)]
  200           -> FCode ()
  201 emitCCall hinted_results fn hinted_args
  202   = void $ emitForeignCall PlayRisky results target args
  203   where
  204     (args, arg_hints) = unzip hinted_args
  205     (results, result_hints) = unzip hinted_results
  206     target = ForeignTarget fn fc
  207     fc = ForeignConvention CCallConv arg_hints result_hints CmmMayReturn
  208 
  209 
  210 emitPrimCall :: [CmmFormal] -> CallishMachOp -> [CmmActual] -> FCode ()
  211 emitPrimCall res op args
  212   = void $ emitForeignCall PlayRisky res (PrimTarget op) args
  213 
  214 -- alternative entry point, used by GHC.Cmm.Parser
  215 emitForeignCall
  216         :: Safety
  217         -> [CmmFormal]          -- where to put the results
  218         -> ForeignTarget        -- the op
  219         -> [CmmActual]          -- arguments
  220         -> FCode ReturnKind
  221 emitForeignCall safety results target args
  222   | not (playSafe safety) = do
  223     platform <- getPlatform
  224     let (caller_save, caller_load) = callerSaveVolatileRegs platform
  225     emit caller_save
  226     target' <- load_target_into_temp target
  227     args' <- mapM maybe_assign_temp args
  228     emit $ mkUnsafeCall target' results args'
  229     emit caller_load
  230     return AssignedDirectly
  231 
  232   | otherwise = do
  233     profile <- getProfile
  234     platform <- getPlatform
  235     updfr_off <- getUpdFrameOff
  236     target' <- load_target_into_temp target
  237     args' <- mapM maybe_assign_temp args
  238     k <- newBlockId
  239     let (off, _, copyout) = copyInOflow profile NativeReturn (Young k) results []
  240        -- see Note [safe foreign call convention]
  241     tscope <- getTickScope
  242     emit $
  243            (    mkStore (CmmStackSlot (Young k) (widthInBytes (wordWidth platform)))
  244                         (CmmLit (CmmBlock k))
  245             <*> mkLast (CmmForeignCall { tgt  = target'
  246                                        , res  = results
  247                                        , args = args'
  248                                        , succ = k
  249                                        , ret_args = off
  250                                        , ret_off = updfr_off
  251                                        , intrbl = playInterruptible safety })
  252             <*> mkLabel k tscope
  253             <*> copyout
  254            )
  255     return (ReturnedTo k off)
  256 
  257 load_target_into_temp :: ForeignTarget -> FCode ForeignTarget
  258 load_target_into_temp (ForeignTarget expr conv) = do
  259   tmp <- maybe_assign_temp expr
  260   return (ForeignTarget tmp conv)
  261 load_target_into_temp other_target@(PrimTarget _) =
  262   return other_target
  263 
  264 -- What we want to do here is create a new temporary for the foreign
  265 -- call argument if it is not safe to use the expression directly,
  266 -- because the expression mentions caller-saves GlobalRegs (see
  267 -- Note [Register parameter passing]).
  268 --
  269 -- However, we can't pattern-match on the expression here, because
  270 -- this is used in a loop by GHC.Cmm.Parser, and testing the expression
  271 -- results in a black hole.  So we always create a temporary, and rely
  272 -- on GHC.Cmm.Sink to clean it up later.  (Yuck, ToDo).  The generated code
  273 -- ends up being the same, at least for the RTS .cmm code.
  274 --
  275 maybe_assign_temp :: CmmExpr -> FCode CmmExpr
  276 maybe_assign_temp e = do
  277   platform <- getPlatform
  278   reg <- newTemp (cmmExprType platform e)
  279   emitAssign (CmmLocal reg) e
  280   return (CmmReg (CmmLocal reg))
  281 
  282 -- -----------------------------------------------------------------------------
  283 -- Save/restore the thread state in the TSO
  284 
  285 -- This stuff can't be done in suspendThread/resumeThread, because it
  286 -- refers to global registers which aren't available in the C world.
  287 
  288 emitSaveThreadState :: FCode ()
  289 emitSaveThreadState = do
  290   profile <- getProfile
  291   code <- saveThreadState profile
  292   emit code
  293 
  294 -- | Produce code to save the current thread state to @CurrentTSO@
  295 saveThreadState :: MonadUnique m => Profile -> m CmmAGraph
  296 saveThreadState profile = do
  297   let platform = profilePlatform profile
  298   tso <- newTemp (gcWord platform)
  299   close_nursery <- closeNursery profile tso
  300   pure $ catAGraphs
  301    [ -- tso = CurrentTSO;
  302      mkAssign (CmmLocal tso) currentTSOExpr
  303 
  304    , -- tso->stackobj->sp = Sp;
  305      mkStore (cmmOffset platform
  306                         (CmmLoad (cmmOffset platform
  307                                             (CmmReg (CmmLocal tso))
  308                                             (tso_stackobj profile))
  309                                  (bWord platform))
  310                         (stack_SP profile))
  311              spExpr
  312 
  313     , close_nursery
  314 
  315     , -- and save the current cost centre stack in the TSO when profiling:
  316       if profileIsProfiling profile
  317          then mkStore (cmmOffset platform (CmmReg (CmmLocal tso)) (tso_CCCS profile)) cccsExpr
  318          else mkNop
  319     ]
  320 
  321 
  322 
  323 -- | Save STG registers
  324 --
  325 -- STG registers must be saved around a C call, just in case the STG
  326 -- register is mapped to a caller-saves machine register.  Normally we
  327 -- don't need to worry about this the code generator has already
  328 -- loaded any live STG registers into variables for us, but in
  329 -- hand-written low-level Cmm code where we don't know which registers
  330 -- are live, we might have to save them all.
  331 emitSaveRegs :: FCode ()
  332 emitSaveRegs = do
  333    platform <- getPlatform
  334    let regs = realArgRegsCover platform
  335        save = catAGraphs (map (callerSaveGlobalReg platform) regs)
  336    emit save
  337 
  338 -- | Restore STG registers (see 'emitSaveRegs')
  339 emitRestoreRegs :: FCode ()
  340 emitRestoreRegs = do
  341    platform <- getPlatform
  342    let regs    = realArgRegsCover platform
  343        restore = catAGraphs (map (callerRestoreGlobalReg platform) regs)
  344    emit restore
  345 
  346 -- | Push a subset of STG registers onto the stack, specified by the bitmap
  347 --
  348 -- Sometimes, a "live" subset of the STG registers needs to be saved on the
  349 -- stack, for example when storing an unboxed tuple to be used in the GHCi
  350 -- bytecode interpreter.
  351 --
  352 -- The "live registers" bitmap corresponds to the list of registers given by
  353 -- 'tupleRegsCover', with the least significant bit indicating liveness of
  354 -- the first register in the list.
  355 --
  356 -- Each register is saved to a stack slot of one or more machine words, even
  357 -- if the register size itself is smaller.
  358 --
  359 -- The resulting Cmm code looks like this, with a line for each real or
  360 -- virtual register used for returning tuples:
  361 --
  362 --    ...
  363 --    if((mask & 2) != 0) { Sp_adj(-1); Sp(0) = R2; }
  364 --    if((mask & 1) != 0) { Sp_adj(-1); Sp(0) = R1; }
  365 --
  366 -- See Note [GHCi tuple layout]
  367 
  368 emitPushTupleRegs :: CmmExpr -> FCode ()
  369 emitPushTupleRegs regs_live = do
  370   platform <- getPlatform
  371   let regs = zip (tupleRegsCover platform) [0..]
  372       save_arg (reg, n) =
  373         let mask     = CmmLit (CmmInt (1 `shiftL` n) (wordWidth platform))
  374             live     = cmmAndWord platform regs_live mask
  375             cond     = cmmNeWord platform live (zeroExpr platform)
  376             reg_ty   = cmmRegType platform (CmmGlobal reg)
  377             width    = roundUpToWords platform
  378                                       (widthInBytes $ typeWidth reg_ty)
  379             adj_sp   = mkAssign spReg
  380                                 (cmmOffset platform spExpr (negate width))
  381             save_reg = mkStore spExpr (CmmReg $ CmmGlobal reg)
  382         in mkCmmIfThen cond $ catAGraphs [adj_sp, save_reg]
  383   emit . catAGraphs =<< mapM save_arg (reverse regs)
  384 
  385 -- | Pop a subset of STG registers from the stack (see 'emitPushTupleRegs')
  386 emitPopTupleRegs :: CmmExpr -> FCode ()
  387 emitPopTupleRegs regs_live = do
  388   platform <- getPlatform
  389   let regs = zip (tupleRegsCover platform) [0..]
  390       save_arg (reg, n) =
  391         let mask     = CmmLit (CmmInt (1 `shiftL` n) (wordWidth platform))
  392             live     = cmmAndWord platform regs_live mask
  393             cond     = cmmNeWord platform live (zeroExpr platform)
  394             reg_ty   = cmmRegType platform (CmmGlobal reg)
  395             width    = roundUpToWords platform
  396                                       (widthInBytes $ typeWidth reg_ty)
  397             adj_sp   = mkAssign spReg
  398                                 (cmmOffset platform spExpr width)
  399             restore_reg = mkAssign (CmmGlobal reg) (CmmLoad spExpr reg_ty)
  400         in mkCmmIfThen cond $ catAGraphs [restore_reg, adj_sp]
  401   emit . catAGraphs =<< mapM save_arg regs
  402 
  403 
  404 emitCloseNursery :: FCode ()
  405 emitCloseNursery = do
  406   profile <- getProfile
  407   let platform = profilePlatform profile
  408   tso <- newTemp (bWord platform)
  409   code <- closeNursery profile tso
  410   emit $ mkAssign (CmmLocal tso) currentTSOExpr <*> code
  411 
  412 {- |
  413 @closeNursery dflags tso@ produces code to close the nursery.
  414 A local register holding the value of @CurrentTSO@ is expected for
  415 efficiency.
  416 
  417 Closing the nursery corresponds to the following code:
  418 
  419 @
  420   tso = CurrentTSO;
  421   cn = CurrentNuresry;
  422 
  423   // Update the allocation limit for the current thread.  We don't
  424   // check to see whether it has overflowed at this point, that check is
  425   // made when we run out of space in the current heap block (stg_gc_noregs)
  426   // and in the scheduler when context switching (schedulePostRunThread).
  427   tso->alloc_limit -= Hp + WDS(1) - cn->start;
  428 
  429   // Set cn->free to the next unoccupied word in the block
  430   cn->free = Hp + WDS(1);
  431 @
  432 -}
  433 closeNursery :: MonadUnique m => Profile -> LocalReg -> m CmmAGraph
  434 closeNursery profile tso = do
  435   let tsoreg   = CmmLocal tso
  436       platform = profilePlatform profile
  437   cnreg      <- CmmLocal <$> newTemp (bWord platform)
  438   pure $ catAGraphs [
  439     mkAssign cnreg currentNurseryExpr,
  440 
  441     -- CurrentNursery->free = Hp+1;
  442     mkStore (nursery_bdescr_free platform cnreg) (cmmOffsetW platform hpExpr 1),
  443 
  444     let alloc =
  445            CmmMachOp (mo_wordSub platform)
  446               [ cmmOffsetW platform hpExpr 1
  447               , CmmLoad (nursery_bdescr_start platform cnreg) (bWord platform)
  448               ]
  449 
  450         alloc_limit = cmmOffset platform (CmmReg tsoreg) (tso_alloc_limit profile)
  451     in
  452 
  453     -- tso->alloc_limit += alloc
  454     mkStore alloc_limit (CmmMachOp (MO_Sub W64)
  455                                [ CmmLoad alloc_limit b64
  456                                , CmmMachOp (mo_WordTo64 platform) [alloc] ])
  457    ]
  458 
  459 emitLoadThreadState :: FCode ()
  460 emitLoadThreadState = do
  461   profile <- getProfile
  462   code <- loadThreadState profile
  463   emit code
  464 
  465 -- | Produce code to load the current thread state from @CurrentTSO@
  466 loadThreadState :: MonadUnique m => Profile -> m CmmAGraph
  467 loadThreadState profile = do
  468   let platform = profilePlatform profile
  469   tso <- newTemp (gcWord platform)
  470   stack <- newTemp (gcWord platform)
  471   open_nursery <- openNursery profile tso
  472   pure $ catAGraphs [
  473     -- tso = CurrentTSO;
  474     mkAssign (CmmLocal tso) currentTSOExpr,
  475     -- stack = tso->stackobj;
  476     mkAssign (CmmLocal stack) (CmmLoad (cmmOffset platform (CmmReg (CmmLocal tso)) (tso_stackobj profile)) (bWord platform)),
  477     -- Sp = stack->sp;
  478     mkAssign spReg (CmmLoad (cmmOffset platform (CmmReg (CmmLocal stack)) (stack_SP profile)) (bWord platform)),
  479     -- SpLim = stack->stack + RESERVED_STACK_WORDS;
  480     mkAssign spLimReg (cmmOffsetW platform (cmmOffset platform (CmmReg (CmmLocal stack)) (stack_STACK profile))
  481                                 (pc_RESERVED_STACK_WORDS (platformConstants platform))),
  482     -- HpAlloc = 0;
  483     --   HpAlloc is assumed to be set to non-zero only by a failed
  484     --   a heap check, see HeapStackCheck.cmm:GC_GENERIC
  485     mkAssign hpAllocReg (zeroExpr platform),
  486     open_nursery,
  487     -- and load the current cost centre stack from the TSO when profiling:
  488     if profileIsProfiling profile
  489        then storeCurCCS
  490               (CmmLoad (cmmOffset platform (CmmReg (CmmLocal tso))
  491                  (tso_CCCS profile)) (ccsType platform))
  492        else mkNop
  493    ]
  494 
  495 
  496 emitOpenNursery :: FCode ()
  497 emitOpenNursery = do
  498   profile <- getProfile
  499   let platform = profilePlatform profile
  500   tso <- newTemp (bWord platform)
  501   code <- openNursery profile tso
  502   emit $ mkAssign (CmmLocal tso) currentTSOExpr <*> code
  503 
  504 {- |
  505 @openNursery profile tso@ produces code to open the nursery. A local register
  506 holding the value of @CurrentTSO@ is expected for efficiency.
  507 
  508 Opening the nursery corresponds to the following code:
  509 
  510 @
  511    tso = CurrentTSO;
  512    cn = CurrentNursery;
  513    bdfree = CurrentNursery->free;
  514    bdstart = CurrentNursery->start;
  515 
  516    // We *add* the currently occupied portion of the nursery block to
  517    // the allocation limit, because we will subtract it again in
  518    // closeNursery.
  519    tso->alloc_limit += bdfree - bdstart;
  520 
  521    // Set Hp to the last occupied word of the heap block.  Why not the
  522    // next unoccupied word?  Doing it this way means that we get to use
  523    // an offset of zero more often, which might lead to slightly smaller
  524    // code on some architectures.
  525    Hp = bdfree - WDS(1);
  526 
  527    // Set HpLim to the end of the current nursery block (note that this block
  528    // might be a block group, consisting of several adjacent blocks.
  529    HpLim = bdstart + CurrentNursery->blocks*BLOCK_SIZE_W - 1;
  530 @
  531 -}
  532 openNursery :: MonadUnique m => Profile -> LocalReg -> m CmmAGraph
  533 openNursery profile tso = do
  534   let tsoreg   = CmmLocal tso
  535       platform = profilePlatform profile
  536   cnreg      <- CmmLocal <$> newTemp (bWord platform)
  537   bdfreereg  <- CmmLocal <$> newTemp (bWord platform)
  538   bdstartreg <- CmmLocal <$> newTemp (bWord platform)
  539 
  540   -- These assignments are carefully ordered to reduce register
  541   -- pressure and generate not completely awful code on x86.  To see
  542   -- what code we generate, look at the assembly for
  543   -- stg_returnToStackTop in rts/StgStartup.cmm.
  544   pure $ catAGraphs [
  545      mkAssign cnreg currentNurseryExpr,
  546      mkAssign bdfreereg  (CmmLoad (nursery_bdescr_free platform cnreg)  (bWord platform)),
  547 
  548      -- Hp = CurrentNursery->free - 1;
  549      mkAssign hpReg (cmmOffsetW platform (CmmReg bdfreereg) (-1)),
  550 
  551      mkAssign bdstartreg (CmmLoad (nursery_bdescr_start platform cnreg) (bWord platform)),
  552 
  553      -- HpLim = CurrentNursery->start +
  554      --              CurrentNursery->blocks*BLOCK_SIZE_W - 1;
  555      mkAssign hpLimReg
  556          (cmmOffsetExpr platform
  557              (CmmReg bdstartreg)
  558              (cmmOffset platform
  559                (CmmMachOp (mo_wordMul platform) [
  560                  CmmMachOp (MO_SS_Conv W32 (wordWidth platform))
  561                    [CmmLoad (nursery_bdescr_blocks platform cnreg) b32],
  562                  mkIntExpr platform (pc_BLOCK_SIZE (platformConstants platform))
  563                 ])
  564                (-1)
  565              )
  566          ),
  567 
  568      -- alloc = bd->free - bd->start
  569      let alloc =
  570            CmmMachOp (mo_wordSub platform) [CmmReg bdfreereg, CmmReg bdstartreg]
  571 
  572          alloc_limit = cmmOffset platform (CmmReg tsoreg) (tso_alloc_limit profile)
  573      in
  574 
  575      -- tso->alloc_limit += alloc
  576      mkStore alloc_limit (CmmMachOp (MO_Add W64)
  577                                [ CmmLoad alloc_limit b64
  578                                , CmmMachOp (mo_WordTo64 platform) [alloc] ])
  579 
  580    ]
  581 
  582 nursery_bdescr_free, nursery_bdescr_start, nursery_bdescr_blocks
  583   :: Platform -> CmmReg -> CmmExpr
  584 nursery_bdescr_free   platform cn =
  585   cmmOffset platform (CmmReg cn) (pc_OFFSET_bdescr_free (platformConstants platform))
  586 nursery_bdescr_start  platform cn =
  587   cmmOffset platform (CmmReg cn) (pc_OFFSET_bdescr_start (platformConstants platform))
  588 nursery_bdescr_blocks platform cn =
  589   cmmOffset platform (CmmReg cn) (pc_OFFSET_bdescr_blocks (platformConstants platform))
  590 
  591 tso_stackobj, tso_CCCS, tso_alloc_limit, stack_STACK, stack_SP :: Profile -> ByteOff
  592 tso_stackobj    profile = closureField profile (pc_OFFSET_StgTSO_stackobj    (profileConstants profile))
  593 tso_alloc_limit profile = closureField profile (pc_OFFSET_StgTSO_alloc_limit (profileConstants profile))
  594 tso_CCCS        profile = closureField profile (pc_OFFSET_StgTSO_cccs        (profileConstants profile))
  595 stack_STACK     profile = closureField profile (pc_OFFSET_StgStack_stack     (profileConstants profile))
  596 stack_SP        profile = closureField profile (pc_OFFSET_StgStack_sp        (profileConstants profile))
  597 
  598 
  599 closureField :: Profile -> ByteOff -> ByteOff
  600 closureField profile off = off + fixedHdrSize profile
  601 
  602 -- Note [Unlifted boxed arguments to foreign calls]
  603 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  604 --
  605 -- For certain types passed to foreign calls, we adjust the actual
  606 -- value passed to the call.  For ByteArray#, Array#, SmallArray#,
  607 -- and ArrayArray#, we pass the address of the array's payload, not
  608 -- the address of the heap object. For example, consider
  609 --   foreign import "c_foo" foo :: ByteArray# -> Int# -> IO ()
  610 -- At a Haskell call like `foo x y`, we'll generate a C call that
  611 -- is more like
  612 --   c_foo( x+8, y )
  613 -- where the "+8" takes the heap pointer (x :: ByteArray#) and moves
  614 -- it past the header words of the ByteArray object to point directly
  615 -- to the data inside the ByteArray#. (The exact offset depends
  616 -- on the target architecture and on profiling) By contrast, (y :: Int#)
  617 -- requires no such adjustment.
  618 --
  619 -- This adjustment is performed by 'add_shim'. The size of the
  620 -- adjustment depends on the type of heap object. But
  621 -- how can we determine that type? There are two available options.
  622 -- We could use the types of the actual values that the foreign call
  623 -- has been applied to, or we could use the types present in the
  624 -- foreign function's type. Prior to GHC 8.10, we used the former
  625 -- strategy since it's a little more simple. However, in issue #16650
  626 -- and more compellingly in the comments of
  627 -- https://gitlab.haskell.org/ghc/ghc/merge_requests/939, it was
  628 -- demonstrated that this leads to bad behavior in the presence
  629 -- of unsafeCoerce#. Returning to the above example, suppose the
  630 -- Haskell call looked like
  631 --   foo (unsafeCoerce# p)
  632 -- where the types of expressions comprising the arguments are
  633 --   p :: (Any :: TYPE 'UnliftedRep)
  634 --   i :: Int#
  635 -- so that the unsafe-coerce is between Any and ByteArray#.
  636 -- These two types have the same kind (they are both represented by
  637 -- a heap pointer) so no GC errors will occur if we do this unsafe coerce.
  638 -- By the time this gets to the code generator the cast has been
  639 -- discarded so we have
  640 --   foo p y
  641 -- But we *must* adjust the pointer to p by a ByteArray# shim,
  642 -- *not* by an Any shim (the Any shim involves no offset at all).
  643 --
  644 -- To avoid this bad behavior, we adopt the second strategy: use
  645 -- the types present in the foreign function's type.
  646 -- In collectStgFArgTypes, we convert the foreign function's
  647 -- type to a list of StgFArgType. Then, in add_shim, we interpret
  648 -- these as numeric offsets.
  649 
  650 getFCallArgs ::
  651      [StgArg]
  652   -> Type -- the type of the foreign function
  653   -> FCode [(CmmExpr, ForeignHint)]
  654 -- (a) Drop void args
  655 -- (b) Add foreign-call shim code
  656 -- It's (b) that makes this differ from getNonVoidArgAmodes
  657 -- Precondition: args and typs have the same length
  658 -- See Note [Unlifted boxed arguments to foreign calls]
  659 getFCallArgs args typ
  660   = do  { mb_cmms <- mapM get (zipEqual "getFCallArgs" args (collectStgFArgTypes typ))
  661         ; return (catMaybes mb_cmms) }
  662   where
  663     get (arg,typ)
  664       | null arg_reps
  665       = return Nothing
  666       | otherwise
  667       = do { cmm <- getArgAmode (NonVoid arg)
  668            ; profile <- getProfile
  669            ; return (Just (add_shim profile typ cmm, hint)) }
  670       where
  671         arg_ty   = stgArgType arg
  672         arg_reps = typePrimRep arg_ty
  673         hint     = typeForeignHint arg_ty
  674 
  675 -- The minimum amount of information needed to determine
  676 -- the offset to apply to an argument to a foreign call.
  677 -- See Note [Unlifted boxed arguments to foreign calls]
  678 data StgFArgType
  679   = StgPlainType
  680   | StgArrayType
  681   | StgSmallArrayType
  682   | StgByteArrayType
  683 
  684 -- See Note [Unlifted boxed arguments to foreign calls]
  685 add_shim :: Profile -> StgFArgType -> CmmExpr -> CmmExpr
  686 add_shim profile ty expr = case ty of
  687   StgPlainType      -> expr
  688   StgArrayType      -> cmmOffsetB platform expr (arrPtrsHdrSize profile)
  689   StgSmallArrayType -> cmmOffsetB platform expr (smallArrPtrsHdrSize profile)
  690   StgByteArrayType  -> cmmOffsetB platform expr (arrWordsHdrSize profile)
  691   where
  692     platform = profilePlatform profile
  693 
  694 -- From a function, extract information needed to determine
  695 -- the offset of each argument when used as a C FFI argument.
  696 -- See Note [Unlifted boxed arguments to foreign calls]
  697 collectStgFArgTypes :: Type -> [StgFArgType]
  698 collectStgFArgTypes = go []
  699   where
  700     -- Skip foralls
  701     go bs (ForAllTy _ res) = go bs res
  702     go bs (AppTy{}) = reverse bs
  703     go bs (TyConApp{}) = reverse bs
  704     go bs (LitTy{}) = reverse bs
  705     go bs (TyVarTy{}) = reverse bs
  706     go  _ (CastTy{}) = panic "myCollectTypeArgs: CastTy"
  707     go  _ (CoercionTy{}) = panic "myCollectTypeArgs: CoercionTy"
  708     go bs (FunTy {ft_arg = arg, ft_res=res}) =
  709       go (typeToStgFArgType arg:bs) res
  710 
  711 -- Choose the offset based on the type. For anything other
  712 -- than an unlifted boxed type, there is no offset.
  713 -- See Note [Unlifted boxed arguments to foreign calls]
  714 typeToStgFArgType :: Type -> StgFArgType
  715 typeToStgFArgType typ
  716   | tycon == arrayPrimTyCon = StgArrayType
  717   | tycon == mutableArrayPrimTyCon = StgArrayType
  718   | tycon == arrayArrayPrimTyCon = StgArrayType
  719   | tycon == mutableArrayArrayPrimTyCon = StgArrayType
  720   | tycon == smallArrayPrimTyCon = StgSmallArrayType
  721   | tycon == smallMutableArrayPrimTyCon = StgSmallArrayType
  722   | tycon == byteArrayPrimTyCon = StgByteArrayType
  723   | tycon == mutableByteArrayPrimTyCon = StgByteArrayType
  724   | otherwise = StgPlainType
  725   where
  726   -- Should be a tycon app, since this is a foreign call. We look
  727   -- through newtypes so the offset does not change if a user replaces
  728   -- a type in a foreign function signature with a representationally
  729   -- equivalent newtype.
  730   tycon = tyConAppTyCon (unwrapType typ)