never executed always true always false
1 -----------------------------------------------------------------------------
2 --
3 -- Code generation for foreign calls.
4 --
5 -- (c) The University of Glasgow 2004-2006
6 --
7 -----------------------------------------------------------------------------
8
9 module GHC.StgToCmm.Foreign (
10 cgForeignCall,
11 emitPrimCall, emitCCall,
12 emitForeignCall,
13 emitSaveThreadState,
14 saveThreadState,
15 emitLoadThreadState,
16 emitSaveRegs,
17 emitRestoreRegs,
18 emitPushTupleRegs,
19 emitPopTupleRegs,
20 loadThreadState,
21 emitOpenNursery,
22 emitCloseNursery,
23 ) where
24
25 import GHC.Prelude hiding( succ, (<*>) )
26
27 import GHC.Platform
28 import GHC.Platform.Profile
29
30 import GHC.Stg.Syntax
31 import GHC.StgToCmm.Prof (storeCurCCS, ccsType)
32 import GHC.StgToCmm.Monad
33 import GHC.StgToCmm.Utils
34 import GHC.StgToCmm.Closure
35 import GHC.StgToCmm.Layout
36
37 import GHC.Cmm.BlockId (newBlockId)
38 import GHC.Cmm
39 import GHC.Cmm.Utils
40 import GHC.Cmm.Graph
41 import GHC.Cmm.CallConv
42 import GHC.Core.Type
43 import GHC.Types.RepType
44 import GHC.Cmm.CLabel
45 import GHC.Runtime.Heap.Layout
46 import GHC.Types.ForeignCall
47 import GHC.Data.Maybe
48 import GHC.Utils.Panic
49 import GHC.Types.Unique.Supply
50 import GHC.Types.Basic
51 import GHC.Unit.Types
52
53 import GHC.Core.TyCo.Rep
54 import GHC.Builtin.Types.Prim
55 import GHC.Utils.Misc (zipEqual)
56
57 import Control.Monad
58
59 -----------------------------------------------------------------------------
60 -- Code generation for Foreign Calls
61 -----------------------------------------------------------------------------
62
63 -- | Emit code for a foreign call, and return the results to the sequel.
64 -- Precondition: the length of the arguments list is the same as the
65 -- arity of the foreign function.
66 cgForeignCall :: ForeignCall -- the op
67 -> Type -- type of foreign function
68 -> [StgArg] -- x,y arguments
69 -> Type -- result type
70 -> FCode ReturnKind
71
72 cgForeignCall (CCall (CCallSpec target cconv safety)) typ stg_args res_ty
73 = do { platform <- getPlatform
74 ; let -- in the stdcall calling convention, the symbol needs @size appended
75 -- to it, where size is the total number of bytes of arguments. We
76 -- attach this info to the CLabel here, and the CLabel pretty printer
77 -- will generate the suffix when the label is printed.
78 call_size args
79 | StdCallConv <- cconv = Just (sum (map arg_size args))
80 | otherwise = Nothing
81
82 -- ToDo: this might not be correct for 64-bit API
83 -- This is correct for the PowerPC ELF ABI version 1 and 2.
84 arg_size (arg, _) = max (widthInBytes $ typeWidth $ cmmExprType platform arg)
85 (platformWordSizeInBytes platform)
86 ; cmm_args <- getFCallArgs stg_args typ
87 -- ; traceM $ show cmm_args
88 ; (res_regs, res_hints) <- newUnboxedTupleRegs res_ty
89 ; let ((call_args, arg_hints), cmm_target)
90 = case target of
91 StaticTarget _ _ _ False ->
92 panic "cgForeignCall: unexpected FFI value import"
93 StaticTarget _ lbl mPkgId True
94 -> let labelSource
95 = case mPkgId of
96 Nothing -> ForeignLabelInThisPackage
97 Just pkgId -> ForeignLabelInPackage (toUnitId pkgId)
98 size = call_size cmm_args
99 in ( unzip cmm_args
100 , CmmLit (CmmLabel
101 (mkForeignLabel lbl size labelSource IsFunction)))
102
103 DynamicTarget -> case cmm_args of
104 (fn,_):rest -> (unzip rest, fn)
105 [] -> panic "cgForeignCall []"
106 fc = ForeignConvention cconv arg_hints res_hints CmmMayReturn
107 call_target = ForeignTarget cmm_target fc
108
109 -- we want to emit code for the call, and then emitReturn.
110 -- However, if the sequel is AssignTo, we shortcut a little
111 -- and generate a foreign call that assigns the results
112 -- directly. Otherwise we end up generating a bunch of
113 -- useless "r = r" assignments, which are not merely annoying:
114 -- they prevent the common block elimination from working correctly
115 -- in the case of a safe foreign call.
116 -- See Note [safe foreign call convention]
117 --
118 ; sequel <- getSequel
119 ; case sequel of
120 AssignTo assign_to_these _ ->
121 emitForeignCall safety assign_to_these call_target call_args
122
123 _something_else ->
124 do { _ <- emitForeignCall safety res_regs call_target call_args
125 ; emitReturn (map (CmmReg . CmmLocal) res_regs)
126 }
127 }
128
129 {- Note [safe foreign call convention]
130
131 The simple thing to do for a safe foreign call would be the same as an
132 unsafe one: just
133
134 emitForeignCall ...
135 emitReturn ...
136
137 but consider what happens in this case
138
139 case foo x y z of
140 (# s, r #) -> ...
141
142 The sequel is AssignTo [r]. The call to newUnboxedTupleRegs picks [r]
143 as the result reg, and we generate
144
145 r = foo(x,y,z) returns to L1 -- emitForeignCall
146 L1:
147 r = r -- emitReturn
148 goto L2
149 L2:
150 ...
151
152 Now L1 is a proc point (by definition, it is the continuation of the
153 safe foreign call). If L2 does a heap check, then L2 will also be a
154 proc point.
155
156 Furthermore, the stack layout algorithm has to arrange to save r
157 somewhere between the call and the jump to L1, which is annoying: we
158 would have to treat r differently from the other live variables, which
159 have to be saved *before* the call.
160
161 So we adopt a special convention for safe foreign calls: the results
162 are copied out according to the NativeReturn convention by the call,
163 and the continuation of the call should copyIn the results. (The
164 copyOut code is actually inserted when the safe foreign call is
165 lowered later). The result regs attached to the safe foreign call are
166 only used temporarily to hold the results before they are copied out.
167
168 We will now generate this:
169
170 r = foo(x,y,z) returns to L1
171 L1:
172 r = R1 -- copyIn, inserted by mkSafeCall
173 goto L2
174 L2:
175 ... r ...
176
177 And when the safe foreign call is lowered later (see Note [lower safe
178 foreign calls]) we get this:
179
180 suspendThread()
181 r = foo(x,y,z)
182 resumeThread()
183 R1 = r -- copyOut, inserted by lowerSafeForeignCall
184 jump L1
185 L1:
186 r = R1 -- copyIn, inserted by mkSafeCall
187 goto L2
188 L2:
189 ... r ...
190
191 Now consider what happens if L2 does a heap check: the Adams
192 optimisation kicks in and commons up L1 with the heap-check
193 continuation, resulting in just one proc point instead of two. Yay!
194 -}
195
196
197 emitCCall :: [(CmmFormal,ForeignHint)]
198 -> CmmExpr
199 -> [(CmmActual,ForeignHint)]
200 -> FCode ()
201 emitCCall hinted_results fn hinted_args
202 = void $ emitForeignCall PlayRisky results target args
203 where
204 (args, arg_hints) = unzip hinted_args
205 (results, result_hints) = unzip hinted_results
206 target = ForeignTarget fn fc
207 fc = ForeignConvention CCallConv arg_hints result_hints CmmMayReturn
208
209
210 emitPrimCall :: [CmmFormal] -> CallishMachOp -> [CmmActual] -> FCode ()
211 emitPrimCall res op args
212 = void $ emitForeignCall PlayRisky res (PrimTarget op) args
213
214 -- alternative entry point, used by GHC.Cmm.Parser
215 emitForeignCall
216 :: Safety
217 -> [CmmFormal] -- where to put the results
218 -> ForeignTarget -- the op
219 -> [CmmActual] -- arguments
220 -> FCode ReturnKind
221 emitForeignCall safety results target args
222 | not (playSafe safety) = do
223 platform <- getPlatform
224 let (caller_save, caller_load) = callerSaveVolatileRegs platform
225 emit caller_save
226 target' <- load_target_into_temp target
227 args' <- mapM maybe_assign_temp args
228 emit $ mkUnsafeCall target' results args'
229 emit caller_load
230 return AssignedDirectly
231
232 | otherwise = do
233 profile <- getProfile
234 platform <- getPlatform
235 updfr_off <- getUpdFrameOff
236 target' <- load_target_into_temp target
237 args' <- mapM maybe_assign_temp args
238 k <- newBlockId
239 let (off, _, copyout) = copyInOflow profile NativeReturn (Young k) results []
240 -- see Note [safe foreign call convention]
241 tscope <- getTickScope
242 emit $
243 ( mkStore (CmmStackSlot (Young k) (widthInBytes (wordWidth platform)))
244 (CmmLit (CmmBlock k))
245 <*> mkLast (CmmForeignCall { tgt = target'
246 , res = results
247 , args = args'
248 , succ = k
249 , ret_args = off
250 , ret_off = updfr_off
251 , intrbl = playInterruptible safety })
252 <*> mkLabel k tscope
253 <*> copyout
254 )
255 return (ReturnedTo k off)
256
257 load_target_into_temp :: ForeignTarget -> FCode ForeignTarget
258 load_target_into_temp (ForeignTarget expr conv) = do
259 tmp <- maybe_assign_temp expr
260 return (ForeignTarget tmp conv)
261 load_target_into_temp other_target@(PrimTarget _) =
262 return other_target
263
264 -- What we want to do here is create a new temporary for the foreign
265 -- call argument if it is not safe to use the expression directly,
266 -- because the expression mentions caller-saves GlobalRegs (see
267 -- Note [Register parameter passing]).
268 --
269 -- However, we can't pattern-match on the expression here, because
270 -- this is used in a loop by GHC.Cmm.Parser, and testing the expression
271 -- results in a black hole. So we always create a temporary, and rely
272 -- on GHC.Cmm.Sink to clean it up later. (Yuck, ToDo). The generated code
273 -- ends up being the same, at least for the RTS .cmm code.
274 --
275 maybe_assign_temp :: CmmExpr -> FCode CmmExpr
276 maybe_assign_temp e = do
277 platform <- getPlatform
278 reg <- newTemp (cmmExprType platform e)
279 emitAssign (CmmLocal reg) e
280 return (CmmReg (CmmLocal reg))
281
282 -- -----------------------------------------------------------------------------
283 -- Save/restore the thread state in the TSO
284
285 -- This stuff can't be done in suspendThread/resumeThread, because it
286 -- refers to global registers which aren't available in the C world.
287
288 emitSaveThreadState :: FCode ()
289 emitSaveThreadState = do
290 profile <- getProfile
291 code <- saveThreadState profile
292 emit code
293
294 -- | Produce code to save the current thread state to @CurrentTSO@
295 saveThreadState :: MonadUnique m => Profile -> m CmmAGraph
296 saveThreadState profile = do
297 let platform = profilePlatform profile
298 tso <- newTemp (gcWord platform)
299 close_nursery <- closeNursery profile tso
300 pure $ catAGraphs
301 [ -- tso = CurrentTSO;
302 mkAssign (CmmLocal tso) currentTSOExpr
303
304 , -- tso->stackobj->sp = Sp;
305 mkStore (cmmOffset platform
306 (CmmLoad (cmmOffset platform
307 (CmmReg (CmmLocal tso))
308 (tso_stackobj profile))
309 (bWord platform))
310 (stack_SP profile))
311 spExpr
312
313 , close_nursery
314
315 , -- and save the current cost centre stack in the TSO when profiling:
316 if profileIsProfiling profile
317 then mkStore (cmmOffset platform (CmmReg (CmmLocal tso)) (tso_CCCS profile)) cccsExpr
318 else mkNop
319 ]
320
321
322
323 -- | Save STG registers
324 --
325 -- STG registers must be saved around a C call, just in case the STG
326 -- register is mapped to a caller-saves machine register. Normally we
327 -- don't need to worry about this the code generator has already
328 -- loaded any live STG registers into variables for us, but in
329 -- hand-written low-level Cmm code where we don't know which registers
330 -- are live, we might have to save them all.
331 emitSaveRegs :: FCode ()
332 emitSaveRegs = do
333 platform <- getPlatform
334 let regs = realArgRegsCover platform
335 save = catAGraphs (map (callerSaveGlobalReg platform) regs)
336 emit save
337
338 -- | Restore STG registers (see 'emitSaveRegs')
339 emitRestoreRegs :: FCode ()
340 emitRestoreRegs = do
341 platform <- getPlatform
342 let regs = realArgRegsCover platform
343 restore = catAGraphs (map (callerRestoreGlobalReg platform) regs)
344 emit restore
345
346 -- | Push a subset of STG registers onto the stack, specified by the bitmap
347 --
348 -- Sometimes, a "live" subset of the STG registers needs to be saved on the
349 -- stack, for example when storing an unboxed tuple to be used in the GHCi
350 -- bytecode interpreter.
351 --
352 -- The "live registers" bitmap corresponds to the list of registers given by
353 -- 'tupleRegsCover', with the least significant bit indicating liveness of
354 -- the first register in the list.
355 --
356 -- Each register is saved to a stack slot of one or more machine words, even
357 -- if the register size itself is smaller.
358 --
359 -- The resulting Cmm code looks like this, with a line for each real or
360 -- virtual register used for returning tuples:
361 --
362 -- ...
363 -- if((mask & 2) != 0) { Sp_adj(-1); Sp(0) = R2; }
364 -- if((mask & 1) != 0) { Sp_adj(-1); Sp(0) = R1; }
365 --
366 -- See Note [GHCi tuple layout]
367
368 emitPushTupleRegs :: CmmExpr -> FCode ()
369 emitPushTupleRegs regs_live = do
370 platform <- getPlatform
371 let regs = zip (tupleRegsCover platform) [0..]
372 save_arg (reg, n) =
373 let mask = CmmLit (CmmInt (1 `shiftL` n) (wordWidth platform))
374 live = cmmAndWord platform regs_live mask
375 cond = cmmNeWord platform live (zeroExpr platform)
376 reg_ty = cmmRegType platform (CmmGlobal reg)
377 width = roundUpToWords platform
378 (widthInBytes $ typeWidth reg_ty)
379 adj_sp = mkAssign spReg
380 (cmmOffset platform spExpr (negate width))
381 save_reg = mkStore spExpr (CmmReg $ CmmGlobal reg)
382 in mkCmmIfThen cond $ catAGraphs [adj_sp, save_reg]
383 emit . catAGraphs =<< mapM save_arg (reverse regs)
384
385 -- | Pop a subset of STG registers from the stack (see 'emitPushTupleRegs')
386 emitPopTupleRegs :: CmmExpr -> FCode ()
387 emitPopTupleRegs regs_live = do
388 platform <- getPlatform
389 let regs = zip (tupleRegsCover platform) [0..]
390 save_arg (reg, n) =
391 let mask = CmmLit (CmmInt (1 `shiftL` n) (wordWidth platform))
392 live = cmmAndWord platform regs_live mask
393 cond = cmmNeWord platform live (zeroExpr platform)
394 reg_ty = cmmRegType platform (CmmGlobal reg)
395 width = roundUpToWords platform
396 (widthInBytes $ typeWidth reg_ty)
397 adj_sp = mkAssign spReg
398 (cmmOffset platform spExpr width)
399 restore_reg = mkAssign (CmmGlobal reg) (CmmLoad spExpr reg_ty)
400 in mkCmmIfThen cond $ catAGraphs [restore_reg, adj_sp]
401 emit . catAGraphs =<< mapM save_arg regs
402
403
404 emitCloseNursery :: FCode ()
405 emitCloseNursery = do
406 profile <- getProfile
407 let platform = profilePlatform profile
408 tso <- newTemp (bWord platform)
409 code <- closeNursery profile tso
410 emit $ mkAssign (CmmLocal tso) currentTSOExpr <*> code
411
412 {- |
413 @closeNursery dflags tso@ produces code to close the nursery.
414 A local register holding the value of @CurrentTSO@ is expected for
415 efficiency.
416
417 Closing the nursery corresponds to the following code:
418
419 @
420 tso = CurrentTSO;
421 cn = CurrentNuresry;
422
423 // Update the allocation limit for the current thread. We don't
424 // check to see whether it has overflowed at this point, that check is
425 // made when we run out of space in the current heap block (stg_gc_noregs)
426 // and in the scheduler when context switching (schedulePostRunThread).
427 tso->alloc_limit -= Hp + WDS(1) - cn->start;
428
429 // Set cn->free to the next unoccupied word in the block
430 cn->free = Hp + WDS(1);
431 @
432 -}
433 closeNursery :: MonadUnique m => Profile -> LocalReg -> m CmmAGraph
434 closeNursery profile tso = do
435 let tsoreg = CmmLocal tso
436 platform = profilePlatform profile
437 cnreg <- CmmLocal <$> newTemp (bWord platform)
438 pure $ catAGraphs [
439 mkAssign cnreg currentNurseryExpr,
440
441 -- CurrentNursery->free = Hp+1;
442 mkStore (nursery_bdescr_free platform cnreg) (cmmOffsetW platform hpExpr 1),
443
444 let alloc =
445 CmmMachOp (mo_wordSub platform)
446 [ cmmOffsetW platform hpExpr 1
447 , CmmLoad (nursery_bdescr_start platform cnreg) (bWord platform)
448 ]
449
450 alloc_limit = cmmOffset platform (CmmReg tsoreg) (tso_alloc_limit profile)
451 in
452
453 -- tso->alloc_limit += alloc
454 mkStore alloc_limit (CmmMachOp (MO_Sub W64)
455 [ CmmLoad alloc_limit b64
456 , CmmMachOp (mo_WordTo64 platform) [alloc] ])
457 ]
458
459 emitLoadThreadState :: FCode ()
460 emitLoadThreadState = do
461 profile <- getProfile
462 code <- loadThreadState profile
463 emit code
464
465 -- | Produce code to load the current thread state from @CurrentTSO@
466 loadThreadState :: MonadUnique m => Profile -> m CmmAGraph
467 loadThreadState profile = do
468 let platform = profilePlatform profile
469 tso <- newTemp (gcWord platform)
470 stack <- newTemp (gcWord platform)
471 open_nursery <- openNursery profile tso
472 pure $ catAGraphs [
473 -- tso = CurrentTSO;
474 mkAssign (CmmLocal tso) currentTSOExpr,
475 -- stack = tso->stackobj;
476 mkAssign (CmmLocal stack) (CmmLoad (cmmOffset platform (CmmReg (CmmLocal tso)) (tso_stackobj profile)) (bWord platform)),
477 -- Sp = stack->sp;
478 mkAssign spReg (CmmLoad (cmmOffset platform (CmmReg (CmmLocal stack)) (stack_SP profile)) (bWord platform)),
479 -- SpLim = stack->stack + RESERVED_STACK_WORDS;
480 mkAssign spLimReg (cmmOffsetW platform (cmmOffset platform (CmmReg (CmmLocal stack)) (stack_STACK profile))
481 (pc_RESERVED_STACK_WORDS (platformConstants platform))),
482 -- HpAlloc = 0;
483 -- HpAlloc is assumed to be set to non-zero only by a failed
484 -- a heap check, see HeapStackCheck.cmm:GC_GENERIC
485 mkAssign hpAllocReg (zeroExpr platform),
486 open_nursery,
487 -- and load the current cost centre stack from the TSO when profiling:
488 if profileIsProfiling profile
489 then storeCurCCS
490 (CmmLoad (cmmOffset platform (CmmReg (CmmLocal tso))
491 (tso_CCCS profile)) (ccsType platform))
492 else mkNop
493 ]
494
495
496 emitOpenNursery :: FCode ()
497 emitOpenNursery = do
498 profile <- getProfile
499 let platform = profilePlatform profile
500 tso <- newTemp (bWord platform)
501 code <- openNursery profile tso
502 emit $ mkAssign (CmmLocal tso) currentTSOExpr <*> code
503
504 {- |
505 @openNursery profile tso@ produces code to open the nursery. A local register
506 holding the value of @CurrentTSO@ is expected for efficiency.
507
508 Opening the nursery corresponds to the following code:
509
510 @
511 tso = CurrentTSO;
512 cn = CurrentNursery;
513 bdfree = CurrentNursery->free;
514 bdstart = CurrentNursery->start;
515
516 // We *add* the currently occupied portion of the nursery block to
517 // the allocation limit, because we will subtract it again in
518 // closeNursery.
519 tso->alloc_limit += bdfree - bdstart;
520
521 // Set Hp to the last occupied word of the heap block. Why not the
522 // next unoccupied word? Doing it this way means that we get to use
523 // an offset of zero more often, which might lead to slightly smaller
524 // code on some architectures.
525 Hp = bdfree - WDS(1);
526
527 // Set HpLim to the end of the current nursery block (note that this block
528 // might be a block group, consisting of several adjacent blocks.
529 HpLim = bdstart + CurrentNursery->blocks*BLOCK_SIZE_W - 1;
530 @
531 -}
532 openNursery :: MonadUnique m => Profile -> LocalReg -> m CmmAGraph
533 openNursery profile tso = do
534 let tsoreg = CmmLocal tso
535 platform = profilePlatform profile
536 cnreg <- CmmLocal <$> newTemp (bWord platform)
537 bdfreereg <- CmmLocal <$> newTemp (bWord platform)
538 bdstartreg <- CmmLocal <$> newTemp (bWord platform)
539
540 -- These assignments are carefully ordered to reduce register
541 -- pressure and generate not completely awful code on x86. To see
542 -- what code we generate, look at the assembly for
543 -- stg_returnToStackTop in rts/StgStartup.cmm.
544 pure $ catAGraphs [
545 mkAssign cnreg currentNurseryExpr,
546 mkAssign bdfreereg (CmmLoad (nursery_bdescr_free platform cnreg) (bWord platform)),
547
548 -- Hp = CurrentNursery->free - 1;
549 mkAssign hpReg (cmmOffsetW platform (CmmReg bdfreereg) (-1)),
550
551 mkAssign bdstartreg (CmmLoad (nursery_bdescr_start platform cnreg) (bWord platform)),
552
553 -- HpLim = CurrentNursery->start +
554 -- CurrentNursery->blocks*BLOCK_SIZE_W - 1;
555 mkAssign hpLimReg
556 (cmmOffsetExpr platform
557 (CmmReg bdstartreg)
558 (cmmOffset platform
559 (CmmMachOp (mo_wordMul platform) [
560 CmmMachOp (MO_SS_Conv W32 (wordWidth platform))
561 [CmmLoad (nursery_bdescr_blocks platform cnreg) b32],
562 mkIntExpr platform (pc_BLOCK_SIZE (platformConstants platform))
563 ])
564 (-1)
565 )
566 ),
567
568 -- alloc = bd->free - bd->start
569 let alloc =
570 CmmMachOp (mo_wordSub platform) [CmmReg bdfreereg, CmmReg bdstartreg]
571
572 alloc_limit = cmmOffset platform (CmmReg tsoreg) (tso_alloc_limit profile)
573 in
574
575 -- tso->alloc_limit += alloc
576 mkStore alloc_limit (CmmMachOp (MO_Add W64)
577 [ CmmLoad alloc_limit b64
578 , CmmMachOp (mo_WordTo64 platform) [alloc] ])
579
580 ]
581
582 nursery_bdescr_free, nursery_bdescr_start, nursery_bdescr_blocks
583 :: Platform -> CmmReg -> CmmExpr
584 nursery_bdescr_free platform cn =
585 cmmOffset platform (CmmReg cn) (pc_OFFSET_bdescr_free (platformConstants platform))
586 nursery_bdescr_start platform cn =
587 cmmOffset platform (CmmReg cn) (pc_OFFSET_bdescr_start (platformConstants platform))
588 nursery_bdescr_blocks platform cn =
589 cmmOffset platform (CmmReg cn) (pc_OFFSET_bdescr_blocks (platformConstants platform))
590
591 tso_stackobj, tso_CCCS, tso_alloc_limit, stack_STACK, stack_SP :: Profile -> ByteOff
592 tso_stackobj profile = closureField profile (pc_OFFSET_StgTSO_stackobj (profileConstants profile))
593 tso_alloc_limit profile = closureField profile (pc_OFFSET_StgTSO_alloc_limit (profileConstants profile))
594 tso_CCCS profile = closureField profile (pc_OFFSET_StgTSO_cccs (profileConstants profile))
595 stack_STACK profile = closureField profile (pc_OFFSET_StgStack_stack (profileConstants profile))
596 stack_SP profile = closureField profile (pc_OFFSET_StgStack_sp (profileConstants profile))
597
598
599 closureField :: Profile -> ByteOff -> ByteOff
600 closureField profile off = off + fixedHdrSize profile
601
602 -- Note [Unlifted boxed arguments to foreign calls]
603 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
604 --
605 -- For certain types passed to foreign calls, we adjust the actual
606 -- value passed to the call. For ByteArray#, Array#, SmallArray#,
607 -- and ArrayArray#, we pass the address of the array's payload, not
608 -- the address of the heap object. For example, consider
609 -- foreign import "c_foo" foo :: ByteArray# -> Int# -> IO ()
610 -- At a Haskell call like `foo x y`, we'll generate a C call that
611 -- is more like
612 -- c_foo( x+8, y )
613 -- where the "+8" takes the heap pointer (x :: ByteArray#) and moves
614 -- it past the header words of the ByteArray object to point directly
615 -- to the data inside the ByteArray#. (The exact offset depends
616 -- on the target architecture and on profiling) By contrast, (y :: Int#)
617 -- requires no such adjustment.
618 --
619 -- This adjustment is performed by 'add_shim'. The size of the
620 -- adjustment depends on the type of heap object. But
621 -- how can we determine that type? There are two available options.
622 -- We could use the types of the actual values that the foreign call
623 -- has been applied to, or we could use the types present in the
624 -- foreign function's type. Prior to GHC 8.10, we used the former
625 -- strategy since it's a little more simple. However, in issue #16650
626 -- and more compellingly in the comments of
627 -- https://gitlab.haskell.org/ghc/ghc/merge_requests/939, it was
628 -- demonstrated that this leads to bad behavior in the presence
629 -- of unsafeCoerce#. Returning to the above example, suppose the
630 -- Haskell call looked like
631 -- foo (unsafeCoerce# p)
632 -- where the types of expressions comprising the arguments are
633 -- p :: (Any :: TYPE 'UnliftedRep)
634 -- i :: Int#
635 -- so that the unsafe-coerce is between Any and ByteArray#.
636 -- These two types have the same kind (they are both represented by
637 -- a heap pointer) so no GC errors will occur if we do this unsafe coerce.
638 -- By the time this gets to the code generator the cast has been
639 -- discarded so we have
640 -- foo p y
641 -- But we *must* adjust the pointer to p by a ByteArray# shim,
642 -- *not* by an Any shim (the Any shim involves no offset at all).
643 --
644 -- To avoid this bad behavior, we adopt the second strategy: use
645 -- the types present in the foreign function's type.
646 -- In collectStgFArgTypes, we convert the foreign function's
647 -- type to a list of StgFArgType. Then, in add_shim, we interpret
648 -- these as numeric offsets.
649
650 getFCallArgs ::
651 [StgArg]
652 -> Type -- the type of the foreign function
653 -> FCode [(CmmExpr, ForeignHint)]
654 -- (a) Drop void args
655 -- (b) Add foreign-call shim code
656 -- It's (b) that makes this differ from getNonVoidArgAmodes
657 -- Precondition: args and typs have the same length
658 -- See Note [Unlifted boxed arguments to foreign calls]
659 getFCallArgs args typ
660 = do { mb_cmms <- mapM get (zipEqual "getFCallArgs" args (collectStgFArgTypes typ))
661 ; return (catMaybes mb_cmms) }
662 where
663 get (arg,typ)
664 | null arg_reps
665 = return Nothing
666 | otherwise
667 = do { cmm <- getArgAmode (NonVoid arg)
668 ; profile <- getProfile
669 ; return (Just (add_shim profile typ cmm, hint)) }
670 where
671 arg_ty = stgArgType arg
672 arg_reps = typePrimRep arg_ty
673 hint = typeForeignHint arg_ty
674
675 -- The minimum amount of information needed to determine
676 -- the offset to apply to an argument to a foreign call.
677 -- See Note [Unlifted boxed arguments to foreign calls]
678 data StgFArgType
679 = StgPlainType
680 | StgArrayType
681 | StgSmallArrayType
682 | StgByteArrayType
683
684 -- See Note [Unlifted boxed arguments to foreign calls]
685 add_shim :: Profile -> StgFArgType -> CmmExpr -> CmmExpr
686 add_shim profile ty expr = case ty of
687 StgPlainType -> expr
688 StgArrayType -> cmmOffsetB platform expr (arrPtrsHdrSize profile)
689 StgSmallArrayType -> cmmOffsetB platform expr (smallArrPtrsHdrSize profile)
690 StgByteArrayType -> cmmOffsetB platform expr (arrWordsHdrSize profile)
691 where
692 platform = profilePlatform profile
693
694 -- From a function, extract information needed to determine
695 -- the offset of each argument when used as a C FFI argument.
696 -- See Note [Unlifted boxed arguments to foreign calls]
697 collectStgFArgTypes :: Type -> [StgFArgType]
698 collectStgFArgTypes = go []
699 where
700 -- Skip foralls
701 go bs (ForAllTy _ res) = go bs res
702 go bs (AppTy{}) = reverse bs
703 go bs (TyConApp{}) = reverse bs
704 go bs (LitTy{}) = reverse bs
705 go bs (TyVarTy{}) = reverse bs
706 go _ (CastTy{}) = panic "myCollectTypeArgs: CastTy"
707 go _ (CoercionTy{}) = panic "myCollectTypeArgs: CoercionTy"
708 go bs (FunTy {ft_arg = arg, ft_res=res}) =
709 go (typeToStgFArgType arg:bs) res
710
711 -- Choose the offset based on the type. For anything other
712 -- than an unlifted boxed type, there is no offset.
713 -- See Note [Unlifted boxed arguments to foreign calls]
714 typeToStgFArgType :: Type -> StgFArgType
715 typeToStgFArgType typ
716 | tycon == arrayPrimTyCon = StgArrayType
717 | tycon == mutableArrayPrimTyCon = StgArrayType
718 | tycon == arrayArrayPrimTyCon = StgArrayType
719 | tycon == mutableArrayArrayPrimTyCon = StgArrayType
720 | tycon == smallArrayPrimTyCon = StgSmallArrayType
721 | tycon == smallMutableArrayPrimTyCon = StgSmallArrayType
722 | tycon == byteArrayPrimTyCon = StgByteArrayType
723 | tycon == mutableByteArrayPrimTyCon = StgByteArrayType
724 | otherwise = StgPlainType
725 where
726 -- Should be a tycon app, since this is a foreign call. We look
727 -- through newtypes so the offset does not change if a user replaces
728 -- a type in a foreign function signature with a representationally
729 -- equivalent newtype.
730 tycon = tyConAppTyCon (unwrapType typ)