never executed always true always false
1 {-# LANGUAGE GADTs #-}
2
3 -----------------------------------------------------------------------------
4 --
5 -- Generating machine code (instruction selection)
6 --
7 -- (c) The University of Glasgow 1996-2004
8 --
9 -----------------------------------------------------------------------------
10
11 -- This is a big module, but, if you pay attention to
12 -- (a) the sectioning, and (b) the type signatures,
13 -- the structure should not be too overwhelming.
14
15 module GHC.CmmToAsm.PPC.CodeGen (
16 cmmTopCodeGen,
17 generateJumpTableForInstr,
18 InstrBlock
19 )
20
21 where
22
23 -- NCG stuff:
24 import GHC.Prelude
25
26 import GHC.Platform.Regs
27 import GHC.CmmToAsm.PPC.Instr
28 import GHC.CmmToAsm.PPC.Cond
29 import GHC.CmmToAsm.PPC.Regs
30 import GHC.CmmToAsm.CPrim
31 import GHC.CmmToAsm.Types
32 import GHC.Cmm.DebugBlock
33 ( DebugBlock(..) )
34 import GHC.CmmToAsm.Monad
35 ( NatM, getNewRegNat, getNewLabelNat
36 , getBlockIdNat, getPicBaseNat, getNewRegPairNat
37 , getPicBaseMaybeNat, getPlatform, getConfig
38 , getDebugBlock, getFileId
39 )
40 import GHC.CmmToAsm.PIC
41 import GHC.CmmToAsm.Format
42 import GHC.CmmToAsm.Config
43 import GHC.Platform.Reg.Class
44 import GHC.Platform.Reg
45 import GHC.CmmToAsm.Reg.Target
46 import GHC.Platform
47
48 -- Our intermediate code:
49 import GHC.Cmm.BlockId
50 import GHC.Cmm.Ppr ( pprExpr )
51 import GHC.Cmm
52 import GHC.Cmm.Utils
53 import GHC.Cmm.Switch
54 import GHC.Cmm.CLabel
55 import GHC.Cmm.Dataflow.Block
56 import GHC.Cmm.Dataflow.Graph
57 import GHC.Types.Tickish ( GenTickish(..) )
58 import GHC.Types.SrcLoc ( srcSpanFile, srcSpanStartLine, srcSpanStartCol )
59
60 -- The rest:
61 import GHC.Data.OrdList
62 import GHC.Utils.Outputable
63 import GHC.Utils.Panic
64 import GHC.Utils.Panic.Plain
65
66 import Control.Monad ( mapAndUnzipM, when )
67 import Data.Word
68
69 import GHC.Types.Basic
70 import GHC.Data.FastString
71
72 -- -----------------------------------------------------------------------------
73 -- Top-level of the instruction selector
74
75 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
76 -- They are really trees of insns to facilitate fast appending, where a
77 -- left-to-right traversal (pre-order?) yields the insns in the correct
78 -- order.
79
80 cmmTopCodeGen
81 :: RawCmmDecl
82 -> NatM [NatCmmDecl RawCmmStatics Instr]
83
84 cmmTopCodeGen (CmmProc info lab live graph) = do
85 let blocks = toBlockListEntryFirst graph
86 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
87 platform <- getPlatform
88 let proc = CmmProc info lab live (ListGraph $ concat nat_blocks)
89 tops = proc : concat statics
90 os = platformOS platform
91 arch = platformArch platform
92 case arch of
93 ArchPPC | os == OSAIX -> return tops
94 | otherwise -> do
95 picBaseMb <- getPicBaseMaybeNat
96 case picBaseMb of
97 Just picBase -> initializePicBase_ppc arch os picBase tops
98 Nothing -> return tops
99 ArchPPC_64 ELF_V1 -> fixup_entry tops
100 -- generating function descriptor is handled in
101 -- pretty printer
102 ArchPPC_64 ELF_V2 -> fixup_entry tops
103 -- generating function prologue is handled in
104 -- pretty printer
105 _ -> panic "PPC.cmmTopCodeGen: unknown arch"
106 where
107 fixup_entry (CmmProc info lab live (ListGraph (entry:blocks)) : statics)
108 = do
109 let BasicBlock bID insns = entry
110 bID' <- if lab == (blockLbl bID)
111 then newBlockId
112 else return bID
113 let b' = BasicBlock bID' insns
114 return (CmmProc info lab live (ListGraph (b':blocks)) : statics)
115 fixup_entry _ = panic "cmmTopCodegen: Broken CmmProc"
116
117 cmmTopCodeGen (CmmData sec dat) =
118 return [CmmData sec dat] -- no translation, we just use CmmStatic
119
120 basicBlockCodeGen
121 :: Block CmmNode C C
122 -> NatM ( [NatBasicBlock Instr]
123 , [NatCmmDecl RawCmmStatics Instr])
124
125 basicBlockCodeGen block = do
126 let (_, nodes, tail) = blockSplit block
127 id = entryLabel block
128 stmts = blockToList nodes
129 -- Generate location directive
130 dbg <- getDebugBlock (entryLabel block)
131 loc_instrs <- case dblSourceTick =<< dbg of
132 Just (SourceNote span name)
133 -> do fileid <- getFileId (srcSpanFile span)
134 let line = srcSpanStartLine span; col =srcSpanStartCol span
135 return $ unitOL $ LOCATION fileid line col name
136 _ -> return nilOL
137 mid_instrs <- stmtsToInstrs stmts
138 tail_instrs <- stmtToInstrs tail
139 let instrs = loc_instrs `appOL` mid_instrs `appOL` tail_instrs
140 -- code generation may introduce new basic block boundaries, which
141 -- are indicated by the NEWBLOCK instruction. We must split up the
142 -- instruction stream into basic blocks again. Also, we extract
143 -- LDATAs here too.
144 let
145 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
146
147 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
148 = ([], BasicBlock id instrs : blocks, statics)
149 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
150 = (instrs, blocks, CmmData sec dat:statics)
151 mkBlocks instr (instrs,blocks,statics)
152 = (instr:instrs, blocks, statics)
153 return (BasicBlock id top : other_blocks, statics)
154
155 stmtsToInstrs :: [CmmNode e x] -> NatM InstrBlock
156 stmtsToInstrs stmts
157 = do instrss <- mapM stmtToInstrs stmts
158 return (concatOL instrss)
159
160 stmtToInstrs :: CmmNode e x -> NatM InstrBlock
161 stmtToInstrs stmt = do
162 config <- getConfig
163 platform <- getPlatform
164 case stmt of
165 CmmComment s -> return (unitOL (COMMENT $ ftext s))
166 CmmTick {} -> return nilOL
167 CmmUnwind {} -> return nilOL
168
169 CmmAssign reg src
170 | isFloatType ty -> assignReg_FltCode format reg src
171 | target32Bit platform &&
172 isWord64 ty -> assignReg_I64Code reg src
173 | otherwise -> assignReg_IntCode format reg src
174 where ty = cmmRegType platform reg
175 format = cmmTypeFormat ty
176
177 CmmStore addr src
178 | isFloatType ty -> assignMem_FltCode format addr src
179 | target32Bit platform &&
180 isWord64 ty -> assignMem_I64Code addr src
181 | otherwise -> assignMem_IntCode format addr src
182 where ty = cmmExprType platform src
183 format = cmmTypeFormat ty
184
185 CmmUnsafeForeignCall target result_regs args
186 -> genCCall target result_regs args
187
188 CmmBranch id -> genBranch id
189 CmmCondBranch arg true false prediction -> do
190 b1 <- genCondJump true arg prediction
191 b2 <- genBranch false
192 return (b1 `appOL` b2)
193 CmmSwitch arg ids -> genSwitch config arg ids
194 CmmCall { cml_target = arg
195 , cml_args_regs = gregs } -> genJump arg (jumpRegs platform gregs)
196 _ ->
197 panic "stmtToInstrs: statement should have been cps'd away"
198
199 jumpRegs :: Platform -> [GlobalReg] -> [Reg]
200 jumpRegs platform gregs = [ RegReal r | Just r <- map (globalRegMaybe platform) gregs ]
201
202 --------------------------------------------------------------------------------
203 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
204 -- They are really trees of insns to facilitate fast appending, where a
205 -- left-to-right traversal yields the insns in the correct order.
206 --
207 type InstrBlock
208 = OrdList Instr
209
210
211 -- | Register's passed up the tree. If the stix code forces the register
212 -- to live in a pre-decided machine register, it comes out as @Fixed@;
213 -- otherwise, it comes out as @Any@, and the parent can decide which
214 -- register to put it in.
215 --
216 data Register
217 = Fixed Format Reg InstrBlock
218 | Any Format (Reg -> InstrBlock)
219
220
221 swizzleRegisterRep :: Register -> Format -> Register
222 swizzleRegisterRep (Fixed _ reg code) format = Fixed format reg code
223 swizzleRegisterRep (Any _ codefn) format = Any format codefn
224
225
226 -- | Grab the Reg for a CmmReg
227 getRegisterReg :: Platform -> CmmReg -> Reg
228
229 getRegisterReg _ (CmmLocal (LocalReg u pk))
230 = RegVirtual $ mkVirtualReg u (cmmTypeFormat pk)
231
232 getRegisterReg platform (CmmGlobal mid)
233 = case globalRegMaybe platform mid of
234 Just reg -> RegReal reg
235 Nothing -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
236 -- By this stage, the only MagicIds remaining should be the
237 -- ones which map to a real machine register on this
238 -- platform. Hence ...
239
240 -- | Convert a BlockId to some CmmStatic data
241 jumpTableEntry :: NCGConfig -> Maybe BlockId -> CmmStatic
242 jumpTableEntry config Nothing = CmmStaticLit (CmmInt 0 (ncgWordWidth config))
243 jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
244 where blockLabel = blockLbl blockid
245
246
247
248 -- -----------------------------------------------------------------------------
249 -- General things for putting together code sequences
250
251 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
252 -- CmmExprs into CmmRegOff?
253 mangleIndexTree :: Platform -> CmmExpr -> CmmExpr
254 mangleIndexTree platform (CmmRegOff reg off)
255 = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
256 where width = typeWidth (cmmRegType platform reg)
257
258 mangleIndexTree _ _
259 = panic "PPC.CodeGen.mangleIndexTree: no match"
260
261 -- -----------------------------------------------------------------------------
262 -- Code gen for 64-bit arithmetic on 32-bit platforms
263
264 {-
265 Simple support for generating 64-bit code (ie, 64 bit values and 64
266 bit assignments) on 32-bit platforms. Unlike the main code generator
267 we merely shoot for generating working code as simply as possible, and
268 pay little attention to code quality. Specifically, there is no
269 attempt to deal cleverly with the fixed-vs-floating register
270 distinction; all values are generated into (pairs of) floating
271 registers, even if this would mean some redundant reg-reg moves as a
272 result. Only one of the VRegUniques is returned, since it will be
273 of the VRegUniqueLo form, and the upper-half VReg can be determined
274 by applying getHiVRegFromLo to it.
275 -}
276
277 data ChildCode64 -- a.k.a "Register64"
278 = ChildCode64
279 InstrBlock -- code
280 Reg -- the lower 32-bit temporary which contains the
281 -- result; use getHiVRegFromLo to find the other
282 -- VRegUnique. Rules of this simplified insn
283 -- selection game are therefore that the returned
284 -- Reg may be modified
285
286
287 -- | Compute an expression into a register, but
288 -- we don't mind which one it is.
289 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
290 getSomeReg expr = do
291 r <- getRegister expr
292 case r of
293 Any rep code -> do
294 tmp <- getNewRegNat rep
295 return (tmp, code tmp)
296 Fixed _ reg code ->
297 return (reg, code)
298
299 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
300 getI64Amodes addrTree = do
301 Amode hi_addr addr_code <- getAmode D addrTree
302 case addrOffset hi_addr 4 of
303 Just lo_addr -> return (hi_addr, lo_addr, addr_code)
304 Nothing -> do (hi_ptr, code) <- getSomeReg addrTree
305 return (AddrRegImm hi_ptr (ImmInt 0),
306 AddrRegImm hi_ptr (ImmInt 4),
307 code)
308
309
310 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
311 assignMem_I64Code addrTree valueTree = do
312 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
313 ChildCode64 vcode rlo <- iselExpr64 valueTree
314 let
315 rhi = getHiVRegFromLo rlo
316
317 -- Big-endian store
318 mov_hi = ST II32 rhi hi_addr
319 mov_lo = ST II32 rlo lo_addr
320 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
321
322
323 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
324 assignReg_I64Code (CmmLocal (LocalReg u_dst _)) valueTree = do
325 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
326 let
327 r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32
328 r_dst_hi = getHiVRegFromLo r_dst_lo
329 r_src_hi = getHiVRegFromLo r_src_lo
330 mov_lo = MR r_dst_lo r_src_lo
331 mov_hi = MR r_dst_hi r_src_hi
332 return (
333 vcode `snocOL` mov_lo `snocOL` mov_hi
334 )
335
336 assignReg_I64Code _ _
337 = panic "assignReg_I64Code(powerpc): invalid lvalue"
338
339
340 iselExpr64 :: CmmExpr -> NatM ChildCode64
341 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
342 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
343 (rlo, rhi) <- getNewRegPairNat II32
344 let mov_hi = LD II32 rhi hi_addr
345 mov_lo = LD II32 rlo lo_addr
346 return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
347 rlo
348
349 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
350 = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32))
351
352 iselExpr64 (CmmLit (CmmInt i _)) = do
353 (rlo,rhi) <- getNewRegPairNat II32
354 let
355 half0 = fromIntegral (fromIntegral i :: Word16)
356 half1 = fromIntegral (fromIntegral (i `shiftR` 16) :: Word16)
357 half2 = fromIntegral (fromIntegral (i `shiftR` 32) :: Word16)
358 half3 = fromIntegral (fromIntegral (i `shiftR` 48) :: Word16)
359
360 code = toOL [
361 LIS rlo (ImmInt half1),
362 OR rlo rlo (RIImm $ ImmInt half0),
363 LIS rhi (ImmInt half3),
364 OR rhi rhi (RIImm $ ImmInt half2)
365 ]
366 return (ChildCode64 code rlo)
367
368 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
369 ChildCode64 code1 r1lo <- iselExpr64 e1
370 ChildCode64 code2 r2lo <- iselExpr64 e2
371 (rlo,rhi) <- getNewRegPairNat II32
372 let
373 r1hi = getHiVRegFromLo r1lo
374 r2hi = getHiVRegFromLo r2lo
375 code = code1 `appOL`
376 code2 `appOL`
377 toOL [ ADDC rlo r1lo r2lo,
378 ADDE rhi r1hi r2hi ]
379 return (ChildCode64 code rlo)
380
381 iselExpr64 (CmmMachOp (MO_Sub _) [e1,e2]) = do
382 ChildCode64 code1 r1lo <- iselExpr64 e1
383 ChildCode64 code2 r2lo <- iselExpr64 e2
384 (rlo,rhi) <- getNewRegPairNat II32
385 let
386 r1hi = getHiVRegFromLo r1lo
387 r2hi = getHiVRegFromLo r2lo
388 code = code1 `appOL`
389 code2 `appOL`
390 toOL [ SUBFC rlo r2lo (RIReg r1lo),
391 SUBFE rhi r2hi r1hi ]
392 return (ChildCode64 code rlo)
393
394 iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do
395 (expr_reg,expr_code) <- getSomeReg expr
396 (rlo, rhi) <- getNewRegPairNat II32
397 let mov_hi = LI rhi (ImmInt 0)
398 mov_lo = MR rlo expr_reg
399 return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
400 rlo
401
402 iselExpr64 (CmmMachOp (MO_SS_Conv W32 W64) [expr]) = do
403 (expr_reg,expr_code) <- getSomeReg expr
404 (rlo, rhi) <- getNewRegPairNat II32
405 let mov_hi = SRA II32 rhi expr_reg (RIImm (ImmInt 31))
406 mov_lo = MR rlo expr_reg
407 return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
408 rlo
409 iselExpr64 expr
410 = do
411 platform <- getPlatform
412 pprPanic "iselExpr64(powerpc)" (pprExpr platform expr)
413
414
415
416 getRegister :: CmmExpr -> NatM Register
417 getRegister e = do config <- getConfig
418 getRegister' config (ncgPlatform config) e
419
420 getRegister' :: NCGConfig -> Platform -> CmmExpr -> NatM Register
421
422 getRegister' _ platform (CmmReg (CmmGlobal PicBaseReg))
423 | OSAIX <- platformOS platform = do
424 let code dst = toOL [ LD II32 dst tocAddr ]
425 tocAddr = AddrRegImm toc (ImmLit (text "ghc_toc_table[TC]"))
426 return (Any II32 code)
427 | target32Bit platform = do
428 reg <- getPicBaseNat $ archWordFormat (target32Bit platform)
429 return (Fixed (archWordFormat (target32Bit platform))
430 reg nilOL)
431 | otherwise = return (Fixed II64 toc nilOL)
432
433 getRegister' _ platform (CmmReg reg)
434 = return (Fixed (cmmTypeFormat (cmmRegType platform reg))
435 (getRegisterReg platform reg) nilOL)
436
437 getRegister' config platform tree@(CmmRegOff _ _)
438 = getRegister' config platform (mangleIndexTree platform tree)
439
440 -- for 32-bit architectures, support some 64 -> 32 bit conversions:
441 -- TO_W_(x), TO_W_(x >> 32)
442
443 getRegister' _ platform (CmmMachOp (MO_UU_Conv W64 W32)
444 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
445 | target32Bit platform = do
446 ChildCode64 code rlo <- iselExpr64 x
447 return $ Fixed II32 (getHiVRegFromLo rlo) code
448
449 getRegister' _ platform (CmmMachOp (MO_SS_Conv W64 W32)
450 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
451 | target32Bit platform = do
452 ChildCode64 code rlo <- iselExpr64 x
453 return $ Fixed II32 (getHiVRegFromLo rlo) code
454
455 getRegister' _ platform (CmmMachOp (MO_UU_Conv W64 W32) [x])
456 | target32Bit platform = do
457 ChildCode64 code rlo <- iselExpr64 x
458 return $ Fixed II32 rlo code
459
460 getRegister' _ platform (CmmMachOp (MO_SS_Conv W64 W32) [x])
461 | target32Bit platform = do
462 ChildCode64 code rlo <- iselExpr64 x
463 return $ Fixed II32 rlo code
464
465 getRegister' _ platform (CmmLoad mem pk)
466 | not (isWord64 pk) = do
467 Amode addr addr_code <- getAmode D mem
468 let code dst = assert ((targetClassOfReg platform dst == RcDouble) == isFloatType pk) $
469 addr_code `snocOL` LD format dst addr
470 return (Any format code)
471 | not (target32Bit platform) = do
472 Amode addr addr_code <- getAmode DS mem
473 let code dst = addr_code `snocOL` LD II64 dst addr
474 return (Any II64 code)
475
476 where format = cmmTypeFormat pk
477
478 -- catch simple cases of zero- or sign-extended load
479 getRegister' _ _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do
480 Amode addr addr_code <- getAmode D mem
481 return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr))
482
483 getRegister' _ _ (CmmMachOp (MO_XX_Conv W8 W32) [CmmLoad mem _]) = do
484 Amode addr addr_code <- getAmode D mem
485 return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr))
486
487 getRegister' _ _ (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad mem _]) = do
488 Amode addr addr_code <- getAmode D mem
489 return (Any II64 (\dst -> addr_code `snocOL` LD II8 dst addr))
490
491 getRegister' _ _ (CmmMachOp (MO_XX_Conv W8 W64) [CmmLoad mem _]) = do
492 Amode addr addr_code <- getAmode D mem
493 return (Any II64 (\dst -> addr_code `snocOL` LD II8 dst addr))
494
495 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
496
497 getRegister' _ _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do
498 Amode addr addr_code <- getAmode D mem
499 return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr))
500
501 getRegister' _ _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do
502 Amode addr addr_code <- getAmode D mem
503 return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr))
504
505 getRegister' _ _ (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad mem _]) = do
506 Amode addr addr_code <- getAmode D mem
507 return (Any II64 (\dst -> addr_code `snocOL` LD II16 dst addr))
508
509 getRegister' _ _ (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad mem _]) = do
510 Amode addr addr_code <- getAmode D mem
511 return (Any II64 (\dst -> addr_code `snocOL` LA II16 dst addr))
512
513 getRegister' _ _ (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad mem _]) = do
514 Amode addr addr_code <- getAmode D mem
515 return (Any II64 (\dst -> addr_code `snocOL` LD II32 dst addr))
516
517 getRegister' _ _ (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad mem _]) = do
518 -- lwa is DS-form. See Note [Power instruction format]
519 Amode addr addr_code <- getAmode DS mem
520 return (Any II64 (\dst -> addr_code `snocOL` LA II32 dst addr))
521
522 getRegister' config platform (CmmMachOp mop [x]) -- unary MachOps
523 = case mop of
524 MO_Not rep -> triv_ucode_int rep NOT
525
526 MO_F_Neg w -> triv_ucode_float w FNEG
527 MO_S_Neg w -> triv_ucode_int w NEG
528
529 MO_FF_Conv W64 W32 -> trivialUCode FF32 FRSP x
530 MO_FF_Conv W32 W64 -> conversionNop FF64 x
531
532 MO_FS_Conv from to -> coerceFP2Int from to x
533 MO_SF_Conv from to -> coerceInt2FP from to x
534
535 MO_SS_Conv from to
536 | from >= to -> conversionNop (intFormat to) x
537 | otherwise -> triv_ucode_int to (EXTS (intFormat from))
538
539 MO_UU_Conv from to
540 | from >= to -> conversionNop (intFormat to) x
541 | otherwise -> clearLeft from to
542
543 MO_XX_Conv _ to -> conversionNop (intFormat to) x
544
545 _ -> panic "PPC.CodeGen.getRegister: no match"
546
547 where
548 triv_ucode_int width instr = trivialUCode (intFormat width) instr x
549 triv_ucode_float width instr = trivialUCode (floatFormat width) instr x
550
551 conversionNop new_format expr
552 = do e_code <- getRegister' config platform expr
553 return (swizzleRegisterRep e_code new_format)
554
555 clearLeft from to
556 = do (src1, code1) <- getSomeReg x
557 let arch_fmt = intFormat (wordWidth platform)
558 arch_bits = widthInBits (wordWidth platform)
559 size = widthInBits from
560 code dst = code1 `snocOL`
561 CLRLI arch_fmt dst src1 (arch_bits - size)
562 return (Any (intFormat to) code)
563
564 getRegister' _ _ (CmmMachOp mop [x, y]) -- dyadic PrimOps
565 = case mop of
566 MO_F_Eq _ -> condFltReg EQQ x y
567 MO_F_Ne _ -> condFltReg NE x y
568 MO_F_Gt _ -> condFltReg GTT x y
569 MO_F_Ge _ -> condFltReg GE x y
570 MO_F_Lt _ -> condFltReg LTT x y
571 MO_F_Le _ -> condFltReg LE x y
572
573 MO_Eq rep -> condIntReg EQQ rep x y
574 MO_Ne rep -> condIntReg NE rep x y
575
576 MO_S_Gt rep -> condIntReg GTT rep x y
577 MO_S_Ge rep -> condIntReg GE rep x y
578 MO_S_Lt rep -> condIntReg LTT rep x y
579 MO_S_Le rep -> condIntReg LE rep x y
580
581 MO_U_Gt rep -> condIntReg GU rep x y
582 MO_U_Ge rep -> condIntReg GEU rep x y
583 MO_U_Lt rep -> condIntReg LU rep x y
584 MO_U_Le rep -> condIntReg LEU rep x y
585
586 MO_F_Add w -> triv_float w FADD
587 MO_F_Sub w -> triv_float w FSUB
588 MO_F_Mul w -> triv_float w FMUL
589 MO_F_Quot w -> triv_float w FDIV
590
591 -- optimize addition with 32-bit immediate
592 -- (needed for PIC)
593 MO_Add W32 ->
594 case y of
595 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate W32 True imm
596 -> trivialCode W32 True ADD x (CmmLit $ CmmInt imm immrep)
597 CmmLit lit
598 -> do
599 (src, srcCode) <- getSomeReg x
600 let imm = litToImm lit
601 code dst = srcCode `appOL` toOL [
602 ADDIS dst src (HA imm),
603 ADD dst dst (RIImm (LO imm))
604 ]
605 return (Any II32 code)
606 _ -> trivialCode W32 True ADD x y
607
608 MO_Add rep -> trivialCode rep True ADD x y
609 MO_Sub rep ->
610 case y of
611 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
612 -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
613 _ -> case x of
614 CmmLit (CmmInt imm _)
615 | Just _ <- makeImmediate rep True imm
616 -- subfi ('subtract from' with immediate) doesn't exist
617 -> trivialCode rep True SUBFC y x
618 _ -> trivialCodeNoImm' (intFormat rep) SUBF y x
619
620 MO_Mul rep -> shiftMulCode rep True MULL x y
621 MO_S_MulMayOflo rep -> do
622 (src1, code1) <- getSomeReg x
623 (src2, code2) <- getSomeReg y
624 let
625 format = intFormat rep
626 code dst = code1 `appOL` code2
627 `appOL` toOL [ MULLO format dst src1 src2
628 , MFOV format dst
629 ]
630 return (Any format code)
631
632 MO_S_Quot rep -> divCode rep True x y
633 MO_U_Quot rep -> divCode rep False x y
634
635 MO_S_Rem rep -> remainder rep True x y
636 MO_U_Rem rep -> remainder rep False x y
637
638 MO_And rep -> case y of
639 (CmmLit (CmmInt imm _)) | imm == -8 || imm == -4
640 -> do
641 (src, srcCode) <- getSomeReg x
642 let clear_mask = if imm == -4 then 2 else 3
643 fmt = intFormat rep
644 code dst = srcCode
645 `appOL` unitOL (CLRRI fmt dst src clear_mask)
646 return (Any fmt code)
647 _ -> trivialCode rep False AND x y
648 MO_Or rep -> trivialCode rep False OR x y
649 MO_Xor rep -> trivialCode rep False XOR x y
650
651 MO_Shl rep -> shiftMulCode rep False SL x y
652 MO_S_Shr rep -> srCode rep True SRA x y
653 MO_U_Shr rep -> srCode rep False SR x y
654 _ -> panic "PPC.CodeGen.getRegister: no match"
655
656 where
657 triv_float :: Width -> (Format -> Reg -> Reg -> Reg -> Instr) -> NatM Register
658 triv_float width instr = trivialCodeNoImm (floatFormat width) instr x y
659
660 remainder :: Width -> Bool -> CmmExpr -> CmmExpr -> NatM Register
661 remainder rep sgn x y = do
662 let fmt = intFormat rep
663 tmp <- getNewRegNat fmt
664 code <- remainderCode rep sgn tmp x y
665 return (Any fmt code)
666
667
668 getRegister' _ _ (CmmLit (CmmInt i rep))
669 | Just imm <- makeImmediate rep True i
670 = let
671 code dst = unitOL (LI dst imm)
672 in
673 return (Any (intFormat rep) code)
674
675 getRegister' config _ (CmmLit (CmmFloat f frep)) = do
676 lbl <- getNewLabelNat
677 dynRef <- cmmMakeDynamicReference config DataReference lbl
678 Amode addr addr_code <- getAmode D dynRef
679 let format = floatFormat frep
680 code dst =
681 LDATA (Section ReadOnlyData lbl)
682 (CmmStaticsRaw lbl [CmmStaticLit (CmmFloat f frep)])
683 `consOL` (addr_code `snocOL` LD format dst addr)
684 return (Any format code)
685
686 getRegister' config platform (CmmLit lit)
687 | target32Bit platform
688 = let rep = cmmLitType platform lit
689 imm = litToImm lit
690 code dst = toOL [
691 LIS dst (HA imm),
692 ADD dst dst (RIImm (LO imm))
693 ]
694 in return (Any (cmmTypeFormat rep) code)
695 | otherwise
696 = do lbl <- getNewLabelNat
697 dynRef <- cmmMakeDynamicReference config DataReference lbl
698 Amode addr addr_code <- getAmode D dynRef
699 let rep = cmmLitType platform lit
700 format = cmmTypeFormat rep
701 code dst =
702 LDATA (Section ReadOnlyData lbl) (CmmStaticsRaw lbl [CmmStaticLit lit])
703 `consOL` (addr_code `snocOL` LD format dst addr)
704 return (Any format code)
705
706 getRegister' _ platform other = pprPanic "getRegister(ppc)" (pprExpr platform other)
707
708 -- extend?Rep: wrap integer expression of type `from`
709 -- in a conversion to `to`
710 extendSExpr :: Width -> Width -> CmmExpr -> CmmExpr
711 extendSExpr from to x = CmmMachOp (MO_SS_Conv from to) [x]
712
713 extendUExpr :: Width -> Width -> CmmExpr -> CmmExpr
714 extendUExpr from to x = CmmMachOp (MO_UU_Conv from to) [x]
715
716 -- -----------------------------------------------------------------------------
717 -- The 'Amode' type: Memory addressing modes passed up the tree.
718
719 data Amode
720 = Amode AddrMode InstrBlock
721
722 {-
723 Now, given a tree (the argument to a CmmLoad) that references memory,
724 produce a suitable addressing mode.
725
726 A Rule of the Game (tm) for Amodes: use of the addr bit must
727 immediately follow use of the code part, since the code part puts
728 values in registers which the addr then refers to. So you can't put
729 anything in between, lest it overwrite some of those registers. If
730 you need to do some other computation between the code part and use of
731 the addr bit, first store the effective address from the amode in a
732 temporary, then do the other computation, and then use the temporary:
733
734 code
735 LEA amode, tmp
736 ... other computation ...
737 ... (tmp) ...
738 -}
739
740 {- Note [Power instruction format]
741 In some instructions the 16 bit offset must be a multiple of 4, i.e.
742 the two least significant bits must be zero. The "Power ISA" specification
743 calls these instruction formats "DS-FORM" and the instructions with
744 arbitrary 16 bit offsets are "D-FORM".
745
746 The Power ISA specification document can be obtained from www.power.org.
747 -}
748 data InstrForm = D | DS
749
750 getAmode :: InstrForm -> CmmExpr -> NatM Amode
751 getAmode inf tree@(CmmRegOff _ _)
752 = do platform <- getPlatform
753 getAmode inf (mangleIndexTree platform tree)
754
755 getAmode _ (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)])
756 | Just off <- makeImmediate W32 True (-i)
757 = do
758 (reg, code) <- getSomeReg x
759 return (Amode (AddrRegImm reg off) code)
760
761
762 getAmode _ (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)])
763 | Just off <- makeImmediate W32 True i
764 = do
765 (reg, code) <- getSomeReg x
766 return (Amode (AddrRegImm reg off) code)
767
768 getAmode D (CmmMachOp (MO_Sub W64) [x, CmmLit (CmmInt i _)])
769 | Just off <- makeImmediate W64 True (-i)
770 = do
771 (reg, code) <- getSomeReg x
772 return (Amode (AddrRegImm reg off) code)
773
774
775 getAmode D (CmmMachOp (MO_Add W64) [x, CmmLit (CmmInt i _)])
776 | Just off <- makeImmediate W64 True i
777 = do
778 (reg, code) <- getSomeReg x
779 return (Amode (AddrRegImm reg off) code)
780
781 getAmode DS (CmmMachOp (MO_Sub W64) [x, CmmLit (CmmInt i _)])
782 | Just off <- makeImmediate W64 True (-i)
783 = do
784 (reg, code) <- getSomeReg x
785 (reg', off', code') <-
786 if i `mod` 4 == 0
787 then return (reg, off, code)
788 else do
789 tmp <- getNewRegNat II64
790 return (tmp, ImmInt 0,
791 code `snocOL` ADD tmp reg (RIImm off))
792 return (Amode (AddrRegImm reg' off') code')
793
794 getAmode DS (CmmMachOp (MO_Add W64) [x, CmmLit (CmmInt i _)])
795 | Just off <- makeImmediate W64 True i
796 = do
797 (reg, code) <- getSomeReg x
798 (reg', off', code') <-
799 if i `mod` 4 == 0
800 then return (reg, off, code)
801 else do
802 tmp <- getNewRegNat II64
803 return (tmp, ImmInt 0,
804 code `snocOL` ADD tmp reg (RIImm off))
805 return (Amode (AddrRegImm reg' off') code')
806
807 -- optimize addition with 32-bit immediate
808 -- (needed for PIC)
809 getAmode _ (CmmMachOp (MO_Add W32) [x, CmmLit lit])
810 = do
811 platform <- getPlatform
812 (src, srcCode) <- getSomeReg x
813 let imm = litToImm lit
814 case () of
815 _ | OSAIX <- platformOS platform
816 , isCmmLabelType lit ->
817 -- HA16/LO16 relocations on labels not supported on AIX
818 return (Amode (AddrRegImm src imm) srcCode)
819 | otherwise -> do
820 tmp <- getNewRegNat II32
821 let code = srcCode `snocOL` ADDIS tmp src (HA imm)
822 return (Amode (AddrRegImm tmp (LO imm)) code)
823 where
824 isCmmLabelType (CmmLabel {}) = True
825 isCmmLabelType (CmmLabelOff {}) = True
826 isCmmLabelType (CmmLabelDiffOff {}) = True
827 isCmmLabelType _ = False
828
829 getAmode _ (CmmLit lit)
830 = do
831 platform <- getPlatform
832 case platformArch platform of
833 ArchPPC -> do
834 tmp <- getNewRegNat II32
835 let imm = litToImm lit
836 code = unitOL (LIS tmp (HA imm))
837 return (Amode (AddrRegImm tmp (LO imm)) code)
838 _ -> do -- TODO: Load from TOC,
839 -- see getRegister' _ (CmmLit lit)
840 tmp <- getNewRegNat II64
841 let imm = litToImm lit
842 code = toOL [
843 LIS tmp (HIGHESTA imm),
844 OR tmp tmp (RIImm (HIGHERA imm)),
845 SL II64 tmp tmp (RIImm (ImmInt 32)),
846 ORIS tmp tmp (HA imm)
847 ]
848 return (Amode (AddrRegImm tmp (LO imm)) code)
849
850 getAmode _ (CmmMachOp (MO_Add W32) [x, y])
851 = do
852 (regX, codeX) <- getSomeReg x
853 (regY, codeY) <- getSomeReg y
854 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
855
856 getAmode _ (CmmMachOp (MO_Add W64) [x, y])
857 = do
858 (regX, codeX) <- getSomeReg x
859 (regY, codeY) <- getSomeReg y
860 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
861
862 getAmode _ other
863 = do
864 (reg, code) <- getSomeReg other
865 let
866 off = ImmInt 0
867 return (Amode (AddrRegImm reg off) code)
868
869
870 -- The 'CondCode' type: Condition codes passed up the tree.
871 data CondCode
872 = CondCode Bool Cond InstrBlock
873
874 -- Set up a condition code for a conditional branch.
875
876 getCondCode :: CmmExpr -> NatM CondCode
877
878 -- almost the same as everywhere else - but we need to
879 -- extend small integers to 32 bit or 64 bit first
880
881 getCondCode (CmmMachOp mop [x, y])
882 = case mop of
883 MO_F_Eq W32 -> condFltCode EQQ x y
884 MO_F_Ne W32 -> condFltCode NE x y
885 MO_F_Gt W32 -> condFltCode GTT x y
886 MO_F_Ge W32 -> condFltCode GE x y
887 MO_F_Lt W32 -> condFltCode LTT x y
888 MO_F_Le W32 -> condFltCode LE x y
889
890 MO_F_Eq W64 -> condFltCode EQQ x y
891 MO_F_Ne W64 -> condFltCode NE x y
892 MO_F_Gt W64 -> condFltCode GTT x y
893 MO_F_Ge W64 -> condFltCode GE x y
894 MO_F_Lt W64 -> condFltCode LTT x y
895 MO_F_Le W64 -> condFltCode LE x y
896
897 MO_Eq rep -> condIntCode EQQ rep x y
898 MO_Ne rep -> condIntCode NE rep x y
899
900 MO_S_Gt rep -> condIntCode GTT rep x y
901 MO_S_Ge rep -> condIntCode GE rep x y
902 MO_S_Lt rep -> condIntCode LTT rep x y
903 MO_S_Le rep -> condIntCode LE rep x y
904
905 MO_U_Gt rep -> condIntCode GU rep x y
906 MO_U_Ge rep -> condIntCode GEU rep x y
907 MO_U_Lt rep -> condIntCode LU rep x y
908 MO_U_Le rep -> condIntCode LEU rep x y
909
910 _ -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
911
912 getCondCode _ = panic "getCondCode(2)(powerpc)"
913
914
915 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
916 -- passed back up the tree.
917
918 condIntCode :: Cond -> Width -> CmmExpr -> CmmExpr -> NatM CondCode
919 condIntCode cond width x y = do
920 platform <- getPlatform
921 condIntCode' (target32Bit platform) cond width x y
922
923 condIntCode' :: Bool -> Cond -> Width -> CmmExpr -> CmmExpr -> NatM CondCode
924
925 -- simple code for 64-bit on 32-bit platforms
926 condIntCode' True cond W64 x y
927 | condUnsigned cond
928 = do
929 ChildCode64 code_x x_lo <- iselExpr64 x
930 ChildCode64 code_y y_lo <- iselExpr64 y
931 let x_hi = getHiVRegFromLo x_lo
932 y_hi = getHiVRegFromLo y_lo
933 end_lbl <- getBlockIdNat
934 let code = code_x `appOL` code_y `appOL` toOL
935 [ CMPL II32 x_hi (RIReg y_hi)
936 , BCC NE end_lbl Nothing
937 , CMPL II32 x_lo (RIReg y_lo)
938 , BCC ALWAYS end_lbl Nothing
939
940 , NEWBLOCK end_lbl
941 ]
942 return (CondCode False cond code)
943 | otherwise
944 = do
945 ChildCode64 code_x x_lo <- iselExpr64 x
946 ChildCode64 code_y y_lo <- iselExpr64 y
947 let x_hi = getHiVRegFromLo x_lo
948 y_hi = getHiVRegFromLo y_lo
949 end_lbl <- getBlockIdNat
950 cmp_lo <- getBlockIdNat
951 let code = code_x `appOL` code_y `appOL` toOL
952 [ CMP II32 x_hi (RIReg y_hi)
953 , BCC NE end_lbl Nothing
954 , CMP II32 x_hi (RIImm (ImmInt 0))
955 , BCC LE cmp_lo Nothing
956 , CMPL II32 x_lo (RIReg y_lo)
957 , BCC ALWAYS end_lbl Nothing
958 , NEWBLOCK cmp_lo
959 , CMPL II32 y_lo (RIReg x_lo)
960 , BCC ALWAYS end_lbl Nothing
961
962 , NEWBLOCK end_lbl
963 ]
964 return (CondCode False cond code)
965
966 -- optimize pointer tag checks. Operation andi. sets condition register
967 -- so cmpi ..., 0 is redundant.
968 condIntCode' _ cond _ (CmmMachOp (MO_And _) [x, CmmLit (CmmInt imm rep)])
969 (CmmLit (CmmInt 0 _))
970 | not $ condUnsigned cond,
971 Just src2 <- makeImmediate rep False imm
972 = do
973 (src1, code) <- getSomeReg x
974 let code' = code `snocOL` AND r0 src1 (RIImm src2)
975 return (CondCode False cond code')
976
977 condIntCode' _ cond width x (CmmLit (CmmInt y rep))
978 | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
979 = do
980 let op_len = max W32 width
981 let extend = if condUnsigned cond then extendUExpr width op_len
982 else extendSExpr width op_len
983 (src1, code) <- getSomeReg (extend x)
984 let format = intFormat op_len
985 code' = code `snocOL`
986 (if condUnsigned cond then CMPL else CMP) format src1 (RIImm src2)
987 return (CondCode False cond code')
988
989 condIntCode' _ cond width x y = do
990 let op_len = max W32 width
991 let extend = if condUnsigned cond then extendUExpr width op_len
992 else extendSExpr width op_len
993 (src1, code1) <- getSomeReg (extend x)
994 (src2, code2) <- getSomeReg (extend y)
995 let format = intFormat op_len
996 code' = code1 `appOL` code2 `snocOL`
997 (if condUnsigned cond then CMPL else CMP) format src1 (RIReg src2)
998 return (CondCode False cond code')
999
1000 condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
1001 condFltCode cond x y = do
1002 (src1, code1) <- getSomeReg x
1003 (src2, code2) <- getSomeReg y
1004 let
1005 code' = code1 `appOL` code2 `snocOL` FCMP src1 src2
1006 code'' = case cond of -- twiddle CR to handle unordered case
1007 GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
1008 LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
1009 _ -> code'
1010 where
1011 ltbit = 0 ; eqbit = 2 ; gtbit = 1
1012 return (CondCode True cond code'')
1013
1014
1015
1016 -- -----------------------------------------------------------------------------
1017 -- Generating assignments
1018
1019 -- Assignments are really at the heart of the whole code generation
1020 -- business. Almost all top-level nodes of any real importance are
1021 -- assignments, which correspond to loads, stores, or register
1022 -- transfers. If we're really lucky, some of the register transfers
1023 -- will go away, because we can use the destination register to
1024 -- complete the code generation for the right hand side. This only
1025 -- fails when the right hand side is forced into a fixed register
1026 -- (e.g. the result of a call).
1027
1028 assignMem_IntCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
1029 assignReg_IntCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
1030
1031 assignMem_FltCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
1032 assignReg_FltCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
1033
1034 assignMem_IntCode pk addr src = do
1035 (srcReg, code) <- getSomeReg src
1036 Amode dstAddr addr_code <- case pk of
1037 II64 -> getAmode DS addr
1038 _ -> getAmode D addr
1039 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
1040
1041 -- dst is a reg, but src could be anything
1042 assignReg_IntCode _ reg src
1043 = do
1044 platform <- getPlatform
1045 let dst = getRegisterReg platform reg
1046 r <- getRegister src
1047 return $ case r of
1048 Any _ code -> code dst
1049 Fixed _ freg fcode -> fcode `snocOL` MR dst freg
1050
1051
1052
1053 -- Easy, isn't it?
1054 assignMem_FltCode = assignMem_IntCode
1055 assignReg_FltCode = assignReg_IntCode
1056
1057
1058
1059 genJump :: CmmExpr{-the branch target-} -> [Reg] -> NatM InstrBlock
1060
1061 genJump (CmmLit (CmmLabel lbl)) regs
1062 = return (unitOL $ JMP lbl regs)
1063
1064 genJump tree gregs
1065 = do
1066 platform <- getPlatform
1067 genJump' tree (platformToGCP platform) gregs
1068
1069 genJump' :: CmmExpr -> GenCCallPlatform -> [Reg] -> NatM InstrBlock
1070
1071 genJump' tree (GCP64ELF 1) regs
1072 = do
1073 (target,code) <- getSomeReg tree
1074 return (code
1075 `snocOL` LD II64 r11 (AddrRegImm target (ImmInt 0))
1076 `snocOL` LD II64 toc (AddrRegImm target (ImmInt 8))
1077 `snocOL` MTCTR r11
1078 `snocOL` LD II64 r11 (AddrRegImm target (ImmInt 16))
1079 `snocOL` BCTR [] Nothing regs)
1080
1081 genJump' tree (GCP64ELF 2) regs
1082 = do
1083 (target,code) <- getSomeReg tree
1084 return (code
1085 `snocOL` MR r12 target
1086 `snocOL` MTCTR r12
1087 `snocOL` BCTR [] Nothing regs)
1088
1089 genJump' tree _ regs
1090 = do
1091 (target,code) <- getSomeReg tree
1092 return (code `snocOL` MTCTR target `snocOL` BCTR [] Nothing regs)
1093
1094 -- -----------------------------------------------------------------------------
1095 -- Unconditional branches
1096 genBranch :: BlockId -> NatM InstrBlock
1097 genBranch = return . toOL . mkJumpInstr
1098
1099
1100 -- -----------------------------------------------------------------------------
1101 -- Conditional jumps
1102
1103 {-
1104 Conditional jumps are always to local labels, so we can use branch
1105 instructions. We peek at the arguments to decide what kind of
1106 comparison to do.
1107 -}
1108
1109
1110 genCondJump
1111 :: BlockId -- the branch target
1112 -> CmmExpr -- the condition on which to branch
1113 -> Maybe Bool
1114 -> NatM InstrBlock
1115
1116 genCondJump id bool prediction = do
1117 CondCode _ cond code <- getCondCode bool
1118 return (code `snocOL` BCC cond id prediction)
1119
1120
1121
1122 -- -----------------------------------------------------------------------------
1123 -- Generating C calls
1124
1125 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
1126 -- @get_arg@, which moves the arguments to the correct registers/stack
1127 -- locations. Apart from that, the code is easy.
1128
1129 genCCall :: ForeignTarget -- function to call
1130 -> [CmmFormal] -- where to put the result
1131 -> [CmmActual] -- arguments (of mixed type)
1132 -> NatM InstrBlock
1133 genCCall (PrimTarget MO_ReadBarrier) _ _
1134 = return $ unitOL LWSYNC
1135 genCCall (PrimTarget MO_WriteBarrier) _ _
1136 = return $ unitOL LWSYNC
1137
1138 genCCall (PrimTarget MO_Touch) _ _
1139 = return $ nilOL
1140
1141 genCCall (PrimTarget (MO_Prefetch_Data _)) _ _
1142 = return $ nilOL
1143
1144 genCCall (PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n]
1145 = do platform <- getPlatform
1146 let fmt = intFormat width
1147 reg_dst = getRegisterReg platform (CmmLocal dst)
1148 (instr, n_code) <- case amop of
1149 AMO_Add -> getSomeRegOrImm ADD True reg_dst
1150 AMO_Sub -> case n of
1151 CmmLit (CmmInt i _)
1152 | Just imm <- makeImmediate width True (-i)
1153 -> return (ADD reg_dst reg_dst (RIImm imm), nilOL)
1154 _
1155 -> do
1156 (n_reg, n_code) <- getSomeReg n
1157 return (SUBF reg_dst n_reg reg_dst, n_code)
1158 AMO_And -> getSomeRegOrImm AND False reg_dst
1159 AMO_Nand -> do (n_reg, n_code) <- getSomeReg n
1160 return (NAND reg_dst reg_dst n_reg, n_code)
1161 AMO_Or -> getSomeRegOrImm OR False reg_dst
1162 AMO_Xor -> getSomeRegOrImm XOR False reg_dst
1163 Amode addr_reg addr_code <- getAmodeIndex addr
1164 lbl_retry <- getBlockIdNat
1165 return $ n_code `appOL` addr_code
1166 `appOL` toOL [ HWSYNC
1167 , BCC ALWAYS lbl_retry Nothing
1168
1169 , NEWBLOCK lbl_retry
1170 , LDR fmt reg_dst addr_reg
1171 , instr
1172 , STC fmt reg_dst addr_reg
1173 , BCC NE lbl_retry (Just False)
1174 , ISYNC
1175 ]
1176 where
1177 getAmodeIndex (CmmMachOp (MO_Add _) [x, y])
1178 = do
1179 (regX, codeX) <- getSomeReg x
1180 (regY, codeY) <- getSomeReg y
1181 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1182 getAmodeIndex other
1183 = do
1184 (reg, code) <- getSomeReg other
1185 return (Amode (AddrRegReg r0 reg) code) -- NB: r0 is 0 here!
1186 getSomeRegOrImm op sign dst
1187 = case n of
1188 CmmLit (CmmInt i _) | Just imm <- makeImmediate width sign i
1189 -> return (op dst dst (RIImm imm), nilOL)
1190 _
1191 -> do
1192 (n_reg, n_code) <- getSomeReg n
1193 return (op dst dst (RIReg n_reg), n_code)
1194
1195 genCCall (PrimTarget (MO_AtomicRead width)) [dst] [addr]
1196 = do platform <- getPlatform
1197 let fmt = intFormat width
1198 reg_dst = getRegisterReg platform (CmmLocal dst)
1199 form = if widthInBits width == 64 then DS else D
1200 Amode addr_reg addr_code <- getAmode form addr
1201 lbl_end <- getBlockIdNat
1202 return $ addr_code `appOL` toOL [ HWSYNC
1203 , LD fmt reg_dst addr_reg
1204 , CMP fmt reg_dst (RIReg reg_dst)
1205 , BCC NE lbl_end (Just False)
1206 , BCC ALWAYS lbl_end Nothing
1207 -- See Note [Seemingly useless cmp and bne]
1208 , NEWBLOCK lbl_end
1209 , ISYNC
1210 ]
1211
1212 -- Note [Seemingly useless cmp and bne]
1213 -- In Power ISA, Book II, Section 4.4.1, Instruction Synchronize Instruction
1214 -- the second paragraph says that isync may complete before storage accesses
1215 -- "associated" with a preceding instruction have been performed. The cmp
1216 -- operation and the following bne introduce a data and control dependency
1217 -- on the load instruction (See also Power ISA, Book II, Appendix B.2.3, Safe
1218 -- Fetch).
1219 -- This is also what gcc does.
1220
1221
1222 genCCall (PrimTarget (MO_AtomicWrite width)) [] [addr, val] = do
1223 code <- assignMem_IntCode (intFormat width) addr val
1224 return $ unitOL HWSYNC `appOL` code
1225
1226 genCCall (PrimTarget (MO_Cmpxchg width)) [dst] [addr, old, new]
1227 | width == W32 || width == W64
1228 = do
1229 platform <- getPlatform
1230 (old_reg, old_code) <- getSomeReg old
1231 (new_reg, new_code) <- getSomeReg new
1232 (addr_reg, addr_code) <- getSomeReg addr
1233 lbl_retry <- getBlockIdNat
1234 lbl_eq <- getBlockIdNat
1235 lbl_end <- getBlockIdNat
1236 let reg_dst = getRegisterReg platform (CmmLocal dst)
1237 code = toOL
1238 [ HWSYNC
1239 , BCC ALWAYS lbl_retry Nothing
1240 , NEWBLOCK lbl_retry
1241 , LDR format reg_dst (AddrRegReg r0 addr_reg)
1242 , CMP format reg_dst (RIReg old_reg)
1243 , BCC NE lbl_end Nothing
1244 , BCC ALWAYS lbl_eq Nothing
1245 , NEWBLOCK lbl_eq
1246 , STC format new_reg (AddrRegReg r0 addr_reg)
1247 , BCC NE lbl_retry Nothing
1248 , BCC ALWAYS lbl_end Nothing
1249 , NEWBLOCK lbl_end
1250 , ISYNC
1251 ]
1252 return $ addr_code `appOL` new_code `appOL` old_code `appOL` code
1253 where
1254 format = intFormat width
1255
1256
1257 genCCall (PrimTarget (MO_Clz width)) [dst] [src]
1258 = do platform <- getPlatform
1259 let reg_dst = getRegisterReg platform (CmmLocal dst)
1260 if target32Bit platform && width == W64
1261 then do
1262 ChildCode64 code vr_lo <- iselExpr64 src
1263 lbl1 <- getBlockIdNat
1264 lbl2 <- getBlockIdNat
1265 lbl3 <- getBlockIdNat
1266 let vr_hi = getHiVRegFromLo vr_lo
1267 cntlz = toOL [ CMPL II32 vr_hi (RIImm (ImmInt 0))
1268 , BCC NE lbl2 Nothing
1269 , BCC ALWAYS lbl1 Nothing
1270
1271 , NEWBLOCK lbl1
1272 , CNTLZ II32 reg_dst vr_lo
1273 , ADD reg_dst reg_dst (RIImm (ImmInt 32))
1274 , BCC ALWAYS lbl3 Nothing
1275
1276 , NEWBLOCK lbl2
1277 , CNTLZ II32 reg_dst vr_hi
1278 , BCC ALWAYS lbl3 Nothing
1279
1280 , NEWBLOCK lbl3
1281 ]
1282 return $ code `appOL` cntlz
1283 else do
1284 let format = if width == W64 then II64 else II32
1285 (s_reg, s_code) <- getSomeReg src
1286 (pre, reg , post) <-
1287 case width of
1288 W64 -> return (nilOL, s_reg, nilOL)
1289 W32 -> return (nilOL, s_reg, nilOL)
1290 W16 -> do
1291 reg_tmp <- getNewRegNat format
1292 return
1293 ( unitOL $ AND reg_tmp s_reg (RIImm (ImmInt 65535))
1294 , reg_tmp
1295 , unitOL $ ADD reg_dst reg_dst (RIImm (ImmInt (-16)))
1296 )
1297 W8 -> do
1298 reg_tmp <- getNewRegNat format
1299 return
1300 ( unitOL $ AND reg_tmp s_reg (RIImm (ImmInt 255))
1301 , reg_tmp
1302 , unitOL $ ADD reg_dst reg_dst (RIImm (ImmInt (-24)))
1303 )
1304 _ -> panic "genCall: Clz wrong format"
1305 let cntlz = unitOL (CNTLZ format reg_dst reg)
1306 return $ s_code `appOL` pre `appOL` cntlz `appOL` post
1307
1308 genCCall (PrimTarget (MO_Ctz width)) [dst] [src]
1309 = do platform <- getPlatform
1310 let reg_dst = getRegisterReg platform (CmmLocal dst)
1311 if target32Bit platform && width == W64
1312 then do
1313 let format = II32
1314 ChildCode64 code vr_lo <- iselExpr64 src
1315 lbl1 <- getBlockIdNat
1316 lbl2 <- getBlockIdNat
1317 lbl3 <- getBlockIdNat
1318 x' <- getNewRegNat format
1319 x'' <- getNewRegNat format
1320 r' <- getNewRegNat format
1321 cnttzlo <- cnttz format reg_dst vr_lo
1322 let vr_hi = getHiVRegFromLo vr_lo
1323 cnttz64 = toOL [ CMPL format vr_lo (RIImm (ImmInt 0))
1324 , BCC NE lbl2 Nothing
1325 , BCC ALWAYS lbl1 Nothing
1326
1327 , NEWBLOCK lbl1
1328 , ADD x' vr_hi (RIImm (ImmInt (-1)))
1329 , ANDC x'' x' vr_hi
1330 , CNTLZ format r' x''
1331 -- 32 + (32 - clz(x''))
1332 , SUBFC reg_dst r' (RIImm (ImmInt 64))
1333 , BCC ALWAYS lbl3 Nothing
1334
1335 , NEWBLOCK lbl2
1336 ]
1337 `appOL` cnttzlo `appOL`
1338 toOL [ BCC ALWAYS lbl3 Nothing
1339
1340 , NEWBLOCK lbl3
1341 ]
1342 return $ code `appOL` cnttz64
1343 else do
1344 let format = if width == W64 then II64 else II32
1345 (s_reg, s_code) <- getSomeReg src
1346 (reg_ctz, pre_code) <-
1347 case width of
1348 W64 -> return (s_reg, nilOL)
1349 W32 -> return (s_reg, nilOL)
1350 W16 -> do
1351 reg_tmp <- getNewRegNat format
1352 return (reg_tmp, unitOL $ ORIS reg_tmp s_reg (ImmInt 1))
1353 W8 -> do
1354 reg_tmp <- getNewRegNat format
1355 return (reg_tmp, unitOL $ OR reg_tmp s_reg (RIImm (ImmInt 256)))
1356 _ -> panic "genCall: Ctz wrong format"
1357 ctz_code <- cnttz format reg_dst reg_ctz
1358 return $ s_code `appOL` pre_code `appOL` ctz_code
1359 where
1360 -- cnttz(x) = sizeof(x) - cntlz(~x & (x - 1))
1361 -- see Henry S. Warren, Hacker's Delight, p 107
1362 cnttz format dst src = do
1363 let format_bits = 8 * formatInBytes format
1364 x' <- getNewRegNat format
1365 x'' <- getNewRegNat format
1366 r' <- getNewRegNat format
1367 return $ toOL [ ADD x' src (RIImm (ImmInt (-1)))
1368 , ANDC x'' x' src
1369 , CNTLZ format r' x''
1370 , SUBFC dst r' (RIImm (ImmInt (format_bits)))
1371 ]
1372
1373 genCCall target dest_regs argsAndHints
1374 = do platform <- getPlatform
1375 case target of
1376 PrimTarget (MO_S_QuotRem width) -> divOp1 platform True width
1377 dest_regs argsAndHints
1378 PrimTarget (MO_U_QuotRem width) -> divOp1 platform False width
1379 dest_regs argsAndHints
1380 PrimTarget (MO_U_QuotRem2 width) -> divOp2 platform width dest_regs
1381 argsAndHints
1382 PrimTarget (MO_U_Mul2 width) -> multOp2 platform width dest_regs
1383 argsAndHints
1384 PrimTarget (MO_Add2 _) -> add2Op platform dest_regs argsAndHints
1385 PrimTarget (MO_AddWordC _) -> addcOp platform dest_regs argsAndHints
1386 PrimTarget (MO_SubWordC _) -> subcOp platform dest_regs argsAndHints
1387 PrimTarget (MO_AddIntC width) -> addSubCOp ADDO platform width
1388 dest_regs argsAndHints
1389 PrimTarget (MO_SubIntC width) -> addSubCOp SUBFO platform width
1390 dest_regs argsAndHints
1391 PrimTarget MO_F64_Fabs -> fabs platform dest_regs argsAndHints
1392 PrimTarget MO_F32_Fabs -> fabs platform dest_regs argsAndHints
1393 _ -> do config <- getConfig
1394 genCCall' config (platformToGCP platform)
1395 target dest_regs argsAndHints
1396 where divOp1 platform signed width [res_q, res_r] [arg_x, arg_y]
1397 = do let reg_q = getRegisterReg platform (CmmLocal res_q)
1398 reg_r = getRegisterReg platform (CmmLocal res_r)
1399 remainderCode width signed reg_q arg_x arg_y
1400 <*> pure reg_r
1401
1402 divOp1 _ _ _ _ _
1403 = panic "genCCall: Wrong number of arguments for divOp1"
1404 divOp2 platform width [res_q, res_r]
1405 [arg_x_high, arg_x_low, arg_y]
1406 = do let reg_q = getRegisterReg platform (CmmLocal res_q)
1407 reg_r = getRegisterReg platform (CmmLocal res_r)
1408 fmt = intFormat width
1409 half = 4 * (formatInBytes fmt)
1410 (xh_reg, xh_code) <- getSomeReg arg_x_high
1411 (xl_reg, xl_code) <- getSomeReg arg_x_low
1412 (y_reg, y_code) <- getSomeReg arg_y
1413 s <- getNewRegNat fmt
1414 b <- getNewRegNat fmt
1415 v <- getNewRegNat fmt
1416 vn1 <- getNewRegNat fmt
1417 vn0 <- getNewRegNat fmt
1418 un32 <- getNewRegNat fmt
1419 tmp <- getNewRegNat fmt
1420 un10 <- getNewRegNat fmt
1421 un1 <- getNewRegNat fmt
1422 un0 <- getNewRegNat fmt
1423 q1 <- getNewRegNat fmt
1424 rhat <- getNewRegNat fmt
1425 tmp1 <- getNewRegNat fmt
1426 q0 <- getNewRegNat fmt
1427 un21 <- getNewRegNat fmt
1428 again1 <- getBlockIdNat
1429 no1 <- getBlockIdNat
1430 then1 <- getBlockIdNat
1431 endif1 <- getBlockIdNat
1432 again2 <- getBlockIdNat
1433 no2 <- getBlockIdNat
1434 then2 <- getBlockIdNat
1435 endif2 <- getBlockIdNat
1436 return $ y_code `appOL` xl_code `appOL` xh_code `appOL`
1437 -- see Hacker's Delight p 196 Figure 9-3
1438 toOL [ -- b = 2 ^ (bits_in_word / 2)
1439 LI b (ImmInt 1)
1440 , SL fmt b b (RIImm (ImmInt half))
1441 -- s = clz(y)
1442 , CNTLZ fmt s y_reg
1443 -- v = y << s
1444 , SL fmt v y_reg (RIReg s)
1445 -- vn1 = upper half of v
1446 , SR fmt vn1 v (RIImm (ImmInt half))
1447 -- vn0 = lower half of v
1448 , CLRLI fmt vn0 v half
1449 -- un32 = (u1 << s)
1450 -- | (u0 >> (bits_in_word - s))
1451 , SL fmt un32 xh_reg (RIReg s)
1452 , SUBFC tmp s
1453 (RIImm (ImmInt (8 * formatInBytes fmt)))
1454 , SR fmt tmp xl_reg (RIReg tmp)
1455 , OR un32 un32 (RIReg tmp)
1456 -- un10 = u0 << s
1457 , SL fmt un10 xl_reg (RIReg s)
1458 -- un1 = upper half of un10
1459 , SR fmt un1 un10 (RIImm (ImmInt half))
1460 -- un0 = lower half of un10
1461 , CLRLI fmt un0 un10 half
1462 -- q1 = un32/vn1
1463 , DIV fmt False q1 un32 vn1
1464 -- rhat = un32 - q1*vn1
1465 , MULL fmt tmp q1 (RIReg vn1)
1466 , SUBF rhat tmp un32
1467 , BCC ALWAYS again1 Nothing
1468
1469 , NEWBLOCK again1
1470 -- if (q1 >= b || q1*vn0 > b*rhat + un1)
1471 , CMPL fmt q1 (RIReg b)
1472 , BCC GEU then1 Nothing
1473 , BCC ALWAYS no1 Nothing
1474
1475 , NEWBLOCK no1
1476 , MULL fmt tmp q1 (RIReg vn0)
1477 , SL fmt tmp1 rhat (RIImm (ImmInt half))
1478 , ADD tmp1 tmp1 (RIReg un1)
1479 , CMPL fmt tmp (RIReg tmp1)
1480 , BCC LEU endif1 Nothing
1481 , BCC ALWAYS then1 Nothing
1482
1483 , NEWBLOCK then1
1484 -- q1 = q1 - 1
1485 , ADD q1 q1 (RIImm (ImmInt (-1)))
1486 -- rhat = rhat + vn1
1487 , ADD rhat rhat (RIReg vn1)
1488 -- if (rhat < b) goto again1
1489 , CMPL fmt rhat (RIReg b)
1490 , BCC LTT again1 Nothing
1491 , BCC ALWAYS endif1 Nothing
1492
1493 , NEWBLOCK endif1
1494 -- un21 = un32*b + un1 - q1*v
1495 , SL fmt un21 un32 (RIImm (ImmInt half))
1496 , ADD un21 un21 (RIReg un1)
1497 , MULL fmt tmp q1 (RIReg v)
1498 , SUBF un21 tmp un21
1499 -- compute second quotient digit
1500 -- q0 = un21/vn1
1501 , DIV fmt False q0 un21 vn1
1502 -- rhat = un21- q0*vn1
1503 , MULL fmt tmp q0 (RIReg vn1)
1504 , SUBF rhat tmp un21
1505 , BCC ALWAYS again2 Nothing
1506
1507 , NEWBLOCK again2
1508 -- if (q0>b || q0*vn0 > b*rhat + un0)
1509 , CMPL fmt q0 (RIReg b)
1510 , BCC GEU then2 Nothing
1511 , BCC ALWAYS no2 Nothing
1512
1513 , NEWBLOCK no2
1514 , MULL fmt tmp q0 (RIReg vn0)
1515 , SL fmt tmp1 rhat (RIImm (ImmInt half))
1516 , ADD tmp1 tmp1 (RIReg un0)
1517 , CMPL fmt tmp (RIReg tmp1)
1518 , BCC LEU endif2 Nothing
1519 , BCC ALWAYS then2 Nothing
1520
1521 , NEWBLOCK then2
1522 -- q0 = q0 - 1
1523 , ADD q0 q0 (RIImm (ImmInt (-1)))
1524 -- rhat = rhat + vn1
1525 , ADD rhat rhat (RIReg vn1)
1526 -- if (rhat<b) goto again2
1527 , CMPL fmt rhat (RIReg b)
1528 , BCC LTT again2 Nothing
1529 , BCC ALWAYS endif2 Nothing
1530
1531 , NEWBLOCK endif2
1532 -- compute remainder
1533 -- r = (un21*b + un0 - q0*v) >> s
1534 , SL fmt reg_r un21 (RIImm (ImmInt half))
1535 , ADD reg_r reg_r (RIReg un0)
1536 , MULL fmt tmp q0 (RIReg v)
1537 , SUBF reg_r tmp reg_r
1538 , SR fmt reg_r reg_r (RIReg s)
1539 -- compute quotient
1540 -- q = q1*b + q0
1541 , SL fmt reg_q q1 (RIImm (ImmInt half))
1542 , ADD reg_q reg_q (RIReg q0)
1543 ]
1544 divOp2 _ _ _ _
1545 = panic "genCCall: Wrong number of arguments for divOp2"
1546 multOp2 platform width [res_h, res_l] [arg_x, arg_y]
1547 = do let reg_h = getRegisterReg platform (CmmLocal res_h)
1548 reg_l = getRegisterReg platform (CmmLocal res_l)
1549 fmt = intFormat width
1550 (x_reg, x_code) <- getSomeReg arg_x
1551 (y_reg, y_code) <- getSomeReg arg_y
1552 return $ y_code `appOL` x_code
1553 `appOL` toOL [ MULL fmt reg_l x_reg (RIReg y_reg)
1554 , MULHU fmt reg_h x_reg y_reg
1555 ]
1556 multOp2 _ _ _ _
1557 = panic "genCall: Wrong number of arguments for multOp2"
1558 add2Op platform [res_h, res_l] [arg_x, arg_y]
1559 = do let reg_h = getRegisterReg platform (CmmLocal res_h)
1560 reg_l = getRegisterReg platform (CmmLocal res_l)
1561 (x_reg, x_code) <- getSomeReg arg_x
1562 (y_reg, y_code) <- getSomeReg arg_y
1563 return $ y_code `appOL` x_code
1564 `appOL` toOL [ LI reg_h (ImmInt 0)
1565 , ADDC reg_l x_reg y_reg
1566 , ADDZE reg_h reg_h
1567 ]
1568 add2Op _ _ _
1569 = panic "genCCall: Wrong number of arguments/results for add2"
1570
1571 addcOp platform [res_r, res_c] [arg_x, arg_y]
1572 = add2Op platform [res_c {-hi-}, res_r {-lo-}] [arg_x, arg_y]
1573 addcOp _ _ _
1574 = panic "genCCall: Wrong number of arguments/results for addc"
1575
1576 -- PowerPC subfc sets the carry for rT = ~(rA) + rB + 1,
1577 -- which is 0 for borrow and 1 otherwise. We need 1 and 0
1578 -- so xor with 1.
1579 subcOp platform [res_r, res_c] [arg_x, arg_y]
1580 = do let reg_r = getRegisterReg platform (CmmLocal res_r)
1581 reg_c = getRegisterReg platform (CmmLocal res_c)
1582 (x_reg, x_code) <- getSomeReg arg_x
1583 (y_reg, y_code) <- getSomeReg arg_y
1584 return $ y_code `appOL` x_code
1585 `appOL` toOL [ LI reg_c (ImmInt 0)
1586 , SUBFC reg_r y_reg (RIReg x_reg)
1587 , ADDZE reg_c reg_c
1588 , XOR reg_c reg_c (RIImm (ImmInt 1))
1589 ]
1590 subcOp _ _ _
1591 = panic "genCCall: Wrong number of arguments/results for subc"
1592 addSubCOp instr platform width [res_r, res_c] [arg_x, arg_y]
1593 = do let reg_r = getRegisterReg platform (CmmLocal res_r)
1594 reg_c = getRegisterReg platform (CmmLocal res_c)
1595 (x_reg, x_code) <- getSomeReg arg_x
1596 (y_reg, y_code) <- getSomeReg arg_y
1597 return $ y_code `appOL` x_code
1598 `appOL` toOL [ instr reg_r y_reg x_reg,
1599 -- SUBFO argument order reversed!
1600 MFOV (intFormat width) reg_c
1601 ]
1602 addSubCOp _ _ _ _ _
1603 = panic "genCall: Wrong number of arguments/results for addC"
1604 fabs platform [res] [arg]
1605 = do let res_r = getRegisterReg platform (CmmLocal res)
1606 (arg_reg, arg_code) <- getSomeReg arg
1607 return $ arg_code `snocOL` FABS res_r arg_reg
1608 fabs _ _ _
1609 = panic "genCall: Wrong number of arguments/results for fabs"
1610
1611 -- TODO: replace 'Int' by an enum such as 'PPC_64ABI'
1612 data GenCCallPlatform = GCP32ELF | GCP64ELF !Int | GCPAIX
1613
1614 platformToGCP :: Platform -> GenCCallPlatform
1615 platformToGCP platform
1616 = case platformOS platform of
1617 OSAIX -> GCPAIX
1618 _ -> case platformArch platform of
1619 ArchPPC -> GCP32ELF
1620 ArchPPC_64 ELF_V1 -> GCP64ELF 1
1621 ArchPPC_64 ELF_V2 -> GCP64ELF 2
1622 _ -> panic "platformToGCP: Not PowerPC"
1623
1624
1625 genCCall'
1626 :: NCGConfig
1627 -> GenCCallPlatform
1628 -> ForeignTarget -- function to call
1629 -> [CmmFormal] -- where to put the result
1630 -> [CmmActual] -- arguments (of mixed type)
1631 -> NatM InstrBlock
1632
1633 {-
1634 PowerPC Linux uses the System V Release 4 Calling Convention
1635 for PowerPC. It is described in the
1636 "System V Application Binary Interface PowerPC Processor Supplement".
1637
1638 PowerPC 64 Linux uses the System V Release 4 Calling Convention for
1639 64-bit PowerPC. It is specified in
1640 "64-bit PowerPC ELF Application Binary Interface Supplement 1.9"
1641 (PPC64 ELF v1.9).
1642
1643 PowerPC 64 Linux in little endian mode uses the "Power Architecture 64-Bit
1644 ELF V2 ABI Specification -- OpenPOWER ABI for Linux Supplement"
1645 (PPC64 ELF v2).
1646
1647 AIX follows the "PowerOpen ABI: Application Binary Interface Big-Endian
1648 32-Bit Hardware Implementation"
1649
1650 All four conventions are similar:
1651 Parameters may be passed in general-purpose registers starting at r3, in
1652 floating point registers starting at f1, or on the stack.
1653
1654 But there are substantial differences:
1655 * The number of registers used for parameter passing and the exact set of
1656 nonvolatile registers differs (see MachRegs.hs).
1657 * On AIX and 64-bit ELF, stack space is always reserved for parameters,
1658 even if they are passed in registers. The called routine may choose to
1659 save parameters from registers to the corresponding space on the stack.
1660 * On AIX and 64-bit ELF, a corresponding amount of GPRs is skipped when
1661 a floating point parameter is passed in an FPR.
1662 * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
1663 starting with an odd-numbered GPR. It may skip a GPR to achieve this.
1664 AIX just treats an I64 likt two separate I32s (high word first).
1665 * I64 and FF64 arguments are 8-byte aligned on the stack for SysV, but only
1666 4-byte aligned like everything else on AIX.
1667 * The SysV spec claims that FF32 is represented as FF64 on the stack. GCC on
1668 PowerPC Linux does not agree, so neither do we.
1669
1670 According to all conventions, the parameter area should be part of the
1671 caller's stack frame, allocated in the caller's prologue code (large enough
1672 to hold the parameter lists for all called routines). The NCG already
1673 uses the stack for register spilling, leaving 64 bytes free at the top.
1674 If we need a larger parameter area than that, we increase the size
1675 of the stack frame just before ccalling.
1676 -}
1677
1678
1679 genCCall' config gcp target dest_regs args
1680 = do
1681 (finalStack,passArgumentsCode,usedRegs) <- passArguments
1682 (zip3 args argReps argHints)
1683 allArgRegs
1684 (allFPArgRegs platform)
1685 initialStackOffset
1686 nilOL []
1687
1688 (labelOrExpr, reduceToFF32) <- case target of
1689 ForeignTarget (CmmLit (CmmLabel lbl)) _ -> do
1690 uses_pic_base_implicitly
1691 return (Left lbl, False)
1692 ForeignTarget expr _ -> do
1693 uses_pic_base_implicitly
1694 return (Right expr, False)
1695 PrimTarget mop -> outOfLineMachOp mop
1696
1697 let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
1698 codeAfter = move_sp_up finalStack `appOL` moveResult reduceToFF32
1699
1700 case labelOrExpr of
1701 Left lbl -> -- the linker does all the work for us
1702 return ( codeBefore
1703 `snocOL` BL lbl usedRegs
1704 `appOL` maybeNOP -- some ABI require a NOP after BL
1705 `appOL` codeAfter)
1706 Right dyn -> do -- implement call through function pointer
1707 (dynReg, dynCode) <- getSomeReg dyn
1708 case gcp of
1709 GCP64ELF 1 -> return ( dynCode
1710 `appOL` codeBefore
1711 `snocOL` ST spFormat toc (AddrRegImm sp (ImmInt 40))
1712 `snocOL` LD II64 r11 (AddrRegImm dynReg (ImmInt 0))
1713 `snocOL` LD II64 toc (AddrRegImm dynReg (ImmInt 8))
1714 `snocOL` MTCTR r11
1715 `snocOL` LD II64 r11 (AddrRegImm dynReg (ImmInt 16))
1716 `snocOL` BCTRL usedRegs
1717 `snocOL` LD spFormat toc (AddrRegImm sp (ImmInt 40))
1718 `appOL` codeAfter)
1719 GCP64ELF 2 -> return ( dynCode
1720 `appOL` codeBefore
1721 `snocOL` ST spFormat toc (AddrRegImm sp (ImmInt 24))
1722 `snocOL` MR r12 dynReg
1723 `snocOL` MTCTR r12
1724 `snocOL` BCTRL usedRegs
1725 `snocOL` LD spFormat toc (AddrRegImm sp (ImmInt 24))
1726 `appOL` codeAfter)
1727 GCPAIX -> return ( dynCode
1728 -- AIX/XCOFF follows the PowerOPEN ABI
1729 -- which is quite similar to LinuxPPC64/ELFv1
1730 `appOL` codeBefore
1731 `snocOL` ST spFormat toc (AddrRegImm sp (ImmInt 20))
1732 `snocOL` LD II32 r11 (AddrRegImm dynReg (ImmInt 0))
1733 `snocOL` LD II32 toc (AddrRegImm dynReg (ImmInt 4))
1734 `snocOL` MTCTR r11
1735 `snocOL` LD II32 r11 (AddrRegImm dynReg (ImmInt 8))
1736 `snocOL` BCTRL usedRegs
1737 `snocOL` LD spFormat toc (AddrRegImm sp (ImmInt 20))
1738 `appOL` codeAfter)
1739 _ -> return ( dynCode
1740 `snocOL` MTCTR dynReg
1741 `appOL` codeBefore
1742 `snocOL` BCTRL usedRegs
1743 `appOL` codeAfter)
1744 where
1745 platform = ncgPlatform config
1746
1747 uses_pic_base_implicitly =
1748 -- See Note [implicit register in PPC PIC code]
1749 -- on why we claim to use PIC register here
1750 when (ncgPIC config && target32Bit platform) $ do
1751 _ <- getPicBaseNat $ archWordFormat True
1752 return ()
1753
1754 initialStackOffset = case gcp of
1755 GCPAIX -> 24
1756 GCP32ELF -> 8
1757 GCP64ELF 1 -> 48
1758 GCP64ELF 2 -> 32
1759 _ -> panic "genCall': unknown calling convention"
1760 -- size of linkage area + size of arguments, in bytes
1761 stackDelta finalStack = case gcp of
1762 GCPAIX ->
1763 roundTo 16 $ (24 +) $ max 32 $ sum $
1764 map (widthInBytes . typeWidth) argReps
1765 GCP32ELF -> roundTo 16 finalStack
1766 GCP64ELF 1 ->
1767 roundTo 16 $ (48 +) $ max 64 $ sum $
1768 map (roundTo 8 . widthInBytes . typeWidth)
1769 argReps
1770 GCP64ELF 2 ->
1771 roundTo 16 $ (32 +) $ max 64 $ sum $
1772 map (roundTo 8 . widthInBytes . typeWidth)
1773 argReps
1774 _ -> panic "genCall': unknown calling conv."
1775
1776 argReps = map (cmmExprType platform) args
1777 (argHints, _) = foreignTargetHints target
1778
1779 roundTo a x | x `mod` a == 0 = x
1780 | otherwise = x + a - (x `mod` a)
1781
1782 spFormat = if target32Bit platform then II32 else II64
1783
1784 -- TODO: Do not create a new stack frame if delta is too large.
1785 move_sp_down finalStack
1786 | delta > stackFrameHeaderSize platform =
1787 toOL [STU spFormat sp (AddrRegImm sp (ImmInt (-delta))),
1788 DELTA (-delta)]
1789 | otherwise = nilOL
1790 where delta = stackDelta finalStack
1791 move_sp_up finalStack
1792 | delta > stackFrameHeaderSize platform =
1793 toOL [ADD sp sp (RIImm (ImmInt delta)),
1794 DELTA 0]
1795 | otherwise = nilOL
1796 where delta = stackDelta finalStack
1797
1798 -- A NOP instruction is required after a call (bl instruction)
1799 -- on AIX and 64-Bit Linux.
1800 -- If the call is to a function with a different TOC (r2) the
1801 -- link editor replaces the NOP instruction with a load of the TOC
1802 -- from the stack to restore the TOC.
1803 maybeNOP = case gcp of
1804 GCP32ELF -> nilOL
1805 -- See Section 3.9.4 of OpenPower ABI
1806 GCPAIX -> unitOL NOP
1807 -- See Section 3.5.11 of PPC64 ELF v1.9
1808 GCP64ELF 1 -> unitOL NOP
1809 -- See Section 2.3.6 of PPC64 ELF v2
1810 GCP64ELF 2 -> unitOL NOP
1811 _ -> panic "maybeNOP: Unknown PowerPC 64-bit ABI"
1812
1813 passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
1814 passArguments ((arg,arg_ty,_):args) gprs fprs stackOffset
1815 accumCode accumUsed | isWord64 arg_ty
1816 && target32Bit (ncgPlatform config) =
1817 do
1818 ChildCode64 code vr_lo <- iselExpr64 arg
1819 let vr_hi = getHiVRegFromLo vr_lo
1820
1821 case gcp of
1822 GCPAIX ->
1823 do let storeWord vr (gpr:_) _ = MR gpr vr
1824 storeWord vr [] offset
1825 = ST II32 vr (AddrRegImm sp (ImmInt offset))
1826 passArguments args
1827 (drop 2 gprs)
1828 fprs
1829 (stackOffset+8)
1830 (accumCode `appOL` code
1831 `snocOL` storeWord vr_hi gprs stackOffset
1832 `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
1833 ((take 2 gprs) ++ accumUsed)
1834 GCP32ELF ->
1835 do let stackOffset' = roundTo 8 stackOffset
1836 stackCode = accumCode `appOL` code
1837 `snocOL` ST II32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
1838 `snocOL` ST II32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
1839 regCode hireg loreg =
1840 accumCode `appOL` code
1841 `snocOL` MR hireg vr_hi
1842 `snocOL` MR loreg vr_lo
1843
1844 case gprs of
1845 hireg : loreg : regs | even (length gprs) ->
1846 passArguments args regs fprs stackOffset
1847 (regCode hireg loreg) (hireg : loreg : accumUsed)
1848 _skipped : hireg : loreg : regs ->
1849 passArguments args regs fprs stackOffset
1850 (regCode hireg loreg) (hireg : loreg : accumUsed)
1851 _ -> -- only one or no regs left
1852 passArguments args [] fprs (stackOffset'+8)
1853 stackCode accumUsed
1854 GCP64ELF _ -> panic "passArguments: 32 bit code"
1855
1856 passArguments ((arg,rep,hint):args) gprs fprs stackOffset accumCode accumUsed
1857 | reg : _ <- regs = do
1858 register <- getRegister arg_pro
1859 let code = case register of
1860 Fixed _ freg fcode -> fcode `snocOL` MR reg freg
1861 Any _ acode -> acode reg
1862 stackOffsetRes = case gcp of
1863 -- The PowerOpen ABI requires that we
1864 -- reserve stack slots for register
1865 -- parameters
1866 GCPAIX -> stackOffset + stackBytes
1867 -- ... the SysV ABI 32-bit doesn't.
1868 GCP32ELF -> stackOffset
1869 -- ... but SysV ABI 64-bit does.
1870 GCP64ELF _ -> stackOffset + stackBytes
1871 passArguments args
1872 (drop nGprs gprs)
1873 (drop nFprs fprs)
1874 stackOffsetRes
1875 (accumCode `appOL` code)
1876 (reg : accumUsed)
1877 | otherwise = do
1878 (vr, code) <- getSomeReg arg_pro
1879 passArguments args
1880 (drop nGprs gprs)
1881 (drop nFprs fprs)
1882 (stackOffset' + stackBytes)
1883 (accumCode `appOL` code
1884 `snocOL` ST format_pro vr stackSlot)
1885 accumUsed
1886 where
1887 arg_pro
1888 | isBitsType rep = CmmMachOp (conv_op (typeWidth rep) (wordWidth platform)) [arg]
1889 | otherwise = arg
1890 format_pro
1891 | isBitsType rep = intFormat (wordWidth platform)
1892 | otherwise = cmmTypeFormat rep
1893 conv_op = case hint of
1894 SignedHint -> MO_SS_Conv
1895 _ -> MO_UU_Conv
1896
1897 stackOffset' = case gcp of
1898 GCPAIX ->
1899 -- The 32bit PowerOPEN ABI is happy with
1900 -- 32bit-alignment ...
1901 stackOffset
1902 GCP32ELF
1903 -- ... the SysV ABI requires 8-byte
1904 -- alignment for doubles.
1905 | isFloatType rep && typeWidth rep == W64 ->
1906 roundTo 8 stackOffset
1907 | otherwise ->
1908 stackOffset
1909 GCP64ELF _ ->
1910 -- Everything on the stack is mapped to
1911 -- 8-byte aligned doublewords
1912 stackOffset
1913 stackOffset''
1914 | isFloatType rep && typeWidth rep == W32 =
1915 case gcp of
1916 -- The ELF v1 ABI Section 3.2.3 requires:
1917 -- "Single precision floating point values
1918 -- are mapped to the second word in a single
1919 -- doubleword"
1920 GCP64ELF 1 -> stackOffset' + 4
1921 _ -> stackOffset'
1922 | otherwise = stackOffset'
1923
1924 stackSlot = AddrRegImm sp (ImmInt stackOffset'')
1925 (nGprs, nFprs, stackBytes, regs)
1926 = case gcp of
1927 GCPAIX ->
1928 case cmmTypeFormat rep of
1929 II8 -> (1, 0, 4, gprs)
1930 II16 -> (1, 0, 4, gprs)
1931 II32 -> (1, 0, 4, gprs)
1932 -- The PowerOpen ABI requires that we skip a
1933 -- corresponding number of GPRs when we use
1934 -- the FPRs.
1935 --
1936 -- E.g. for a `double` two GPRs are skipped,
1937 -- whereas for a `float` one GPR is skipped
1938 -- when parameters are assigned to
1939 -- registers.
1940 --
1941 -- The PowerOpen ABI specification can be found at
1942 -- ftp://www.sourceware.org/pub/binutils/ppc-docs/ppc-poweropen/
1943 FF32 -> (1, 1, 4, fprs)
1944 FF64 -> (2, 1, 8, fprs)
1945 II64 -> panic "genCCall' passArguments II64"
1946
1947 GCP32ELF ->
1948 case cmmTypeFormat rep of
1949 II8 -> (1, 0, 4, gprs)
1950 II16 -> (1, 0, 4, gprs)
1951 II32 -> (1, 0, 4, gprs)
1952 -- ... the SysV ABI doesn't.
1953 FF32 -> (0, 1, 4, fprs)
1954 FF64 -> (0, 1, 8, fprs)
1955 II64 -> panic "genCCall' passArguments II64"
1956 GCP64ELF _ ->
1957 case cmmTypeFormat rep of
1958 II8 -> (1, 0, 8, gprs)
1959 II16 -> (1, 0, 8, gprs)
1960 II32 -> (1, 0, 8, gprs)
1961 II64 -> (1, 0, 8, gprs)
1962 -- The ELFv1 ABI requires that we skip a
1963 -- corresponding number of GPRs when we use
1964 -- the FPRs.
1965 FF32 -> (1, 1, 8, fprs)
1966 FF64 -> (1, 1, 8, fprs)
1967
1968 moveResult reduceToFF32 =
1969 case dest_regs of
1970 [] -> nilOL
1971 [dest]
1972 | reduceToFF32 && isFloat32 rep -> unitOL (FRSP r_dest f1)
1973 | isFloat32 rep || isFloat64 rep -> unitOL (MR r_dest f1)
1974 | isWord64 rep && target32Bit platform
1975 -> toOL [MR (getHiVRegFromLo r_dest) r3,
1976 MR r_dest r4]
1977 | otherwise -> unitOL (MR r_dest r3)
1978 where rep = cmmRegType platform (CmmLocal dest)
1979 r_dest = getRegisterReg platform (CmmLocal dest)
1980 _ -> panic "genCCall' moveResult: Bad dest_regs"
1981
1982 outOfLineMachOp mop =
1983 do
1984 mopExpr <- cmmMakeDynamicReference config CallReference $
1985 mkForeignLabel functionName Nothing ForeignLabelInThisPackage IsFunction
1986 let mopLabelOrExpr = case mopExpr of
1987 CmmLit (CmmLabel lbl) -> Left lbl
1988 _ -> Right mopExpr
1989 return (mopLabelOrExpr, reduce)
1990 where
1991 (functionName, reduce) = case mop of
1992 MO_F32_Exp -> (fsLit "exp", True)
1993 MO_F32_ExpM1 -> (fsLit "expm1", True)
1994 MO_F32_Log -> (fsLit "log", True)
1995 MO_F32_Log1P -> (fsLit "log1p", True)
1996 MO_F32_Sqrt -> (fsLit "sqrt", True)
1997 MO_F32_Fabs -> unsupported
1998
1999 MO_F32_Sin -> (fsLit "sin", True)
2000 MO_F32_Cos -> (fsLit "cos", True)
2001 MO_F32_Tan -> (fsLit "tan", True)
2002
2003 MO_F32_Asin -> (fsLit "asin", True)
2004 MO_F32_Acos -> (fsLit "acos", True)
2005 MO_F32_Atan -> (fsLit "atan", True)
2006
2007 MO_F32_Sinh -> (fsLit "sinh", True)
2008 MO_F32_Cosh -> (fsLit "cosh", True)
2009 MO_F32_Tanh -> (fsLit "tanh", True)
2010 MO_F32_Pwr -> (fsLit "pow", True)
2011
2012 MO_F32_Asinh -> (fsLit "asinh", True)
2013 MO_F32_Acosh -> (fsLit "acosh", True)
2014 MO_F32_Atanh -> (fsLit "atanh", True)
2015
2016 MO_F64_Exp -> (fsLit "exp", False)
2017 MO_F64_ExpM1 -> (fsLit "expm1", False)
2018 MO_F64_Log -> (fsLit "log", False)
2019 MO_F64_Log1P -> (fsLit "log1p", False)
2020 MO_F64_Sqrt -> (fsLit "sqrt", False)
2021 MO_F64_Fabs -> unsupported
2022
2023 MO_F64_Sin -> (fsLit "sin", False)
2024 MO_F64_Cos -> (fsLit "cos", False)
2025 MO_F64_Tan -> (fsLit "tan", False)
2026
2027 MO_F64_Asin -> (fsLit "asin", False)
2028 MO_F64_Acos -> (fsLit "acos", False)
2029 MO_F64_Atan -> (fsLit "atan", False)
2030
2031 MO_F64_Sinh -> (fsLit "sinh", False)
2032 MO_F64_Cosh -> (fsLit "cosh", False)
2033 MO_F64_Tanh -> (fsLit "tanh", False)
2034 MO_F64_Pwr -> (fsLit "pow", False)
2035
2036 MO_F64_Asinh -> (fsLit "asinh", False)
2037 MO_F64_Acosh -> (fsLit "acosh", False)
2038 MO_F64_Atanh -> (fsLit "atanh", False)
2039
2040 MO_I64_ToI -> (fsLit "hs_int64ToInt", False)
2041 MO_I64_FromI -> (fsLit "hs_intToInt64", False)
2042 MO_W64_ToW -> (fsLit "hs_word64ToWord", False)
2043 MO_W64_FromW -> (fsLit "hs_wordToWord64", False)
2044
2045 MO_x64_Neg -> (fsLit "hs_neg64", False)
2046 MO_x64_Add -> (fsLit "hs_add64", False)
2047 MO_x64_Sub -> (fsLit "hs_sub64", False)
2048 MO_x64_Mul -> (fsLit "hs_mul64", False)
2049 MO_I64_Quot -> (fsLit "hs_quotInt64", False)
2050 MO_I64_Rem -> (fsLit "hs_remInt64", False)
2051 MO_W64_Quot -> (fsLit "hs_quotWord64", False)
2052 MO_W64_Rem -> (fsLit "hs_remWord64", False)
2053
2054 MO_x64_And -> (fsLit "hs_and64", False)
2055 MO_x64_Or -> (fsLit "hs_or64", False)
2056 MO_x64_Xor -> (fsLit "hs_xor64", False)
2057 MO_x64_Not -> (fsLit "hs_not64", False)
2058 MO_x64_Shl -> (fsLit "hs_uncheckedShiftL64", False)
2059 MO_I64_Shr -> (fsLit "hs_uncheckedIShiftRA64", False)
2060 MO_W64_Shr -> (fsLit "hs_uncheckedShiftRL64", False)
2061
2062 MO_x64_Eq -> (fsLit "hs_eq64", False)
2063 MO_x64_Ne -> (fsLit "hs_ne64", False)
2064 MO_I64_Ge -> (fsLit "hs_geInt64", False)
2065 MO_I64_Gt -> (fsLit "hs_gtInt64", False)
2066 MO_I64_Le -> (fsLit "hs_leInt64", False)
2067 MO_I64_Lt -> (fsLit "hs_ltInt64", False)
2068 MO_W64_Ge -> (fsLit "hs_geWord64", False)
2069 MO_W64_Gt -> (fsLit "hs_gtWord64", False)
2070 MO_W64_Le -> (fsLit "hs_leWord64", False)
2071 MO_W64_Lt -> (fsLit "hs_ltWord64", False)
2072
2073 MO_UF_Conv w -> (word2FloatLabel w, False)
2074
2075 MO_Memcpy _ -> (fsLit "memcpy", False)
2076 MO_Memset _ -> (fsLit "memset", False)
2077 MO_Memmove _ -> (fsLit "memmove", False)
2078 MO_Memcmp _ -> (fsLit "memcmp", False)
2079
2080 MO_SuspendThread -> (fsLit "suspendThread", False)
2081 MO_ResumeThread -> (fsLit "resumeThread", False)
2082
2083 MO_BSwap w -> (bSwapLabel w, False)
2084 MO_BRev w -> (bRevLabel w, False)
2085 MO_PopCnt w -> (popCntLabel w, False)
2086 MO_Pdep w -> (pdepLabel w, False)
2087 MO_Pext w -> (pextLabel w, False)
2088 MO_Clz _ -> unsupported
2089 MO_Ctz _ -> unsupported
2090 MO_AtomicRMW {} -> unsupported
2091 MO_Cmpxchg w -> (cmpxchgLabel w, False)
2092 MO_Xchg w -> (xchgLabel w, False)
2093 MO_AtomicRead _ -> unsupported
2094 MO_AtomicWrite _ -> unsupported
2095
2096 MO_S_Mul2 {} -> unsupported
2097 MO_S_QuotRem {} -> unsupported
2098 MO_U_QuotRem {} -> unsupported
2099 MO_U_QuotRem2 {} -> unsupported
2100 MO_Add2 {} -> unsupported
2101 MO_AddWordC {} -> unsupported
2102 MO_SubWordC {} -> unsupported
2103 MO_AddIntC {} -> unsupported
2104 MO_SubIntC {} -> unsupported
2105 MO_U_Mul2 {} -> unsupported
2106 MO_ReadBarrier -> unsupported
2107 MO_WriteBarrier -> unsupported
2108 MO_Touch -> unsupported
2109 MO_Prefetch_Data _ -> unsupported
2110 unsupported = panic ("outOfLineCmmOp: " ++ show mop
2111 ++ " not supported")
2112
2113 -- -----------------------------------------------------------------------------
2114 -- Generating a table-branch
2115
2116 genSwitch :: NCGConfig -> CmmExpr -> SwitchTargets -> NatM InstrBlock
2117 genSwitch config expr targets
2118 | OSAIX <- platformOS platform
2119 = do
2120 (reg,e_code) <- getSomeReg indexExpr
2121 let fmt = archWordFormat $ target32Bit platform
2122 sha = if target32Bit platform then 2 else 3
2123 tmp <- getNewRegNat fmt
2124 lbl <- getNewLabelNat
2125 dynRef <- cmmMakeDynamicReference config DataReference lbl
2126 (tableReg,t_code) <- getSomeReg $ dynRef
2127 let code = e_code `appOL` t_code `appOL` toOL [
2128 SL fmt tmp reg (RIImm (ImmInt sha)),
2129 LD fmt tmp (AddrRegReg tableReg tmp),
2130 MTCTR tmp,
2131 BCTR ids (Just lbl) []
2132 ]
2133 return code
2134
2135 | (ncgPIC config) || (not $ target32Bit platform)
2136 = do
2137 (reg,e_code) <- getSomeReg indexExpr
2138 let fmt = archWordFormat $ target32Bit platform
2139 sha = if target32Bit platform then 2 else 3
2140 tmp <- getNewRegNat fmt
2141 lbl <- getNewLabelNat
2142 dynRef <- cmmMakeDynamicReference config DataReference lbl
2143 (tableReg,t_code) <- getSomeReg $ dynRef
2144 let code = e_code `appOL` t_code `appOL` toOL [
2145 SL fmt tmp reg (RIImm (ImmInt sha)),
2146 LD fmt tmp (AddrRegReg tableReg tmp),
2147 ADD tmp tmp (RIReg tableReg),
2148 MTCTR tmp,
2149 BCTR ids (Just lbl) []
2150 ]
2151 return code
2152 | otherwise
2153 = do
2154 (reg,e_code) <- getSomeReg indexExpr
2155 let fmt = archWordFormat $ target32Bit platform
2156 sha = if target32Bit platform then 2 else 3
2157 tmp <- getNewRegNat fmt
2158 lbl <- getNewLabelNat
2159 let code = e_code `appOL` toOL [
2160 SL fmt tmp reg (RIImm (ImmInt sha)),
2161 ADDIS tmp tmp (HA (ImmCLbl lbl)),
2162 LD fmt tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
2163 MTCTR tmp,
2164 BCTR ids (Just lbl) []
2165 ]
2166 return code
2167 where
2168 indexExpr = cmmOffset platform exprWidened offset
2169 -- We widen to a native-width register to santize the high bits
2170 exprWidened = CmmMachOp
2171 (MO_UU_Conv (cmmExprWidth platform expr)
2172 (platformWordWidth platform))
2173 [expr]
2174 (offset, ids) = switchTargetsToTable targets
2175 platform = ncgPlatform config
2176
2177 generateJumpTableForInstr :: NCGConfig -> Instr
2178 -> Maybe (NatCmmDecl RawCmmStatics Instr)
2179 generateJumpTableForInstr config (BCTR ids (Just lbl) _) =
2180 let jumpTable
2181 | (ncgPIC config) || (not $ target32Bit $ ncgPlatform config)
2182 = map jumpTableEntryRel ids
2183 | otherwise = map (jumpTableEntry config) ids
2184 where jumpTableEntryRel Nothing
2185 = CmmStaticLit (CmmInt 0 (ncgWordWidth config))
2186 jumpTableEntryRel (Just blockid)
2187 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0
2188 (ncgWordWidth config))
2189 where blockLabel = blockLbl blockid
2190 in Just (CmmData (Section ReadOnlyData lbl) (CmmStaticsRaw lbl jumpTable))
2191 generateJumpTableForInstr _ _ = Nothing
2192
2193 -- -----------------------------------------------------------------------------
2194 -- 'condIntReg' and 'condFltReg': condition codes into registers
2195
2196 -- Turn those condition codes into integers now (when they appear on
2197 -- the right hand side of an assignment).
2198
2199
2200
2201 condReg :: NatM CondCode -> NatM Register
2202 condReg getCond = do
2203 CondCode _ cond cond_code <- getCond
2204 platform <- getPlatform
2205 let
2206 code dst = cond_code
2207 `appOL` negate_code
2208 `appOL` toOL [
2209 MFCR dst,
2210 RLWINM dst dst (bit + 1) 31 31
2211 ]
2212
2213 negate_code | do_negate = unitOL (CRNOR bit bit bit)
2214 | otherwise = nilOL
2215
2216 (bit, do_negate) = case cond of
2217 LTT -> (0, False)
2218 LE -> (1, True)
2219 EQQ -> (2, False)
2220 GE -> (0, True)
2221 GTT -> (1, False)
2222
2223 NE -> (2, True)
2224
2225 LU -> (0, False)
2226 LEU -> (1, True)
2227 GEU -> (0, True)
2228 GU -> (1, False)
2229 _ -> panic "PPC.CodeGen.codeReg: no match"
2230
2231 format = archWordFormat $ target32Bit platform
2232 return (Any format code)
2233
2234 condIntReg :: Cond -> Width -> CmmExpr -> CmmExpr -> NatM Register
2235 condIntReg cond width x y = condReg (condIntCode cond width x y)
2236 condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
2237 condFltReg cond x y = condReg (condFltCode cond x y)
2238
2239
2240
2241 -- -----------------------------------------------------------------------------
2242 -- 'trivial*Code': deal with trivial instructions
2243
2244 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
2245 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
2246 -- Only look for constants on the right hand side, because that's
2247 -- where the generic optimizer will have put them.
2248
2249 -- Similarly, for unary instructions, we don't have to worry about
2250 -- matching an StInt as the argument, because genericOpt will already
2251 -- have handled the constant-folding.
2252
2253
2254
2255 {-
2256 Wolfgang's PowerPC version of The Rules:
2257
2258 A slightly modified version of The Rules to take advantage of the fact
2259 that PowerPC instructions work on all registers and don't implicitly
2260 clobber any fixed registers.
2261
2262 * The only expression for which getRegister returns Fixed is (CmmReg reg).
2263
2264 * If getRegister returns Any, then the code it generates may modify only:
2265 (a) fresh temporaries
2266 (b) the destination register
2267 It may *not* modify global registers, unless the global
2268 register happens to be the destination register.
2269 It may not clobber any other registers. In fact, only ccalls clobber any
2270 fixed registers.
2271 Also, it may not modify the counter register (used by genCCall).
2272
2273 Corollary: If a getRegister for a subexpression returns Fixed, you need
2274 not move it to a fresh temporary before evaluating the next subexpression.
2275 The Fixed register won't be modified.
2276 Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
2277
2278 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
2279 the value of the destination register.
2280 -}
2281
2282 trivialCode
2283 :: Width
2284 -> Bool
2285 -> (Reg -> Reg -> RI -> Instr)
2286 -> CmmExpr
2287 -> CmmExpr
2288 -> NatM Register
2289
2290 trivialCode rep signed instr x (CmmLit (CmmInt y _))
2291 | Just imm <- makeImmediate rep signed y
2292 = do
2293 (src1, code1) <- getSomeReg x
2294 let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
2295 return (Any (intFormat rep) code)
2296
2297 trivialCode rep _ instr x y = do
2298 (src1, code1) <- getSomeReg x
2299 (src2, code2) <- getSomeReg y
2300 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
2301 return (Any (intFormat rep) code)
2302
2303 shiftMulCode
2304 :: Width
2305 -> Bool
2306 -> (Format-> Reg -> Reg -> RI -> Instr)
2307 -> CmmExpr
2308 -> CmmExpr
2309 -> NatM Register
2310 shiftMulCode width sign instr x (CmmLit (CmmInt y _))
2311 | Just imm <- makeImmediate width sign y
2312 = do
2313 (src1, code1) <- getSomeReg x
2314 let format = intFormat width
2315 let ins_fmt = intFormat (max W32 width)
2316 let code dst = code1 `snocOL` instr ins_fmt dst src1 (RIImm imm)
2317 return (Any format code)
2318
2319 shiftMulCode width _ instr x y = do
2320 (src1, code1) <- getSomeReg x
2321 (src2, code2) <- getSomeReg y
2322 let format = intFormat width
2323 let ins_fmt = intFormat (max W32 width)
2324 let code dst = code1 `appOL` code2
2325 `snocOL` instr ins_fmt dst src1 (RIReg src2)
2326 return (Any format code)
2327
2328 trivialCodeNoImm' :: Format -> (Reg -> Reg -> Reg -> Instr)
2329 -> CmmExpr -> CmmExpr -> NatM Register
2330 trivialCodeNoImm' format instr x y = do
2331 (src1, code1) <- getSomeReg x
2332 (src2, code2) <- getSomeReg y
2333 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
2334 return (Any format code)
2335
2336 trivialCodeNoImm :: Format -> (Format -> Reg -> Reg -> Reg -> Instr)
2337 -> CmmExpr -> CmmExpr -> NatM Register
2338 trivialCodeNoImm format instr x y
2339 = trivialCodeNoImm' format (instr format) x y
2340
2341 srCode :: Width -> Bool -> (Format-> Reg -> Reg -> RI -> Instr)
2342 -> CmmExpr -> CmmExpr -> NatM Register
2343 srCode width sgn instr x (CmmLit (CmmInt y _))
2344 | Just imm <- makeImmediate width sgn y
2345 = do
2346 let op_len = max W32 width
2347 extend = if sgn then extendSExpr else extendUExpr
2348 (src1, code1) <- getSomeReg (extend width op_len x)
2349 let code dst = code1 `snocOL`
2350 instr (intFormat op_len) dst src1 (RIImm imm)
2351 return (Any (intFormat width) code)
2352
2353 srCode width sgn instr x y = do
2354 let op_len = max W32 width
2355 extend = if sgn then extendSExpr else extendUExpr
2356 (src1, code1) <- getSomeReg (extend width op_len x)
2357 (src2, code2) <- getSomeReg (extendUExpr width op_len y)
2358 -- Note: Shift amount `y` is unsigned
2359 let code dst = code1 `appOL` code2 `snocOL`
2360 instr (intFormat op_len) dst src1 (RIReg src2)
2361 return (Any (intFormat width) code)
2362
2363 divCode :: Width -> Bool -> CmmExpr -> CmmExpr -> NatM Register
2364 divCode width sgn x y = do
2365 let op_len = max W32 width
2366 extend = if sgn then extendSExpr else extendUExpr
2367 (src1, code1) <- getSomeReg (extend width op_len x)
2368 (src2, code2) <- getSomeReg (extend width op_len y)
2369 let code dst = code1 `appOL` code2 `snocOL`
2370 DIV (intFormat op_len) sgn dst src1 src2
2371 return (Any (intFormat width) code)
2372
2373
2374 trivialUCode :: Format
2375 -> (Reg -> Reg -> Instr)
2376 -> CmmExpr
2377 -> NatM Register
2378 trivialUCode rep instr x = do
2379 (src, code) <- getSomeReg x
2380 let code' dst = code `snocOL` instr dst src
2381 return (Any rep code')
2382
2383 -- There is no "remainder" instruction on the PPC, so we have to do
2384 -- it the hard way.
2385 -- The "sgn" parameter is the signedness for the division instruction
2386
2387 remainderCode :: Width -> Bool -> Reg -> CmmExpr -> CmmExpr
2388 -> NatM (Reg -> InstrBlock)
2389 remainderCode rep sgn reg_q arg_x arg_y = do
2390 let op_len = max W32 rep
2391 fmt = intFormat op_len
2392 extend = if sgn then extendSExpr else extendUExpr
2393 (x_reg, x_code) <- getSomeReg (extend rep op_len arg_x)
2394 (y_reg, y_code) <- getSomeReg (extend rep op_len arg_y)
2395 return $ \reg_r -> y_code `appOL` x_code
2396 `appOL` toOL [ DIV fmt sgn reg_q x_reg y_reg
2397 , MULL fmt reg_r reg_q (RIReg y_reg)
2398 , SUBF reg_r reg_r x_reg
2399 ]
2400
2401
2402 coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
2403 coerceInt2FP fromRep toRep x = do
2404 platform <- getPlatform
2405 let arch = platformArch platform
2406 coerceInt2FP' arch fromRep toRep x
2407
2408 coerceInt2FP' :: Arch -> Width -> Width -> CmmExpr -> NatM Register
2409 coerceInt2FP' ArchPPC fromRep toRep x = do
2410 (src, code) <- getSomeReg x
2411 lbl <- getNewLabelNat
2412 itmp <- getNewRegNat II32
2413 ftmp <- getNewRegNat FF64
2414 config <- getConfig
2415 platform <- getPlatform
2416 dynRef <- cmmMakeDynamicReference config DataReference lbl
2417 Amode addr addr_code <- getAmode D dynRef
2418 let
2419 code' dst = code `appOL` maybe_exts `appOL` toOL [
2420 LDATA (Section ReadOnlyData lbl) $ CmmStaticsRaw lbl
2421 [CmmStaticLit (CmmInt 0x43300000 W32),
2422 CmmStaticLit (CmmInt 0x80000000 W32)],
2423 XORIS itmp src (ImmInt 0x8000),
2424 ST II32 itmp (spRel platform 3),
2425 LIS itmp (ImmInt 0x4330),
2426 ST II32 itmp (spRel platform 2),
2427 LD FF64 ftmp (spRel platform 2)
2428 ] `appOL` addr_code `appOL` toOL [
2429 LD FF64 dst addr,
2430 FSUB FF64 dst ftmp dst
2431 ] `appOL` maybe_frsp dst
2432
2433 maybe_exts = case fromRep of
2434 W8 -> unitOL $ EXTS II8 src src
2435 W16 -> unitOL $ EXTS II16 src src
2436 W32 -> nilOL
2437 _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
2438
2439 maybe_frsp dst
2440 = case toRep of
2441 W32 -> unitOL $ FRSP dst dst
2442 W64 -> nilOL
2443 _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
2444
2445 return (Any (floatFormat toRep) code')
2446
2447 -- On an ELF v1 Linux we use the compiler doubleword in the stack frame
2448 -- this is the TOC pointer doubleword on ELF v2 Linux. The latter is only
2449 -- set right before a call and restored right after return from the call.
2450 -- So it is fine.
2451 coerceInt2FP' (ArchPPC_64 _) fromRep toRep x = do
2452 (src, code) <- getSomeReg x
2453 platform <- getPlatform
2454 upper <- getNewRegNat II64
2455 lower <- getNewRegNat II64
2456 l1 <- getBlockIdNat
2457 l2 <- getBlockIdNat
2458 let
2459 code' dst = code `appOL` maybe_exts `appOL` toOL [
2460 ST II64 src (spRel platform 3),
2461 LD FF64 dst (spRel platform 3),
2462 FCFID dst dst
2463 ] `appOL` maybe_frsp dst
2464
2465 maybe_exts
2466 = case fromRep of
2467 W8 -> unitOL $ EXTS II8 src src
2468 W16 -> unitOL $ EXTS II16 src src
2469 W32 -> unitOL $ EXTS II32 src src
2470 W64 -> case toRep of
2471 W32 -> toOL [ SRA II64 upper src (RIImm (ImmInt 53))
2472 , CLRLI II64 lower src 53
2473 , ADD upper upper (RIImm (ImmInt 1))
2474 , ADD lower lower (RIImm (ImmInt 2047))
2475 , CMPL II64 upper (RIImm (ImmInt 2))
2476 , OR lower lower (RIReg src)
2477 , CLRRI II64 lower lower 11
2478 , BCC LTT l2 Nothing
2479 , BCC ALWAYS l1 Nothing
2480 , NEWBLOCK l1
2481 , MR src lower
2482 , BCC ALWAYS l2 Nothing
2483 , NEWBLOCK l2
2484 ]
2485 _ -> nilOL
2486 _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
2487
2488 maybe_frsp dst
2489 = case toRep of
2490 W32 -> unitOL $ FRSP dst dst
2491 W64 -> nilOL
2492 _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
2493
2494 return (Any (floatFormat toRep) code')
2495
2496 coerceInt2FP' _ _ _ _ = panic "PPC.CodeGen.coerceInt2FP: unknown arch"
2497
2498
2499 coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
2500 coerceFP2Int fromRep toRep x = do
2501 platform <- getPlatform
2502 let arch = platformArch platform
2503 coerceFP2Int' arch fromRep toRep x
2504
2505 coerceFP2Int' :: Arch -> Width -> Width -> CmmExpr -> NatM Register
2506 coerceFP2Int' ArchPPC _ toRep x = do
2507 platform <- getPlatform
2508 -- the reps don't really matter: F*->FF64 and II32->I* are no-ops
2509 (src, code) <- getSomeReg x
2510 tmp <- getNewRegNat FF64
2511 let
2512 code' dst = code `appOL` toOL [
2513 -- convert to int in FP reg
2514 FCTIWZ tmp src,
2515 -- store value (64bit) from FP to stack
2516 ST FF64 tmp (spRel platform 2),
2517 -- read low word of value (high word is undefined)
2518 LD II32 dst (spRel platform 3)]
2519 return (Any (intFormat toRep) code')
2520
2521 coerceFP2Int' (ArchPPC_64 _) _ toRep x = do
2522 platform <- getPlatform
2523 -- the reps don't really matter: F*->FF64 and II64->I* are no-ops
2524 (src, code) <- getSomeReg x
2525 tmp <- getNewRegNat FF64
2526 let
2527 code' dst = code `appOL` toOL [
2528 -- convert to int in FP reg
2529 FCTIDZ tmp src,
2530 -- store value (64bit) from FP to compiler word on stack
2531 ST FF64 tmp (spRel platform 3),
2532 LD II64 dst (spRel platform 3)]
2533 return (Any (intFormat toRep) code')
2534
2535 coerceFP2Int' _ _ _ _ = panic "PPC.CodeGen.coerceFP2Int: unknown arch"
2536
2537 -- Note [.LCTOC1 in PPC PIC code]
2538 -- The .LCTOC1 label is defined to point 32768 bytes into the GOT table
2539 -- to make the most of the PPC's 16-bit displacements.
2540 -- As 16-bit signed offset is used (usually via addi/lwz instructions)
2541 -- first element will have '-32768' offset against .LCTOC1.
2542
2543 -- Note [implicit register in PPC PIC code]
2544 -- PPC generates calls by labels in assembly
2545 -- in form of:
2546 -- bl puts+32768@plt
2547 -- in this form it's not seen directly (by GHC NCG)
2548 -- that r30 (PicBaseReg) is used,
2549 -- but r30 is a required part of PLT code setup:
2550 -- puts+32768@plt:
2551 -- lwz r11,-30484(r30) ; offset in .LCTOC1
2552 -- mtctr r11
2553 -- bctr