never executed always true always false
1 {-# language GADTs #-}
2 {-# LANGUAGE TupleSections #-}
3 {-# LANGUAGE BangPatterns #-}
4 {-# LANGUAGE BinaryLiterals #-}
5 {-# LANGUAGE OverloadedStrings #-}
6 {-# LANGUAGE NumericUnderscores #-}
7 module GHC.CmmToAsm.AArch64.CodeGen (
8 cmmTopCodeGen
9 , generateJumpTableForInstr
10 )
11
12 where
13
14 -- NCG stuff:
15 import GHC.Prelude hiding (EQ)
16
17 import GHC.Platform.Regs
18 import GHC.CmmToAsm.AArch64.Instr
19 import GHC.CmmToAsm.AArch64.Regs
20 import GHC.CmmToAsm.AArch64.Cond
21
22 import GHC.CmmToAsm.CPrim
23 import GHC.Cmm.DebugBlock
24 import GHC.CmmToAsm.Monad
25 ( NatM, getNewRegNat
26 , getPicBaseMaybeNat, getPlatform, getConfig
27 , getDebugBlock, getFileId
28 )
29 -- import GHC.CmmToAsm.Instr
30 import GHC.CmmToAsm.PIC
31 import GHC.CmmToAsm.Format
32 import GHC.CmmToAsm.Config
33 import GHC.CmmToAsm.Types
34 import GHC.Platform.Reg
35 import GHC.Platform
36
37 -- Our intermediate code:
38 import GHC.Cmm.BlockId
39 import GHC.Cmm
40 import GHC.Cmm.Utils
41 import GHC.Cmm.Switch
42 import GHC.Cmm.CLabel
43 import GHC.Cmm.Dataflow.Block
44 import GHC.Cmm.Dataflow.Graph
45 import GHC.Types.Tickish ( GenTickish(..) )
46 import GHC.Types.SrcLoc ( srcSpanFile, srcSpanStartLine, srcSpanStartCol )
47
48 -- The rest:
49 import GHC.Data.OrdList
50 import GHC.Utils.Outputable
51
52 import Control.Monad ( mapAndUnzipM, when, foldM )
53 import Data.Word
54 import Data.Maybe
55 import GHC.Float
56
57 import GHC.Types.Basic
58 import GHC.Types.ForeignCall
59 import GHC.Data.FastString
60 import GHC.Utils.Misc
61 import GHC.Utils.Panic
62
63 -- Note [General layout of an NCG]
64 -- @cmmTopCodeGen@ will be our main entry point to code gen. Here we'll get
65 -- @RawCmmDecl@; see GHC.Cmm
66 --
67 -- RawCmmDecl = GenCmmDecl RawCmmStatics (LabelMap RawCmmStatics) CmmGraph
68 --
69 -- GenCmmDecl d h g = CmmProc h CLabel [GlobalReg] g
70 -- | CmmData Section d
71 --
72 -- As a result we want to transform this to a list of @NatCmmDecl@, which is
73 -- defined @GHC.CmmToAsm.Instr@ as
74 --
75 -- type NatCmmDecl statics instr
76 -- = GenCmmDecl statics (LabelMap RawCmmStatics) (ListGraph instr)
77 --
78 -- Thus well' turn
79 -- GenCmmDecl RawCmmStatics (LabelMap RawCmmStatics) CmmGraph
80 -- into
81 -- [GenCmmDecl RawCmmStatics (LabelMap RawCmmStatics) (ListGraph Instr)]
82 --
83 -- where @CmmGraph@ is
84 --
85 -- type CmmGraph = GenCmmGraph CmmNode
86 -- data GenCmmGraph n = CmmGraph { g_entry :: BlockId, g_graph :: Graph n C C }
87 -- type CmmBlock = Block CmmNode C C
88 --
89 -- and @ListGraph Instr@ is
90 --
91 -- newtype ListGraph i = ListGraph [GenBasicBlock i]
92 -- data GenBasicBlock i = BasicBlock BlockId [i]
93
94 cmmTopCodeGen
95 :: RawCmmDecl
96 -> NatM [NatCmmDecl RawCmmStatics Instr]
97
98 -- Thus we'll have to deal with either CmmProc ...
99 cmmTopCodeGen _cmm@(CmmProc info lab live graph) = do
100 -- do
101 -- traceM $ "-- -------------------------- cmmTopGen (CmmProc) -------------------------- --\n"
102 -- ++ showSDocUnsafe (ppr cmm)
103
104 let blocks = toBlockListEntryFirst graph
105 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
106 picBaseMb <- getPicBaseMaybeNat
107
108 let proc = CmmProc info lab live (ListGraph $ concat nat_blocks)
109 tops = proc : concat statics
110
111 case picBaseMb of
112 Just _picBase -> panic "AArch64.cmmTopCodeGen: picBase not implemented"
113 Nothing -> return tops
114
115 -- ... or CmmData.
116 cmmTopCodeGen _cmm@(CmmData sec dat) = do
117 -- do
118 -- traceM $ "-- -------------------------- cmmTopGen (CmmData) -------------------------- --\n"
119 -- ++ showSDocUnsafe (ppr cmm)
120 return [CmmData sec dat] -- no translation, we just use CmmStatic
121
122 basicBlockCodeGen
123 :: Block CmmNode C C
124 -> NatM ( [NatBasicBlock Instr]
125 , [NatCmmDecl RawCmmStatics Instr])
126
127 basicBlockCodeGen block = do
128 config <- getConfig
129 -- do
130 -- traceM $ "-- --------------------------- basicBlockCodeGen --------------------------- --\n"
131 -- ++ showSDocUnsafe (ppr block)
132 let (_, nodes, tail) = blockSplit block
133 id = entryLabel block
134 stmts = blockToList nodes
135
136 header_comment_instr = unitOL $ MULTILINE_COMMENT (
137 text "-- --------------------------- basicBlockCodeGen --------------------------- --\n"
138 $+$ pdoc (ncgPlatform config) block
139 )
140 -- Generate location directive
141 dbg <- getDebugBlock (entryLabel block)
142 loc_instrs <- case dblSourceTick =<< dbg of
143 Just (SourceNote span name)
144 -> do fileId <- getFileId (srcSpanFile span)
145 let line = srcSpanStartLine span; col = srcSpanStartCol span
146 return $ unitOL $ LOCATION fileId line col name
147 _ -> return nilOL
148 (mid_instrs,mid_bid) <- stmtsToInstrs id stmts
149 (!tail_instrs,_) <- stmtToInstrs mid_bid tail
150 let instrs = header_comment_instr `appOL` loc_instrs `appOL` mid_instrs `appOL` tail_instrs
151 -- TODO: Then x86 backend run @verifyBasicBlock@ here and inserts
152 -- unwinding info. See Ticket 19913
153 -- code generation may introduce new basic block boundaries, which
154 -- are indicated by the NEWBLOCK instruction. We must split up the
155 -- instruction stream into basic blocks again. Also, we extract
156 -- LDATAs here too.
157 let
158 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
159
160 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
161 = ([], BasicBlock id instrs : blocks, statics)
162 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
163 = (instrs, blocks, CmmData sec dat:statics)
164 mkBlocks instr (instrs,blocks,statics)
165 = (instr:instrs, blocks, statics)
166 return (BasicBlock id top : other_blocks, statics)
167
168
169 -- -----------------------------------------------------------------------------
170 -- | Utilities
171 ann :: SDoc -> Instr -> Instr
172 ann doc instr {- | debugIsOn -} = ANN doc instr
173 -- ann _ instr = instr
174 {-# INLINE ann #-}
175
176 -- Using pprExpr will hide the AST, @ANN@ will end up in the assembly with
177 -- -dppr-debug. The idea is that we can trivially see how a cmm expression
178 -- ended up producing the assmebly we see. By having the verbatim AST printed
179 -- we can simply check the patterns that were matched to arrive at the assmebly
180 -- we generated.
181 --
182 -- pprExpr will hide a lot of noise of the underlying data structure and print
183 -- the expression into something that can be easily read by a human. However
184 -- going back to the exact CmmExpr representation can be labourous and adds
185 -- indirections to find the matches that lead to the assembly.
186 --
187 -- An improvement oculd be to have
188 --
189 -- (pprExpr genericPlatform e) <> parens (text. show e)
190 --
191 -- to have the best of both worlds.
192 --
193 -- Note: debugIsOn is too restrictive, it only works for debug compilers.
194 -- However, we do not only want to inspect this for debug compilers. Ideally
195 -- we'd have a check for -dppr-debug here already, such that we don't even
196 -- generate the ANN expressions. However, as they are lazy, they shouldn't be
197 -- forced until we actually force them, and without -dppr-debug they should
198 -- never end up being forced.
199 annExpr :: CmmExpr -> Instr -> Instr
200 annExpr e instr {- | debugIsOn -} = ANN (text . show $ e) instr
201 -- annExpr e instr {- | debugIsOn -} = ANN (pprExpr genericPlatform e) instr
202 -- annExpr _ instr = instr
203 {-# INLINE annExpr #-}
204
205 -- -----------------------------------------------------------------------------
206 -- Generating a table-branch
207
208 -- TODO jump tables would be a lot faster, but we'll use bare bones for now.
209 -- this is usually done by sticking the jump table ids into an instruction
210 -- and then have the @generateJumpTableForInstr@ callback produce the jump
211 -- table as a static.
212 --
213 -- See Ticket 19912
214 --
215 -- data SwitchTargets =
216 -- SwitchTargets
217 -- Bool -- Signed values
218 -- (Integer, Integer) -- Range
219 -- (Maybe Label) -- Default value
220 -- (M.Map Integer Label) -- The branches
221 --
222 -- Non Jumptable plan:
223 -- xE <- expr
224 --
225 genSwitch :: CmmExpr -> SwitchTargets -> NatM InstrBlock
226 genSwitch expr targets = do -- pprPanic "genSwitch" (ppr expr)
227 (reg, format, code) <- getSomeReg expr
228 let w = formatToWidth format
229 let mkbranch acc (key, bid) = do
230 (keyReg, _format, code) <- getSomeReg (CmmLit (CmmInt key w))
231 return $ code `appOL`
232 toOL [ CMP (OpReg w reg) (OpReg w keyReg)
233 , BCOND EQ (TBlock bid)
234 ] `appOL` acc
235 def_code = case switchTargetsDefault targets of
236 Just bid -> unitOL (B (TBlock bid))
237 Nothing -> nilOL
238
239 switch_code <- foldM mkbranch nilOL (switchTargetsCases targets)
240 return $ code `appOL` switch_code `appOL` def_code
241
242 -- We don't do jump tables for now, see Ticket 19912
243 generateJumpTableForInstr :: NCGConfig -> Instr
244 -> Maybe (NatCmmDecl RawCmmStatics Instr)
245 generateJumpTableForInstr _ _ = Nothing
246
247 -- -----------------------------------------------------------------------------
248 -- Top-level of the instruction selector
249
250 -- See Note [Keeping track of the current block] for why
251 -- we pass the BlockId.
252 stmtsToInstrs :: BlockId -- ^ Basic block these statement will start to be placed in.
253 -> [CmmNode O O] -- ^ Cmm Statement
254 -> NatM (InstrBlock, BlockId) -- ^ Resulting instruction
255 stmtsToInstrs bid stmts =
256 go bid stmts nilOL
257 where
258 go bid [] instrs = return (instrs,bid)
259 go bid (s:stmts) instrs = do
260 (instrs',bid') <- stmtToInstrs bid s
261 -- If the statement introduced a new block, we use that one
262 let !newBid = fromMaybe bid bid'
263 go newBid stmts (instrs `appOL` instrs')
264
265 -- | `bid` refers to the current block and is used to update the CFG
266 -- if new blocks are inserted in the control flow.
267 -- See Note [Keeping track of the current block] for more details.
268 stmtToInstrs :: BlockId -- ^ Basic block this statement will start to be placed in.
269 -> CmmNode e x
270 -> NatM (InstrBlock, Maybe BlockId)
271 -- ^ Instructions, and bid of new block if successive
272 -- statements are placed in a different basic block.
273 stmtToInstrs bid stmt = do
274 -- traceM $ "-- -------------------------- stmtToInstrs -------------------------- --\n"
275 -- ++ showSDocUnsafe (ppr stmt)
276 platform <- getPlatform
277 case stmt of
278 CmmUnsafeForeignCall target result_regs args
279 -> genCCall target result_regs args bid
280
281 _ -> (,Nothing) <$> case stmt of
282 CmmComment s -> return (unitOL (COMMENT (ftext s)))
283 CmmTick {} -> return nilOL
284
285 CmmAssign reg src
286 | isFloatType ty -> assignReg_FltCode format reg src
287 | otherwise -> assignReg_IntCode format reg src
288 where ty = cmmRegType platform reg
289 format = cmmTypeFormat ty
290
291 CmmStore addr src
292 | isFloatType ty -> assignMem_FltCode format addr src
293 | otherwise -> assignMem_IntCode format addr src
294 where ty = cmmExprType platform src
295 format = cmmTypeFormat ty
296
297 CmmBranch id -> genBranch id
298
299 --We try to arrange blocks such that the likely branch is the fallthrough
300 --in GHC.Cmm.ContFlowOpt. So we can assume the condition is likely false here.
301 CmmCondBranch arg true false _prediction ->
302 genCondBranch bid true false arg
303
304 CmmSwitch arg ids -> genSwitch arg ids
305
306 CmmCall { cml_target = arg } -> genJump arg
307
308 CmmUnwind _regs -> return nilOL
309
310 _ -> pprPanic "stmtToInstrs: statement should have been cps'd away" (pdoc platform stmt)
311
312 --------------------------------------------------------------------------------
313 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
314 -- They are really trees of insns to facilitate fast appending, where a
315 -- left-to-right traversal yields the insns in the correct order.
316 --
317 type InstrBlock
318 = OrdList Instr
319
320 -- | Register's passed up the tree. If the stix code forces the register
321 -- to live in a pre-decided machine register, it comes out as @Fixed@;
322 -- otherwise, it comes out as @Any@, and the parent can decide which
323 -- register to put it in.
324 --
325 data Register
326 = Fixed Format Reg InstrBlock
327 | Any Format (Reg -> InstrBlock)
328
329 -- | Sometimes we need to change the Format of a register. Primarily during
330 -- conversion.
331 swizzleRegisterRep :: Format -> Register -> Register
332 swizzleRegisterRep format (Fixed _ reg code) = Fixed format reg code
333 swizzleRegisterRep format (Any _ codefn) = Any format codefn
334
335 -- | Grab the Reg for a CmmReg
336 getRegisterReg :: Platform -> CmmReg -> Reg
337
338 getRegisterReg _ (CmmLocal (LocalReg u pk))
339 = RegVirtual $ mkVirtualReg u (cmmTypeFormat pk)
340
341 getRegisterReg platform (CmmGlobal mid)
342 = case globalRegMaybe platform mid of
343 Just reg -> RegReal reg
344 Nothing -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
345 -- By this stage, the only MagicIds remaining should be the
346 -- ones which map to a real machine register on this
347 -- platform. Hence if it's not mapped to a registers something
348 -- went wrong earlier in the pipeline.
349 -- | Convert a BlockId to some CmmStatic data
350 -- TODO: Add JumpTable Logic, see Ticket 19912
351 -- jumpTableEntry :: NCGConfig -> Maybe BlockId -> CmmStatic
352 -- jumpTableEntry config Nothing = CmmStaticLit (CmmInt 0 (ncgWordWidth config))
353 -- jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
354 -- where blockLabel = blockLbl blockid
355
356 -- -----------------------------------------------------------------------------
357 -- General things for putting together code sequences
358
359 -- | The dual to getAnyReg: compute an expression into a register, but
360 -- we don't mind which one it is.
361 getSomeReg :: CmmExpr -> NatM (Reg, Format, InstrBlock)
362 getSomeReg expr = do
363 r <- getRegister expr
364 case r of
365 Any rep code -> do
366 tmp <- getNewRegNat rep
367 return (tmp, rep, code tmp)
368 Fixed rep reg code ->
369 return (reg, rep, code)
370
371 -- TODO OPT: we might be able give getRegister
372 -- a hint, what kind of register we want.
373 getFloatReg :: HasCallStack => CmmExpr -> NatM (Reg, Format, InstrBlock)
374 getFloatReg expr = do
375 r <- getRegister expr
376 case r of
377 Any rep code | isFloatFormat rep -> do
378 tmp <- getNewRegNat rep
379 return (tmp, rep, code tmp)
380 Any II32 code -> do
381 tmp <- getNewRegNat FF32
382 return (tmp, FF32, code tmp)
383 Any II64 code -> do
384 tmp <- getNewRegNat FF64
385 return (tmp, FF64, code tmp)
386 Any _w _code -> do
387 config <- getConfig
388 pprPanic "can't do getFloatReg on" (pdoc (ncgPlatform config) expr)
389 -- can't do much for fixed.
390 Fixed rep reg code ->
391 return (reg, rep, code)
392
393 -- TODO: TODO, bounds. We can't put any immediate
394 -- value in. They are constrained.
395 -- See Ticket 19911
396 litToImm' :: CmmLit -> NatM (Operand, InstrBlock)
397 litToImm' lit = return (OpImm (litToImm lit), nilOL)
398
399
400 getRegister :: CmmExpr -> NatM Register
401 getRegister e = do
402 config <- getConfig
403 getRegister' config (ncgPlatform config) e
404
405 -- Note [Handling PIC on AArch64]
406 -- AArch64 does not have a special PIC register, the general approach is to
407 -- simply go through the GOT, and there is assembly support for this:
408 --
409 -- // Load the address of 'sym' from the GOT using ADRP and LDR (used for
410 -- // position-independent code on AArch64):
411 -- adrp x0, #:got:sym
412 -- ldr x0, [x0, #:got_lo12:sym]
413 --
414 -- See also: https://developer.arm.com/documentation/dui0774/i/armclang-integrated-assembler-directives/assembly-expressions
415 --
416 -- CmmGlobal @PicBaseReg@'s are generated in @GHC.CmmToAsm.PIC@ in the
417 -- @cmmMakePicReference@. This is in turn called from @cmmMakeDynamicReference@
418 -- also in @Cmm.CmmToAsm.PIC@ from where it is also exported. There are two
419 -- callsites for this. One is in this module to produce the @target@ in @genCCall@
420 -- the other is in @GHC.CmmToAsm@ in @cmmExprNative@.
421 --
422 -- Conceptually we do not want any special PicBaseReg to be used on AArch64. If
423 -- we want to distinguish between symbol loading, we need to address this through
424 -- the way we load it, not through a register.
425 --
426
427 getRegister' :: NCGConfig -> Platform -> CmmExpr -> NatM Register
428 -- OPTIMIZATION WARNING: CmmExpr rewrites
429 -- 1. Rewrite: Reg + (-n) => Reg - n
430 -- TODO: this expression shouldn't even be generated to begin with.
431 getRegister' config plat (CmmMachOp (MO_Add w0) [x, CmmLit (CmmInt i w1)]) | i < 0
432 = getRegister' config plat (CmmMachOp (MO_Sub w0) [x, CmmLit (CmmInt (-i) w1)])
433
434 getRegister' config plat (CmmMachOp (MO_Sub w0) [x, CmmLit (CmmInt i w1)]) | i < 0
435 = getRegister' config plat (CmmMachOp (MO_Add w0) [x, CmmLit (CmmInt (-i) w1)])
436
437
438 -- Generic case.
439 getRegister' config plat expr
440 = case expr of
441 CmmReg (CmmGlobal PicBaseReg)
442 -> pprPanic "getRegisterReg-memory" (ppr $ PicBaseReg)
443 CmmLit lit
444 -> case lit of
445
446 -- TODO handle CmmInt 0 specially, use wzr or xzr.
447
448 CmmInt i W8 | i >= 0 -> do
449 return (Any (intFormat W8) (\dst -> unitOL $ annExpr expr (MOV (OpReg W8 dst) (OpImm (ImmInteger (narrowU W8 i))))))
450 CmmInt i W16 | i >= 0 -> do
451 return (Any (intFormat W16) (\dst -> unitOL $ annExpr expr (MOV (OpReg W16 dst) (OpImm (ImmInteger (narrowU W16 i))))))
452
453 CmmInt i W8 -> do
454 return (Any (intFormat W8) (\dst -> unitOL $ annExpr expr (MOV (OpReg W8 dst) (OpImm (ImmInteger (narrowS W8 i))))))
455 CmmInt i W16 -> do
456 return (Any (intFormat W16) (\dst -> unitOL $ annExpr expr (MOV (OpReg W16 dst) (OpImm (ImmInteger (narrowS W16 i))))))
457
458 -- We need to be careful to not shorten this for negative literals.
459 -- Those need the upper bits set. We'd either have to explicitly sign
460 -- or figure out something smarter. Lowered to
461 -- `MOV dst XZR`
462 CmmInt i w | isNbitEncodeable 16 i, i >= 0 -> do
463 return (Any (intFormat w) (\dst -> unitOL $ annExpr expr (MOV (OpReg W16 dst) (OpImm (ImmInteger i)))))
464 CmmInt i w | isNbitEncodeable 32 i, i >= 0 -> do
465 let half0 = fromIntegral (fromIntegral i :: Word16)
466 half1 = fromIntegral (fromIntegral (i `shiftR` 16) :: Word16)
467 return (Any (intFormat w) (\dst -> toOL [ annExpr expr
468 $ MOV (OpReg W32 dst) (OpImm (ImmInt half0))
469 , MOVK (OpReg W32 dst) (OpImmShift (ImmInt half1) SLSL 16)
470 ]))
471 -- fallback for W32
472 CmmInt i W32 -> do
473 let half0 = fromIntegral (fromIntegral i :: Word16)
474 half1 = fromIntegral (fromIntegral (i `shiftR` 16) :: Word16)
475 return (Any (intFormat W32) (\dst -> toOL [ annExpr expr
476 $ MOV (OpReg W32 dst) (OpImm (ImmInt half0))
477 , MOVK (OpReg W32 dst) (OpImmShift (ImmInt half1) SLSL 16)
478 ]))
479 -- anything else
480 CmmInt i W64 -> do
481 let half0 = fromIntegral (fromIntegral i :: Word16)
482 half1 = fromIntegral (fromIntegral (i `shiftR` 16) :: Word16)
483 half2 = fromIntegral (fromIntegral (i `shiftR` 32) :: Word16)
484 half3 = fromIntegral (fromIntegral (i `shiftR` 48) :: Word16)
485 return (Any (intFormat W64) (\dst -> toOL [ annExpr expr
486 $ MOV (OpReg W64 dst) (OpImm (ImmInt half0))
487 , MOVK (OpReg W64 dst) (OpImmShift (ImmInt half1) SLSL 16)
488 , MOVK (OpReg W64 dst) (OpImmShift (ImmInt half2) SLSL 32)
489 , MOVK (OpReg W64 dst) (OpImmShift (ImmInt half3) SLSL 48)
490 ]))
491 CmmInt _i rep -> do
492 (op, imm_code) <- litToImm' lit
493 return (Any (intFormat rep) (\dst -> imm_code `snocOL` annExpr expr (MOV (OpReg rep dst) op)))
494
495 -- floatToBytes (fromRational f)
496 CmmFloat 0 w -> do
497 (op, imm_code) <- litToImm' lit
498 return (Any (floatFormat w) (\dst -> imm_code `snocOL` annExpr expr (MOV (OpReg w dst) op)))
499
500 CmmFloat _f W8 -> pprPanic "getRegister' (CmmLit:CmmFloat), no support for bytes" (pdoc plat expr)
501 CmmFloat _f W16 -> pprPanic "getRegister' (CmmLit:CmmFloat), no support for halfs" (pdoc plat expr)
502 CmmFloat f W32 -> do
503 let word = castFloatToWord32 (fromRational f) :: Word32
504 half0 = fromIntegral (fromIntegral word :: Word16)
505 half1 = fromIntegral (fromIntegral (word `shiftR` 16) :: Word16)
506 tmp <- getNewRegNat (intFormat W32)
507 return (Any (floatFormat W32) (\dst -> toOL [ annExpr expr
508 $ MOV (OpReg W32 tmp) (OpImm (ImmInt half0))
509 , MOVK (OpReg W32 tmp) (OpImmShift (ImmInt half1) SLSL 16)
510 , MOV (OpReg W32 dst) (OpReg W32 tmp)
511 ]))
512 CmmFloat f W64 -> do
513 let word = castDoubleToWord64 (fromRational f) :: Word64
514 half0 = fromIntegral (fromIntegral word :: Word16)
515 half1 = fromIntegral (fromIntegral (word `shiftR` 16) :: Word16)
516 half2 = fromIntegral (fromIntegral (word `shiftR` 32) :: Word16)
517 half3 = fromIntegral (fromIntegral (word `shiftR` 48) :: Word16)
518 tmp <- getNewRegNat (intFormat W64)
519 return (Any (floatFormat W64) (\dst -> toOL [ annExpr expr
520 $ MOV (OpReg W64 tmp) (OpImm (ImmInt half0))
521 , MOVK (OpReg W64 tmp) (OpImmShift (ImmInt half1) SLSL 16)
522 , MOVK (OpReg W64 tmp) (OpImmShift (ImmInt half2) SLSL 32)
523 , MOVK (OpReg W64 tmp) (OpImmShift (ImmInt half3) SLSL 48)
524 , MOV (OpReg W64 dst) (OpReg W64 tmp)
525 ]))
526 CmmFloat _f _w -> pprPanic "getRegister' (CmmLit:CmmFloat), unsupported float lit" (pdoc plat expr)
527 CmmVec _ -> pprPanic "getRegister' (CmmLit:CmmVec): " (pdoc plat expr)
528 CmmLabel _lbl -> do
529 (op, imm_code) <- litToImm' lit
530 let rep = cmmLitType plat lit
531 format = cmmTypeFormat rep
532 return (Any format (\dst -> imm_code `snocOL` (annExpr expr $ LDR format (OpReg (formatToWidth format) dst) op)))
533
534 CmmLabelOff _lbl off | isNbitEncodeable 12 (fromIntegral off) -> do
535 (op, imm_code) <- litToImm' lit
536 let rep = cmmLitType plat lit
537 format = cmmTypeFormat rep
538 -- width = typeWidth rep
539 return (Any format (\dst -> imm_code `snocOL` LDR format (OpReg (formatToWidth format) dst) op))
540
541 CmmLabelOff lbl off -> do
542 (op, imm_code) <- litToImm' (CmmLabel lbl)
543 let rep = cmmLitType plat lit
544 format = cmmTypeFormat rep
545 width = typeWidth rep
546 (off_r, _off_format, off_code) <- getSomeReg $ CmmLit (CmmInt (fromIntegral off) width)
547 return (Any format (\dst -> imm_code `appOL` off_code `snocOL` LDR format (OpReg (formatToWidth format) dst) op `snocOL` ADD (OpReg width dst) (OpReg width dst) (OpReg width off_r)))
548
549 CmmLabelDiffOff _ _ _ _ -> pprPanic "getRegister' (CmmLit:CmmLabelOff): " (pdoc plat expr)
550 CmmBlock _ -> pprPanic "getRegister' (CmmLit:CmmLabelOff): " (pdoc plat expr)
551 CmmHighStackMark -> pprPanic "getRegister' (CmmLit:CmmLabelOff): " (pdoc plat expr)
552 CmmLoad mem rep -> do
553 Amode addr addr_code <- getAmode plat mem
554 let format = cmmTypeFormat rep
555 return (Any format (\dst -> addr_code `snocOL` LDR format (OpReg (formatToWidth format) dst) (OpAddr addr)))
556 CmmStackSlot _ _
557 -> pprPanic "getRegister' (CmmStackSlot): " (pdoc plat expr)
558 CmmReg reg
559 -> return (Fixed (cmmTypeFormat (cmmRegType plat reg))
560 (getRegisterReg plat reg)
561 nilOL)
562 CmmRegOff reg off | isNbitEncodeable 12 (fromIntegral off) -> do
563 getRegister' config plat $
564 CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
565 where width = typeWidth (cmmRegType plat reg)
566
567 CmmRegOff reg off -> do
568 (off_r, _off_format, off_code) <- getSomeReg $ CmmLit (CmmInt (fromIntegral off) width)
569 (reg, _format, code) <- getSomeReg $ CmmReg reg
570 return $ Any (intFormat width) (\dst -> off_code `appOL` code `snocOL` ADD (OpReg width dst) (OpReg width reg) (OpReg width off_r))
571 where width = typeWidth (cmmRegType plat reg)
572
573
574
575 -- for MachOps, see GHC.Cmm.MachOp
576 -- For CmmMachOp, see GHC.Cmm.Expr
577 CmmMachOp op [e] -> do
578 (reg, _format, code) <- getSomeReg e
579 case op of
580 MO_Not w -> return $ Any (intFormat w) (\dst -> code `snocOL` MVN (OpReg w dst) (OpReg w reg))
581
582 MO_S_Neg w -> return $ Any (intFormat w) (\dst -> code `snocOL` NEG (OpReg w dst) (OpReg w reg))
583 MO_F_Neg w -> return $ Any (floatFormat w) (\dst -> code `snocOL` NEG (OpReg w dst) (OpReg w reg))
584
585 MO_SF_Conv from to -> return $ Any (floatFormat to) (\dst -> code `snocOL` SCVTF (OpReg to dst) (OpReg from reg)) -- (Signed ConVerT Float)
586 MO_FS_Conv from to -> return $ Any (intFormat to) (\dst -> code `snocOL` FCVTZS (OpReg to dst) (OpReg from reg)) -- (float convert (-> zero) signed)
587
588 -- TODO this is very hacky
589 -- Note, UBFM and SBFM expect source and target register to be of the same size, so we'll use @max from to@
590 -- UBFM will set the high bits to 0. SBFM will copy the sign (sign extend).
591 MO_UU_Conv from to -> return $ Any (intFormat to) (\dst -> code `snocOL` UBFM (OpReg (max from to) dst) (OpReg (max from to) reg) (OpImm (ImmInt 0)) (toImm (min from to)))
592 MO_SS_Conv from to -> return $ Any (intFormat to) (\dst -> code `snocOL` SBFM (OpReg (max from to) dst) (OpReg (max from to) reg) (OpImm (ImmInt 0)) (toImm (min from to)))
593 MO_FF_Conv from to -> return $ Any (floatFormat to) (\dst -> code `snocOL` FCVT (OpReg to dst) (OpReg from reg))
594
595 -- Conversions
596 MO_XX_Conv _from to -> swizzleRegisterRep (intFormat to) <$> getRegister e
597
598 _ -> pprPanic "getRegister' (monadic CmmMachOp):" (pdoc plat expr)
599 where toImm W8 = (OpImm (ImmInt 7))
600 toImm W16 = (OpImm (ImmInt 15))
601 toImm W32 = (OpImm (ImmInt 31))
602 toImm W64 = (OpImm (ImmInt 63))
603 toImm W128 = (OpImm (ImmInt 127))
604 toImm W256 = (OpImm (ImmInt 255))
605 toImm W512 = (OpImm (ImmInt 511))
606 -- Dyadic machops:
607 --
608 -- The general idea is:
609 -- compute x<i> <- x
610 -- compute x<j> <- y
611 -- OP x<r>, x<i>, x<j>
612 --
613 -- TODO: for now we'll only implement the 64bit versions. And rely on the
614 -- fallthrough to alert us if things go wrong!
615 -- OPTIMIZATION WARNING: Dyadic CmmMachOp destructuring
616 -- 0. TODO This should not exist! Rewrite: Reg +- 0 -> Reg
617 CmmMachOp (MO_Add _) [expr'@(CmmReg (CmmGlobal _r)), CmmLit (CmmInt 0 _)] -> getRegister' config plat expr'
618 CmmMachOp (MO_Sub _) [expr'@(CmmReg (CmmGlobal _r)), CmmLit (CmmInt 0 _)] -> getRegister' config plat expr'
619 -- 1. Compute Reg +/- n directly.
620 -- For Add/Sub we can directly encode 12bits, or 12bits lsl #12.
621 CmmMachOp (MO_Add w) [(CmmReg reg), CmmLit (CmmInt n _)]
622 | n > 0 && n < 4096 -> return $ Any (intFormat w) (\d -> unitOL $ annExpr expr (ADD (OpReg w d) (OpReg w' r') (OpImm (ImmInteger n))))
623 -- TODO: 12bits lsl #12; e.g. lower 12 bits of n are 0; shift n >> 12, and set lsl to #12.
624 where w' = formatToWidth (cmmTypeFormat (cmmRegType plat reg))
625 r' = getRegisterReg plat reg
626 CmmMachOp (MO_Sub w) [(CmmReg reg), CmmLit (CmmInt n _)]
627 | n > 0 && n < 4096 -> return $ Any (intFormat w) (\d -> unitOL $ annExpr expr (SUB (OpReg w d) (OpReg w' r') (OpImm (ImmInteger n))))
628 -- TODO: 12bits lsl #12; e.g. lower 12 bits of n are 0; shift n >> 12, and set lsl to #12.
629 where w' = formatToWidth (cmmTypeFormat (cmmRegType plat reg))
630 r' = getRegisterReg plat reg
631
632 CmmMachOp (MO_U_Quot w) [x, y] | w == W8 -> do
633 (reg_x, _format_x, code_x) <- getSomeReg x
634 (reg_y, _format_y, code_y) <- getSomeReg y
635 return $ Any (intFormat w) (\dst -> code_x `appOL` code_y `snocOL` annExpr expr (UXTB (OpReg w reg_x) (OpReg w reg_x)) `snocOL` (UXTB (OpReg w reg_y) (OpReg w reg_y)) `snocOL` (UDIV (OpReg w dst) (OpReg w reg_x) (OpReg w reg_y)))
636 CmmMachOp (MO_U_Quot w) [x, y] | w == W16 -> do
637 (reg_x, _format_x, code_x) <- getSomeReg x
638 (reg_y, _format_y, code_y) <- getSomeReg y
639 return $ Any (intFormat w) (\dst -> code_x `appOL` code_y `snocOL` annExpr expr (UXTH (OpReg w reg_x) (OpReg w reg_x)) `snocOL` (UXTH (OpReg w reg_y) (OpReg w reg_y)) `snocOL` (UDIV (OpReg w dst) (OpReg w reg_x) (OpReg w reg_y)))
640
641 -- 2. Shifts. x << n, x >> n.
642 CmmMachOp (MO_Shl w) [x, (CmmLit (CmmInt n _))] | w == W32, 0 <= n, n < 32 -> do
643 (reg_x, _format_x, code_x) <- getSomeReg x
644 return $ Any (intFormat w) (\dst -> code_x `snocOL` annExpr expr (LSL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n))))
645 CmmMachOp (MO_Shl w) [x, (CmmLit (CmmInt n _))] | w == W64, 0 <= n, n < 64 -> do
646 (reg_x, _format_x, code_x) <- getSomeReg x
647 return $ Any (intFormat w) (\dst -> code_x `snocOL` annExpr expr (LSL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n))))
648
649 CmmMachOp (MO_S_Shr w) [x, (CmmLit (CmmInt n _))] | w == W8, 0 <= n, n < 8 -> do
650 (reg_x, _format_x, code_x) <- getSomeReg x
651 return $ Any (intFormat w) (\dst -> code_x `snocOL` annExpr expr (SBFX (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)) (OpImm (ImmInteger (8-n)))))
652 CmmMachOp (MO_S_Shr w) [x, y] | w == W8 -> do
653 (reg_x, _format_x, code_x) <- getSomeReg x
654 (reg_y, _format_y, code_y) <- getSomeReg y
655 return $ Any (intFormat w) (\dst -> code_x `appOL` code_y `snocOL` annExpr expr (SXTB (OpReg w reg_x) (OpReg w reg_x)) `snocOL` (ASR (OpReg w dst) (OpReg w reg_x) (OpReg w reg_y)))
656
657 CmmMachOp (MO_S_Shr w) [x, (CmmLit (CmmInt n _))] | w == W16, 0 <= n, n < 16 -> do
658 (reg_x, _format_x, code_x) <- getSomeReg x
659 return $ Any (intFormat w) (\dst -> code_x `snocOL` annExpr expr (SBFX (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)) (OpImm (ImmInteger (16-n)))))
660 CmmMachOp (MO_S_Shr w) [x, y] | w == W16 -> do
661 (reg_x, _format_x, code_x) <- getSomeReg x
662 (reg_y, _format_y, code_y) <- getSomeReg y
663 return $ Any (intFormat w) (\dst -> code_x `appOL` code_y `snocOL` annExpr expr (SXTH (OpReg w reg_x) (OpReg w reg_x)) `snocOL` (ASR (OpReg w dst) (OpReg w reg_x) (OpReg w reg_y)))
664
665 CmmMachOp (MO_S_Shr w) [x, (CmmLit (CmmInt n _))] | w == W32, 0 <= n, n < 32 -> do
666 (reg_x, _format_x, code_x) <- getSomeReg x
667 return $ Any (intFormat w) (\dst -> code_x `snocOL` annExpr expr (ASR (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n))))
668
669 CmmMachOp (MO_S_Shr w) [x, (CmmLit (CmmInt n _))] | w == W64, 0 <= n, n < 64 -> do
670 (reg_x, _format_x, code_x) <- getSomeReg x
671 return $ Any (intFormat w) (\dst -> code_x `snocOL` annExpr expr (ASR (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n))))
672
673
674 CmmMachOp (MO_U_Shr w) [x, (CmmLit (CmmInt n _))] | w == W8, 0 <= n, n < 8 -> do
675 (reg_x, _format_x, code_x) <- getSomeReg x
676 return $ Any (intFormat w) (\dst -> code_x `snocOL` annExpr expr (UBFX (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)) (OpImm (ImmInteger (8-n)))))
677 CmmMachOp (MO_U_Shr w) [x, y] | w == W8 -> do
678 (reg_x, _format_x, code_x) <- getSomeReg x
679 (reg_y, _format_y, code_y) <- getSomeReg y
680 return $ Any (intFormat w) (\dst -> code_x `appOL` code_y `snocOL` annExpr expr (UXTB (OpReg w reg_x) (OpReg w reg_x)) `snocOL` (ASR (OpReg w dst) (OpReg w reg_x) (OpReg w reg_y)))
681
682 CmmMachOp (MO_U_Shr w) [x, (CmmLit (CmmInt n _))] | w == W16, 0 <= n, n < 16 -> do
683 (reg_x, _format_x, code_x) <- getSomeReg x
684 return $ Any (intFormat w) (\dst -> code_x `snocOL` annExpr expr (UBFX (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)) (OpImm (ImmInteger (16-n)))))
685 CmmMachOp (MO_U_Shr w) [x, y] | w == W16 -> do
686 (reg_x, _format_x, code_x) <- getSomeReg x
687 (reg_y, _format_y, code_y) <- getSomeReg y
688 return $ Any (intFormat w) (\dst -> code_x `appOL` code_y `snocOL` annExpr expr (UXTH (OpReg w reg_x) (OpReg w reg_x)) `snocOL` (ASR (OpReg w dst) (OpReg w reg_x) (OpReg w reg_y)))
689
690 CmmMachOp (MO_U_Shr w) [x, (CmmLit (CmmInt n _))] | w == W32, 0 <= n, n < 32 -> do
691 (reg_x, _format_x, code_x) <- getSomeReg x
692 return $ Any (intFormat w) (\dst -> code_x `snocOL` annExpr expr (LSR (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n))))
693
694 CmmMachOp (MO_U_Shr w) [x, (CmmLit (CmmInt n _))] | w == W64, 0 <= n, n < 64 -> do
695 (reg_x, _format_x, code_x) <- getSomeReg x
696 return $ Any (intFormat w) (\dst -> code_x `snocOL` annExpr expr (LSR (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n))))
697
698 -- 3. Logic &&, ||
699 CmmMachOp (MO_And w) [(CmmReg reg), CmmLit (CmmInt n _)] | isBitMaskImmediate (fromIntegral n) ->
700 return $ Any (intFormat w) (\d -> unitOL $ annExpr expr (AND (OpReg w d) (OpReg w' r') (OpImm (ImmInteger n))))
701 where w' = formatToWidth (cmmTypeFormat (cmmRegType plat reg))
702 r' = getRegisterReg plat reg
703
704 CmmMachOp (MO_Or w) [(CmmReg reg), CmmLit (CmmInt n _)] | isBitMaskImmediate (fromIntegral n) ->
705 return $ Any (intFormat w) (\d -> unitOL $ annExpr expr (ORR (OpReg w d) (OpReg w' r') (OpImm (ImmInteger n))))
706 where w' = formatToWidth (cmmTypeFormat (cmmRegType plat reg))
707 r' = getRegisterReg plat reg
708
709 -- Generic case.
710 CmmMachOp op [x, y] -> do
711 -- alright, so we have an operation, and two expressions. And we want to essentially do
712 -- ensure we get float regs
713 let genOp w op = do
714 (reg_x, format_x, code_x) <- getSomeReg x
715 (reg_y, format_y, code_y) <- getSomeReg y
716 when ((isFloatFormat format_x && isIntFormat format_y) || (isIntFormat format_x && isFloatFormat format_y)) $ pprPanic "getRegister:genOp" (text "formats don't match:" <+> text (show format_x) <+> text "/=" <+> text (show format_y))
717 return $ Any format_x (\dst -> code_x `appOL` code_y `appOL` op (OpReg w dst) (OpReg w reg_x) (OpReg w reg_y))
718
719 withTempIntReg w op = OpReg w <$> getNewRegNat (intFormat w) >>= op
720 -- withTempFloatReg w op = OpReg w <$> getNewRegNat (floatFormat w) >>= op
721
722 intOp w op = do
723 -- compute x<m> <- x
724 -- compute x<o> <- y
725 -- <OP> x<n>, x<m>, x<o>
726 (reg_x, _format_x, code_x) <- getSomeReg x
727 (reg_y, _format_y, code_y) <- getSomeReg y
728 return $ Any (intFormat w) (\dst -> code_x `appOL` code_y `appOL` op (OpReg w dst) (OpReg w reg_x) (OpReg w reg_y))
729 floatOp w op = do
730 (reg_fx, _format_x, code_fx) <- getFloatReg x
731 (reg_fy, _format_y, code_fy) <- getFloatReg y
732 return $ Any (floatFormat w) (\dst -> code_fx `appOL` code_fy `appOL` op (OpReg w dst) (OpReg w reg_fx) (OpReg w reg_fy))
733 -- need a special one for conditionals, as they return ints
734 floatCond w op = do
735 (reg_fx, _format_x, code_fx) <- getFloatReg x
736 (reg_fy, _format_y, code_fy) <- getFloatReg y
737 return $ Any (intFormat w) (\dst -> code_fx `appOL` code_fy `appOL` op (OpReg w dst) (OpReg w reg_fx) (OpReg w reg_fy))
738
739 case op of
740 -- Integer operations
741 -- Add/Sub should only be Interger Options.
742 -- But our Cmm parser doesn't care about types
743 -- and thus we end up with <float> + <float> => MO_Add <float> <float>
744 MO_Add w -> genOp w (\d x y -> unitOL $ annExpr expr (ADD d x y))
745 MO_Sub w -> genOp w (\d x y -> unitOL $ annExpr expr (SUB d x y))
746
747 -- Note [CSET]
748 --
749 -- Setting conditional flags: the architecture internally knows the
750 -- following flag bits. And based on thsoe comparisons as in the
751 -- table below.
752 --
753 -- 31 30 29 28
754 -- .---+---+---+---+-- - -
755 -- | N | Z | C | V |
756 -- '---+---+---+---+-- - -
757 -- Negative
758 -- Zero
759 -- Carry
760 -- oVerflow
761 --
762 -- .------+-------------------------------------+-----------------+----------.
763 -- | Code | Meaning | Flags | Encoding |
764 -- |------+-------------------------------------+-----------------+----------|
765 -- | EQ | Equal | Z = 1 | 0000 |
766 -- | NE | Not Equal | Z = 0 | 0001 |
767 -- | HI | Unsigned Higher | C = 1 && Z = 0 | 1000 |
768 -- | HS | Unsigned Higher or Same | C = 1 | 0010 |
769 -- | LS | Unsigned Lower or Same | C = 0 || Z = 1 | 1001 |
770 -- | LO | Unsigned Lower | C = 0 | 0011 |
771 -- | GT | Signed Greater Than | Z = 0 && N = V | 1100 |
772 -- | GE | Signed Greater Than or Equal | N = V | 1010 |
773 -- | LE | Signed Less Than or Equal | Z = 1 || N /= V | 1101 |
774 -- | LT | Signed Less Than | N /= V | 1011 |
775 -- | CS | Carry Set (Unsigned Overflow) | C = 1 | 0010 |
776 -- | CC | Carry Clear (No Unsigned Overflow) | C = 0 | 0011 |
777 -- | VS | Signed Overflow | V = 1 | 0110 |
778 -- | VC | No Signed Overflow | V = 0 | 0111 |
779 -- | MI | Minus, Negative | N = 1 | 0100 |
780 -- | PL | Plus, Positive or Zero (!) | N = 0 | 0101 |
781 -- | AL | Always | Any | 1110 |
782 -- | NV | Never | Any | 1111 |
783 --- '-------------------------------------------------------------------------'
784 MO_Eq w@W8 -> intOp w (\d x y -> toOL [ SXTB x x, SXTB y y, CMP x y, CSET d EQ ])
785 MO_Eq w@W16 -> intOp w (\d x y -> toOL [ SXTH x x, SXTH y y, CMP x y, CSET d EQ ])
786 MO_Eq w -> intOp w (\d x y -> toOL [ CMP x y, CSET d EQ ])
787 MO_Ne w@W8 -> intOp w (\d x y -> toOL [ SXTB x x, SXTB y y, CMP x y, CSET d NE ])
788 MO_Ne w@W16 -> intOp w (\d x y -> toOL [ SXTH x x, SXTH y y, CMP x y, CSET d NE ])
789 MO_Ne w -> intOp w (\d x y -> toOL [ CMP x y, CSET d NE ])
790 MO_Mul w -> intOp w (\d x y -> unitOL $ MUL d x y)
791
792 -- Signed multiply/divide
793 MO_S_MulMayOflo w -> intOp w (\d x y -> toOL [ MUL d x y, CSET d VS ])
794 MO_S_Quot w -> intOp w (\d x y -> unitOL $ SDIV d x y)
795
796 -- No native rem instruction. So we'll compute the following
797 -- Rd <- Rx / Ry | 2 <- 7 / 3 -- SDIV Rd Rx Ry
798 -- Rd' <- Rx - Rd * Ry | 1 <- 7 - 2 * 3 -- MSUB Rd' Rd Ry Rx
799 -- | '---|----------------|---' |
800 -- | '----------------|-------'
801 -- '--------------------------'
802 -- Note the swap in Rx and Ry.
803 MO_S_Rem w -> withTempIntReg w $ \t ->
804 intOp w (\d x y -> toOL [ SDIV t x y, MSUB d t y x ])
805
806 -- Unsigned multiply/divide
807 MO_U_MulMayOflo _w -> unsupportedP plat expr
808 MO_U_Quot w -> intOp w (\d x y -> unitOL $ UDIV d x y)
809 MO_U_Rem w -> withTempIntReg w $ \t ->
810 intOp w (\d x y -> toOL [ UDIV t x y, MSUB d t y x ])
811
812 -- Signed comparisons -- see Note [CSET]
813 MO_S_Ge w@W8 -> intOp w (\d x y -> toOL [ SXTB x x, SXTB y y, CMP x y, CSET d SGE ])
814 MO_S_Ge w@W16 -> intOp w (\d x y -> toOL [ SXTH x x, SXTH y y, CMP x y, CSET d SGE ])
815 MO_S_Ge w -> intOp w (\d x y -> toOL [ CMP x y, CSET d SGE ])
816 MO_S_Le w@W8 -> intOp w (\d x y -> toOL [ SXTB x x, SXTB y y, CMP x y, CSET d SLE ])
817 MO_S_Le w@W16 -> intOp w (\d x y -> toOL [ SXTH x x, SXTH y y, CMP x y, CSET d SLE ])
818 MO_S_Le w -> intOp w (\d x y -> toOL [ CMP x y, CSET d SLE ])
819 MO_S_Gt w@W8 -> intOp w (\d x y -> toOL [ SXTB x x, SXTB y y, CMP x y, CSET d SGT ])
820 MO_S_Gt w@W16 -> intOp w (\d x y -> toOL [ SXTH x x, SXTH y y, CMP x y, CSET d SGT ])
821 MO_S_Gt w -> intOp w (\d x y -> toOL [ CMP x y, CSET d SGT ])
822 MO_S_Lt w@W8 -> intOp w (\d x y -> toOL [ SXTB x x, SXTB y y, CMP x y, CSET d SLT ])
823 MO_S_Lt w@W16 -> intOp w (\d x y -> toOL [ SXTH x x, SXTH y y, CMP x y, CSET d SLT ])
824 MO_S_Lt w -> intOp w (\d x y -> toOL [ CMP x y, CSET d SLT ])
825
826 -- Unsigned comparisons
827 MO_U_Ge w@W8 -> intOp w (\d x y -> toOL [ UXTB x x, UXTB y y, CMP x y, CSET d UGE ])
828 MO_U_Ge w@W16 -> intOp w (\d x y -> toOL [ UXTH x x, UXTH y y, CMP x y, CSET d UGE ])
829 MO_U_Ge w -> intOp w (\d x y -> toOL [ CMP x y, CSET d UGE ])
830 MO_U_Le w@W8 -> intOp w (\d x y -> toOL [ UXTB x x, UXTB y y, CMP x y, CSET d ULE ])
831 MO_U_Le w@W16 -> intOp w (\d x y -> toOL [ UXTH x x, UXTH y y, CMP x y, CSET d ULE ])
832 MO_U_Le w -> intOp w (\d x y -> toOL [ CMP x y, CSET d ULE ])
833 MO_U_Gt w@W8 -> intOp w (\d x y -> toOL [ UXTB x x, UXTB y y, CMP x y, CSET d UGT ])
834 MO_U_Gt w@W16 -> intOp w (\d x y -> toOL [ UXTH x x, UXTH y y, CMP x y, CSET d UGT ])
835 MO_U_Gt w -> intOp w (\d x y -> toOL [ CMP x y, CSET d UGT ])
836 MO_U_Lt w@W8 -> intOp w (\d x y -> toOL [ UXTB x x, UXTB y y, CMP x y, CSET d ULT ])
837 MO_U_Lt w@W16 -> intOp w (\d x y -> toOL [ UXTH x x, UXTH y y, CMP x y, CSET d ULT ])
838 MO_U_Lt w -> intOp w (\d x y -> toOL [ CMP x y, CSET d ULT ])
839
840 -- Floating point arithmetic
841 MO_F_Add w -> floatOp w (\d x y -> unitOL $ ADD d x y)
842 MO_F_Sub w -> floatOp w (\d x y -> unitOL $ SUB d x y)
843 MO_F_Mul w -> floatOp w (\d x y -> unitOL $ MUL d x y)
844 MO_F_Quot w -> floatOp w (\d x y -> unitOL $ SDIV d x y)
845
846 -- Floating point comparison
847 MO_F_Eq w -> floatCond w (\d x y -> toOL [ CMP x y, CSET d EQ ])
848 MO_F_Ne w -> floatCond w (\d x y -> toOL [ CMP x y, CSET d NE ])
849
850 -- careful with the floating point operations.
851 -- SLE is effectively LE or unordered (NaN)
852 -- SLT is the same. ULE, and ULT will not return true for NaN.
853 -- This is a bit counter intutive. Don't let yourself be fooled by
854 -- the S/U prefix for floats, it's only meaningful for integers.
855 MO_F_Ge w -> floatCond w (\d x y -> toOL [ CMP x y, CSET d OGE ])
856 MO_F_Le w -> floatCond w (\d x y -> toOL [ CMP x y, CSET d OLE ]) -- x <= y <=> y > x
857 MO_F_Gt w -> floatCond w (\d x y -> toOL [ CMP x y, CSET d OGT ])
858 MO_F_Lt w -> floatCond w (\d x y -> toOL [ CMP x y, CSET d OLT ]) -- x < y <=> y >= x
859
860 -- Bitwise operations
861 MO_And w -> intOp w (\d x y -> unitOL $ AND d x y)
862 MO_Or w -> intOp w (\d x y -> unitOL $ ORR d x y)
863 MO_Xor w -> intOp w (\d x y -> unitOL $ EOR d x y)
864 -- MO_Not W64 ->
865 MO_Shl w -> intOp w (\d x y -> unitOL $ LSL d x y)
866 MO_U_Shr w -> intOp w (\d x y -> unitOL $ LSR d x y)
867 MO_S_Shr w -> intOp w (\d x y -> unitOL $ ASR d x y)
868
869 -- TODO
870
871 op -> pprPanic "getRegister' (unhandled dyadic CmmMachOp): " $ (pprMachOp op) <+> text "in" <+> (pdoc plat expr)
872 CmmMachOp _op _xs
873 -> pprPanic "getRegister' (variadic CmmMachOp): " (pdoc plat expr)
874
875 where
876 unsupportedP :: OutputableP env a => env -> a -> b
877 unsupportedP platform op = pprPanic "Unsupported op:" (pdoc platform op)
878
879 isNbitEncodeable :: Int -> Integer -> Bool
880 isNbitEncodeable n i = let shift = n - 1 in (-1 `shiftL` shift) <= i && i < (1 `shiftL` shift)
881 -- This needs to check if n can be encoded as a bitmask immediate:
882 --
883 -- See https://stackoverflow.com/questions/30904718/range-of-immediate-values-in-armv8-a64-assembly
884 --
885 isBitMaskImmediate :: Integer -> Bool
886 isBitMaskImmediate i = i `elem` [0b0000_0001, 0b0000_0010, 0b0000_0100, 0b0000_1000, 0b0001_0000, 0b0010_0000, 0b0100_0000, 0b1000_0000
887 ,0b0000_0011, 0b0000_0110, 0b0000_1100, 0b0001_1000, 0b0011_0000, 0b0110_0000, 0b1100_0000
888 ,0b0000_0111, 0b0000_1110, 0b0001_1100, 0b0011_1000, 0b0111_0000, 0b1110_0000
889 ,0b0000_1111, 0b0001_1110, 0b0011_1100, 0b0111_1000, 0b1111_0000
890 ,0b0001_1111, 0b0011_1110, 0b0111_1100, 0b1111_1000
891 ,0b0011_1111, 0b0111_1110, 0b1111_1100
892 ,0b0111_1111, 0b1111_1110
893 ,0b1111_1111]
894
895
896 -- -----------------------------------------------------------------------------
897 -- The 'Amode' type: Memory addressing modes passed up the tree.
898 data Amode = Amode AddrMode InstrBlock
899
900 getAmode :: Platform -> CmmExpr -> NatM Amode
901 -- TODO: Specialize stuff we can destructure here.
902
903 -- OPTIMIZATION WARNING: Addressing modes.
904 -- Addressing options:
905 -- LDUR/STUR: imm9: -256 - 255
906 getAmode platform (CmmRegOff reg off) | -256 <= off, off <= 255
907 = return $ Amode (AddrRegImm reg' off') nilOL
908 where reg' = getRegisterReg platform reg
909 off' = ImmInt off
910 -- LDR/STR: imm12: if reg is 32bit: 0 -- 16380 in multiples of 4
911 getAmode platform (CmmRegOff reg off)
912 | typeWidth (cmmRegType platform reg) == W32, 0 <= off, off <= 16380, off `mod` 4 == 0
913 = return $ Amode (AddrRegImm reg' off') nilOL
914 where reg' = getRegisterReg platform reg
915 off' = ImmInt off
916 -- LDR/STR: imm12: if reg is 64bit: 0 -- 32760 in multiples of 8
917 getAmode platform (CmmRegOff reg off)
918 | typeWidth (cmmRegType platform reg) == W64, 0 <= off, off <= 32760, off `mod` 8 == 0
919 = return $ Amode (AddrRegImm reg' off') nilOL
920 where reg' = getRegisterReg platform reg
921 off' = ImmInt off
922
923 -- For Stores we often see something like this:
924 -- CmmStore (CmmMachOp (MO_Add w) [CmmLoad expr, CmmLit (CmmInt n w')]) (expr2)
925 -- E.g. a CmmStoreOff really. This can be translated to `str $expr2, [$expr, #n ]
926 -- for `n` in range.
927 getAmode _platform (CmmMachOp (MO_Add _w) [expr, CmmLit (CmmInt off _w')])
928 | -256 <= off, off <= 255
929 = do (reg, _format, code) <- getSomeReg expr
930 return $ Amode (AddrRegImm reg (ImmInteger off)) code
931
932 getAmode _platform (CmmMachOp (MO_Sub _w) [expr, CmmLit (CmmInt off _w')])
933 | -256 <= -off, -off <= 255
934 = do (reg, _format, code) <- getSomeReg expr
935 return $ Amode (AddrRegImm reg (ImmInteger (-off))) code
936
937 -- Generic case
938 getAmode _platform expr
939 = do (reg, _format, code) <- getSomeReg expr
940 return $ Amode (AddrReg reg) code
941
942 -- -----------------------------------------------------------------------------
943 -- Generating assignments
944
945 -- Assignments are really at the heart of the whole code generation
946 -- business. Almost all top-level nodes of any real importance are
947 -- assignments, which correspond to loads, stores, or register
948 -- transfers. If we're really lucky, some of the register transfers
949 -- will go away, because we can use the destination register to
950 -- complete the code generation for the right hand side. This only
951 -- fails when the right hand side is forced into a fixed register
952 -- (e.g. the result of a call).
953
954 assignMem_IntCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
955 assignReg_IntCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
956
957 assignMem_FltCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
958 assignReg_FltCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
959
960 assignMem_IntCode rep addrE srcE
961 = do
962 (src_reg, _format, code) <- getSomeReg srcE
963 platform <- getPlatform
964 Amode addr addr_code <- getAmode platform addrE
965 return $ COMMENT (text "CmmStore" <+> parens (text (show addrE)) <+> parens (text (show srcE)))
966 `consOL` (code
967 `appOL` addr_code
968 `snocOL` STR rep (OpReg (formatToWidth rep) src_reg) (OpAddr addr))
969
970 assignReg_IntCode _ reg src
971 = do
972 platform <- getPlatform
973 let dst = getRegisterReg platform reg
974 r <- getRegister src
975 return $ case r of
976 Any _ code -> COMMENT (text "CmmAssign" <+> parens (text (show reg)) <+> parens (text (show src))) `consOL` code dst
977 Fixed format freg fcode -> COMMENT (text "CmmAssign" <+> parens (text (show reg)) <+> parens (text (show src))) `consOL` (fcode `snocOL` MOV (OpReg (formatToWidth format) dst) (OpReg (formatToWidth format) freg))
978
979 -- Let's treat Floating point stuff
980 -- as integer code for now. Opaque.
981 assignMem_FltCode = assignMem_IntCode
982 assignReg_FltCode = assignReg_IntCode
983
984 -- -----------------------------------------------------------------------------
985 -- Jumps
986 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
987 genJump expr@(CmmLit (CmmLabel lbl))
988 = return $ unitOL (annExpr expr (J (TLabel lbl)))
989
990 genJump expr = do
991 (target, _format, code) <- getSomeReg expr
992 return (code `appOL` unitOL (annExpr expr (J (TReg target))))
993
994 -- -----------------------------------------------------------------------------
995 -- Unconditional branches
996 genBranch :: BlockId -> NatM InstrBlock
997 genBranch = return . toOL . mkJumpInstr
998
999 -- -----------------------------------------------------------------------------
1000 -- Conditional branches
1001 genCondJump
1002 :: BlockId
1003 -> CmmExpr
1004 -> NatM InstrBlock
1005 genCondJump bid expr = do
1006 case expr of
1007 -- Optimized == 0 case.
1008 CmmMachOp (MO_Eq w) [x, CmmLit (CmmInt 0 _)] -> do
1009 (reg_x, _format_x, code_x) <- getSomeReg x
1010 return $ code_x `snocOL` (annExpr expr (CBZ (OpReg w reg_x) (TBlock bid)))
1011
1012 -- Optimized /= 0 case.
1013 CmmMachOp (MO_Ne w) [x, CmmLit (CmmInt 0 _)] -> do
1014 (reg_x, _format_x, code_x) <- getSomeReg x
1015 return $ code_x `snocOL` (annExpr expr (CBNZ (OpReg w reg_x) (TBlock bid)))
1016
1017 -- Generic case.
1018 CmmMachOp mop [x, y] -> do
1019
1020 let ubcond w cmp = do
1021 -- compute both sides.
1022 (reg_x, _format_x, code_x) <- getSomeReg x
1023 (reg_y, _format_y, code_y) <- getSomeReg y
1024 let x' = OpReg w reg_x
1025 y' = OpReg w reg_y
1026 return $ case w of
1027 W8 -> code_x `appOL` code_y `appOL` toOL [ UXTB x' x', UXTB y' y', CMP x' y', (annExpr expr (BCOND cmp (TBlock bid))) ]
1028 W16 -> code_x `appOL` code_y `appOL` toOL [ UXTH x' x', UXTH y' y', CMP x' y', (annExpr expr (BCOND cmp (TBlock bid))) ]
1029 _ -> code_x `appOL` code_y `appOL` toOL [ CMP x' y', (annExpr expr (BCOND cmp (TBlock bid))) ]
1030
1031 sbcond w cmp = do
1032 -- compute both sides.
1033 (reg_x, _format_x, code_x) <- getSomeReg x
1034 (reg_y, _format_y, code_y) <- getSomeReg y
1035 let x' = OpReg w reg_x
1036 y' = OpReg w reg_y
1037 return $ case w of
1038 W8 -> code_x `appOL` code_y `appOL` toOL [ SXTB x' x', SXTB y' y', CMP x' y', (annExpr expr (BCOND cmp (TBlock bid))) ]
1039 W16 -> code_x `appOL` code_y `appOL` toOL [ SXTH x' x', SXTH y' y', CMP x' y', (annExpr expr (BCOND cmp (TBlock bid))) ]
1040 _ -> code_x `appOL` code_y `appOL` toOL [ CMP x' y', (annExpr expr (BCOND cmp (TBlock bid))) ]
1041
1042 fbcond w cmp = do
1043 -- ensure we get float regs
1044 (reg_fx, _format_fx, code_fx) <- getFloatReg x
1045 (reg_fy, _format_fy, code_fy) <- getFloatReg y
1046 return $ code_fx `appOL` code_fy `snocOL` CMP (OpReg w reg_fx) (OpReg w reg_fy) `snocOL` (annExpr expr (BCOND cmp (TBlock bid)))
1047
1048 case mop of
1049 MO_F_Eq w -> fbcond w EQ
1050 MO_F_Ne w -> fbcond w NE
1051
1052 MO_F_Gt w -> fbcond w OGT
1053 MO_F_Ge w -> fbcond w OGE
1054 MO_F_Lt w -> fbcond w OLT
1055 MO_F_Le w -> fbcond w OLE
1056
1057 MO_Eq w -> sbcond w EQ
1058 MO_Ne w -> sbcond w NE
1059
1060 MO_S_Gt w -> sbcond w SGT
1061 MO_S_Ge w -> sbcond w SGE
1062 MO_S_Lt w -> sbcond w SLT
1063 MO_S_Le w -> sbcond w SLE
1064 MO_U_Gt w -> ubcond w UGT
1065 MO_U_Ge w -> ubcond w UGE
1066 MO_U_Lt w -> ubcond w ULT
1067 MO_U_Le w -> ubcond w ULE
1068 _ -> pprPanic "AArch64.genCondJump:case mop: " (text $ show expr)
1069 _ -> pprPanic "AArch64.genCondJump: " (text $ show expr)
1070
1071
1072 genCondBranch
1073 :: BlockId -- the source of the jump
1074 -> BlockId -- the true branch target
1075 -> BlockId -- the false branch target
1076 -> CmmExpr -- the condition on which to branch
1077 -> NatM InstrBlock -- Instructions
1078
1079 genCondBranch _ true false expr = do
1080 b1 <- genCondJump true expr
1081 b2 <- genBranch false
1082 return (b1 `appOL` b2)
1083
1084 -- -----------------------------------------------------------------------------
1085 -- Generating C calls
1086
1087 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
1088 -- @get_arg@, which moves the arguments to the correct registers/stack
1089 -- locations. Apart from that, the code is easy.
1090 --
1091 -- As per *convention*:
1092 -- x0-x7: (volatile) argument registers
1093 -- x8: (volatile) indirect result register / Linux syscall no
1094 -- x9-x15: (volatile) caller saved regs
1095 -- x16,x17: (volatile) intra-procedure-call registers
1096 -- x18: (volatile) platform register. don't use for portability
1097 -- x19-x28: (non-volatile) callee save regs
1098 -- x29: (non-volatile) frame pointer
1099 -- x30: link register
1100 -- x31: stack pointer / zero reg
1101 --
1102 -- Thus, this is what a c function will expect. Find the arguments in x0-x7,
1103 -- anything above that on the stack. We'll ignore c functions with more than
1104 -- 8 arguments for now. Sorry.
1105 --
1106 -- We need to make sure we preserve x9-x15, don't want to touch x16, x17.
1107
1108 -- Note [PLT vs GOT relocations]
1109 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1110 -- When linking objects together, we may need to lookup foreign references. That
1111 -- is symbolic references to functions or values in other objects. When
1112 -- compiling the object, we can not know where those elements will end up in
1113 -- memory (relative to the current location). Thus the use of symbols. There
1114 -- are two types of items we are interested, code segments we want to jump to
1115 -- and continue execution there (functions, ...), and data items we want to look
1116 -- up (strings, numbers, ...). For functions we can use the fact that we can use
1117 -- an intermediate jump without visibility to the programs execution. If we
1118 -- want to jump to a function that is simply too far away to reach for the B/BL
1119 -- instruction, we can create a small piece of code that loads the full target
1120 -- address and jumps to that on demand. Say f wants to call g, however g is out
1121 -- of range for a direct jump, we can create a function h in range for f, that
1122 -- will load the address of g, and jump there. The area where we construct h
1123 -- is called the Procedure Linking Table (PLT), we have essentially replaced
1124 -- f -> g with f -> h -> g. This is fine for function calls. However if we
1125 -- want to lookup values, this trick doesn't work, so we need something else.
1126 -- We will instead reserve a slot in memory, and have a symbol pointing to that
1127 -- slot. Now what we essentially do is, we reference that slot, and expect that
1128 -- slot to hold the final resting address of the data we are interested in.
1129 -- Thus what that symbol really points to is the location of the final data.
1130 -- The block of memory where we hold all those slots is the Global Offset Table
1131 -- (GOT). Instead of x <- $foo, we now do y <- $fooPtr, and x <- [$y].
1132 --
1133 -- For JUMP/CALLs we have 26bits (+/- 128MB), for conditional branches we only
1134 -- have 19bits (+/- 1MB). Symbol lookups are also within +/- 1MB, thus for most
1135 -- of the LOAD/STOREs we'd want to use adrp, and add to compute a value within
1136 -- 4GB of the PC, and load that. For anything outside of that range, we'd have
1137 -- to go through the GOT.
1138 --
1139 -- adrp x0, <symbol>
1140 -- add x0, :lo:<symbol>
1141 --
1142 -- will compute the address of <symbol> int x0 if <symbol> is within 4GB of the
1143 -- PC.
1144 --
1145 -- If we want to get the slot in the global offset table (GOT), we can do this:
1146 --
1147 -- adrp x0, #:got:<symbol>
1148 -- ldr x0, [x0, #:got_lo12:<symbol>]
1149 --
1150 -- this will compute the address anywhere in the addressable 64bit space into
1151 -- x0, by loading the address from the GOT slot.
1152 --
1153 -- To actually get the value of <symbol>, we'd need to ldr x0, x0 still, which
1154 -- for the first case can be optimized to use ldr x0, [x0, #:lo12:<symbol>]
1155 -- instaed of the add instruction.
1156 --
1157 -- As the memory model for AArch64 for PIC is considered to be +/- 4GB, we do
1158 -- not need to go through the GOT, unless we want to address the full address
1159 -- range within 64bit.
1160
1161 genCCall
1162 :: ForeignTarget -- function to call
1163 -> [CmmFormal] -- where to put the result
1164 -> [CmmActual] -- arguments (of mixed type)
1165 -> BlockId -- The block we are in
1166 -> NatM (InstrBlock, Maybe BlockId)
1167 -- TODO: Specialize where we can.
1168 -- Generic impl
1169 genCCall target dest_regs arg_regs bid = do
1170 -- we want to pass arg_regs into allArgRegs
1171 -- pprTraceM "genCCall target" (ppr target)
1172 -- pprTraceM "genCCall formal" (ppr dest_regs)
1173 -- pprTraceM "genCCall actual" (ppr arg_regs)
1174
1175 case target of
1176 -- The target :: ForeignTarget call can either
1177 -- be a foreign procedure with an address expr
1178 -- and a calling convention.
1179 ForeignTarget expr _cconv -> do
1180 (call_target, call_target_code) <- case expr of
1181 -- if this is a label, let's just directly to it. This will produce the
1182 -- correct CALL relocation for BL...
1183 (CmmLit (CmmLabel lbl)) -> pure (TLabel lbl, nilOL)
1184 -- ... if it's not a label--well--let's compute the expression into a
1185 -- register and jump to that. See Note [PLT vs GOT relocations]
1186 _ -> do (reg, _format, reg_code) <- getSomeReg expr
1187 pure (TReg reg, reg_code)
1188 -- compute the code and register logic for all arg_regs.
1189 -- this will give us the format information to match on.
1190 arg_regs' <- mapM getSomeReg arg_regs
1191
1192 -- Now this is stupid. Our Cmm expressions doesn't carry the proper sizes
1193 -- so while in Cmm we might get W64 incorrectly for an int, that is W32 in
1194 -- STG; this thenn breaks packing of stack arguments, if we need to pack
1195 -- for the pcs, e.g. darwinpcs. Option one would be to fix the Int type
1196 -- in Cmm proper. Option two, which we choose here is to use extended Hint
1197 -- information to contain the size information and use that when packing
1198 -- arguments, spilled onto the stack.
1199 let (_res_hints, arg_hints) = foreignTargetHints target
1200 arg_regs'' = zipWith (\(r, f, c) h -> (r,f,h,c)) arg_regs' arg_hints
1201
1202 platform <- getPlatform
1203 let packStack = platformOS platform == OSDarwin
1204
1205 (stackSpace', passRegs, passArgumentsCode) <- passArguments packStack allGpArgRegs allFpArgRegs arg_regs'' 0 [] nilOL
1206
1207 -- if we pack the stack, we may need to adjust to multiple of 8byte.
1208 -- if we don't pack the stack, it will always be multiple of 8.
1209 let stackSpace = if stackSpace' `mod` 8 /= 0
1210 then 8 * (stackSpace' `div` 8 + 1)
1211 else stackSpace'
1212
1213 (returnRegs, readResultsCode) <- readResults allGpArgRegs allFpArgRegs dest_regs [] nilOL
1214
1215 let moveStackDown 0 = toOL [ PUSH_STACK_FRAME
1216 , DELTA (-16) ]
1217 moveStackDown i | odd i = moveStackDown (i + 1)
1218 moveStackDown i = toOL [ PUSH_STACK_FRAME
1219 , SUB (OpReg W64 (regSingle 31)) (OpReg W64 (regSingle 31)) (OpImm (ImmInt (8 * i)))
1220 , DELTA (-8 * i - 16) ]
1221 moveStackUp 0 = toOL [ POP_STACK_FRAME
1222 , DELTA 0 ]
1223 moveStackUp i | odd i = moveStackUp (i + 1)
1224 moveStackUp i = toOL [ ADD (OpReg W64 (regSingle 31)) (OpReg W64 (regSingle 31)) (OpImm (ImmInt (8 * i)))
1225 , POP_STACK_FRAME
1226 , DELTA 0 ]
1227
1228 let code = call_target_code -- compute the label (possibly into a register)
1229 `appOL` moveStackDown (stackSpace `div` 8)
1230 `appOL` passArgumentsCode -- put the arguments into x0, ...
1231 `appOL` (unitOL $ BL call_target passRegs returnRegs) -- branch and link.
1232 `appOL` readResultsCode -- parse the results into registers
1233 `appOL` moveStackUp (stackSpace `div` 8)
1234 return (code, Nothing)
1235
1236 PrimTarget MO_F32_Fabs
1237 | [arg_reg] <- arg_regs, [dest_reg] <- dest_regs ->
1238 unaryFloatOp W32 (\d x -> unitOL $ FABS d x) arg_reg dest_reg
1239 PrimTarget MO_F64_Fabs
1240 | [arg_reg] <- arg_regs, [dest_reg] <- dest_regs ->
1241 unaryFloatOp W64 (\d x -> unitOL $ FABS d x) arg_reg dest_reg
1242
1243 -- or a possibly side-effecting machine operation
1244 -- mop :: CallishMachOp (see GHC.Cmm.MachOp)
1245 PrimTarget mop -> do
1246 -- We'll need config to construct forien targets
1247 case mop of
1248 -- 64 bit float ops
1249 MO_F64_Pwr -> mkCCall "pow"
1250
1251 MO_F64_Sin -> mkCCall "sin"
1252 MO_F64_Cos -> mkCCall "cos"
1253 MO_F64_Tan -> mkCCall "tan"
1254
1255 MO_F64_Sinh -> mkCCall "sinh"
1256 MO_F64_Cosh -> mkCCall "cosh"
1257 MO_F64_Tanh -> mkCCall "tanh"
1258
1259 MO_F64_Asin -> mkCCall "asin"
1260 MO_F64_Acos -> mkCCall "acos"
1261 MO_F64_Atan -> mkCCall "atan"
1262
1263 MO_F64_Asinh -> mkCCall "asinh"
1264 MO_F64_Acosh -> mkCCall "acosh"
1265 MO_F64_Atanh -> mkCCall "atanh"
1266
1267 MO_F64_Log -> mkCCall "log"
1268 MO_F64_Log1P -> mkCCall "log1p"
1269 MO_F64_Exp -> mkCCall "exp"
1270 MO_F64_ExpM1 -> mkCCall "expm1"
1271 MO_F64_Fabs -> mkCCall "fabs"
1272 MO_F64_Sqrt -> mkCCall "sqrt"
1273
1274 -- 32 bit float ops
1275 MO_F32_Pwr -> mkCCall "powf"
1276
1277 MO_F32_Sin -> mkCCall "sinf"
1278 MO_F32_Cos -> mkCCall "cosf"
1279 MO_F32_Tan -> mkCCall "tanf"
1280 MO_F32_Sinh -> mkCCall "sinhf"
1281 MO_F32_Cosh -> mkCCall "coshf"
1282 MO_F32_Tanh -> mkCCall "tanhf"
1283 MO_F32_Asin -> mkCCall "asinf"
1284 MO_F32_Acos -> mkCCall "acosf"
1285 MO_F32_Atan -> mkCCall "atanf"
1286 MO_F32_Asinh -> mkCCall "asinhf"
1287 MO_F32_Acosh -> mkCCall "acoshf"
1288 MO_F32_Atanh -> mkCCall "atanhf"
1289 MO_F32_Log -> mkCCall "logf"
1290 MO_F32_Log1P -> mkCCall "log1pf"
1291 MO_F32_Exp -> mkCCall "expf"
1292 MO_F32_ExpM1 -> mkCCall "expm1f"
1293 MO_F32_Fabs -> mkCCall "fabsf"
1294 MO_F32_Sqrt -> mkCCall "sqrtf"
1295
1296 -- 64-bit primops
1297 MO_I64_ToI -> mkCCall "hs_int64ToInt"
1298 MO_I64_FromI -> mkCCall "hs_intToInt64"
1299 MO_W64_ToW -> mkCCall "hs_word64ToWord"
1300 MO_W64_FromW -> mkCCall "hs_wordToWord64"
1301 MO_x64_Neg -> mkCCall "hs_neg64"
1302 MO_x64_Add -> mkCCall "hs_add64"
1303 MO_x64_Sub -> mkCCall "hs_sub64"
1304 MO_x64_Mul -> mkCCall "hs_mul64"
1305 MO_I64_Quot -> mkCCall "hs_quotInt64"
1306 MO_I64_Rem -> mkCCall "hs_remInt64"
1307 MO_W64_Quot -> mkCCall "hs_quotWord64"
1308 MO_W64_Rem -> mkCCall "hs_remWord64"
1309 MO_x64_And -> mkCCall "hs_and64"
1310 MO_x64_Or -> mkCCall "hs_or64"
1311 MO_x64_Xor -> mkCCall "hs_xor64"
1312 MO_x64_Not -> mkCCall "hs_not64"
1313 MO_x64_Shl -> mkCCall "hs_uncheckedShiftL64"
1314 MO_I64_Shr -> mkCCall "hs_uncheckedIShiftRA64"
1315 MO_W64_Shr -> mkCCall "hs_uncheckedShiftRL64"
1316 MO_x64_Eq -> mkCCall "hs_eq64"
1317 MO_x64_Ne -> mkCCall "hs_ne64"
1318 MO_I64_Ge -> mkCCall "hs_geInt64"
1319 MO_I64_Gt -> mkCCall "hs_gtInt64"
1320 MO_I64_Le -> mkCCall "hs_leInt64"
1321 MO_I64_Lt -> mkCCall "hs_ltInt64"
1322 MO_W64_Ge -> mkCCall "hs_geWord64"
1323 MO_W64_Gt -> mkCCall "hs_gtWord64"
1324 MO_W64_Le -> mkCCall "hs_leWord64"
1325 MO_W64_Lt -> mkCCall "hs_ltWord64"
1326
1327 -- Conversion
1328 MO_UF_Conv w -> mkCCall (word2FloatLabel w)
1329
1330 -- Arithmatic
1331 -- These are not supported on X86, so I doubt they are used much.
1332 MO_S_Mul2 _w -> unsupported mop
1333 MO_S_QuotRem _w -> unsupported mop
1334 MO_U_QuotRem _w -> unsupported mop
1335 MO_U_QuotRem2 _w -> unsupported mop
1336 MO_Add2 _w -> unsupported mop
1337 MO_AddWordC _w -> unsupported mop
1338 MO_SubWordC _w -> unsupported mop
1339 MO_AddIntC _w -> unsupported mop
1340 MO_SubIntC _w -> unsupported mop
1341 MO_U_Mul2 _w -> unsupported mop
1342
1343 -- Memory Ordering
1344 -- TODO DMBSY is probably *way* too much!
1345 MO_ReadBarrier -> return (unitOL DMBSY, Nothing)
1346 MO_WriteBarrier -> return (unitOL DMBSY, Nothing)
1347 MO_Touch -> return (nilOL, Nothing) -- Keep variables live (when using interior pointers)
1348 -- Prefetch
1349 MO_Prefetch_Data _n -> return (nilOL, Nothing) -- Prefetch hint.
1350
1351 -- Memory copy/set/move/cmp, with alignment for optimization
1352
1353 -- TODO Optimize and use e.g. quad registers to move memory around instead
1354 -- of offloading this to memcpy. For small memcpys we can utilize
1355 -- the 128bit quad registers in NEON to move block of bytes around.
1356 -- Might also make sense of small memsets? Use xzr? What's the function
1357 -- call overhead?
1358 MO_Memcpy _align -> mkCCall "memcpy"
1359 MO_Memset _align -> mkCCall "memset"
1360 MO_Memmove _align -> mkCCall "memmove"
1361 MO_Memcmp _align -> mkCCall "memcmp"
1362
1363 MO_SuspendThread -> mkCCall "suspendThread"
1364 MO_ResumeThread -> mkCCall "resumeThread"
1365
1366 MO_PopCnt w -> mkCCall (popCntLabel w)
1367 MO_Pdep w -> mkCCall (pdepLabel w)
1368 MO_Pext w -> mkCCall (pextLabel w)
1369 MO_Clz w -> mkCCall (clzLabel w)
1370 MO_Ctz w -> mkCCall (ctzLabel w)
1371 MO_BSwap w -> mkCCall (bSwapLabel w)
1372 MO_BRev w -> mkCCall (bRevLabel w)
1373
1374 -- -- Atomic read-modify-write.
1375 MO_AtomicRMW w amop -> mkCCall (atomicRMWLabel w amop)
1376 MO_AtomicRead w -> mkCCall (atomicReadLabel w)
1377 MO_AtomicWrite w -> mkCCall (atomicWriteLabel w)
1378 MO_Cmpxchg w -> mkCCall (cmpxchgLabel w)
1379 -- -- Should be an AtomicRMW variant eventually.
1380 -- -- Sequential consistent.
1381 -- TODO: this should be implemented properly!
1382 MO_Xchg w -> mkCCall (xchgLabel w)
1383
1384 where
1385 unsupported :: Show a => a -> b
1386 unsupported mop = panic ("outOfLineCmmOp: " ++ show mop
1387 ++ " not supported here")
1388 mkCCall :: FastString -> NatM (InstrBlock, Maybe BlockId)
1389 mkCCall name = do
1390 config <- getConfig
1391 target <- cmmMakeDynamicReference config CallReference $
1392 mkForeignLabel name Nothing ForeignLabelInThisPackage IsFunction
1393 let cconv = ForeignConvention CCallConv [NoHint] [NoHint] CmmMayReturn
1394 genCCall (ForeignTarget target cconv) dest_regs arg_regs bid
1395
1396 -- TODO: Optimize using paired stores and loads (STP, LDP). It is
1397 -- automomatically done by the allocator for us. However it's not optimal,
1398 -- as we'd rather want to have control over
1399 -- all spill/load registers, so we can optimize with instructions like
1400 -- STP xA, xB, [sp, #-16]!
1401 -- and
1402 -- LDP xA, xB, sp, #16
1403 --
1404 passArguments :: Bool -> [Reg] -> [Reg] -> [(Reg, Format, ForeignHint, InstrBlock)] -> Int -> [Reg] -> InstrBlock -> NatM (Int, [Reg], InstrBlock)
1405 passArguments _packStack _ _ [] stackSpace accumRegs accumCode = return (stackSpace, accumRegs, accumCode)
1406 -- passArguments _ _ [] accumCode stackSpace | isEven stackSpace = return $ SUM (OpReg W64 x31) (OpReg W64 x31) OpImm (ImmInt (-8 * stackSpace))
1407 -- passArguments _ _ [] accumCode stackSpace = return $ SUM (OpReg W64 x31) (OpReg W64 x31) OpImm (ImmInt (-8 * (stackSpace + 1)))
1408 -- passArguments [] fpRegs (arg0:arg1:args) stack accumCode = do
1409 -- -- allocate this on the stack
1410 -- (r0, format0, code_r0) <- getSomeReg arg0
1411 -- (r1, format1, code_r1) <- getSomeReg arg1
1412 -- let w0 = formatToWidth format0
1413 -- w1 = formatToWidth format1
1414 -- stackCode = unitOL $ STP (OpReg w0 r0) (OpReg w1 R1), (OpAddr (AddrRegImm x31 (ImmInt (stackSpace * 8)))
1415 -- passArguments gpRegs (fpReg:fpRegs) args (stackCode `appOL` accumCode)
1416
1417 -- float promotion.
1418 -- According to
1419 -- ISO/IEC 9899:2018
1420 -- Information technology — Programming languages — C
1421 --
1422 -- e.g.
1423 -- http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1124.pdf
1424 -- http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1256.pdf
1425 --
1426 -- GHC would need to know the prototype.
1427 --
1428 -- > If the expression that denotes the called function has a type that does not include a
1429 -- > prototype, the integer promotions are performed on each argument, and arguments that
1430 -- > have type float are promoted to double.
1431 --
1432 -- As we have no way to get prototypes for C yet, we'll *not* promote this
1433 -- which is in line with the x86_64 backend :(
1434 --
1435 -- See the encode_values.cmm test.
1436 --
1437 -- We would essentially need to insert an FCVT (OpReg W64 fpReg) (OpReg W32 fpReg)
1438 -- if w == W32. But *only* if we don't have a prototype m(
1439 --
1440 -- For AArch64 specificies see: https://developer.arm.com/docs/ihi0055/latest/procedure-call-standard-for-the-arm-64-bit-architecture
1441 --
1442 -- Still have GP regs, and we want to pass an GP argument.
1443
1444 -- AArch64-Darwin: stack packing and alignment
1445 --
1446 -- According to the "Writing ARM64 Code for Apple Platforms" document form
1447 -- Apple, specifically the section "Handle Data Types and Data Alignment Properly"
1448 -- we need to not only pack, but also align arguments on the stack.
1449 --
1450 -- Data type Size (in bytes) Natural alignment (in bytes)
1451 -- BOOL, bool 1 1
1452 -- char 1 1
1453 -- short 2 2
1454 -- int 4 4
1455 -- long 8 8
1456 -- long long 8 8
1457 -- pointer 8 8
1458 -- size_t 8 8
1459 -- NSInteger 8 8
1460 -- CFIndex 8 8
1461 -- fpos_t 8 8
1462 -- off_t 8 8
1463 --
1464 -- We can see that types are aligned by their sizes so the easiest way to
1465 -- guarantee alignment during packing seems to be to pad to a multiple of the
1466 -- size we want to pack. Failure to get this right can result in pretty
1467 -- subtle bugs, e.g. #20137.
1468
1469 passArguments pack (gpReg:gpRegs) fpRegs ((r, format, _hint, code_r):args) stackSpace accumRegs accumCode | isIntFormat format = do
1470 let w = formatToWidth format
1471 passArguments pack gpRegs fpRegs args stackSpace (gpReg:accumRegs) (accumCode `appOL` code_r `snocOL` (ann (text "Pass gp argument: " <> ppr r) $ MOV (OpReg w gpReg) (OpReg w r)))
1472
1473 -- Still have FP regs, and we want to pass an FP argument.
1474 passArguments pack gpRegs (fpReg:fpRegs) ((r, format, _hint, code_r):args) stackSpace accumRegs accumCode | isFloatFormat format = do
1475 let w = formatToWidth format
1476 passArguments pack gpRegs fpRegs args stackSpace (fpReg:accumRegs) (accumCode `appOL` code_r `snocOL` (ann (text "Pass fp argument: " <> ppr r) $ MOV (OpReg w fpReg) (OpReg w r)))
1477
1478 -- No mor regs left to pass. Must pass on stack.
1479 passArguments pack [] [] ((r, format, _hint, code_r):args) stackSpace accumRegs accumCode = do
1480 let w = formatToWidth format
1481 bytes = widthInBits w `div` 8
1482 space = if pack then bytes else 8
1483 stackSpace' | pack && stackSpace `mod` space /= 0 = stackSpace + space - (stackSpace `mod` space)
1484 | otherwise = stackSpace
1485 stackCode = code_r `snocOL` (ann (text "Pass argument (size " <> ppr w <> text ") on the stack: " <> ppr r) $ STR format (OpReg w r) (OpAddr (AddrRegImm (regSingle 31) (ImmInt stackSpace'))))
1486 passArguments pack [] [] args (stackSpace'+space) accumRegs (stackCode `appOL` accumCode)
1487
1488 -- Still have fpRegs left, but want to pass a GP argument. Must be passed on the stack then.
1489 passArguments pack [] fpRegs ((r, format, _hint, code_r):args) stackSpace accumRegs accumCode | isIntFormat format = do
1490 let w = formatToWidth format
1491 bytes = widthInBits w `div` 8
1492 space = if pack then bytes else 8
1493 stackSpace' | pack && stackSpace `mod` space /= 0 = stackSpace + space - (stackSpace `mod` space)
1494 | otherwise = stackSpace
1495 stackCode = code_r `snocOL` (ann (text "Pass argument (size " <> ppr w <> text ") on the stack: " <> ppr r) $ STR format (OpReg w r) (OpAddr (AddrRegImm (regSingle 31) (ImmInt stackSpace'))))
1496 passArguments pack [] fpRegs args (stackSpace'+space) accumRegs (stackCode `appOL` accumCode)
1497
1498 -- Still have gpRegs left, but want to pass a FP argument. Must be passed on the stack then.
1499 passArguments pack gpRegs [] ((r, format, _hint, code_r):args) stackSpace accumRegs accumCode | isFloatFormat format = do
1500 let w = formatToWidth format
1501 bytes = widthInBits w `div` 8
1502 space = if pack then bytes else 8
1503 stackSpace' | pack && stackSpace `mod` space /= 0 = stackSpace + space - (stackSpace `mod` space)
1504 | otherwise = stackSpace
1505 stackCode = code_r `snocOL` (ann (text "Pass argument (size " <> ppr w <> text ") on the stack: " <> ppr r) $ STR format (OpReg w r) (OpAddr (AddrRegImm (regSingle 31) (ImmInt stackSpace'))))
1506 passArguments pack gpRegs [] args (stackSpace'+space) accumRegs (stackCode `appOL` accumCode)
1507
1508 passArguments _ _ _ _ _ _ _ = pprPanic "passArguments" (text "invalid state")
1509
1510 readResults :: [Reg] -> [Reg] -> [LocalReg] -> [Reg]-> InstrBlock -> NatM ([Reg], InstrBlock)
1511 readResults _ _ [] accumRegs accumCode = return (accumRegs, accumCode)
1512 readResults [] _ _ _ _ = do
1513 platform <- getPlatform
1514 pprPanic "genCCall, out of gp registers when reading results" (pdoc platform target)
1515 readResults _ [] _ _ _ = do
1516 platform <- getPlatform
1517 pprPanic "genCCall, out of fp registers when reading results" (pdoc platform target)
1518 readResults (gpReg:gpRegs) (fpReg:fpRegs) (dst:dsts) accumRegs accumCode = do
1519 -- gp/fp reg -> dst
1520 platform <- getPlatform
1521 let rep = cmmRegType platform (CmmLocal dst)
1522 format = cmmTypeFormat rep
1523 w = cmmRegWidth platform (CmmLocal dst)
1524 r_dst = getRegisterReg platform (CmmLocal dst)
1525 if isFloatFormat format
1526 then readResults (gpReg:gpRegs) fpRegs dsts (fpReg:accumRegs) (accumCode `snocOL` MOV (OpReg w r_dst) (OpReg w fpReg))
1527 else readResults gpRegs (fpReg:fpRegs) dsts (gpReg:accumRegs) (accumCode `snocOL` MOV (OpReg w r_dst) (OpReg w gpReg))
1528
1529 unaryFloatOp w op arg_reg dest_reg = do
1530 platform <- getPlatform
1531 (reg_fx, _format_x, code_fx) <- getFloatReg arg_reg
1532 let dst = getRegisterReg platform (CmmLocal dest_reg)
1533 let code = code_fx `appOL` op (OpReg w dst) (OpReg w reg_fx)
1534 return (code, Nothing)