never executed always true always false
    1 {-# LANGUAGE GADTs #-}
    2 
    3 -----------------------------------------------------------------------------
    4 --
    5 -- Generating machine code (instruction selection)
    6 --
    7 -- (c) The University of Glasgow 1996-2004
    8 --
    9 -----------------------------------------------------------------------------
   10 
   11 -- This is a big module, but, if you pay attention to
   12 -- (a) the sectioning, and (b) the type signatures,
   13 -- the structure should not be too overwhelming.
   14 
   15 module GHC.CmmToAsm.PPC.CodeGen (
   16         cmmTopCodeGen,
   17         generateJumpTableForInstr,
   18         InstrBlock
   19 )
   20 
   21 where
   22 
   23 -- NCG stuff:
   24 import GHC.Prelude
   25 
   26 import GHC.Platform.Regs
   27 import GHC.CmmToAsm.PPC.Instr
   28 import GHC.CmmToAsm.PPC.Cond
   29 import GHC.CmmToAsm.PPC.Regs
   30 import GHC.CmmToAsm.CPrim
   31 import GHC.CmmToAsm.Types
   32 import GHC.Cmm.DebugBlock
   33    ( DebugBlock(..) )
   34 import GHC.CmmToAsm.Monad
   35    ( NatM, getNewRegNat, getNewLabelNat
   36    , getBlockIdNat, getPicBaseNat, getNewRegPairNat
   37    , getPicBaseMaybeNat, getPlatform, getConfig
   38    , getDebugBlock, getFileId
   39    )
   40 import GHC.CmmToAsm.PIC
   41 import GHC.CmmToAsm.Format
   42 import GHC.CmmToAsm.Config
   43 import GHC.Platform.Reg.Class
   44 import GHC.Platform.Reg
   45 import GHC.CmmToAsm.Reg.Target
   46 import GHC.Platform
   47 
   48 -- Our intermediate code:
   49 import GHC.Cmm.BlockId
   50 import GHC.Cmm.Ppr           ( pprExpr )
   51 import GHC.Cmm
   52 import GHC.Cmm.Utils
   53 import GHC.Cmm.Switch
   54 import GHC.Cmm.CLabel
   55 import GHC.Cmm.Dataflow.Block
   56 import GHC.Cmm.Dataflow.Graph
   57 import GHC.Types.Tickish     ( GenTickish(..) )
   58 import GHC.Types.SrcLoc      ( srcSpanFile, srcSpanStartLine, srcSpanStartCol )
   59 
   60 -- The rest:
   61 import GHC.Data.OrdList
   62 import GHC.Utils.Outputable
   63 import GHC.Utils.Panic
   64 import GHC.Utils.Panic.Plain
   65 
   66 import Control.Monad    ( mapAndUnzipM, when )
   67 import Data.Word
   68 
   69 import GHC.Types.Basic
   70 import GHC.Data.FastString
   71 
   72 -- -----------------------------------------------------------------------------
   73 -- Top-level of the instruction selector
   74 
   75 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
   76 -- They are really trees of insns to facilitate fast appending, where a
   77 -- left-to-right traversal (pre-order?) yields the insns in the correct
   78 -- order.
   79 
   80 cmmTopCodeGen
   81         :: RawCmmDecl
   82         -> NatM [NatCmmDecl RawCmmStatics Instr]
   83 
   84 cmmTopCodeGen (CmmProc info lab live graph) = do
   85   let blocks = toBlockListEntryFirst graph
   86   (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
   87   platform <- getPlatform
   88   let proc = CmmProc info lab live (ListGraph $ concat nat_blocks)
   89       tops = proc : concat statics
   90       os   = platformOS platform
   91       arch = platformArch platform
   92   case arch of
   93     ArchPPC | os == OSAIX -> return tops
   94             | otherwise -> do
   95       picBaseMb <- getPicBaseMaybeNat
   96       case picBaseMb of
   97            Just picBase -> initializePicBase_ppc arch os picBase tops
   98            Nothing -> return tops
   99     ArchPPC_64 ELF_V1 -> fixup_entry tops
  100                       -- generating function descriptor is handled in
  101                       -- pretty printer
  102     ArchPPC_64 ELF_V2 -> fixup_entry tops
  103                       -- generating function prologue is handled in
  104                       -- pretty printer
  105     _          -> panic "PPC.cmmTopCodeGen: unknown arch"
  106     where
  107       fixup_entry (CmmProc info lab live (ListGraph (entry:blocks)) : statics)
  108         = do
  109         let BasicBlock bID insns = entry
  110         bID' <- if lab == (blockLbl bID)
  111                 then newBlockId
  112                 else return bID
  113         let b' = BasicBlock bID' insns
  114         return (CmmProc info lab live (ListGraph (b':blocks)) : statics)
  115       fixup_entry _ = panic "cmmTopCodegen: Broken CmmProc"
  116 
  117 cmmTopCodeGen (CmmData sec dat) =
  118   return [CmmData sec dat]  -- no translation, we just use CmmStatic
  119 
  120 basicBlockCodeGen
  121         :: Block CmmNode C C
  122         -> NatM ( [NatBasicBlock Instr]
  123                 , [NatCmmDecl RawCmmStatics Instr])
  124 
  125 basicBlockCodeGen block = do
  126   let (_, nodes, tail)  = blockSplit block
  127       id = entryLabel block
  128       stmts = blockToList nodes
  129   -- Generate location directive
  130   dbg <- getDebugBlock (entryLabel block)
  131   loc_instrs <- case dblSourceTick =<< dbg of
  132     Just (SourceNote span name)
  133       -> do fileid <- getFileId (srcSpanFile span)
  134             let line = srcSpanStartLine span; col =srcSpanStartCol span
  135             return $ unitOL $ LOCATION fileid line col name
  136     _ -> return nilOL
  137   mid_instrs <- stmtsToInstrs stmts
  138   tail_instrs <- stmtToInstrs tail
  139   let instrs = loc_instrs `appOL` mid_instrs `appOL` tail_instrs
  140   -- code generation may introduce new basic block boundaries, which
  141   -- are indicated by the NEWBLOCK instruction.  We must split up the
  142   -- instruction stream into basic blocks again.  Also, we extract
  143   -- LDATAs here too.
  144   let
  145         (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
  146 
  147         mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
  148           = ([], BasicBlock id instrs : blocks, statics)
  149         mkBlocks (LDATA sec dat) (instrs,blocks,statics)
  150           = (instrs, blocks, CmmData sec dat:statics)
  151         mkBlocks instr (instrs,blocks,statics)
  152           = (instr:instrs, blocks, statics)
  153   return (BasicBlock id top : other_blocks, statics)
  154 
  155 stmtsToInstrs :: [CmmNode e x] -> NatM InstrBlock
  156 stmtsToInstrs stmts
  157    = do instrss <- mapM stmtToInstrs stmts
  158         return (concatOL instrss)
  159 
  160 stmtToInstrs :: CmmNode e x -> NatM InstrBlock
  161 stmtToInstrs stmt = do
  162   config <- getConfig
  163   platform <- getPlatform
  164   case stmt of
  165     CmmComment s   -> return (unitOL (COMMENT $ ftext s))
  166     CmmTick {}     -> return nilOL
  167     CmmUnwind {}   -> return nilOL
  168 
  169     CmmAssign reg src
  170       | isFloatType ty -> assignReg_FltCode format reg src
  171       | target32Bit platform &&
  172         isWord64 ty    -> assignReg_I64Code      reg src
  173       | otherwise      -> assignReg_IntCode format reg src
  174         where ty = cmmRegType platform reg
  175               format = cmmTypeFormat ty
  176 
  177     CmmStore addr src
  178       | isFloatType ty -> assignMem_FltCode format addr src
  179       | target32Bit platform &&
  180         isWord64 ty    -> assignMem_I64Code      addr src
  181       | otherwise      -> assignMem_IntCode format addr src
  182         where ty = cmmExprType platform src
  183               format = cmmTypeFormat ty
  184 
  185     CmmUnsafeForeignCall target result_regs args
  186        -> genCCall target result_regs args
  187 
  188     CmmBranch id          -> genBranch id
  189     CmmCondBranch arg true false prediction -> do
  190       b1 <- genCondJump true arg prediction
  191       b2 <- genBranch false
  192       return (b1 `appOL` b2)
  193     CmmSwitch arg ids -> genSwitch config arg ids
  194     CmmCall { cml_target = arg
  195             , cml_args_regs = gregs } -> genJump arg (jumpRegs platform gregs)
  196     _ ->
  197       panic "stmtToInstrs: statement should have been cps'd away"
  198 
  199 jumpRegs :: Platform -> [GlobalReg] -> [Reg]
  200 jumpRegs platform gregs = [ RegReal r | Just r <- map (globalRegMaybe platform) gregs ]
  201 
  202 --------------------------------------------------------------------------------
  203 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
  204 --      They are really trees of insns to facilitate fast appending, where a
  205 --      left-to-right traversal yields the insns in the correct order.
  206 --
  207 type InstrBlock
  208         = OrdList Instr
  209 
  210 
  211 -- | Register's passed up the tree.  If the stix code forces the register
  212 --      to live in a pre-decided machine register, it comes out as @Fixed@;
  213 --      otherwise, it comes out as @Any@, and the parent can decide which
  214 --      register to put it in.
  215 --
  216 data Register
  217         = Fixed Format Reg InstrBlock
  218         | Any   Format (Reg -> InstrBlock)
  219 
  220 
  221 swizzleRegisterRep :: Register -> Format -> Register
  222 swizzleRegisterRep (Fixed _ reg code) format = Fixed format reg code
  223 swizzleRegisterRep (Any _ codefn)     format = Any   format codefn
  224 
  225 
  226 -- | Grab the Reg for a CmmReg
  227 getRegisterReg :: Platform -> CmmReg -> Reg
  228 
  229 getRegisterReg _ (CmmLocal (LocalReg u pk))
  230   = RegVirtual $ mkVirtualReg u (cmmTypeFormat pk)
  231 
  232 getRegisterReg platform (CmmGlobal mid)
  233   = case globalRegMaybe platform mid of
  234         Just reg -> RegReal reg
  235         Nothing  -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
  236         -- By this stage, the only MagicIds remaining should be the
  237         -- ones which map to a real machine register on this
  238         -- platform.  Hence ...
  239 
  240 -- | Convert a BlockId to some CmmStatic data
  241 jumpTableEntry :: NCGConfig -> Maybe BlockId -> CmmStatic
  242 jumpTableEntry config Nothing   = CmmStaticLit (CmmInt 0 (ncgWordWidth config))
  243 jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
  244     where blockLabel = blockLbl blockid
  245 
  246 
  247 
  248 -- -----------------------------------------------------------------------------
  249 -- General things for putting together code sequences
  250 
  251 -- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
  252 -- CmmExprs into CmmRegOff?
  253 mangleIndexTree :: Platform -> CmmExpr -> CmmExpr
  254 mangleIndexTree platform (CmmRegOff reg off)
  255   = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
  256   where width = typeWidth (cmmRegType platform reg)
  257 
  258 mangleIndexTree _ _
  259         = panic "PPC.CodeGen.mangleIndexTree: no match"
  260 
  261 -- -----------------------------------------------------------------------------
  262 --  Code gen for 64-bit arithmetic on 32-bit platforms
  263 
  264 {-
  265 Simple support for generating 64-bit code (ie, 64 bit values and 64
  266 bit assignments) on 32-bit platforms.  Unlike the main code generator
  267 we merely shoot for generating working code as simply as possible, and
  268 pay little attention to code quality.  Specifically, there is no
  269 attempt to deal cleverly with the fixed-vs-floating register
  270 distinction; all values are generated into (pairs of) floating
  271 registers, even if this would mean some redundant reg-reg moves as a
  272 result.  Only one of the VRegUniques is returned, since it will be
  273 of the VRegUniqueLo form, and the upper-half VReg can be determined
  274 by applying getHiVRegFromLo to it.
  275 -}
  276 
  277 data ChildCode64        -- a.k.a "Register64"
  278       = ChildCode64
  279            InstrBlock   -- code
  280            Reg          -- the lower 32-bit temporary which contains the
  281                         -- result; use getHiVRegFromLo to find the other
  282                         -- VRegUnique.  Rules of this simplified insn
  283                         -- selection game are therefore that the returned
  284                         -- Reg may be modified
  285 
  286 
  287 -- | Compute an expression into a register, but
  288 --      we don't mind which one it is.
  289 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
  290 getSomeReg expr = do
  291   r <- getRegister expr
  292   case r of
  293     Any rep code -> do
  294         tmp <- getNewRegNat rep
  295         return (tmp, code tmp)
  296     Fixed _ reg code ->
  297         return (reg, code)
  298 
  299 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
  300 getI64Amodes addrTree = do
  301     Amode hi_addr addr_code <- getAmode D addrTree
  302     case addrOffset hi_addr 4 of
  303         Just lo_addr -> return (hi_addr, lo_addr, addr_code)
  304         Nothing      -> do (hi_ptr, code) <- getSomeReg addrTree
  305                            return (AddrRegImm hi_ptr (ImmInt 0),
  306                                    AddrRegImm hi_ptr (ImmInt 4),
  307                                    code)
  308 
  309 
  310 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
  311 assignMem_I64Code addrTree valueTree = do
  312         (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
  313         ChildCode64 vcode rlo <- iselExpr64 valueTree
  314         let
  315                 rhi = getHiVRegFromLo rlo
  316 
  317                 -- Big-endian store
  318                 mov_hi = ST II32 rhi hi_addr
  319                 mov_lo = ST II32 rlo lo_addr
  320         return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
  321 
  322 
  323 assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
  324 assignReg_I64Code (CmmLocal (LocalReg u_dst _)) valueTree = do
  325    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
  326    let
  327          r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32
  328          r_dst_hi = getHiVRegFromLo r_dst_lo
  329          r_src_hi = getHiVRegFromLo r_src_lo
  330          mov_lo = MR r_dst_lo r_src_lo
  331          mov_hi = MR r_dst_hi r_src_hi
  332    return (
  333         vcode `snocOL` mov_lo `snocOL` mov_hi
  334      )
  335 
  336 assignReg_I64Code _ _
  337    = panic "assignReg_I64Code(powerpc): invalid lvalue"
  338 
  339 
  340 iselExpr64        :: CmmExpr -> NatM ChildCode64
  341 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
  342     (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
  343     (rlo, rhi) <- getNewRegPairNat II32
  344     let mov_hi = LD II32 rhi hi_addr
  345         mov_lo = LD II32 rlo lo_addr
  346     return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
  347                          rlo
  348 
  349 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
  350    = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32))
  351 
  352 iselExpr64 (CmmLit (CmmInt i _)) = do
  353   (rlo,rhi) <- getNewRegPairNat II32
  354   let
  355         half0 = fromIntegral (fromIntegral i :: Word16)
  356         half1 = fromIntegral (fromIntegral (i `shiftR` 16) :: Word16)
  357         half2 = fromIntegral (fromIntegral (i `shiftR` 32) :: Word16)
  358         half3 = fromIntegral (fromIntegral (i `shiftR` 48) :: Word16)
  359 
  360         code = toOL [
  361                 LIS rlo (ImmInt half1),
  362                 OR rlo rlo (RIImm $ ImmInt half0),
  363                 LIS rhi (ImmInt half3),
  364                 OR rhi rhi (RIImm $ ImmInt half2)
  365                 ]
  366   return (ChildCode64 code rlo)
  367 
  368 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
  369    ChildCode64 code1 r1lo <- iselExpr64 e1
  370    ChildCode64 code2 r2lo <- iselExpr64 e2
  371    (rlo,rhi) <- getNewRegPairNat II32
  372    let
  373         r1hi = getHiVRegFromLo r1lo
  374         r2hi = getHiVRegFromLo r2lo
  375         code =  code1 `appOL`
  376                 code2 `appOL`
  377                 toOL [ ADDC rlo r1lo r2lo,
  378                        ADDE rhi r1hi r2hi ]
  379    return (ChildCode64 code rlo)
  380 
  381 iselExpr64 (CmmMachOp (MO_Sub _) [e1,e2]) = do
  382    ChildCode64 code1 r1lo <- iselExpr64 e1
  383    ChildCode64 code2 r2lo <- iselExpr64 e2
  384    (rlo,rhi) <- getNewRegPairNat II32
  385    let
  386         r1hi = getHiVRegFromLo r1lo
  387         r2hi = getHiVRegFromLo r2lo
  388         code =  code1 `appOL`
  389                 code2 `appOL`
  390                 toOL [ SUBFC rlo r2lo (RIReg r1lo),
  391                        SUBFE rhi r2hi r1hi ]
  392    return (ChildCode64 code rlo)
  393 
  394 iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do
  395     (expr_reg,expr_code) <- getSomeReg expr
  396     (rlo, rhi) <- getNewRegPairNat II32
  397     let mov_hi = LI rhi (ImmInt 0)
  398         mov_lo = MR rlo expr_reg
  399     return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
  400                          rlo
  401 
  402 iselExpr64 (CmmMachOp (MO_SS_Conv W32 W64) [expr]) = do
  403     (expr_reg,expr_code) <- getSomeReg expr
  404     (rlo, rhi) <- getNewRegPairNat II32
  405     let mov_hi = SRA II32 rhi expr_reg (RIImm (ImmInt 31))
  406         mov_lo = MR rlo expr_reg
  407     return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
  408                          rlo
  409 iselExpr64 expr
  410    = do
  411      platform <- getPlatform
  412      pprPanic "iselExpr64(powerpc)" (pprExpr platform expr)
  413 
  414 
  415 
  416 getRegister :: CmmExpr -> NatM Register
  417 getRegister e = do config <- getConfig
  418                    getRegister' config (ncgPlatform config) e
  419 
  420 getRegister' :: NCGConfig -> Platform -> CmmExpr -> NatM Register
  421 
  422 getRegister' _ platform (CmmReg (CmmGlobal PicBaseReg))
  423   | OSAIX <- platformOS platform = do
  424         let code dst = toOL [ LD II32 dst tocAddr ]
  425             tocAddr = AddrRegImm toc (ImmLit (text "ghc_toc_table[TC]"))
  426         return (Any II32 code)
  427   | target32Bit platform = do
  428       reg <- getPicBaseNat $ archWordFormat (target32Bit platform)
  429       return (Fixed (archWordFormat (target32Bit platform))
  430                     reg nilOL)
  431   | otherwise = return (Fixed II64 toc nilOL)
  432 
  433 getRegister' _ platform (CmmReg reg)
  434   = return (Fixed (cmmTypeFormat (cmmRegType platform reg))
  435                   (getRegisterReg platform reg) nilOL)
  436 
  437 getRegister' config platform tree@(CmmRegOff _ _)
  438   = getRegister' config platform (mangleIndexTree platform tree)
  439 
  440     -- for 32-bit architectures, support some 64 -> 32 bit conversions:
  441     -- TO_W_(x), TO_W_(x >> 32)
  442 
  443 getRegister' _ platform (CmmMachOp (MO_UU_Conv W64 W32)
  444                      [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
  445  | target32Bit platform = do
  446   ChildCode64 code rlo <- iselExpr64 x
  447   return $ Fixed II32 (getHiVRegFromLo rlo) code
  448 
  449 getRegister' _ platform (CmmMachOp (MO_SS_Conv W64 W32)
  450                      [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
  451  | target32Bit platform = do
  452   ChildCode64 code rlo <- iselExpr64 x
  453   return $ Fixed II32 (getHiVRegFromLo rlo) code
  454 
  455 getRegister' _ platform (CmmMachOp (MO_UU_Conv W64 W32) [x])
  456  | target32Bit platform = do
  457   ChildCode64 code rlo <- iselExpr64 x
  458   return $ Fixed II32 rlo code
  459 
  460 getRegister' _ platform (CmmMachOp (MO_SS_Conv W64 W32) [x])
  461  | target32Bit platform = do
  462   ChildCode64 code rlo <- iselExpr64 x
  463   return $ Fixed II32 rlo code
  464 
  465 getRegister' _ platform (CmmLoad mem pk)
  466  | not (isWord64 pk) = do
  467         Amode addr addr_code <- getAmode D mem
  468         let code dst = assert ((targetClassOfReg platform dst == RcDouble) == isFloatType pk) $
  469                        addr_code `snocOL` LD format dst addr
  470         return (Any format code)
  471  | not (target32Bit platform) = do
  472         Amode addr addr_code <- getAmode DS mem
  473         let code dst = addr_code `snocOL` LD II64 dst addr
  474         return (Any II64 code)
  475 
  476           where format = cmmTypeFormat pk
  477 
  478 -- catch simple cases of zero- or sign-extended load
  479 getRegister' _ _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do
  480     Amode addr addr_code <- getAmode D mem
  481     return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr))
  482 
  483 getRegister' _ _ (CmmMachOp (MO_XX_Conv W8 W32) [CmmLoad mem _]) = do
  484     Amode addr addr_code <- getAmode D mem
  485     return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr))
  486 
  487 getRegister' _ _ (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad mem _]) = do
  488     Amode addr addr_code <- getAmode D mem
  489     return (Any II64 (\dst -> addr_code `snocOL` LD II8 dst addr))
  490 
  491 getRegister' _ _ (CmmMachOp (MO_XX_Conv W8 W64) [CmmLoad mem _]) = do
  492     Amode addr addr_code <- getAmode D mem
  493     return (Any II64 (\dst -> addr_code `snocOL` LD II8 dst addr))
  494 
  495 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
  496 
  497 getRegister' _ _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do
  498     Amode addr addr_code <- getAmode D mem
  499     return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr))
  500 
  501 getRegister' _ _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do
  502     Amode addr addr_code <- getAmode D mem
  503     return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr))
  504 
  505 getRegister' _ _ (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad mem _]) = do
  506     Amode addr addr_code <- getAmode D mem
  507     return (Any II64 (\dst -> addr_code `snocOL` LD II16 dst addr))
  508 
  509 getRegister' _ _ (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad mem _]) = do
  510     Amode addr addr_code <- getAmode D mem
  511     return (Any II64 (\dst -> addr_code `snocOL` LA II16 dst addr))
  512 
  513 getRegister' _ _ (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad mem _]) = do
  514     Amode addr addr_code <- getAmode D mem
  515     return (Any II64 (\dst -> addr_code `snocOL` LD II32 dst addr))
  516 
  517 getRegister' _ _ (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad mem _]) = do
  518     -- lwa is DS-form. See Note [Power instruction format]
  519     Amode addr addr_code <- getAmode DS mem
  520     return (Any II64 (\dst -> addr_code `snocOL` LA II32 dst addr))
  521 
  522 getRegister' config platform (CmmMachOp mop [x]) -- unary MachOps
  523   = case mop of
  524       MO_Not rep   -> triv_ucode_int rep NOT
  525 
  526       MO_F_Neg w   -> triv_ucode_float w FNEG
  527       MO_S_Neg w   -> triv_ucode_int   w NEG
  528 
  529       MO_FF_Conv W64 W32 -> trivialUCode  FF32 FRSP x
  530       MO_FF_Conv W32 W64 -> conversionNop FF64 x
  531 
  532       MO_FS_Conv from to -> coerceFP2Int from to x
  533       MO_SF_Conv from to -> coerceInt2FP from to x
  534 
  535       MO_SS_Conv from to
  536         | from >= to -> conversionNop (intFormat to) x
  537         | otherwise  -> triv_ucode_int to (EXTS (intFormat from))
  538 
  539       MO_UU_Conv from to
  540         | from >= to -> conversionNop (intFormat to) x
  541         | otherwise  -> clearLeft from to
  542 
  543       MO_XX_Conv _ to -> conversionNop (intFormat to) x
  544 
  545       _ -> panic "PPC.CodeGen.getRegister: no match"
  546 
  547     where
  548         triv_ucode_int   width instr = trivialUCode (intFormat    width) instr x
  549         triv_ucode_float width instr = trivialUCode (floatFormat  width) instr x
  550 
  551         conversionNop new_format expr
  552             = do e_code <- getRegister' config platform expr
  553                  return (swizzleRegisterRep e_code new_format)
  554 
  555         clearLeft from to
  556             = do (src1, code1) <- getSomeReg x
  557                  let arch_fmt  = intFormat (wordWidth platform)
  558                      arch_bits = widthInBits (wordWidth platform)
  559                      size      = widthInBits from
  560                      code dst  = code1 `snocOL`
  561                                  CLRLI arch_fmt dst src1 (arch_bits - size)
  562                  return (Any (intFormat to) code)
  563 
  564 getRegister' _ _ (CmmMachOp mop [x, y]) -- dyadic PrimOps
  565   = case mop of
  566       MO_F_Eq _ -> condFltReg EQQ x y
  567       MO_F_Ne _ -> condFltReg NE  x y
  568       MO_F_Gt _ -> condFltReg GTT x y
  569       MO_F_Ge _ -> condFltReg GE  x y
  570       MO_F_Lt _ -> condFltReg LTT x y
  571       MO_F_Le _ -> condFltReg LE  x y
  572 
  573       MO_Eq rep -> condIntReg EQQ rep x y
  574       MO_Ne rep -> condIntReg NE  rep x y
  575 
  576       MO_S_Gt rep -> condIntReg GTT rep x y
  577       MO_S_Ge rep -> condIntReg GE  rep x y
  578       MO_S_Lt rep -> condIntReg LTT rep x y
  579       MO_S_Le rep -> condIntReg LE  rep x y
  580 
  581       MO_U_Gt rep -> condIntReg GU  rep x y
  582       MO_U_Ge rep -> condIntReg GEU rep x y
  583       MO_U_Lt rep -> condIntReg LU  rep x y
  584       MO_U_Le rep -> condIntReg LEU rep x y
  585 
  586       MO_F_Add w  -> triv_float w FADD
  587       MO_F_Sub w  -> triv_float w FSUB
  588       MO_F_Mul w  -> triv_float w FMUL
  589       MO_F_Quot w -> triv_float w FDIV
  590 
  591          -- optimize addition with 32-bit immediate
  592          -- (needed for PIC)
  593       MO_Add W32 ->
  594         case y of
  595           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate W32 True imm
  596             -> trivialCode W32 True ADD x (CmmLit $ CmmInt imm immrep)
  597           CmmLit lit
  598             -> do
  599                 (src, srcCode) <- getSomeReg x
  600                 let imm = litToImm lit
  601                     code dst = srcCode `appOL` toOL [
  602                                     ADDIS dst src (HA imm),
  603                                     ADD dst dst (RIImm (LO imm))
  604                                 ]
  605                 return (Any II32 code)
  606           _ -> trivialCode W32 True ADD x y
  607 
  608       MO_Add rep -> trivialCode rep True ADD x y
  609       MO_Sub rep ->
  610         case y of
  611           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
  612             -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
  613           _ -> case x of
  614                  CmmLit (CmmInt imm _)
  615                    | Just _ <- makeImmediate rep True imm
  616                    -- subfi ('subtract from' with immediate) doesn't exist
  617                    -> trivialCode rep True SUBFC y x
  618                  _ -> trivialCodeNoImm' (intFormat rep) SUBF y x
  619 
  620       MO_Mul rep -> shiftMulCode rep True MULL x y
  621       MO_S_MulMayOflo rep -> do
  622         (src1, code1) <- getSomeReg x
  623         (src2, code2) <- getSomeReg y
  624         let
  625           format = intFormat rep
  626           code dst = code1 `appOL` code2
  627                        `appOL` toOL [ MULLO format dst src1 src2
  628                                     , MFOV  format dst
  629                                     ]
  630         return (Any format code)
  631 
  632       MO_S_Quot rep -> divCode rep True x y
  633       MO_U_Quot rep -> divCode rep False x y
  634 
  635       MO_S_Rem rep -> remainder rep True x y
  636       MO_U_Rem rep -> remainder rep False x y
  637 
  638       MO_And rep   -> case y of
  639         (CmmLit (CmmInt imm _)) | imm == -8 || imm == -4
  640             -> do
  641                 (src, srcCode) <- getSomeReg x
  642                 let clear_mask = if imm == -4 then 2 else 3
  643                     fmt = intFormat rep
  644                     code dst = srcCode
  645                                `appOL` unitOL (CLRRI fmt dst src clear_mask)
  646                 return (Any fmt code)
  647         _ -> trivialCode rep False AND x y
  648       MO_Or rep    -> trivialCode rep False OR x y
  649       MO_Xor rep   -> trivialCode rep False XOR x y
  650 
  651       MO_Shl rep   -> shiftMulCode rep False SL x y
  652       MO_S_Shr rep -> srCode rep True SRA x y
  653       MO_U_Shr rep -> srCode rep False SR x y
  654       _         -> panic "PPC.CodeGen.getRegister: no match"
  655 
  656   where
  657     triv_float :: Width -> (Format -> Reg -> Reg -> Reg -> Instr) -> NatM Register
  658     triv_float width instr = trivialCodeNoImm (floatFormat width) instr x y
  659 
  660     remainder :: Width -> Bool -> CmmExpr -> CmmExpr -> NatM Register
  661     remainder rep sgn x y = do
  662       let fmt = intFormat rep
  663       tmp <- getNewRegNat fmt
  664       code <- remainderCode rep sgn tmp x y
  665       return (Any fmt code)
  666 
  667 
  668 getRegister' _ _ (CmmLit (CmmInt i rep))
  669   | Just imm <- makeImmediate rep True i
  670   = let
  671         code dst = unitOL (LI dst imm)
  672     in
  673         return (Any (intFormat rep) code)
  674 
  675 getRegister' config _ (CmmLit (CmmFloat f frep)) = do
  676     lbl <- getNewLabelNat
  677     dynRef <- cmmMakeDynamicReference config DataReference lbl
  678     Amode addr addr_code <- getAmode D dynRef
  679     let format = floatFormat frep
  680         code dst =
  681             LDATA (Section ReadOnlyData lbl)
  682                   (CmmStaticsRaw lbl [CmmStaticLit (CmmFloat f frep)])
  683             `consOL` (addr_code `snocOL` LD format dst addr)
  684     return (Any format code)
  685 
  686 getRegister' config platform (CmmLit lit)
  687   | target32Bit platform
  688   = let rep = cmmLitType platform lit
  689         imm = litToImm lit
  690         code dst = toOL [
  691               LIS dst (HA imm),
  692               ADD dst dst (RIImm (LO imm))
  693           ]
  694     in return (Any (cmmTypeFormat rep) code)
  695   | otherwise
  696   = do lbl <- getNewLabelNat
  697        dynRef <- cmmMakeDynamicReference config DataReference lbl
  698        Amode addr addr_code <- getAmode D dynRef
  699        let rep = cmmLitType platform lit
  700            format = cmmTypeFormat rep
  701            code dst =
  702             LDATA (Section ReadOnlyData lbl) (CmmStaticsRaw lbl [CmmStaticLit lit])
  703             `consOL` (addr_code `snocOL` LD format dst addr)
  704        return (Any format code)
  705 
  706 getRegister' _ platform other = pprPanic "getRegister(ppc)" (pprExpr platform other)
  707 
  708     -- extend?Rep: wrap integer expression of type `from`
  709     -- in a conversion to `to`
  710 extendSExpr :: Width -> Width -> CmmExpr -> CmmExpr
  711 extendSExpr from to x = CmmMachOp (MO_SS_Conv from to) [x]
  712 
  713 extendUExpr :: Width -> Width -> CmmExpr -> CmmExpr
  714 extendUExpr from to x = CmmMachOp (MO_UU_Conv from to) [x]
  715 
  716 -- -----------------------------------------------------------------------------
  717 --  The 'Amode' type: Memory addressing modes passed up the tree.
  718 
  719 data Amode
  720         = Amode AddrMode InstrBlock
  721 
  722 {-
  723 Now, given a tree (the argument to a CmmLoad) that references memory,
  724 produce a suitable addressing mode.
  725 
  726 A Rule of the Game (tm) for Amodes: use of the addr bit must
  727 immediately follow use of the code part, since the code part puts
  728 values in registers which the addr then refers to.  So you can't put
  729 anything in between, lest it overwrite some of those registers.  If
  730 you need to do some other computation between the code part and use of
  731 the addr bit, first store the effective address from the amode in a
  732 temporary, then do the other computation, and then use the temporary:
  733 
  734     code
  735     LEA amode, tmp
  736     ... other computation ...
  737     ... (tmp) ...
  738 -}
  739 
  740 {- Note [Power instruction format]
  741 In some instructions the 16 bit offset must be a multiple of 4, i.e.
  742 the two least significant bits must be zero. The "Power ISA" specification
  743 calls these instruction formats "DS-FORM" and the instructions with
  744 arbitrary 16 bit offsets are "D-FORM".
  745 
  746 The Power ISA specification document can be obtained from www.power.org.
  747 -}
  748 data InstrForm = D | DS
  749 
  750 getAmode :: InstrForm -> CmmExpr -> NatM Amode
  751 getAmode inf tree@(CmmRegOff _ _)
  752   = do platform <- getPlatform
  753        getAmode inf (mangleIndexTree platform tree)
  754 
  755 getAmode _ (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)])
  756   | Just off <- makeImmediate W32 True (-i)
  757   = do
  758         (reg, code) <- getSomeReg x
  759         return (Amode (AddrRegImm reg off) code)
  760 
  761 
  762 getAmode _ (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)])
  763   | Just off <- makeImmediate W32 True i
  764   = do
  765         (reg, code) <- getSomeReg x
  766         return (Amode (AddrRegImm reg off) code)
  767 
  768 getAmode D (CmmMachOp (MO_Sub W64) [x, CmmLit (CmmInt i _)])
  769   | Just off <- makeImmediate W64 True (-i)
  770   = do
  771         (reg, code) <- getSomeReg x
  772         return (Amode (AddrRegImm reg off) code)
  773 
  774 
  775 getAmode D (CmmMachOp (MO_Add W64) [x, CmmLit (CmmInt i _)])
  776   | Just off <- makeImmediate W64 True i
  777   = do
  778         (reg, code) <- getSomeReg x
  779         return (Amode (AddrRegImm reg off) code)
  780 
  781 getAmode DS (CmmMachOp (MO_Sub W64) [x, CmmLit (CmmInt i _)])
  782   | Just off <- makeImmediate W64 True (-i)
  783   = do
  784         (reg, code) <- getSomeReg x
  785         (reg', off', code')  <-
  786                      if i `mod` 4 == 0
  787                       then return (reg, off, code)
  788                       else do
  789                            tmp <- getNewRegNat II64
  790                            return (tmp, ImmInt 0,
  791                                   code `snocOL` ADD tmp reg (RIImm off))
  792         return (Amode (AddrRegImm reg' off') code')
  793 
  794 getAmode DS (CmmMachOp (MO_Add W64) [x, CmmLit (CmmInt i _)])
  795   | Just off <- makeImmediate W64 True i
  796   = do
  797         (reg, code) <- getSomeReg x
  798         (reg', off', code')  <-
  799                      if i `mod` 4 == 0
  800                       then return (reg, off, code)
  801                       else do
  802                            tmp <- getNewRegNat II64
  803                            return (tmp, ImmInt 0,
  804                                   code `snocOL` ADD tmp reg (RIImm off))
  805         return (Amode (AddrRegImm reg' off') code')
  806 
  807    -- optimize addition with 32-bit immediate
  808    -- (needed for PIC)
  809 getAmode _ (CmmMachOp (MO_Add W32) [x, CmmLit lit])
  810   = do
  811         platform <- getPlatform
  812         (src, srcCode) <- getSomeReg x
  813         let imm = litToImm lit
  814         case () of
  815             _ | OSAIX <- platformOS platform
  816               , isCmmLabelType lit ->
  817                     -- HA16/LO16 relocations on labels not supported on AIX
  818                     return (Amode (AddrRegImm src imm) srcCode)
  819               | otherwise -> do
  820                     tmp <- getNewRegNat II32
  821                     let code = srcCode `snocOL` ADDIS tmp src (HA imm)
  822                     return (Amode (AddrRegImm tmp (LO imm)) code)
  823   where
  824       isCmmLabelType (CmmLabel {})        = True
  825       isCmmLabelType (CmmLabelOff {})     = True
  826       isCmmLabelType (CmmLabelDiffOff {}) = True
  827       isCmmLabelType _                    = False
  828 
  829 getAmode _ (CmmLit lit)
  830   = do
  831         platform <- getPlatform
  832         case platformArch platform of
  833              ArchPPC -> do
  834                  tmp <- getNewRegNat II32
  835                  let imm = litToImm lit
  836                      code = unitOL (LIS tmp (HA imm))
  837                  return (Amode (AddrRegImm tmp (LO imm)) code)
  838              _        -> do -- TODO: Load from TOC,
  839                             -- see getRegister' _ (CmmLit lit)
  840                  tmp <- getNewRegNat II64
  841                  let imm = litToImm lit
  842                      code =  toOL [
  843                           LIS tmp (HIGHESTA imm),
  844                           OR tmp tmp (RIImm (HIGHERA imm)),
  845                           SL  II64 tmp tmp (RIImm (ImmInt 32)),
  846                           ORIS tmp tmp (HA imm)
  847                           ]
  848                  return (Amode (AddrRegImm tmp (LO imm)) code)
  849 
  850 getAmode _ (CmmMachOp (MO_Add W32) [x, y])
  851   = do
  852         (regX, codeX) <- getSomeReg x
  853         (regY, codeY) <- getSomeReg y
  854         return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
  855 
  856 getAmode _ (CmmMachOp (MO_Add W64) [x, y])
  857   = do
  858         (regX, codeX) <- getSomeReg x
  859         (regY, codeY) <- getSomeReg y
  860         return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
  861 
  862 getAmode _ other
  863   = do
  864         (reg, code) <- getSomeReg other
  865         let
  866             off  = ImmInt 0
  867         return (Amode (AddrRegImm reg off) code)
  868 
  869 
  870 --  The 'CondCode' type:  Condition codes passed up the tree.
  871 data CondCode
  872         = CondCode Bool Cond InstrBlock
  873 
  874 -- Set up a condition code for a conditional branch.
  875 
  876 getCondCode :: CmmExpr -> NatM CondCode
  877 
  878 -- almost the same as everywhere else - but we need to
  879 -- extend small integers to 32 bit or 64 bit first
  880 
  881 getCondCode (CmmMachOp mop [x, y])
  882   = case mop of
  883       MO_F_Eq W32 -> condFltCode EQQ x y
  884       MO_F_Ne W32 -> condFltCode NE  x y
  885       MO_F_Gt W32 -> condFltCode GTT x y
  886       MO_F_Ge W32 -> condFltCode GE  x y
  887       MO_F_Lt W32 -> condFltCode LTT x y
  888       MO_F_Le W32 -> condFltCode LE  x y
  889 
  890       MO_F_Eq W64 -> condFltCode EQQ x y
  891       MO_F_Ne W64 -> condFltCode NE  x y
  892       MO_F_Gt W64 -> condFltCode GTT x y
  893       MO_F_Ge W64 -> condFltCode GE  x y
  894       MO_F_Lt W64 -> condFltCode LTT x y
  895       MO_F_Le W64 -> condFltCode LE  x y
  896 
  897       MO_Eq rep -> condIntCode EQQ rep x y
  898       MO_Ne rep -> condIntCode NE  rep x y
  899 
  900       MO_S_Gt rep -> condIntCode GTT rep x y
  901       MO_S_Ge rep -> condIntCode GE  rep x y
  902       MO_S_Lt rep -> condIntCode LTT rep x y
  903       MO_S_Le rep -> condIntCode LE  rep x y
  904 
  905       MO_U_Gt rep -> condIntCode GU  rep x y
  906       MO_U_Ge rep -> condIntCode GEU rep x y
  907       MO_U_Lt rep -> condIntCode LU  rep x y
  908       MO_U_Le rep -> condIntCode LEU rep x y
  909 
  910       _ -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
  911 
  912 getCondCode _ = panic "getCondCode(2)(powerpc)"
  913 
  914 
  915 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
  916 -- passed back up the tree.
  917 
  918 condIntCode :: Cond -> Width -> CmmExpr -> CmmExpr -> NatM CondCode
  919 condIntCode cond width x y = do
  920   platform <- getPlatform
  921   condIntCode' (target32Bit platform) cond width x y
  922 
  923 condIntCode' :: Bool -> Cond -> Width -> CmmExpr -> CmmExpr -> NatM CondCode
  924 
  925 -- simple code for 64-bit on 32-bit platforms
  926 condIntCode' True cond W64 x y
  927   | condUnsigned cond
  928   = do
  929       ChildCode64 code_x x_lo <- iselExpr64 x
  930       ChildCode64 code_y y_lo <- iselExpr64 y
  931       let x_hi = getHiVRegFromLo x_lo
  932           y_hi = getHiVRegFromLo y_lo
  933       end_lbl <- getBlockIdNat
  934       let code = code_x `appOL` code_y `appOL` toOL
  935                  [ CMPL II32 x_hi (RIReg y_hi)
  936                  , BCC NE end_lbl Nothing
  937                  , CMPL II32 x_lo (RIReg y_lo)
  938                  , BCC ALWAYS end_lbl Nothing
  939 
  940                  , NEWBLOCK end_lbl
  941                  ]
  942       return (CondCode False cond code)
  943   | otherwise
  944   = do
  945       ChildCode64 code_x x_lo <- iselExpr64 x
  946       ChildCode64 code_y y_lo <- iselExpr64 y
  947       let x_hi = getHiVRegFromLo x_lo
  948           y_hi = getHiVRegFromLo y_lo
  949       end_lbl <- getBlockIdNat
  950       cmp_lo  <- getBlockIdNat
  951       let code = code_x `appOL` code_y `appOL` toOL
  952                  [ CMP II32 x_hi (RIReg y_hi)
  953                  , BCC NE end_lbl Nothing
  954                  , CMP II32 x_hi (RIImm (ImmInt 0))
  955                  , BCC LE cmp_lo Nothing
  956                  , CMPL II32 x_lo (RIReg y_lo)
  957                  , BCC ALWAYS end_lbl Nothing
  958                  , NEWBLOCK cmp_lo
  959                  , CMPL II32 y_lo (RIReg x_lo)
  960                  , BCC ALWAYS end_lbl Nothing
  961 
  962                  , NEWBLOCK end_lbl
  963                  ]
  964       return (CondCode False cond code)
  965 
  966 -- optimize pointer tag checks. Operation andi. sets condition register
  967 -- so cmpi ..., 0 is redundant.
  968 condIntCode' _ cond _ (CmmMachOp (MO_And _) [x, CmmLit (CmmInt imm rep)])
  969                  (CmmLit (CmmInt 0 _))
  970   | not $ condUnsigned cond,
  971     Just src2 <- makeImmediate rep False imm
  972   = do
  973       (src1, code) <- getSomeReg x
  974       let code' = code `snocOL` AND r0 src1 (RIImm src2)
  975       return (CondCode False cond code')
  976 
  977 condIntCode' _ cond width x (CmmLit (CmmInt y rep))
  978   | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
  979   = do
  980       let op_len = max W32 width
  981       let extend = if condUnsigned cond then extendUExpr width op_len
  982                    else extendSExpr width op_len
  983       (src1, code) <- getSomeReg (extend x)
  984       let format = intFormat op_len
  985           code' = code `snocOL`
  986             (if condUnsigned cond then CMPL else CMP) format src1 (RIImm src2)
  987       return (CondCode False cond code')
  988 
  989 condIntCode' _ cond width x y = do
  990   let op_len = max W32 width
  991   let extend = if condUnsigned cond then extendUExpr width op_len
  992                else extendSExpr width op_len
  993   (src1, code1) <- getSomeReg (extend x)
  994   (src2, code2) <- getSomeReg (extend y)
  995   let format = intFormat op_len
  996       code' = code1 `appOL` code2 `snocOL`
  997         (if condUnsigned cond then CMPL else CMP) format src1 (RIReg src2)
  998   return (CondCode False cond code')
  999 
 1000 condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
 1001 condFltCode cond x y = do
 1002     (src1, code1) <- getSomeReg x
 1003     (src2, code2) <- getSomeReg y
 1004     let
 1005         code'  = code1 `appOL` code2 `snocOL` FCMP src1 src2
 1006         code'' = case cond of -- twiddle CR to handle unordered case
 1007                     GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
 1008                     LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
 1009                     _ -> code'
 1010                  where
 1011                     ltbit = 0 ; eqbit = 2 ; gtbit = 1
 1012     return (CondCode True cond code'')
 1013 
 1014 
 1015 
 1016 -- -----------------------------------------------------------------------------
 1017 -- Generating assignments
 1018 
 1019 -- Assignments are really at the heart of the whole code generation
 1020 -- business.  Almost all top-level nodes of any real importance are
 1021 -- assignments, which correspond to loads, stores, or register
 1022 -- transfers.  If we're really lucky, some of the register transfers
 1023 -- will go away, because we can use the destination register to
 1024 -- complete the code generation for the right hand side.  This only
 1025 -- fails when the right hand side is forced into a fixed register
 1026 -- (e.g. the result of a call).
 1027 
 1028 assignMem_IntCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
 1029 assignReg_IntCode :: Format -> CmmReg  -> CmmExpr -> NatM InstrBlock
 1030 
 1031 assignMem_FltCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
 1032 assignReg_FltCode :: Format -> CmmReg  -> CmmExpr -> NatM InstrBlock
 1033 
 1034 assignMem_IntCode pk addr src = do
 1035     (srcReg, code) <- getSomeReg src
 1036     Amode dstAddr addr_code <- case pk of
 1037                                 II64 -> getAmode DS addr
 1038                                 _    -> getAmode D  addr
 1039     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
 1040 
 1041 -- dst is a reg, but src could be anything
 1042 assignReg_IntCode _ reg src
 1043     = do
 1044         platform <- getPlatform
 1045         let dst = getRegisterReg platform reg
 1046         r <- getRegister src
 1047         return $ case r of
 1048             Any _ code         -> code dst
 1049             Fixed _ freg fcode -> fcode `snocOL` MR dst freg
 1050 
 1051 
 1052 
 1053 -- Easy, isn't it?
 1054 assignMem_FltCode = assignMem_IntCode
 1055 assignReg_FltCode = assignReg_IntCode
 1056 
 1057 
 1058 
 1059 genJump :: CmmExpr{-the branch target-} -> [Reg] -> NatM InstrBlock
 1060 
 1061 genJump (CmmLit (CmmLabel lbl)) regs
 1062   = return (unitOL $ JMP lbl regs)
 1063 
 1064 genJump tree gregs
 1065   = do
 1066         platform <- getPlatform
 1067         genJump' tree (platformToGCP platform) gregs
 1068 
 1069 genJump' :: CmmExpr -> GenCCallPlatform -> [Reg] -> NatM InstrBlock
 1070 
 1071 genJump' tree (GCP64ELF 1) regs
 1072   = do
 1073         (target,code) <- getSomeReg tree
 1074         return (code
 1075                `snocOL` LD II64 r11 (AddrRegImm target (ImmInt 0))
 1076                `snocOL` LD II64 toc (AddrRegImm target (ImmInt 8))
 1077                `snocOL` MTCTR r11
 1078                `snocOL` LD II64 r11 (AddrRegImm target (ImmInt 16))
 1079                `snocOL` BCTR [] Nothing regs)
 1080 
 1081 genJump' tree (GCP64ELF 2) regs
 1082   = do
 1083         (target,code) <- getSomeReg tree
 1084         return (code
 1085                `snocOL` MR r12 target
 1086                `snocOL` MTCTR r12
 1087                `snocOL` BCTR [] Nothing regs)
 1088 
 1089 genJump' tree _ regs
 1090   = do
 1091         (target,code) <- getSomeReg tree
 1092         return (code `snocOL` MTCTR target `snocOL` BCTR [] Nothing regs)
 1093 
 1094 -- -----------------------------------------------------------------------------
 1095 --  Unconditional branches
 1096 genBranch :: BlockId -> NatM InstrBlock
 1097 genBranch = return . toOL . mkJumpInstr
 1098 
 1099 
 1100 -- -----------------------------------------------------------------------------
 1101 --  Conditional jumps
 1102 
 1103 {-
 1104 Conditional jumps are always to local labels, so we can use branch
 1105 instructions.  We peek at the arguments to decide what kind of
 1106 comparison to do.
 1107 -}
 1108 
 1109 
 1110 genCondJump
 1111     :: BlockId      -- the branch target
 1112     -> CmmExpr      -- the condition on which to branch
 1113     -> Maybe Bool
 1114     -> NatM InstrBlock
 1115 
 1116 genCondJump id bool prediction = do
 1117   CondCode _ cond code <- getCondCode bool
 1118   return (code `snocOL` BCC cond id prediction)
 1119 
 1120 
 1121 
 1122 -- -----------------------------------------------------------------------------
 1123 --  Generating C calls
 1124 
 1125 -- Now the biggest nightmare---calls.  Most of the nastiness is buried in
 1126 -- @get_arg@, which moves the arguments to the correct registers/stack
 1127 -- locations.  Apart from that, the code is easy.
 1128 
 1129 genCCall :: ForeignTarget      -- function to call
 1130          -> [CmmFormal]        -- where to put the result
 1131          -> [CmmActual]        -- arguments (of mixed type)
 1132          -> NatM InstrBlock
 1133 genCCall (PrimTarget MO_ReadBarrier) _ _
 1134  = return $ unitOL LWSYNC
 1135 genCCall (PrimTarget MO_WriteBarrier) _ _
 1136  = return $ unitOL LWSYNC
 1137 
 1138 genCCall (PrimTarget MO_Touch) _ _
 1139  = return $ nilOL
 1140 
 1141 genCCall (PrimTarget (MO_Prefetch_Data _)) _ _
 1142  = return $ nilOL
 1143 
 1144 genCCall (PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n]
 1145  = do platform <- getPlatform
 1146       let fmt      = intFormat width
 1147           reg_dst  = getRegisterReg platform (CmmLocal dst)
 1148       (instr, n_code) <- case amop of
 1149             AMO_Add  -> getSomeRegOrImm ADD True reg_dst
 1150             AMO_Sub  -> case n of
 1151                 CmmLit (CmmInt i _)
 1152                   | Just imm <- makeImmediate width True (-i)
 1153                    -> return (ADD reg_dst reg_dst (RIImm imm), nilOL)
 1154                 _
 1155                    -> do
 1156                          (n_reg, n_code) <- getSomeReg n
 1157                          return  (SUBF reg_dst n_reg reg_dst, n_code)
 1158             AMO_And  -> getSomeRegOrImm AND False reg_dst
 1159             AMO_Nand -> do (n_reg, n_code) <- getSomeReg n
 1160                            return (NAND reg_dst reg_dst n_reg, n_code)
 1161             AMO_Or   -> getSomeRegOrImm OR False reg_dst
 1162             AMO_Xor  -> getSomeRegOrImm XOR False reg_dst
 1163       Amode addr_reg addr_code <- getAmodeIndex addr
 1164       lbl_retry <- getBlockIdNat
 1165       return $ n_code `appOL` addr_code
 1166         `appOL` toOL [ HWSYNC
 1167                      , BCC ALWAYS lbl_retry Nothing
 1168 
 1169                      , NEWBLOCK lbl_retry
 1170                      , LDR fmt reg_dst addr_reg
 1171                      , instr
 1172                      , STC fmt reg_dst addr_reg
 1173                      , BCC NE lbl_retry (Just False)
 1174                      , ISYNC
 1175                      ]
 1176          where
 1177            getAmodeIndex (CmmMachOp (MO_Add _) [x, y])
 1178              = do
 1179                  (regX, codeX) <- getSomeReg x
 1180                  (regY, codeY) <- getSomeReg y
 1181                  return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
 1182            getAmodeIndex other
 1183              = do
 1184                  (reg, code) <- getSomeReg other
 1185                  return (Amode (AddrRegReg r0 reg) code) -- NB: r0 is 0 here!
 1186            getSomeRegOrImm op sign dst
 1187              = case n of
 1188                  CmmLit (CmmInt i _) | Just imm <- makeImmediate width sign i
 1189                     -> return (op dst dst (RIImm imm), nilOL)
 1190                  _
 1191                     -> do
 1192                           (n_reg, n_code) <- getSomeReg n
 1193                           return  (op dst dst (RIReg n_reg), n_code)
 1194 
 1195 genCCall (PrimTarget (MO_AtomicRead width)) [dst] [addr]
 1196  = do platform <- getPlatform
 1197       let fmt      = intFormat width
 1198           reg_dst  = getRegisterReg platform (CmmLocal dst)
 1199           form     = if widthInBits width == 64 then DS else D
 1200       Amode addr_reg addr_code <- getAmode form addr
 1201       lbl_end <- getBlockIdNat
 1202       return $ addr_code `appOL` toOL [ HWSYNC
 1203                                       , LD fmt reg_dst addr_reg
 1204                                       , CMP fmt reg_dst (RIReg reg_dst)
 1205                                       , BCC NE lbl_end (Just False)
 1206                                       , BCC ALWAYS lbl_end Nothing
 1207                             -- See Note [Seemingly useless cmp and bne]
 1208                                       , NEWBLOCK lbl_end
 1209                                       , ISYNC
 1210                                       ]
 1211 
 1212 -- Note [Seemingly useless cmp and bne]
 1213 -- In Power ISA, Book II, Section 4.4.1, Instruction Synchronize Instruction
 1214 -- the second paragraph says that isync may complete before storage accesses
 1215 -- "associated" with a preceding instruction have been performed. The cmp
 1216 -- operation and the following bne introduce a data and control dependency
 1217 -- on the load instruction (See also Power ISA, Book II, Appendix B.2.3, Safe
 1218 -- Fetch).
 1219 -- This is also what gcc does.
 1220 
 1221 
 1222 genCCall (PrimTarget (MO_AtomicWrite width)) [] [addr, val] = do
 1223     code <- assignMem_IntCode (intFormat width) addr val
 1224     return $ unitOL HWSYNC `appOL` code
 1225 
 1226 genCCall (PrimTarget (MO_Cmpxchg width)) [dst] [addr, old, new]
 1227   | width == W32 || width == W64
 1228   = do
 1229       platform <- getPlatform
 1230       (old_reg, old_code) <- getSomeReg old
 1231       (new_reg, new_code) <- getSomeReg new
 1232       (addr_reg, addr_code) <- getSomeReg addr
 1233       lbl_retry <- getBlockIdNat
 1234       lbl_eq    <- getBlockIdNat
 1235       lbl_end   <- getBlockIdNat
 1236       let reg_dst   = getRegisterReg platform (CmmLocal dst)
 1237           code      = toOL
 1238                       [ HWSYNC
 1239                       , BCC ALWAYS lbl_retry Nothing
 1240                       , NEWBLOCK lbl_retry
 1241                       , LDR format reg_dst (AddrRegReg r0 addr_reg)
 1242                       , CMP format reg_dst (RIReg old_reg)
 1243                       , BCC NE lbl_end Nothing
 1244                       , BCC ALWAYS lbl_eq Nothing
 1245                       , NEWBLOCK lbl_eq
 1246                       , STC format new_reg (AddrRegReg r0 addr_reg)
 1247                       , BCC NE lbl_retry Nothing
 1248                       , BCC ALWAYS lbl_end Nothing
 1249                       , NEWBLOCK lbl_end
 1250                       , ISYNC
 1251                       ]
 1252       return $ addr_code `appOL` new_code `appOL` old_code `appOL` code
 1253   where
 1254     format = intFormat width
 1255 
 1256 
 1257 genCCall (PrimTarget (MO_Clz width)) [dst] [src]
 1258  = do platform <- getPlatform
 1259       let reg_dst = getRegisterReg platform (CmmLocal dst)
 1260       if target32Bit platform && width == W64
 1261         then do
 1262           ChildCode64 code vr_lo <- iselExpr64 src
 1263           lbl1 <- getBlockIdNat
 1264           lbl2 <- getBlockIdNat
 1265           lbl3 <- getBlockIdNat
 1266           let vr_hi = getHiVRegFromLo vr_lo
 1267               cntlz = toOL [ CMPL II32 vr_hi (RIImm (ImmInt 0))
 1268                            , BCC NE lbl2 Nothing
 1269                            , BCC ALWAYS lbl1 Nothing
 1270 
 1271                            , NEWBLOCK lbl1
 1272                            , CNTLZ II32 reg_dst vr_lo
 1273                            , ADD reg_dst reg_dst (RIImm (ImmInt 32))
 1274                            , BCC ALWAYS lbl3 Nothing
 1275 
 1276                            , NEWBLOCK lbl2
 1277                            , CNTLZ II32 reg_dst vr_hi
 1278                            , BCC ALWAYS lbl3 Nothing
 1279 
 1280                            , NEWBLOCK lbl3
 1281                            ]
 1282           return $ code `appOL` cntlz
 1283         else do
 1284           let format = if width == W64 then II64 else II32
 1285           (s_reg, s_code) <- getSomeReg src
 1286           (pre, reg , post) <-
 1287             case width of
 1288               W64 -> return (nilOL, s_reg, nilOL)
 1289               W32 -> return (nilOL, s_reg, nilOL)
 1290               W16 -> do
 1291                 reg_tmp <- getNewRegNat format
 1292                 return
 1293                   ( unitOL $ AND reg_tmp s_reg (RIImm (ImmInt 65535))
 1294                   , reg_tmp
 1295                   , unitOL $ ADD reg_dst reg_dst (RIImm (ImmInt (-16)))
 1296                   )
 1297               W8  -> do
 1298                 reg_tmp <- getNewRegNat format
 1299                 return
 1300                   ( unitOL $ AND reg_tmp s_reg (RIImm (ImmInt 255))
 1301                   , reg_tmp
 1302                   , unitOL $ ADD reg_dst reg_dst (RIImm (ImmInt (-24)))
 1303                   )
 1304               _   -> panic "genCall: Clz wrong format"
 1305           let cntlz = unitOL (CNTLZ format reg_dst reg)
 1306           return $ s_code `appOL` pre `appOL` cntlz `appOL` post
 1307 
 1308 genCCall (PrimTarget (MO_Ctz width)) [dst] [src]
 1309  = do platform <- getPlatform
 1310       let reg_dst = getRegisterReg platform (CmmLocal dst)
 1311       if target32Bit platform && width == W64
 1312         then do
 1313           let format = II32
 1314           ChildCode64 code vr_lo <- iselExpr64 src
 1315           lbl1 <- getBlockIdNat
 1316           lbl2 <- getBlockIdNat
 1317           lbl3 <- getBlockIdNat
 1318           x' <- getNewRegNat format
 1319           x'' <- getNewRegNat format
 1320           r' <- getNewRegNat format
 1321           cnttzlo <- cnttz format reg_dst vr_lo
 1322           let vr_hi = getHiVRegFromLo vr_lo
 1323               cnttz64 = toOL [ CMPL format vr_lo (RIImm (ImmInt 0))
 1324                              , BCC NE lbl2 Nothing
 1325                              , BCC ALWAYS lbl1 Nothing
 1326 
 1327                              , NEWBLOCK lbl1
 1328                              , ADD x' vr_hi (RIImm (ImmInt (-1)))
 1329                              , ANDC x'' x' vr_hi
 1330                              , CNTLZ format r' x''
 1331                                -- 32 + (32 - clz(x''))
 1332                              , SUBFC reg_dst r' (RIImm (ImmInt 64))
 1333                              , BCC ALWAYS lbl3 Nothing
 1334 
 1335                              , NEWBLOCK lbl2
 1336                              ]
 1337                         `appOL` cnttzlo `appOL`
 1338                         toOL [ BCC ALWAYS lbl3 Nothing
 1339 
 1340                              , NEWBLOCK lbl3
 1341                              ]
 1342           return $ code `appOL` cnttz64
 1343         else do
 1344           let format = if width == W64 then II64 else II32
 1345           (s_reg, s_code) <- getSomeReg src
 1346           (reg_ctz, pre_code) <-
 1347             case width of
 1348               W64 -> return (s_reg, nilOL)
 1349               W32 -> return (s_reg, nilOL)
 1350               W16 -> do
 1351                 reg_tmp <- getNewRegNat format
 1352                 return (reg_tmp, unitOL $ ORIS reg_tmp s_reg (ImmInt 1))
 1353               W8  -> do
 1354                 reg_tmp <- getNewRegNat format
 1355                 return (reg_tmp, unitOL $ OR reg_tmp s_reg (RIImm (ImmInt 256)))
 1356               _   -> panic "genCall: Ctz wrong format"
 1357           ctz_code <- cnttz format reg_dst reg_ctz
 1358           return $ s_code `appOL` pre_code `appOL` ctz_code
 1359         where
 1360           -- cnttz(x) = sizeof(x) - cntlz(~x & (x - 1))
 1361           -- see Henry S. Warren, Hacker's Delight, p 107
 1362           cnttz format dst src = do
 1363             let format_bits = 8 * formatInBytes format
 1364             x' <- getNewRegNat format
 1365             x'' <- getNewRegNat format
 1366             r' <- getNewRegNat format
 1367             return $ toOL [ ADD x' src (RIImm (ImmInt (-1)))
 1368                           , ANDC x'' x' src
 1369                           , CNTLZ format r' x''
 1370                           , SUBFC dst r' (RIImm (ImmInt (format_bits)))
 1371                           ]
 1372 
 1373 genCCall target dest_regs argsAndHints
 1374  = do platform <- getPlatform
 1375       case target of
 1376         PrimTarget (MO_S_QuotRem  width) -> divOp1 platform True  width
 1377                                                    dest_regs argsAndHints
 1378         PrimTarget (MO_U_QuotRem  width) -> divOp1 platform False width
 1379                                                    dest_regs argsAndHints
 1380         PrimTarget (MO_U_QuotRem2 width) -> divOp2 platform width dest_regs
 1381                                                    argsAndHints
 1382         PrimTarget (MO_U_Mul2 width) -> multOp2 platform width dest_regs
 1383                                                 argsAndHints
 1384         PrimTarget (MO_Add2 _) -> add2Op platform dest_regs argsAndHints
 1385         PrimTarget (MO_AddWordC _) -> addcOp platform dest_regs argsAndHints
 1386         PrimTarget (MO_SubWordC _) -> subcOp platform dest_regs argsAndHints
 1387         PrimTarget (MO_AddIntC width) -> addSubCOp ADDO platform width
 1388                                                    dest_regs argsAndHints
 1389         PrimTarget (MO_SubIntC width) -> addSubCOp SUBFO platform width
 1390                                                    dest_regs argsAndHints
 1391         PrimTarget MO_F64_Fabs -> fabs platform dest_regs argsAndHints
 1392         PrimTarget MO_F32_Fabs -> fabs platform dest_regs argsAndHints
 1393         _ -> do config <- getConfig
 1394                 genCCall' config (platformToGCP platform)
 1395                        target dest_regs argsAndHints
 1396         where divOp1 platform signed width [res_q, res_r] [arg_x, arg_y]
 1397                 = do let reg_q = getRegisterReg platform (CmmLocal res_q)
 1398                          reg_r = getRegisterReg platform (CmmLocal res_r)
 1399                      remainderCode width signed reg_q arg_x arg_y
 1400                        <*> pure reg_r
 1401 
 1402               divOp1 _ _ _ _ _
 1403                 = panic "genCCall: Wrong number of arguments for divOp1"
 1404               divOp2 platform width [res_q, res_r]
 1405                                     [arg_x_high, arg_x_low, arg_y]
 1406                 = do let reg_q = getRegisterReg platform (CmmLocal res_q)
 1407                          reg_r = getRegisterReg platform (CmmLocal res_r)
 1408                          fmt   = intFormat width
 1409                          half  = 4 * (formatInBytes fmt)
 1410                      (xh_reg, xh_code) <- getSomeReg arg_x_high
 1411                      (xl_reg, xl_code) <- getSomeReg arg_x_low
 1412                      (y_reg, y_code) <- getSomeReg arg_y
 1413                      s <- getNewRegNat fmt
 1414                      b <- getNewRegNat fmt
 1415                      v <- getNewRegNat fmt
 1416                      vn1 <- getNewRegNat fmt
 1417                      vn0 <- getNewRegNat fmt
 1418                      un32 <- getNewRegNat fmt
 1419                      tmp  <- getNewRegNat fmt
 1420                      un10 <- getNewRegNat fmt
 1421                      un1 <- getNewRegNat fmt
 1422                      un0 <- getNewRegNat fmt
 1423                      q1 <- getNewRegNat fmt
 1424                      rhat <- getNewRegNat fmt
 1425                      tmp1 <- getNewRegNat fmt
 1426                      q0 <- getNewRegNat fmt
 1427                      un21 <- getNewRegNat fmt
 1428                      again1 <- getBlockIdNat
 1429                      no1 <- getBlockIdNat
 1430                      then1 <- getBlockIdNat
 1431                      endif1 <- getBlockIdNat
 1432                      again2 <- getBlockIdNat
 1433                      no2 <- getBlockIdNat
 1434                      then2 <- getBlockIdNat
 1435                      endif2 <- getBlockIdNat
 1436                      return $ y_code `appOL` xl_code `appOL` xh_code `appOL`
 1437                               -- see Hacker's Delight p 196 Figure 9-3
 1438                               toOL [ -- b = 2 ^ (bits_in_word / 2)
 1439                                      LI b (ImmInt 1)
 1440                                    , SL fmt b b (RIImm (ImmInt half))
 1441                                      -- s = clz(y)
 1442                                    , CNTLZ fmt s y_reg
 1443                                      -- v = y << s
 1444                                    , SL fmt v y_reg (RIReg s)
 1445                                      -- vn1 = upper half of v
 1446                                    , SR fmt vn1 v (RIImm (ImmInt half))
 1447                                      -- vn0 = lower half of v
 1448                                    , CLRLI fmt vn0 v half
 1449                                      -- un32 = (u1 << s)
 1450                                      --      | (u0 >> (bits_in_word - s))
 1451                                    , SL fmt un32 xh_reg (RIReg s)
 1452                                    , SUBFC tmp s
 1453                                         (RIImm (ImmInt (8 * formatInBytes fmt)))
 1454                                    , SR fmt tmp xl_reg (RIReg tmp)
 1455                                    , OR un32 un32 (RIReg tmp)
 1456                                      -- un10 = u0 << s
 1457                                    , SL fmt un10 xl_reg (RIReg s)
 1458                                      -- un1 = upper half of un10
 1459                                    , SR fmt un1 un10 (RIImm (ImmInt half))
 1460                                      -- un0 = lower half of un10
 1461                                    , CLRLI fmt un0 un10 half
 1462                                      -- q1 = un32/vn1
 1463                                    , DIV fmt False q1 un32 vn1
 1464                                      -- rhat = un32 - q1*vn1
 1465                                    , MULL fmt tmp q1 (RIReg vn1)
 1466                                    , SUBF rhat tmp un32
 1467                                    , BCC ALWAYS again1 Nothing
 1468 
 1469                                    , NEWBLOCK again1
 1470                                      -- if (q1 >= b || q1*vn0 > b*rhat + un1)
 1471                                    , CMPL fmt q1 (RIReg b)
 1472                                    , BCC GEU then1 Nothing
 1473                                    , BCC ALWAYS no1 Nothing
 1474 
 1475                                    , NEWBLOCK no1
 1476                                    , MULL fmt tmp q1 (RIReg vn0)
 1477                                    , SL fmt tmp1 rhat (RIImm (ImmInt half))
 1478                                    , ADD tmp1 tmp1 (RIReg un1)
 1479                                    , CMPL fmt tmp (RIReg tmp1)
 1480                                    , BCC LEU endif1 Nothing
 1481                                    , BCC ALWAYS then1 Nothing
 1482 
 1483                                    , NEWBLOCK then1
 1484                                      -- q1 = q1 - 1
 1485                                    , ADD q1 q1 (RIImm (ImmInt (-1)))
 1486                                      -- rhat = rhat + vn1
 1487                                    , ADD rhat rhat (RIReg vn1)
 1488                                      -- if (rhat < b) goto again1
 1489                                    , CMPL fmt rhat (RIReg b)
 1490                                    , BCC LTT again1 Nothing
 1491                                    , BCC ALWAYS endif1 Nothing
 1492 
 1493                                    , NEWBLOCK endif1
 1494                                      -- un21 = un32*b + un1 - q1*v
 1495                                    , SL fmt un21 un32 (RIImm (ImmInt half))
 1496                                    , ADD un21 un21 (RIReg un1)
 1497                                    , MULL fmt tmp q1 (RIReg v)
 1498                                    , SUBF un21 tmp un21
 1499                                      -- compute second quotient digit
 1500                                      -- q0 = un21/vn1
 1501                                    , DIV fmt False q0 un21 vn1
 1502                                      -- rhat = un21- q0*vn1
 1503                                    , MULL fmt tmp q0 (RIReg vn1)
 1504                                    , SUBF rhat tmp un21
 1505                                    , BCC ALWAYS again2 Nothing
 1506 
 1507                                    , NEWBLOCK again2
 1508                                      -- if (q0>b || q0*vn0 > b*rhat + un0)
 1509                                    , CMPL fmt q0 (RIReg b)
 1510                                    , BCC GEU then2 Nothing
 1511                                    , BCC ALWAYS no2 Nothing
 1512 
 1513                                    , NEWBLOCK no2
 1514                                    , MULL fmt tmp q0 (RIReg vn0)
 1515                                    , SL fmt tmp1 rhat (RIImm (ImmInt half))
 1516                                    , ADD tmp1 tmp1 (RIReg un0)
 1517                                    , CMPL fmt tmp (RIReg tmp1)
 1518                                    , BCC LEU endif2 Nothing
 1519                                    , BCC ALWAYS then2 Nothing
 1520 
 1521                                    , NEWBLOCK then2
 1522                                      -- q0 = q0 - 1
 1523                                    , ADD q0 q0 (RIImm (ImmInt (-1)))
 1524                                      -- rhat = rhat + vn1
 1525                                    , ADD rhat rhat (RIReg vn1)
 1526                                      -- if (rhat<b) goto again2
 1527                                    , CMPL fmt rhat (RIReg b)
 1528                                    , BCC LTT again2 Nothing
 1529                                    , BCC ALWAYS endif2 Nothing
 1530 
 1531                                    , NEWBLOCK endif2
 1532                                      -- compute remainder
 1533                                      -- r = (un21*b + un0 - q0*v) >> s
 1534                                    , SL fmt reg_r un21 (RIImm (ImmInt half))
 1535                                    , ADD reg_r reg_r (RIReg un0)
 1536                                    , MULL fmt tmp q0 (RIReg v)
 1537                                    , SUBF reg_r tmp reg_r
 1538                                    , SR fmt reg_r reg_r (RIReg s)
 1539                                      -- compute quotient
 1540                                      -- q = q1*b + q0
 1541                                    , SL fmt reg_q q1 (RIImm (ImmInt half))
 1542                                    , ADD reg_q reg_q (RIReg q0)
 1543                                    ]
 1544               divOp2 _ _ _ _
 1545                 = panic "genCCall: Wrong number of arguments for divOp2"
 1546               multOp2 platform width [res_h, res_l] [arg_x, arg_y]
 1547                 = do let reg_h = getRegisterReg platform (CmmLocal res_h)
 1548                          reg_l = getRegisterReg platform (CmmLocal res_l)
 1549                          fmt = intFormat width
 1550                      (x_reg, x_code) <- getSomeReg arg_x
 1551                      (y_reg, y_code) <- getSomeReg arg_y
 1552                      return $ y_code `appOL` x_code
 1553                             `appOL` toOL [ MULL fmt reg_l x_reg (RIReg y_reg)
 1554                                          , MULHU fmt reg_h x_reg y_reg
 1555                                          ]
 1556               multOp2 _ _ _ _
 1557                 = panic "genCall: Wrong number of arguments for multOp2"
 1558               add2Op platform [res_h, res_l] [arg_x, arg_y]
 1559                 = do let reg_h = getRegisterReg platform (CmmLocal res_h)
 1560                          reg_l = getRegisterReg platform (CmmLocal res_l)
 1561                      (x_reg, x_code) <- getSomeReg arg_x
 1562                      (y_reg, y_code) <- getSomeReg arg_y
 1563                      return $ y_code `appOL` x_code
 1564                             `appOL` toOL [ LI reg_h (ImmInt 0)
 1565                                          , ADDC reg_l x_reg y_reg
 1566                                          , ADDZE reg_h reg_h
 1567                                          ]
 1568               add2Op _ _ _
 1569                 = panic "genCCall: Wrong number of arguments/results for add2"
 1570 
 1571               addcOp platform [res_r, res_c] [arg_x, arg_y]
 1572                 = add2Op platform [res_c {-hi-}, res_r {-lo-}] [arg_x, arg_y]
 1573               addcOp _ _ _
 1574                 = panic "genCCall: Wrong number of arguments/results for addc"
 1575 
 1576               -- PowerPC subfc sets the carry for rT = ~(rA) + rB + 1,
 1577               -- which is 0 for borrow and 1 otherwise. We need 1 and 0
 1578               -- so xor with 1.
 1579               subcOp platform [res_r, res_c] [arg_x, arg_y]
 1580                 = do let reg_r = getRegisterReg platform (CmmLocal res_r)
 1581                          reg_c = getRegisterReg platform (CmmLocal res_c)
 1582                      (x_reg, x_code) <- getSomeReg arg_x
 1583                      (y_reg, y_code) <- getSomeReg arg_y
 1584                      return $ y_code `appOL` x_code
 1585                             `appOL` toOL [ LI reg_c (ImmInt 0)
 1586                                          , SUBFC reg_r y_reg (RIReg x_reg)
 1587                                          , ADDZE reg_c reg_c
 1588                                          , XOR reg_c reg_c (RIImm (ImmInt 1))
 1589                                          ]
 1590               subcOp _ _ _
 1591                 = panic "genCCall: Wrong number of arguments/results for subc"
 1592               addSubCOp instr platform width [res_r, res_c] [arg_x, arg_y]
 1593                 = do let reg_r = getRegisterReg platform (CmmLocal res_r)
 1594                          reg_c = getRegisterReg platform (CmmLocal res_c)
 1595                      (x_reg, x_code) <- getSomeReg arg_x
 1596                      (y_reg, y_code) <- getSomeReg arg_y
 1597                      return $ y_code `appOL` x_code
 1598                             `appOL` toOL [ instr reg_r y_reg x_reg,
 1599                                            -- SUBFO argument order reversed!
 1600                                            MFOV (intFormat width) reg_c
 1601                                          ]
 1602               addSubCOp _ _ _ _ _
 1603                 = panic "genCall: Wrong number of arguments/results for addC"
 1604               fabs platform [res] [arg]
 1605                 = do let res_r = getRegisterReg platform (CmmLocal res)
 1606                      (arg_reg, arg_code) <- getSomeReg arg
 1607                      return $ arg_code `snocOL` FABS res_r arg_reg
 1608               fabs _ _ _
 1609                 = panic "genCall: Wrong number of arguments/results for fabs"
 1610 
 1611 -- TODO: replace 'Int' by an enum such as 'PPC_64ABI'
 1612 data GenCCallPlatform = GCP32ELF | GCP64ELF !Int | GCPAIX
 1613 
 1614 platformToGCP :: Platform -> GenCCallPlatform
 1615 platformToGCP platform
 1616   = case platformOS platform of
 1617       OSAIX    -> GCPAIX
 1618       _ -> case platformArch platform of
 1619              ArchPPC           -> GCP32ELF
 1620              ArchPPC_64 ELF_V1 -> GCP64ELF 1
 1621              ArchPPC_64 ELF_V2 -> GCP64ELF 2
 1622              _ -> panic "platformToGCP: Not PowerPC"
 1623 
 1624 
 1625 genCCall'
 1626     :: NCGConfig
 1627     -> GenCCallPlatform
 1628     -> ForeignTarget            -- function to call
 1629     -> [CmmFormal]        -- where to put the result
 1630     -> [CmmActual]        -- arguments (of mixed type)
 1631     -> NatM InstrBlock
 1632 
 1633 {-
 1634     PowerPC Linux uses the System V Release 4 Calling Convention
 1635     for PowerPC. It is described in the
 1636     "System V Application Binary Interface PowerPC Processor Supplement".
 1637 
 1638     PowerPC 64 Linux uses the System V Release 4 Calling Convention for
 1639     64-bit PowerPC. It is specified in
 1640     "64-bit PowerPC ELF Application Binary Interface Supplement 1.9"
 1641     (PPC64 ELF v1.9).
 1642 
 1643     PowerPC 64 Linux in little endian mode uses the "Power Architecture 64-Bit
 1644     ELF V2 ABI Specification -- OpenPOWER ABI for Linux Supplement"
 1645     (PPC64 ELF v2).
 1646 
 1647     AIX follows the "PowerOpen ABI: Application Binary Interface Big-Endian
 1648     32-Bit Hardware Implementation"
 1649 
 1650     All four conventions are similar:
 1651     Parameters may be passed in general-purpose registers starting at r3, in
 1652     floating point registers starting at f1, or on the stack.
 1653 
 1654     But there are substantial differences:
 1655     * The number of registers used for parameter passing and the exact set of
 1656       nonvolatile registers differs (see MachRegs.hs).
 1657     * On AIX and 64-bit ELF, stack space is always reserved for parameters,
 1658       even if they are passed in registers. The called routine may choose to
 1659       save parameters from registers to the corresponding space on the stack.
 1660     * On AIX and 64-bit ELF, a corresponding amount of GPRs is skipped when
 1661       a floating point parameter is passed in an FPR.
 1662     * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
 1663       starting with an odd-numbered GPR. It may skip a GPR to achieve this.
 1664       AIX just treats an I64 likt two separate I32s (high word first).
 1665     * I64 and FF64 arguments are 8-byte aligned on the stack for SysV, but only
 1666       4-byte aligned like everything else on AIX.
 1667     * The SysV spec claims that FF32 is represented as FF64 on the stack. GCC on
 1668       PowerPC Linux does not agree, so neither do we.
 1669 
 1670     According to all conventions, the parameter area should be part of the
 1671     caller's stack frame, allocated in the caller's prologue code (large enough
 1672     to hold the parameter lists for all called routines). The NCG already
 1673     uses the stack for register spilling, leaving 64 bytes free at the top.
 1674     If we need a larger parameter area than that, we increase the size
 1675     of the stack frame just before ccalling.
 1676 -}
 1677 
 1678 
 1679 genCCall' config gcp target dest_regs args
 1680   = do
 1681         (finalStack,passArgumentsCode,usedRegs) <- passArguments
 1682                                                    (zip3 args argReps argHints)
 1683                                                    allArgRegs
 1684                                                    (allFPArgRegs platform)
 1685                                                    initialStackOffset
 1686                                                    nilOL []
 1687 
 1688         (labelOrExpr, reduceToFF32) <- case target of
 1689             ForeignTarget (CmmLit (CmmLabel lbl)) _ -> do
 1690                 uses_pic_base_implicitly
 1691                 return (Left lbl, False)
 1692             ForeignTarget expr _ -> do
 1693                 uses_pic_base_implicitly
 1694                 return (Right expr, False)
 1695             PrimTarget mop -> outOfLineMachOp mop
 1696 
 1697         let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
 1698             codeAfter = move_sp_up finalStack `appOL` moveResult reduceToFF32
 1699 
 1700         case labelOrExpr of
 1701             Left lbl -> -- the linker does all the work for us
 1702                 return (         codeBefore
 1703                         `snocOL` BL lbl usedRegs
 1704                         `appOL`  maybeNOP -- some ABI require a NOP after BL
 1705                         `appOL`  codeAfter)
 1706             Right dyn -> do -- implement call through function pointer
 1707                 (dynReg, dynCode) <- getSomeReg dyn
 1708                 case gcp of
 1709                      GCP64ELF 1      -> return ( dynCode
 1710                        `appOL`  codeBefore
 1711                        `snocOL` ST spFormat toc (AddrRegImm sp (ImmInt 40))
 1712                        `snocOL` LD II64 r11 (AddrRegImm dynReg (ImmInt 0))
 1713                        `snocOL` LD II64 toc (AddrRegImm dynReg (ImmInt 8))
 1714                        `snocOL` MTCTR r11
 1715                        `snocOL` LD II64 r11 (AddrRegImm dynReg (ImmInt 16))
 1716                        `snocOL` BCTRL usedRegs
 1717                        `snocOL` LD spFormat toc (AddrRegImm sp (ImmInt 40))
 1718                        `appOL`  codeAfter)
 1719                      GCP64ELF 2      -> return ( dynCode
 1720                        `appOL`  codeBefore
 1721                        `snocOL` ST spFormat toc (AddrRegImm sp (ImmInt 24))
 1722                        `snocOL` MR r12 dynReg
 1723                        `snocOL` MTCTR r12
 1724                        `snocOL` BCTRL usedRegs
 1725                        `snocOL` LD spFormat toc (AddrRegImm sp (ImmInt 24))
 1726                        `appOL`  codeAfter)
 1727                      GCPAIX          -> return ( dynCode
 1728                        -- AIX/XCOFF follows the PowerOPEN ABI
 1729                        -- which is quite similar to LinuxPPC64/ELFv1
 1730                        `appOL`  codeBefore
 1731                        `snocOL` ST spFormat toc (AddrRegImm sp (ImmInt 20))
 1732                        `snocOL` LD II32 r11 (AddrRegImm dynReg (ImmInt 0))
 1733                        `snocOL` LD II32 toc (AddrRegImm dynReg (ImmInt 4))
 1734                        `snocOL` MTCTR r11
 1735                        `snocOL` LD II32 r11 (AddrRegImm dynReg (ImmInt 8))
 1736                        `snocOL` BCTRL usedRegs
 1737                        `snocOL` LD spFormat toc (AddrRegImm sp (ImmInt 20))
 1738                        `appOL`  codeAfter)
 1739                      _               -> return ( dynCode
 1740                        `snocOL` MTCTR dynReg
 1741                        `appOL`  codeBefore
 1742                        `snocOL` BCTRL usedRegs
 1743                        `appOL`  codeAfter)
 1744     where
 1745         platform = ncgPlatform config
 1746 
 1747         uses_pic_base_implicitly =
 1748             -- See Note [implicit register in PPC PIC code]
 1749             -- on why we claim to use PIC register here
 1750             when (ncgPIC config && target32Bit platform) $ do
 1751                 _ <- getPicBaseNat $ archWordFormat True
 1752                 return ()
 1753 
 1754         initialStackOffset = case gcp of
 1755                              GCPAIX     -> 24
 1756                              GCP32ELF   -> 8
 1757                              GCP64ELF 1 -> 48
 1758                              GCP64ELF 2 -> 32
 1759                              _ -> panic "genCall': unknown calling convention"
 1760             -- size of linkage area + size of arguments, in bytes
 1761         stackDelta finalStack = case gcp of
 1762                                 GCPAIX ->
 1763                                     roundTo 16 $ (24 +) $ max 32 $ sum $
 1764                                     map (widthInBytes . typeWidth) argReps
 1765                                 GCP32ELF -> roundTo 16 finalStack
 1766                                 GCP64ELF 1 ->
 1767                                     roundTo 16 $ (48 +) $ max 64 $ sum $
 1768                                     map (roundTo 8 . widthInBytes . typeWidth)
 1769                                         argReps
 1770                                 GCP64ELF 2 ->
 1771                                     roundTo 16 $ (32 +) $ max 64 $ sum $
 1772                                     map (roundTo 8 . widthInBytes . typeWidth)
 1773                                         argReps
 1774                                 _ -> panic "genCall': unknown calling conv."
 1775 
 1776         argReps = map (cmmExprType platform) args
 1777         (argHints, _) = foreignTargetHints target
 1778 
 1779         roundTo a x | x `mod` a == 0 = x
 1780                     | otherwise = x + a - (x `mod` a)
 1781 
 1782         spFormat = if target32Bit platform then II32 else II64
 1783 
 1784         -- TODO: Do not create a new stack frame if delta is too large.
 1785         move_sp_down finalStack
 1786                | delta > stackFrameHeaderSize platform =
 1787                         toOL [STU spFormat sp (AddrRegImm sp (ImmInt (-delta))),
 1788                               DELTA (-delta)]
 1789                | otherwise = nilOL
 1790                where delta = stackDelta finalStack
 1791         move_sp_up finalStack
 1792                | delta > stackFrameHeaderSize platform =
 1793                         toOL [ADD sp sp (RIImm (ImmInt delta)),
 1794                               DELTA 0]
 1795                | otherwise = nilOL
 1796                where delta = stackDelta finalStack
 1797 
 1798         -- A NOP instruction is required after a call (bl instruction)
 1799         -- on AIX and 64-Bit Linux.
 1800         -- If the call is to a function with a different TOC (r2) the
 1801         -- link editor replaces the NOP instruction with a load of the TOC
 1802         -- from the stack to restore the TOC.
 1803         maybeNOP = case gcp of
 1804            GCP32ELF        -> nilOL
 1805            -- See Section 3.9.4 of OpenPower ABI
 1806            GCPAIX          -> unitOL NOP
 1807            -- See Section 3.5.11 of PPC64 ELF v1.9
 1808            GCP64ELF 1      -> unitOL NOP
 1809            -- See Section 2.3.6 of PPC64 ELF v2
 1810            GCP64ELF 2      -> unitOL NOP
 1811            _               -> panic "maybeNOP: Unknown PowerPC 64-bit ABI"
 1812 
 1813         passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
 1814         passArguments ((arg,arg_ty,_):args) gprs fprs stackOffset
 1815                accumCode accumUsed | isWord64 arg_ty
 1816                                      && target32Bit (ncgPlatform config) =
 1817             do
 1818                 ChildCode64 code vr_lo <- iselExpr64 arg
 1819                 let vr_hi = getHiVRegFromLo vr_lo
 1820 
 1821                 case gcp of
 1822                     GCPAIX ->
 1823                         do let storeWord vr (gpr:_) _ = MR gpr vr
 1824                                storeWord vr [] offset
 1825                                    = ST II32 vr (AddrRegImm sp (ImmInt offset))
 1826                            passArguments args
 1827                                          (drop 2 gprs)
 1828                                          fprs
 1829                                          (stackOffset+8)
 1830                                          (accumCode `appOL` code
 1831                                                `snocOL` storeWord vr_hi gprs stackOffset
 1832                                                `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
 1833                                          ((take 2 gprs) ++ accumUsed)
 1834                     GCP32ELF ->
 1835                         do let stackOffset' = roundTo 8 stackOffset
 1836                                stackCode = accumCode `appOL` code
 1837                                    `snocOL` ST II32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
 1838                                    `snocOL` ST II32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
 1839                                regCode hireg loreg =
 1840                                    accumCode `appOL` code
 1841                                        `snocOL` MR hireg vr_hi
 1842                                        `snocOL` MR loreg vr_lo
 1843 
 1844                            case gprs of
 1845                                hireg : loreg : regs | even (length gprs) ->
 1846                                    passArguments args regs fprs stackOffset
 1847                                                  (regCode hireg loreg) (hireg : loreg : accumUsed)
 1848                                _skipped : hireg : loreg : regs ->
 1849                                    passArguments args regs fprs stackOffset
 1850                                                  (regCode hireg loreg) (hireg : loreg : accumUsed)
 1851                                _ -> -- only one or no regs left
 1852                                    passArguments args [] fprs (stackOffset'+8)
 1853                                                  stackCode accumUsed
 1854                     GCP64ELF _ -> panic "passArguments: 32 bit code"
 1855 
 1856         passArguments ((arg,rep,hint):args) gprs fprs stackOffset accumCode accumUsed
 1857             | reg : _ <- regs = do
 1858                 register <- getRegister arg_pro
 1859                 let code = case register of
 1860                             Fixed _ freg fcode -> fcode `snocOL` MR reg freg
 1861                             Any _ acode -> acode reg
 1862                     stackOffsetRes = case gcp of
 1863                                      -- The PowerOpen ABI requires that we
 1864                                      -- reserve stack slots for register
 1865                                      -- parameters
 1866                                      GCPAIX    -> stackOffset + stackBytes
 1867                                      -- ... the SysV ABI 32-bit doesn't.
 1868                                      GCP32ELF -> stackOffset
 1869                                      -- ... but SysV ABI 64-bit does.
 1870                                      GCP64ELF _ -> stackOffset + stackBytes
 1871                 passArguments args
 1872                               (drop nGprs gprs)
 1873                               (drop nFprs fprs)
 1874                               stackOffsetRes
 1875                               (accumCode `appOL` code)
 1876                               (reg : accumUsed)
 1877             | otherwise = do
 1878                 (vr, code) <- getSomeReg arg_pro
 1879                 passArguments args
 1880                               (drop nGprs gprs)
 1881                               (drop nFprs fprs)
 1882                               (stackOffset' + stackBytes)
 1883                               (accumCode `appOL` code
 1884                                          `snocOL` ST format_pro vr stackSlot)
 1885                               accumUsed
 1886             where
 1887                 arg_pro
 1888                    | isBitsType rep = CmmMachOp (conv_op (typeWidth rep) (wordWidth platform)) [arg]
 1889                    | otherwise      = arg
 1890                 format_pro
 1891                    | isBitsType rep = intFormat (wordWidth platform)
 1892                    | otherwise      = cmmTypeFormat rep
 1893                 conv_op = case hint of
 1894                             SignedHint -> MO_SS_Conv
 1895                             _          -> MO_UU_Conv
 1896 
 1897                 stackOffset' = case gcp of
 1898                                GCPAIX ->
 1899                                    -- The 32bit PowerOPEN ABI is happy with
 1900                                    -- 32bit-alignment ...
 1901                                    stackOffset
 1902                                GCP32ELF
 1903                                    -- ... the SysV ABI requires 8-byte
 1904                                    -- alignment for doubles.
 1905                                 | isFloatType rep && typeWidth rep == W64 ->
 1906                                    roundTo 8 stackOffset
 1907                                 | otherwise ->
 1908                                    stackOffset
 1909                                GCP64ELF _ ->
 1910                                    -- Everything on the stack is mapped to
 1911                                    -- 8-byte aligned doublewords
 1912                                    stackOffset
 1913                 stackOffset''
 1914                      | isFloatType rep && typeWidth rep == W32 =
 1915                          case gcp of
 1916                          -- The ELF v1 ABI Section 3.2.3 requires:
 1917                          -- "Single precision floating point values
 1918                          -- are mapped to the second word in a single
 1919                          -- doubleword"
 1920                          GCP64ELF 1      -> stackOffset' + 4
 1921                          _               -> stackOffset'
 1922                      | otherwise = stackOffset'
 1923 
 1924                 stackSlot = AddrRegImm sp (ImmInt stackOffset'')
 1925                 (nGprs, nFprs, stackBytes, regs)
 1926                     = case gcp of
 1927                       GCPAIX ->
 1928                           case cmmTypeFormat rep of
 1929                           II8  -> (1, 0, 4, gprs)
 1930                           II16 -> (1, 0, 4, gprs)
 1931                           II32 -> (1, 0, 4, gprs)
 1932                           -- The PowerOpen ABI requires that we skip a
 1933                           -- corresponding number of GPRs when we use
 1934                           -- the FPRs.
 1935                           --
 1936                           -- E.g. for a `double` two GPRs are skipped,
 1937                           -- whereas for a `float` one GPR is skipped
 1938                           -- when parameters are assigned to
 1939                           -- registers.
 1940                           --
 1941                           -- The PowerOpen ABI specification can be found at
 1942                           -- ftp://www.sourceware.org/pub/binutils/ppc-docs/ppc-poweropen/
 1943                           FF32 -> (1, 1, 4, fprs)
 1944                           FF64 -> (2, 1, 8, fprs)
 1945                           II64 -> panic "genCCall' passArguments II64"
 1946 
 1947                       GCP32ELF ->
 1948                           case cmmTypeFormat rep of
 1949                           II8  -> (1, 0, 4, gprs)
 1950                           II16 -> (1, 0, 4, gprs)
 1951                           II32 -> (1, 0, 4, gprs)
 1952                           -- ... the SysV ABI doesn't.
 1953                           FF32 -> (0, 1, 4, fprs)
 1954                           FF64 -> (0, 1, 8, fprs)
 1955                           II64 -> panic "genCCall' passArguments II64"
 1956                       GCP64ELF _ ->
 1957                           case cmmTypeFormat rep of
 1958                           II8  -> (1, 0, 8, gprs)
 1959                           II16 -> (1, 0, 8, gprs)
 1960                           II32 -> (1, 0, 8, gprs)
 1961                           II64 -> (1, 0, 8, gprs)
 1962                           -- The ELFv1 ABI requires that we skip a
 1963                           -- corresponding number of GPRs when we use
 1964                           -- the FPRs.
 1965                           FF32 -> (1, 1, 8, fprs)
 1966                           FF64 -> (1, 1, 8, fprs)
 1967 
 1968         moveResult reduceToFF32 =
 1969             case dest_regs of
 1970                 [] -> nilOL
 1971                 [dest]
 1972                     | reduceToFF32 && isFloat32 rep   -> unitOL (FRSP r_dest f1)
 1973                     | isFloat32 rep || isFloat64 rep -> unitOL (MR r_dest f1)
 1974                     | isWord64 rep && target32Bit platform
 1975                        -> toOL [MR (getHiVRegFromLo r_dest) r3,
 1976                                 MR r_dest r4]
 1977                     | otherwise -> unitOL (MR r_dest r3)
 1978                     where rep = cmmRegType platform (CmmLocal dest)
 1979                           r_dest = getRegisterReg platform (CmmLocal dest)
 1980                 _ -> panic "genCCall' moveResult: Bad dest_regs"
 1981 
 1982         outOfLineMachOp mop =
 1983             do
 1984                 mopExpr <- cmmMakeDynamicReference config CallReference $
 1985                               mkForeignLabel functionName Nothing ForeignLabelInThisPackage IsFunction
 1986                 let mopLabelOrExpr = case mopExpr of
 1987                         CmmLit (CmmLabel lbl) -> Left lbl
 1988                         _ -> Right mopExpr
 1989                 return (mopLabelOrExpr, reduce)
 1990             where
 1991                 (functionName, reduce) = case mop of
 1992                     MO_F32_Exp   -> (fsLit "exp", True)
 1993                     MO_F32_ExpM1 -> (fsLit "expm1", True)
 1994                     MO_F32_Log   -> (fsLit "log", True)
 1995                     MO_F32_Log1P -> (fsLit "log1p", True)
 1996                     MO_F32_Sqrt  -> (fsLit "sqrt", True)
 1997                     MO_F32_Fabs  -> unsupported
 1998 
 1999                     MO_F32_Sin   -> (fsLit "sin", True)
 2000                     MO_F32_Cos   -> (fsLit "cos", True)
 2001                     MO_F32_Tan   -> (fsLit "tan", True)
 2002 
 2003                     MO_F32_Asin  -> (fsLit "asin", True)
 2004                     MO_F32_Acos  -> (fsLit "acos", True)
 2005                     MO_F32_Atan  -> (fsLit "atan", True)
 2006 
 2007                     MO_F32_Sinh  -> (fsLit "sinh", True)
 2008                     MO_F32_Cosh  -> (fsLit "cosh", True)
 2009                     MO_F32_Tanh  -> (fsLit "tanh", True)
 2010                     MO_F32_Pwr   -> (fsLit "pow", True)
 2011 
 2012                     MO_F32_Asinh -> (fsLit "asinh", True)
 2013                     MO_F32_Acosh -> (fsLit "acosh", True)
 2014                     MO_F32_Atanh -> (fsLit "atanh", True)
 2015 
 2016                     MO_F64_Exp   -> (fsLit "exp", False)
 2017                     MO_F64_ExpM1 -> (fsLit "expm1", False)
 2018                     MO_F64_Log   -> (fsLit "log", False)
 2019                     MO_F64_Log1P -> (fsLit "log1p", False)
 2020                     MO_F64_Sqrt  -> (fsLit "sqrt", False)
 2021                     MO_F64_Fabs  -> unsupported
 2022 
 2023                     MO_F64_Sin   -> (fsLit "sin", False)
 2024                     MO_F64_Cos   -> (fsLit "cos", False)
 2025                     MO_F64_Tan   -> (fsLit "tan", False)
 2026 
 2027                     MO_F64_Asin  -> (fsLit "asin", False)
 2028                     MO_F64_Acos  -> (fsLit "acos", False)
 2029                     MO_F64_Atan  -> (fsLit "atan", False)
 2030 
 2031                     MO_F64_Sinh  -> (fsLit "sinh", False)
 2032                     MO_F64_Cosh  -> (fsLit "cosh", False)
 2033                     MO_F64_Tanh  -> (fsLit "tanh", False)
 2034                     MO_F64_Pwr   -> (fsLit "pow", False)
 2035 
 2036                     MO_F64_Asinh -> (fsLit "asinh", False)
 2037                     MO_F64_Acosh -> (fsLit "acosh", False)
 2038                     MO_F64_Atanh -> (fsLit "atanh", False)
 2039 
 2040                     MO_I64_ToI   -> (fsLit "hs_int64ToInt", False)
 2041                     MO_I64_FromI -> (fsLit "hs_intToInt64", False)
 2042                     MO_W64_ToW   -> (fsLit "hs_word64ToWord", False)
 2043                     MO_W64_FromW -> (fsLit "hs_wordToWord64", False)
 2044 
 2045                     MO_x64_Neg   -> (fsLit "hs_neg64", False)
 2046                     MO_x64_Add   -> (fsLit "hs_add64", False)
 2047                     MO_x64_Sub   -> (fsLit "hs_sub64", False)
 2048                     MO_x64_Mul   -> (fsLit "hs_mul64", False)
 2049                     MO_I64_Quot  -> (fsLit "hs_quotInt64", False)
 2050                     MO_I64_Rem   -> (fsLit "hs_remInt64", False)
 2051                     MO_W64_Quot  -> (fsLit "hs_quotWord64", False)
 2052                     MO_W64_Rem   -> (fsLit "hs_remWord64", False)
 2053 
 2054                     MO_x64_And   -> (fsLit "hs_and64", False)
 2055                     MO_x64_Or    -> (fsLit "hs_or64", False)
 2056                     MO_x64_Xor   -> (fsLit "hs_xor64", False)
 2057                     MO_x64_Not   -> (fsLit "hs_not64", False)
 2058                     MO_x64_Shl   -> (fsLit "hs_uncheckedShiftL64", False)
 2059                     MO_I64_Shr   -> (fsLit "hs_uncheckedIShiftRA64", False)
 2060                     MO_W64_Shr   -> (fsLit "hs_uncheckedShiftRL64", False)
 2061 
 2062                     MO_x64_Eq    -> (fsLit "hs_eq64", False)
 2063                     MO_x64_Ne    -> (fsLit "hs_ne64", False)
 2064                     MO_I64_Ge    -> (fsLit "hs_geInt64", False)
 2065                     MO_I64_Gt    -> (fsLit "hs_gtInt64", False)
 2066                     MO_I64_Le    -> (fsLit "hs_leInt64", False)
 2067                     MO_I64_Lt    -> (fsLit "hs_ltInt64", False)
 2068                     MO_W64_Ge    -> (fsLit "hs_geWord64", False)
 2069                     MO_W64_Gt    -> (fsLit "hs_gtWord64", False)
 2070                     MO_W64_Le    -> (fsLit "hs_leWord64", False)
 2071                     MO_W64_Lt    -> (fsLit "hs_ltWord64", False)
 2072 
 2073                     MO_UF_Conv w -> (word2FloatLabel w, False)
 2074 
 2075                     MO_Memcpy _  -> (fsLit "memcpy", False)
 2076                     MO_Memset _  -> (fsLit "memset", False)
 2077                     MO_Memmove _ -> (fsLit "memmove", False)
 2078                     MO_Memcmp _  -> (fsLit "memcmp", False)
 2079 
 2080                     MO_SuspendThread -> (fsLit "suspendThread", False)
 2081                     MO_ResumeThread  -> (fsLit "resumeThread", False)
 2082 
 2083                     MO_BSwap w   -> (bSwapLabel w, False)
 2084                     MO_BRev w    -> (bRevLabel w, False)
 2085                     MO_PopCnt w  -> (popCntLabel w, False)
 2086                     MO_Pdep w    -> (pdepLabel w, False)
 2087                     MO_Pext w    -> (pextLabel w, False)
 2088                     MO_Clz _     -> unsupported
 2089                     MO_Ctz _     -> unsupported
 2090                     MO_AtomicRMW {} -> unsupported
 2091                     MO_Cmpxchg w -> (cmpxchgLabel w, False)
 2092                     MO_Xchg w    -> (xchgLabel w, False)
 2093                     MO_AtomicRead _  -> unsupported
 2094                     MO_AtomicWrite _ -> unsupported
 2095 
 2096                     MO_S_Mul2    {}  -> unsupported
 2097                     MO_S_QuotRem {}  -> unsupported
 2098                     MO_U_QuotRem {}  -> unsupported
 2099                     MO_U_QuotRem2 {} -> unsupported
 2100                     MO_Add2 {}       -> unsupported
 2101                     MO_AddWordC {}   -> unsupported
 2102                     MO_SubWordC {}   -> unsupported
 2103                     MO_AddIntC {}    -> unsupported
 2104                     MO_SubIntC {}    -> unsupported
 2105                     MO_U_Mul2 {}     -> unsupported
 2106                     MO_ReadBarrier   -> unsupported
 2107                     MO_WriteBarrier  -> unsupported
 2108                     MO_Touch         -> unsupported
 2109                     MO_Prefetch_Data _ -> unsupported
 2110                 unsupported = panic ("outOfLineCmmOp: " ++ show mop
 2111                                   ++ " not supported")
 2112 
 2113 -- -----------------------------------------------------------------------------
 2114 -- Generating a table-branch
 2115 
 2116 genSwitch :: NCGConfig -> CmmExpr -> SwitchTargets -> NatM InstrBlock
 2117 genSwitch config expr targets
 2118   | OSAIX <- platformOS platform
 2119   = do
 2120         (reg,e_code) <- getSomeReg indexExpr
 2121         let fmt = archWordFormat $ target32Bit platform
 2122             sha = if target32Bit platform then 2 else 3
 2123         tmp <- getNewRegNat fmt
 2124         lbl <- getNewLabelNat
 2125         dynRef <- cmmMakeDynamicReference config DataReference lbl
 2126         (tableReg,t_code) <- getSomeReg $ dynRef
 2127         let code = e_code `appOL` t_code `appOL` toOL [
 2128                             SL fmt tmp reg (RIImm (ImmInt sha)),
 2129                             LD fmt tmp (AddrRegReg tableReg tmp),
 2130                             MTCTR tmp,
 2131                             BCTR ids (Just lbl) []
 2132                     ]
 2133         return code
 2134 
 2135   | (ncgPIC config) || (not $ target32Bit platform)
 2136   = do
 2137         (reg,e_code) <- getSomeReg indexExpr
 2138         let fmt = archWordFormat $ target32Bit platform
 2139             sha = if target32Bit platform then 2 else 3
 2140         tmp <- getNewRegNat fmt
 2141         lbl <- getNewLabelNat
 2142         dynRef <- cmmMakeDynamicReference config DataReference lbl
 2143         (tableReg,t_code) <- getSomeReg $ dynRef
 2144         let code = e_code `appOL` t_code `appOL` toOL [
 2145                             SL fmt tmp reg (RIImm (ImmInt sha)),
 2146                             LD fmt tmp (AddrRegReg tableReg tmp),
 2147                             ADD tmp tmp (RIReg tableReg),
 2148                             MTCTR tmp,
 2149                             BCTR ids (Just lbl) []
 2150                     ]
 2151         return code
 2152   | otherwise
 2153   = do
 2154         (reg,e_code) <- getSomeReg indexExpr
 2155         let fmt = archWordFormat $ target32Bit platform
 2156             sha = if target32Bit platform then 2 else 3
 2157         tmp <- getNewRegNat fmt
 2158         lbl <- getNewLabelNat
 2159         let code = e_code `appOL` toOL [
 2160                             SL fmt tmp reg (RIImm (ImmInt sha)),
 2161                             ADDIS tmp tmp (HA (ImmCLbl lbl)),
 2162                             LD fmt tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
 2163                             MTCTR tmp,
 2164                             BCTR ids (Just lbl) []
 2165                     ]
 2166         return code
 2167   where
 2168     indexExpr = cmmOffset platform exprWidened offset
 2169     -- We widen to a native-width register to santize the high bits
 2170     exprWidened = CmmMachOp
 2171       (MO_UU_Conv (cmmExprWidth platform expr)
 2172                   (platformWordWidth platform))
 2173       [expr]
 2174     (offset, ids) = switchTargetsToTable targets
 2175     platform      = ncgPlatform config
 2176 
 2177 generateJumpTableForInstr :: NCGConfig -> Instr
 2178                           -> Maybe (NatCmmDecl RawCmmStatics Instr)
 2179 generateJumpTableForInstr config (BCTR ids (Just lbl) _) =
 2180     let jumpTable
 2181             | (ncgPIC config) || (not $ target32Bit $ ncgPlatform config)
 2182             = map jumpTableEntryRel ids
 2183             | otherwise = map (jumpTableEntry config) ids
 2184                 where jumpTableEntryRel Nothing
 2185                         = CmmStaticLit (CmmInt 0 (ncgWordWidth config))
 2186                       jumpTableEntryRel (Just blockid)
 2187                         = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0
 2188                                          (ncgWordWidth config))
 2189                             where blockLabel = blockLbl blockid
 2190     in Just (CmmData (Section ReadOnlyData lbl) (CmmStaticsRaw lbl jumpTable))
 2191 generateJumpTableForInstr _ _ = Nothing
 2192 
 2193 -- -----------------------------------------------------------------------------
 2194 -- 'condIntReg' and 'condFltReg': condition codes into registers
 2195 
 2196 -- Turn those condition codes into integers now (when they appear on
 2197 -- the right hand side of an assignment).
 2198 
 2199 
 2200 
 2201 condReg :: NatM CondCode -> NatM Register
 2202 condReg getCond = do
 2203     CondCode _ cond cond_code <- getCond
 2204     platform <- getPlatform
 2205     let
 2206         code dst = cond_code
 2207             `appOL` negate_code
 2208             `appOL` toOL [
 2209                 MFCR dst,
 2210                 RLWINM dst dst (bit + 1) 31 31
 2211             ]
 2212 
 2213         negate_code | do_negate = unitOL (CRNOR bit bit bit)
 2214                     | otherwise = nilOL
 2215 
 2216         (bit, do_negate) = case cond of
 2217             LTT -> (0, False)
 2218             LE  -> (1, True)
 2219             EQQ -> (2, False)
 2220             GE  -> (0, True)
 2221             GTT -> (1, False)
 2222 
 2223             NE  -> (2, True)
 2224 
 2225             LU  -> (0, False)
 2226             LEU -> (1, True)
 2227             GEU -> (0, True)
 2228             GU  -> (1, False)
 2229             _   -> panic "PPC.CodeGen.codeReg: no match"
 2230 
 2231         format = archWordFormat $ target32Bit platform
 2232     return (Any format code)
 2233 
 2234 condIntReg :: Cond -> Width -> CmmExpr -> CmmExpr -> NatM Register
 2235 condIntReg cond width x y = condReg (condIntCode cond width x y)
 2236 condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
 2237 condFltReg cond x y = condReg (condFltCode cond x y)
 2238 
 2239 
 2240 
 2241 -- -----------------------------------------------------------------------------
 2242 -- 'trivial*Code': deal with trivial instructions
 2243 
 2244 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
 2245 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
 2246 -- Only look for constants on the right hand side, because that's
 2247 -- where the generic optimizer will have put them.
 2248 
 2249 -- Similarly, for unary instructions, we don't have to worry about
 2250 -- matching an StInt as the argument, because genericOpt will already
 2251 -- have handled the constant-folding.
 2252 
 2253 
 2254 
 2255 {-
 2256 Wolfgang's PowerPC version of The Rules:
 2257 
 2258 A slightly modified version of The Rules to take advantage of the fact
 2259 that PowerPC instructions work on all registers and don't implicitly
 2260 clobber any fixed registers.
 2261 
 2262 * The only expression for which getRegister returns Fixed is (CmmReg reg).
 2263 
 2264 * If getRegister returns Any, then the code it generates may modify only:
 2265         (a) fresh temporaries
 2266         (b) the destination register
 2267   It may *not* modify global registers, unless the global
 2268   register happens to be the destination register.
 2269   It may not clobber any other registers. In fact, only ccalls clobber any
 2270   fixed registers.
 2271   Also, it may not modify the counter register (used by genCCall).
 2272 
 2273   Corollary: If a getRegister for a subexpression returns Fixed, you need
 2274   not move it to a fresh temporary before evaluating the next subexpression.
 2275   The Fixed register won't be modified.
 2276   Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
 2277 
 2278 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
 2279   the value of the destination register.
 2280 -}
 2281 
 2282 trivialCode
 2283         :: Width
 2284         -> Bool
 2285         -> (Reg -> Reg -> RI -> Instr)
 2286         -> CmmExpr
 2287         -> CmmExpr
 2288         -> NatM Register
 2289 
 2290 trivialCode rep signed instr x (CmmLit (CmmInt y _))
 2291     | Just imm <- makeImmediate rep signed y
 2292     = do
 2293         (src1, code1) <- getSomeReg x
 2294         let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
 2295         return (Any (intFormat rep) code)
 2296 
 2297 trivialCode rep _ instr x y = do
 2298     (src1, code1) <- getSomeReg x
 2299     (src2, code2) <- getSomeReg y
 2300     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
 2301     return (Any (intFormat rep) code)
 2302 
 2303 shiftMulCode
 2304         :: Width
 2305         -> Bool
 2306         -> (Format-> Reg -> Reg -> RI -> Instr)
 2307         -> CmmExpr
 2308         -> CmmExpr
 2309         -> NatM Register
 2310 shiftMulCode width sign instr x (CmmLit (CmmInt y _))
 2311     | Just imm <- makeImmediate width sign y
 2312     = do
 2313         (src1, code1) <- getSomeReg x
 2314         let format = intFormat width
 2315         let ins_fmt = intFormat (max W32 width)
 2316         let code dst = code1 `snocOL` instr ins_fmt dst src1 (RIImm imm)
 2317         return (Any format code)
 2318 
 2319 shiftMulCode width _ instr x y = do
 2320     (src1, code1) <- getSomeReg x
 2321     (src2, code2) <- getSomeReg y
 2322     let format = intFormat width
 2323     let ins_fmt = intFormat (max W32 width)
 2324     let code dst = code1 `appOL` code2
 2325                    `snocOL` instr ins_fmt dst src1 (RIReg src2)
 2326     return (Any format code)
 2327 
 2328 trivialCodeNoImm' :: Format -> (Reg -> Reg -> Reg -> Instr)
 2329                  -> CmmExpr -> CmmExpr -> NatM Register
 2330 trivialCodeNoImm' format instr x y = do
 2331     (src1, code1) <- getSomeReg x
 2332     (src2, code2) <- getSomeReg y
 2333     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
 2334     return (Any format code)
 2335 
 2336 trivialCodeNoImm :: Format -> (Format -> Reg -> Reg -> Reg -> Instr)
 2337                  -> CmmExpr -> CmmExpr -> NatM Register
 2338 trivialCodeNoImm format instr x y
 2339   = trivialCodeNoImm' format (instr format) x y
 2340 
 2341 srCode :: Width -> Bool -> (Format-> Reg -> Reg -> RI -> Instr)
 2342        -> CmmExpr -> CmmExpr -> NatM Register
 2343 srCode width sgn instr x (CmmLit (CmmInt y _))
 2344     | Just imm <- makeImmediate width sgn y
 2345     = do
 2346         let op_len = max W32 width
 2347             extend = if sgn then extendSExpr else extendUExpr
 2348         (src1, code1) <- getSomeReg (extend width op_len x)
 2349         let code dst = code1 `snocOL`
 2350                        instr (intFormat op_len) dst src1 (RIImm imm)
 2351         return (Any (intFormat width) code)
 2352 
 2353 srCode width sgn instr x y = do
 2354   let op_len = max W32 width
 2355       extend = if sgn then extendSExpr else extendUExpr
 2356   (src1, code1) <- getSomeReg (extend width op_len x)
 2357   (src2, code2) <- getSomeReg (extendUExpr width op_len y)
 2358   -- Note: Shift amount `y` is unsigned
 2359   let code dst = code1 `appOL` code2 `snocOL`
 2360                  instr (intFormat op_len) dst src1 (RIReg src2)
 2361   return (Any (intFormat width) code)
 2362 
 2363 divCode :: Width -> Bool -> CmmExpr -> CmmExpr -> NatM Register
 2364 divCode width sgn x y = do
 2365   let op_len = max W32 width
 2366       extend = if sgn then extendSExpr else extendUExpr
 2367   (src1, code1) <- getSomeReg (extend width op_len x)
 2368   (src2, code2) <- getSomeReg (extend width op_len y)
 2369   let code dst = code1 `appOL` code2 `snocOL`
 2370                  DIV (intFormat op_len) sgn dst src1 src2
 2371   return (Any (intFormat width) code)
 2372 
 2373 
 2374 trivialUCode :: Format
 2375              -> (Reg -> Reg -> Instr)
 2376              -> CmmExpr
 2377              -> NatM Register
 2378 trivialUCode rep instr x = do
 2379     (src, code) <- getSomeReg x
 2380     let code' dst = code `snocOL` instr dst src
 2381     return (Any rep code')
 2382 
 2383 -- There is no "remainder" instruction on the PPC, so we have to do
 2384 -- it the hard way.
 2385 -- The "sgn" parameter is the signedness for the division instruction
 2386 
 2387 remainderCode :: Width -> Bool -> Reg -> CmmExpr -> CmmExpr
 2388                -> NatM (Reg -> InstrBlock)
 2389 remainderCode rep sgn reg_q arg_x arg_y = do
 2390   let op_len = max W32 rep
 2391       fmt    = intFormat op_len
 2392       extend = if sgn then extendSExpr else extendUExpr
 2393   (x_reg, x_code) <- getSomeReg (extend rep op_len arg_x)
 2394   (y_reg, y_code) <- getSomeReg (extend rep op_len arg_y)
 2395   return $ \reg_r -> y_code `appOL` x_code
 2396                      `appOL` toOL [ DIV fmt sgn reg_q x_reg y_reg
 2397                                   , MULL fmt reg_r reg_q (RIReg y_reg)
 2398                                   , SUBF reg_r reg_r x_reg
 2399                                   ]
 2400 
 2401 
 2402 coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
 2403 coerceInt2FP fromRep toRep x = do
 2404     platform <- getPlatform
 2405     let arch = platformArch platform
 2406     coerceInt2FP' arch fromRep toRep x
 2407 
 2408 coerceInt2FP' :: Arch -> Width -> Width -> CmmExpr -> NatM Register
 2409 coerceInt2FP' ArchPPC fromRep toRep x = do
 2410     (src, code) <- getSomeReg x
 2411     lbl <- getNewLabelNat
 2412     itmp <- getNewRegNat II32
 2413     ftmp <- getNewRegNat FF64
 2414     config <- getConfig
 2415     platform <- getPlatform
 2416     dynRef <- cmmMakeDynamicReference config DataReference lbl
 2417     Amode addr addr_code <- getAmode D dynRef
 2418     let
 2419         code' dst = code `appOL` maybe_exts `appOL` toOL [
 2420                 LDATA (Section ReadOnlyData lbl) $ CmmStaticsRaw lbl
 2421                                  [CmmStaticLit (CmmInt 0x43300000 W32),
 2422                                   CmmStaticLit (CmmInt 0x80000000 W32)],
 2423                 XORIS itmp src (ImmInt 0x8000),
 2424                 ST II32 itmp (spRel platform 3),
 2425                 LIS itmp (ImmInt 0x4330),
 2426                 ST II32 itmp (spRel platform 2),
 2427                 LD FF64 ftmp (spRel platform 2)
 2428             ] `appOL` addr_code `appOL` toOL [
 2429                 LD FF64 dst addr,
 2430                 FSUB FF64 dst ftmp dst
 2431             ] `appOL` maybe_frsp dst
 2432 
 2433         maybe_exts = case fromRep of
 2434                         W8 ->  unitOL $ EXTS II8 src src
 2435                         W16 -> unitOL $ EXTS II16 src src
 2436                         W32 -> nilOL
 2437                         _       -> panic "PPC.CodeGen.coerceInt2FP: no match"
 2438 
 2439         maybe_frsp dst
 2440                 = case toRep of
 2441                         W32 -> unitOL $ FRSP dst dst
 2442                         W64 -> nilOL
 2443                         _       -> panic "PPC.CodeGen.coerceInt2FP: no match"
 2444 
 2445     return (Any (floatFormat toRep) code')
 2446 
 2447 -- On an ELF v1 Linux we use the compiler doubleword in the stack frame
 2448 -- this is the TOC pointer doubleword on ELF v2 Linux. The latter is only
 2449 -- set right before a call and restored right after return from the call.
 2450 -- So it is fine.
 2451 coerceInt2FP' (ArchPPC_64 _) fromRep toRep x = do
 2452     (src, code) <- getSomeReg x
 2453     platform <- getPlatform
 2454     upper <- getNewRegNat II64
 2455     lower <- getNewRegNat II64
 2456     l1 <- getBlockIdNat
 2457     l2 <- getBlockIdNat
 2458     let
 2459         code' dst = code `appOL` maybe_exts `appOL` toOL [
 2460                 ST II64 src (spRel platform 3),
 2461                 LD FF64 dst (spRel platform 3),
 2462                 FCFID dst dst
 2463             ] `appOL` maybe_frsp dst
 2464 
 2465         maybe_exts
 2466           = case fromRep of
 2467               W8 ->  unitOL $ EXTS II8 src src
 2468               W16 -> unitOL $ EXTS II16 src src
 2469               W32 -> unitOL $ EXTS II32 src src
 2470               W64 -> case toRep of
 2471                         W32 -> toOL [ SRA II64 upper src (RIImm (ImmInt 53))
 2472                                     , CLRLI II64 lower src 53
 2473                                     , ADD upper upper (RIImm (ImmInt 1))
 2474                                     , ADD lower lower (RIImm (ImmInt 2047))
 2475                                     , CMPL II64 upper (RIImm (ImmInt 2))
 2476                                     , OR lower lower (RIReg src)
 2477                                     , CLRRI II64 lower lower 11
 2478                                     , BCC LTT l2 Nothing
 2479                                     , BCC ALWAYS l1 Nothing
 2480                                     , NEWBLOCK l1
 2481                                     , MR src lower
 2482                                     , BCC ALWAYS l2 Nothing
 2483                                     , NEWBLOCK l2
 2484                                     ]
 2485                         _   -> nilOL
 2486               _       -> panic "PPC.CodeGen.coerceInt2FP: no match"
 2487 
 2488         maybe_frsp dst
 2489                 = case toRep of
 2490                         W32 -> unitOL $ FRSP dst dst
 2491                         W64 -> nilOL
 2492                         _       -> panic "PPC.CodeGen.coerceInt2FP: no match"
 2493 
 2494     return (Any (floatFormat toRep) code')
 2495 
 2496 coerceInt2FP' _ _ _ _ = panic "PPC.CodeGen.coerceInt2FP: unknown arch"
 2497 
 2498 
 2499 coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
 2500 coerceFP2Int fromRep toRep x = do
 2501     platform <- getPlatform
 2502     let arch =  platformArch platform
 2503     coerceFP2Int' arch fromRep toRep x
 2504 
 2505 coerceFP2Int' :: Arch -> Width -> Width -> CmmExpr -> NatM Register
 2506 coerceFP2Int' ArchPPC _ toRep x = do
 2507     platform <- getPlatform
 2508     -- the reps don't really matter: F*->FF64 and II32->I* are no-ops
 2509     (src, code) <- getSomeReg x
 2510     tmp <- getNewRegNat FF64
 2511     let
 2512         code' dst = code `appOL` toOL [
 2513                 -- convert to int in FP reg
 2514             FCTIWZ tmp src,
 2515                 -- store value (64bit) from FP to stack
 2516             ST FF64 tmp (spRel platform 2),
 2517                 -- read low word of value (high word is undefined)
 2518             LD II32 dst (spRel platform 3)]
 2519     return (Any (intFormat toRep) code')
 2520 
 2521 coerceFP2Int' (ArchPPC_64 _) _ toRep x = do
 2522     platform <- getPlatform
 2523     -- the reps don't really matter: F*->FF64 and II64->I* are no-ops
 2524     (src, code) <- getSomeReg x
 2525     tmp <- getNewRegNat FF64
 2526     let
 2527         code' dst = code `appOL` toOL [
 2528                 -- convert to int in FP reg
 2529             FCTIDZ tmp src,
 2530                 -- store value (64bit) from FP to compiler word on stack
 2531             ST FF64 tmp (spRel platform 3),
 2532             LD II64 dst (spRel platform 3)]
 2533     return (Any (intFormat toRep) code')
 2534 
 2535 coerceFP2Int' _ _ _ _ = panic "PPC.CodeGen.coerceFP2Int: unknown arch"
 2536 
 2537 -- Note [.LCTOC1 in PPC PIC code]
 2538 -- The .LCTOC1 label is defined to point 32768 bytes into the GOT table
 2539 -- to make the most of the PPC's 16-bit displacements.
 2540 -- As 16-bit signed offset is used (usually via addi/lwz instructions)
 2541 -- first element will have '-32768' offset against .LCTOC1.
 2542 
 2543 -- Note [implicit register in PPC PIC code]
 2544 -- PPC generates calls by labels in assembly
 2545 -- in form of:
 2546 --     bl puts+32768@plt
 2547 -- in this form it's not seen directly (by GHC NCG)
 2548 -- that r30 (PicBaseReg) is used,
 2549 -- but r30 is a required part of PLT code setup:
 2550 --   puts+32768@plt:
 2551 --       lwz     r11,-30484(r30) ; offset in .LCTOC1
 2552 --       mtctr   r11
 2553 --       bctr