never executed always true always false
    1 {-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}
    2 
    3 -----------------------------------------------------------------------------
    4 --
    5 -- Machine-dependent assembly language
    6 --
    7 -- (c) The University of Glasgow 1993-2004
    8 --
    9 -----------------------------------------------------------------------------
   10 
   11 module GHC.CmmToAsm.PPC.Instr
   12    ( Instr(..)
   13    , RI(..)
   14    , archWordFormat
   15    , stackFrameHeaderSize
   16    , maxSpillSlots
   17    , allocMoreStack
   18    , makeFarBranches
   19    , mkJumpInstr
   20    , mkLoadInstr
   21    , mkSpillInstr
   22    , patchJumpInstr
   23    , patchRegsOfInstr
   24    , jumpDestsOfInstr
   25    , takeRegRegMoveInstr
   26    , takeDeltaInstr
   27    , mkRegRegMoveInstr
   28    , mkStackAllocInstr
   29    , mkStackDeallocInstr
   30    , regUsageOfInstr
   31    , isJumpishInstr
   32    , isMetaInstr
   33    )
   34 where
   35 
   36 import GHC.Prelude
   37 
   38 import GHC.CmmToAsm.PPC.Regs
   39 import GHC.CmmToAsm.PPC.Cond
   40 import GHC.CmmToAsm.Types
   41 import GHC.CmmToAsm.Instr (RegUsage(..), noUsage)
   42 import GHC.CmmToAsm.Format
   43 import GHC.CmmToAsm.Reg.Target
   44 import GHC.CmmToAsm.Config
   45 import GHC.Platform.Reg.Class
   46 import GHC.Platform.Reg
   47 
   48 import GHC.Platform.Regs
   49 import GHC.Cmm.BlockId
   50 import GHC.Cmm.Dataflow.Collections
   51 import GHC.Cmm.Dataflow.Label
   52 import GHC.Cmm
   53 import GHC.Cmm.Info
   54 import GHC.Cmm.CLabel
   55 import GHC.Utils.Outputable
   56 import GHC.Utils.Panic
   57 import GHC.Platform
   58 import GHC.Types.Unique.FM (listToUFM, lookupUFM)
   59 import GHC.Types.Unique.Supply
   60 
   61 import Control.Monad (replicateM)
   62 import Data.Maybe (fromMaybe)
   63 
   64 
   65 --------------------------------------------------------------------------------
   66 -- Format of a PPC memory address.
   67 --
   68 archWordFormat :: Bool -> Format
   69 archWordFormat is32Bit
   70  | is32Bit   = II32
   71  | otherwise = II64
   72 
   73 
   74 mkStackAllocInstr :: Platform -> Int -> [Instr]
   75 mkStackAllocInstr platform amount
   76   = mkStackAllocInstr' platform (-amount)
   77 
   78 mkStackDeallocInstr :: Platform -> Int -> [Instr]
   79 mkStackDeallocInstr platform amount
   80   = mkStackAllocInstr' platform amount
   81 
   82 mkStackAllocInstr' :: Platform -> Int -> [Instr]
   83 mkStackAllocInstr' platform amount
   84   | fits16Bits amount
   85   = [ LD fmt r0 (AddrRegImm sp zero)
   86     , STU fmt r0 (AddrRegImm sp immAmount)
   87     ]
   88   | otherwise
   89   = [ LD fmt r0 (AddrRegImm sp zero)
   90     , ADDIS tmp sp (HA immAmount)
   91     , ADD tmp tmp (RIImm (LO immAmount))
   92     , STU fmt r0 (AddrRegReg sp tmp)
   93     ]
   94   where
   95     fmt = intFormat $ widthFromBytes (platformWordSizeInBytes platform)
   96     zero = ImmInt 0
   97     tmp = tmpReg platform
   98     immAmount = ImmInt amount
   99 
  100 --
  101 -- See note [extra spill slots] in X86/Instr.hs
  102 --
  103 allocMoreStack
  104   :: Platform
  105   -> Int
  106   -> NatCmmDecl statics GHC.CmmToAsm.PPC.Instr.Instr
  107   -> UniqSM (NatCmmDecl statics GHC.CmmToAsm.PPC.Instr.Instr, [(BlockId,BlockId)])
  108 
  109 allocMoreStack _ _ top@(CmmData _ _) = return (top,[])
  110 allocMoreStack platform slots (CmmProc info lbl live (ListGraph code)) = do
  111     let
  112         infos   = mapKeys info
  113         entries = case code of
  114                     [] -> infos
  115                     BasicBlock entry _ : _ -- first block is the entry point
  116                         | entry `elem` infos -> infos
  117                         | otherwise          -> entry : infos
  118 
  119     uniqs <- replicateM (length entries) getUniqueM
  120 
  121     let
  122         delta = ((x + stackAlign - 1) `quot` stackAlign) * stackAlign -- round up
  123             where x = slots * spillSlotSize -- sp delta
  124 
  125         alloc   = mkStackAllocInstr   platform delta
  126         dealloc = mkStackDeallocInstr platform delta
  127 
  128         retargetList = (zip entries (map mkBlockId uniqs))
  129 
  130         new_blockmap :: LabelMap BlockId
  131         new_blockmap = mapFromList retargetList
  132 
  133         insert_stack_insns (BasicBlock id insns)
  134             | Just new_blockid <- mapLookup id new_blockmap
  135                 = [ BasicBlock id $ alloc ++ [BCC ALWAYS new_blockid Nothing]
  136                   , BasicBlock new_blockid block'
  137                   ]
  138             | otherwise
  139                 = [ BasicBlock id block' ]
  140             where
  141               block' = foldr insert_dealloc [] insns
  142 
  143         insert_dealloc insn r
  144             -- BCTR might or might not be a non-local jump. For
  145             -- "labeled-goto" we use JMP, and for "computed-goto" we
  146             -- use MTCTR followed by BCTR. See 'PPC.CodeGen.genJump'.
  147             = case insn of
  148                 JMP _ _           -> dealloc ++ (insn : r)
  149                 BCTR [] Nothing _ -> dealloc ++ (insn : r)
  150                 BCTR ids label rs -> BCTR (map (fmap retarget) ids) label rs : r
  151                 BCCFAR cond b p   -> BCCFAR cond (retarget b) p : r
  152                 BCC    cond b p   -> BCC    cond (retarget b) p : r
  153                 _                 -> insn : r
  154             -- BL and BCTRL are call-like instructions rather than
  155             -- jumps, and are used only for C calls.
  156 
  157         retarget :: BlockId -> BlockId
  158         retarget b
  159             = fromMaybe b (mapLookup b new_blockmap)
  160 
  161         new_code
  162             = concatMap insert_stack_insns code
  163 
  164     -- in
  165     return (CmmProc info lbl live (ListGraph new_code),retargetList)
  166 
  167 
  168 -- -----------------------------------------------------------------------------
  169 -- Machine's assembly language
  170 
  171 -- We have a few common "instructions" (nearly all the pseudo-ops) but
  172 -- mostly all of 'Instr' is machine-specific.
  173 
  174 -- Register or immediate
  175 data RI
  176     = RIReg Reg
  177     | RIImm Imm
  178 
  179 data Instr
  180     -- comment pseudo-op
  181     = COMMENT SDoc
  182 
  183     -- location pseudo-op (file, line, col, name)
  184     | LOCATION Int Int Int String
  185 
  186     -- some static data spat out during code
  187     -- generation.  Will be extracted before
  188     -- pretty-printing.
  189     | LDATA   Section RawCmmStatics
  190 
  191     -- start a new basic block.  Useful during
  192     -- codegen, removed later.  Preceding
  193     -- instruction should be a jump, as per the
  194     -- invariants for a BasicBlock (see Cmm).
  195     | NEWBLOCK BlockId
  196 
  197     -- specify current stack offset for
  198     -- benefit of subsequent passes
  199     | DELTA   Int
  200 
  201     -- Loads and stores.
  202     | LD      Format Reg AddrMode   -- Load format, dst, src
  203     | LDFAR   Format Reg AddrMode   -- Load format, dst, src 32 bit offset
  204     | LDR     Format Reg AddrMode   -- Load and reserve format, dst, src
  205     | LA      Format Reg AddrMode   -- Load arithmetic format, dst, src
  206     | ST      Format Reg AddrMode   -- Store format, src, dst
  207     | STFAR   Format Reg AddrMode   -- Store format, src, dst 32 bit offset
  208     | STU     Format Reg AddrMode   -- Store with Update format, src, dst
  209     | STC     Format Reg AddrMode   -- Store conditional format, src, dst
  210     | LIS     Reg Imm               -- Load Immediate Shifted dst, src
  211     | LI      Reg Imm               -- Load Immediate dst, src
  212     | MR      Reg Reg               -- Move Register dst, src -- also for fmr
  213 
  214     | CMP     Format Reg RI         -- format, src1, src2
  215     | CMPL    Format Reg RI         -- format, src1, src2
  216 
  217     | BCC     Cond BlockId (Maybe Bool) -- cond, block, hint
  218     | BCCFAR  Cond BlockId (Maybe Bool) -- cond, block, hint
  219                                     --   hint:
  220                                     --    Just True:  branch likely taken
  221                                     --    Just False: branch likely not taken
  222                                     --    Nothing:    no hint
  223     | JMP     CLabel [Reg]          -- same as branch,
  224                                     -- but with CLabel instead of block ID
  225                                     -- and live global registers
  226     | MTCTR   Reg
  227     | BCTR    [Maybe BlockId] (Maybe CLabel) [Reg]
  228                                     -- with list of local destinations, and
  229                                     -- jump table location if necessary
  230     | BL      CLabel [Reg]          -- with list of argument regs
  231     | BCTRL   [Reg]
  232 
  233     | ADD     Reg Reg RI            -- dst, src1, src2
  234     | ADDO    Reg Reg Reg           -- add and set overflow
  235     | ADDC    Reg Reg Reg           -- (carrying) dst, src1, src2
  236     | ADDE    Reg Reg Reg           -- (extended) dst, src1, src2
  237     | ADDZE   Reg Reg               -- (to zero extended) dst, src
  238     | ADDIS   Reg Reg Imm           -- Add Immediate Shifted dst, src1, src2
  239     | SUBF    Reg Reg Reg           -- dst, src1, src2 ; dst = src2 - src1
  240     | SUBFO   Reg Reg Reg           -- subtract from and set overflow
  241     | SUBFC   Reg Reg RI            -- (carrying) dst, src1, src2 ;
  242                                     -- dst = src2 - src1
  243     | SUBFE   Reg Reg Reg           -- (extended) dst, src1, src2 ;
  244                                     -- dst = src2 - src1
  245     | MULL    Format Reg Reg RI
  246     | MULLO   Format Reg Reg Reg    -- multiply and set overflow
  247     | MFOV    Format Reg            -- move overflow bit (1|33) to register
  248                                     -- pseudo-instruction; pretty printed as
  249                                     -- mfxer dst
  250                                     -- extr[w|d]i dst, dst, 1, [1|33]
  251     | MULHU   Format Reg Reg Reg
  252     | DIV     Format Bool Reg Reg Reg
  253     | AND     Reg Reg RI            -- dst, src1, src2
  254     | ANDC    Reg Reg Reg           -- AND with complement, dst = src1 & ~ src2
  255     | NAND    Reg Reg Reg           -- dst, src1, src2
  256     | OR      Reg Reg RI            -- dst, src1, src2
  257     | ORIS    Reg Reg Imm           -- OR Immediate Shifted dst, src1, src2
  258     | XOR     Reg Reg RI            -- dst, src1, src2
  259     | XORIS   Reg Reg Imm           -- XOR Immediate Shifted dst, src1, src2
  260 
  261     | EXTS    Format Reg Reg
  262     | CNTLZ   Format Reg Reg
  263 
  264     | NEG     Reg Reg
  265     | NOT     Reg Reg
  266 
  267     | SL      Format Reg Reg RI            -- shift left
  268     | SR      Format Reg Reg RI            -- shift right
  269     | SRA     Format Reg Reg RI            -- shift right arithmetic
  270 
  271     | RLWINM  Reg Reg Int Int Int   -- Rotate Left Word Immediate then AND with Mask
  272     | CLRLI   Format Reg Reg Int    -- clear left immediate (extended mnemonic)
  273     | CLRRI   Format Reg Reg Int    -- clear right immediate (extended mnemonic)
  274 
  275     | FADD    Format Reg Reg Reg
  276     | FSUB    Format Reg Reg Reg
  277     | FMUL    Format Reg Reg Reg
  278     | FDIV    Format Reg Reg Reg
  279     | FABS    Reg Reg               -- abs is the same for single and double
  280     | FNEG    Reg Reg               -- negate is the same for single and double prec.
  281 
  282     | FCMP    Reg Reg
  283 
  284     | FCTIWZ  Reg Reg           -- convert to integer word
  285     | FCTIDZ  Reg Reg           -- convert to integer double word
  286     | FCFID   Reg Reg           -- convert from integer double word
  287     | FRSP    Reg Reg           -- reduce to single precision
  288                                 -- (but destination is a FP register)
  289 
  290     | CRNOR   Int Int Int       -- condition register nor
  291     | MFCR    Reg               -- move from condition register
  292 
  293     | MFLR    Reg               -- move from link register
  294     | FETCHPC Reg               -- pseudo-instruction:
  295                                 -- bcl to next insn, mflr reg
  296     | HWSYNC                    -- heavy weight sync
  297     | ISYNC                     -- instruction synchronize
  298     | LWSYNC                    -- memory barrier
  299     | NOP                       -- no operation, PowerPC 64 bit
  300                                 -- needs this as place holder to
  301                                 -- reload TOC pointer
  302 
  303 -- | Get the registers that are being used by this instruction.
  304 -- regUsage doesn't need to do any trickery for jumps and such.
  305 -- Just state precisely the regs read and written by that insn.
  306 -- The consequences of control flow transfers, as far as register
  307 -- allocation goes, are taken care of by the register allocator.
  308 --
  309 regUsageOfInstr :: Platform -> Instr -> RegUsage
  310 regUsageOfInstr platform instr
  311  = case instr of
  312     LD      _ reg addr       -> usage (regAddr addr, [reg])
  313     LDFAR   _ reg addr       -> usage (regAddr addr, [reg])
  314     LDR     _ reg addr       -> usage (regAddr addr, [reg])
  315     LA      _ reg addr       -> usage (regAddr addr, [reg])
  316     ST      _ reg addr       -> usage (reg : regAddr addr, [])
  317     STFAR   _ reg addr       -> usage (reg : regAddr addr, [])
  318     STU     _ reg addr       -> usage (reg : regAddr addr, [])
  319     STC     _ reg addr       -> usage (reg : regAddr addr, [])
  320     LIS     reg _            -> usage ([], [reg])
  321     LI      reg _            -> usage ([], [reg])
  322     MR      reg1 reg2        -> usage ([reg2], [reg1])
  323     CMP     _ reg ri         -> usage (reg : regRI ri,[])
  324     CMPL    _ reg ri         -> usage (reg : regRI ri,[])
  325     BCC     _ _ _            -> noUsage
  326     BCCFAR  _ _ _            -> noUsage
  327     JMP     _ regs           -> usage (regs, [])
  328     MTCTR   reg              -> usage ([reg],[])
  329     BCTR    _ _ regs         -> usage (regs, [])
  330     BL      _ params         -> usage (params, callClobberedRegs platform)
  331     BCTRL   params           -> usage (params, callClobberedRegs platform)
  332 
  333     ADD     reg1 reg2 ri     -> usage (reg2 : regRI ri, [reg1])
  334     ADDO    reg1 reg2 reg3   -> usage ([reg2,reg3], [reg1])
  335     ADDC    reg1 reg2 reg3   -> usage ([reg2,reg3], [reg1])
  336     ADDE    reg1 reg2 reg3   -> usage ([reg2,reg3], [reg1])
  337     ADDZE   reg1 reg2        -> usage ([reg2], [reg1])
  338     ADDIS   reg1 reg2 _      -> usage ([reg2], [reg1])
  339     SUBF    reg1 reg2 reg3   -> usage ([reg2,reg3], [reg1])
  340     SUBFO   reg1 reg2 reg3   -> usage ([reg2,reg3], [reg1])
  341     SUBFC   reg1 reg2 ri     -> usage (reg2 : regRI ri, [reg1])
  342     SUBFE   reg1 reg2 reg3   -> usage ([reg2,reg3], [reg1])
  343     MULL    _ reg1 reg2 ri   -> usage (reg2 : regRI ri, [reg1])
  344     MULLO   _ reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
  345     MFOV    _ reg            -> usage ([], [reg])
  346     MULHU   _ reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
  347     DIV     _ _ reg1 reg2 reg3
  348                              -> usage ([reg2,reg3], [reg1])
  349 
  350     AND     reg1 reg2 ri    -> usage (reg2 : regRI ri, [reg1])
  351     ANDC    reg1 reg2 reg3  -> usage ([reg2,reg3], [reg1])
  352     NAND    reg1 reg2 reg3  -> usage ([reg2,reg3], [reg1])
  353     OR      reg1 reg2 ri    -> usage (reg2 : regRI ri, [reg1])
  354     ORIS    reg1 reg2 _     -> usage ([reg2], [reg1])
  355     XOR     reg1 reg2 ri    -> usage (reg2 : regRI ri, [reg1])
  356     XORIS   reg1 reg2 _     -> usage ([reg2], [reg1])
  357     EXTS    _  reg1 reg2    -> usage ([reg2], [reg1])
  358     CNTLZ   _  reg1 reg2    -> usage ([reg2], [reg1])
  359     NEG     reg1 reg2       -> usage ([reg2], [reg1])
  360     NOT     reg1 reg2       -> usage ([reg2], [reg1])
  361     SL      _ reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
  362     SR      _ reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
  363     SRA     _ reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
  364     RLWINM  reg1 reg2 _ _ _ -> usage ([reg2], [reg1])
  365     CLRLI   _ reg1 reg2 _   -> usage ([reg2], [reg1])
  366     CLRRI   _ reg1 reg2 _   -> usage ([reg2], [reg1])
  367 
  368     FADD    _ r1 r2 r3      -> usage ([r2,r3], [r1])
  369     FSUB    _ r1 r2 r3      -> usage ([r2,r3], [r1])
  370     FMUL    _ r1 r2 r3      -> usage ([r2,r3], [r1])
  371     FDIV    _ r1 r2 r3      -> usage ([r2,r3], [r1])
  372     FABS    r1 r2           -> usage ([r2], [r1])
  373     FNEG    r1 r2           -> usage ([r2], [r1])
  374     FCMP    r1 r2           -> usage ([r1,r2], [])
  375     FCTIWZ  r1 r2           -> usage ([r2], [r1])
  376     FCTIDZ  r1 r2           -> usage ([r2], [r1])
  377     FCFID   r1 r2           -> usage ([r2], [r1])
  378     FRSP    r1 r2           -> usage ([r2], [r1])
  379     MFCR    reg             -> usage ([], [reg])
  380     MFLR    reg             -> usage ([], [reg])
  381     FETCHPC reg             -> usage ([], [reg])
  382     _                       -> noUsage
  383   where
  384     usage (src, dst) = RU (filter (interesting platform) src)
  385                           (filter (interesting platform) dst)
  386     regAddr (AddrRegReg r1 r2) = [r1, r2]
  387     regAddr (AddrRegImm r1 _)  = [r1]
  388 
  389     regRI (RIReg r) = [r]
  390     regRI  _        = []
  391 
  392 interesting :: Platform -> Reg -> Bool
  393 interesting _        (RegVirtual _)              = True
  394 interesting platform (RegReal (RealRegSingle i)) = freeReg platform i
  395 interesting _        (RegReal (RealRegPair{}))
  396     = panic "PPC.Instr.interesting: no reg pairs on this arch"
  397 
  398 
  399 
  400 -- | Apply a given mapping to all the register references in this
  401 -- instruction.
  402 patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr
  403 patchRegsOfInstr instr env
  404  = case instr of
  405     LD      fmt reg addr    -> LD fmt (env reg) (fixAddr addr)
  406     LDFAR   fmt reg addr    -> LDFAR fmt (env reg) (fixAddr addr)
  407     LDR     fmt reg addr    -> LDR fmt (env reg) (fixAddr addr)
  408     LA      fmt reg addr    -> LA fmt (env reg) (fixAddr addr)
  409     ST      fmt reg addr    -> ST fmt (env reg) (fixAddr addr)
  410     STFAR   fmt reg addr    -> STFAR fmt (env reg) (fixAddr addr)
  411     STU     fmt reg addr    -> STU fmt (env reg) (fixAddr addr)
  412     STC     fmt reg addr    -> STC fmt (env reg) (fixAddr addr)
  413     LIS     reg imm         -> LIS (env reg) imm
  414     LI      reg imm         -> LI (env reg) imm
  415     MR      reg1 reg2       -> MR (env reg1) (env reg2)
  416     CMP     fmt reg ri      -> CMP fmt (env reg) (fixRI ri)
  417     CMPL    fmt reg ri      -> CMPL fmt (env reg) (fixRI ri)
  418     BCC     cond lbl p      -> BCC cond lbl p
  419     BCCFAR  cond lbl p      -> BCCFAR cond lbl p
  420     JMP     l regs          -> JMP l regs -- global regs will not be remapped
  421     MTCTR   reg             -> MTCTR (env reg)
  422     BCTR    targets lbl rs  -> BCTR targets lbl rs
  423     BL      imm argRegs     -> BL imm argRegs    -- argument regs
  424     BCTRL   argRegs         -> BCTRL argRegs     -- cannot be remapped
  425     ADD     reg1 reg2 ri    -> ADD (env reg1) (env reg2) (fixRI ri)
  426     ADDO    reg1 reg2 reg3  -> ADDO (env reg1) (env reg2) (env reg3)
  427     ADDC    reg1 reg2 reg3  -> ADDC (env reg1) (env reg2) (env reg3)
  428     ADDE    reg1 reg2 reg3  -> ADDE (env reg1) (env reg2) (env reg3)
  429     ADDZE   reg1 reg2       -> ADDZE (env reg1) (env reg2)
  430     ADDIS   reg1 reg2 imm   -> ADDIS (env reg1) (env reg2) imm
  431     SUBF    reg1 reg2 reg3  -> SUBF (env reg1) (env reg2) (env reg3)
  432     SUBFO   reg1 reg2 reg3  -> SUBFO (env reg1) (env reg2) (env reg3)
  433     SUBFC   reg1 reg2 ri    -> SUBFC (env reg1) (env reg2) (fixRI ri)
  434     SUBFE   reg1 reg2 reg3  -> SUBFE (env reg1) (env reg2) (env reg3)
  435     MULL    fmt reg1 reg2 ri
  436                             -> MULL fmt (env reg1) (env reg2) (fixRI ri)
  437     MULLO   fmt reg1 reg2 reg3
  438                             -> MULLO fmt (env reg1) (env reg2) (env reg3)
  439     MFOV    fmt reg         -> MFOV fmt (env reg)
  440     MULHU   fmt reg1 reg2 reg3
  441                             -> MULHU fmt (env reg1) (env reg2) (env reg3)
  442     DIV     fmt sgn reg1 reg2 reg3
  443                             -> DIV fmt sgn (env reg1) (env reg2) (env reg3)
  444 
  445     AND     reg1 reg2 ri    -> AND (env reg1) (env reg2) (fixRI ri)
  446     ANDC    reg1 reg2 reg3  -> ANDC (env reg1) (env reg2) (env reg3)
  447     NAND    reg1 reg2 reg3  -> NAND (env reg1) (env reg2) (env reg3)
  448     OR      reg1 reg2 ri    -> OR  (env reg1) (env reg2) (fixRI ri)
  449     ORIS    reg1 reg2 imm   -> ORIS (env reg1) (env reg2) imm
  450     XOR     reg1 reg2 ri    -> XOR (env reg1) (env reg2) (fixRI ri)
  451     XORIS   reg1 reg2 imm   -> XORIS (env reg1) (env reg2) imm
  452     EXTS    fmt reg1 reg2   -> EXTS fmt (env reg1) (env reg2)
  453     CNTLZ   fmt reg1 reg2   -> CNTLZ fmt (env reg1) (env reg2)
  454     NEG     reg1 reg2       -> NEG (env reg1) (env reg2)
  455     NOT     reg1 reg2       -> NOT (env reg1) (env reg2)
  456     SL      fmt reg1 reg2 ri
  457                             -> SL fmt (env reg1) (env reg2) (fixRI ri)
  458     SR      fmt reg1 reg2 ri
  459                             -> SR fmt (env reg1) (env reg2) (fixRI ri)
  460     SRA     fmt reg1 reg2 ri
  461                             -> SRA fmt (env reg1) (env reg2) (fixRI ri)
  462     RLWINM  reg1 reg2 sh mb me
  463                             -> RLWINM (env reg1) (env reg2) sh mb me
  464     CLRLI   fmt reg1 reg2 n -> CLRLI fmt (env reg1) (env reg2) n
  465     CLRRI   fmt reg1 reg2 n -> CLRRI fmt (env reg1) (env reg2) n
  466     FADD    fmt r1 r2 r3    -> FADD fmt (env r1) (env r2) (env r3)
  467     FSUB    fmt r1 r2 r3    -> FSUB fmt (env r1) (env r2) (env r3)
  468     FMUL    fmt r1 r2 r3    -> FMUL fmt (env r1) (env r2) (env r3)
  469     FDIV    fmt r1 r2 r3    -> FDIV fmt (env r1) (env r2) (env r3)
  470     FABS    r1 r2           -> FABS (env r1) (env r2)
  471     FNEG    r1 r2           -> FNEG (env r1) (env r2)
  472     FCMP    r1 r2           -> FCMP (env r1) (env r2)
  473     FCTIWZ  r1 r2           -> FCTIWZ (env r1) (env r2)
  474     FCTIDZ  r1 r2           -> FCTIDZ (env r1) (env r2)
  475     FCFID   r1 r2           -> FCFID (env r1) (env r2)
  476     FRSP    r1 r2           -> FRSP (env r1) (env r2)
  477     MFCR    reg             -> MFCR (env reg)
  478     MFLR    reg             -> MFLR (env reg)
  479     FETCHPC reg             -> FETCHPC (env reg)
  480     _                       -> instr
  481   where
  482     fixAddr (AddrRegReg r1 r2) = AddrRegReg (env r1) (env r2)
  483     fixAddr (AddrRegImm r1 i)  = AddrRegImm (env r1) i
  484 
  485     fixRI (RIReg r) = RIReg (env r)
  486     fixRI other     = other
  487 
  488 
  489 --------------------------------------------------------------------------------
  490 -- | Checks whether this instruction is a jump/branch instruction.
  491 -- One that can change the flow of control in a way that the
  492 -- register allocator needs to worry about.
  493 isJumpishInstr :: Instr -> Bool
  494 isJumpishInstr instr
  495  = case instr of
  496     BCC{}       -> True
  497     BCCFAR{}    -> True
  498     BCTR{}      -> True
  499     BCTRL{}     -> True
  500     BL{}        -> True
  501     JMP{}       -> True
  502     _           -> False
  503 
  504 
  505 -- | Checks whether this instruction is a jump/branch instruction.
  506 -- One that can change the flow of control in a way that the
  507 -- register allocator needs to worry about.
  508 jumpDestsOfInstr :: Instr -> [BlockId]
  509 jumpDestsOfInstr insn
  510   = case insn of
  511         BCC _ id _       -> [id]
  512         BCCFAR _ id _    -> [id]
  513         BCTR targets _ _ -> [id | Just id <- targets]
  514         _                -> []
  515 
  516 
  517 -- | Change the destination of this jump instruction.
  518 -- Used in the linear allocator when adding fixup blocks for join
  519 -- points.
  520 patchJumpInstr :: Instr -> (BlockId -> BlockId) -> Instr
  521 patchJumpInstr insn patchF
  522   = case insn of
  523         BCC cc id p     -> BCC cc (patchF id) p
  524         BCCFAR cc id p  -> BCCFAR cc (patchF id) p
  525         BCTR ids lbl rs -> BCTR (map (fmap patchF) ids) lbl rs
  526         _               -> insn
  527 
  528 
  529 -- -----------------------------------------------------------------------------
  530 
  531 -- | An instruction to spill a register into a spill slot.
  532 mkSpillInstr
  533    :: NCGConfig
  534    -> Reg       -- register to spill
  535    -> Int       -- current stack delta
  536    -> Int       -- spill slot to use
  537    -> [Instr]
  538 
  539 mkSpillInstr config reg delta slot
  540   = let platform = ncgPlatform config
  541         off      = spillSlotToOffset platform slot
  542         arch     = platformArch platform
  543     in
  544     let fmt = case targetClassOfReg platform reg of
  545                 RcInteger -> case arch of
  546                                 ArchPPC -> II32
  547                                 _       -> II64
  548                 RcDouble  -> FF64
  549                 _         -> panic "PPC.Instr.mkSpillInstr: no match"
  550         instr = case makeImmediate W32 True (off-delta) of
  551                 Just _  -> ST
  552                 Nothing -> STFAR -- pseudo instruction: 32 bit offsets
  553 
  554     in [instr fmt reg (AddrRegImm sp (ImmInt (off-delta)))]
  555 
  556 
  557 mkLoadInstr
  558    :: NCGConfig
  559    -> Reg       -- register to load
  560    -> Int       -- current stack delta
  561    -> Int       -- spill slot to use
  562    -> [Instr]
  563 
  564 mkLoadInstr config reg delta slot
  565   = let platform = ncgPlatform config
  566         off      = spillSlotToOffset platform slot
  567         arch     = platformArch platform
  568     in
  569     let fmt = case targetClassOfReg platform reg of
  570                 RcInteger ->  case arch of
  571                                  ArchPPC -> II32
  572                                  _       -> II64
  573                 RcDouble  -> FF64
  574                 _         -> panic "PPC.Instr.mkLoadInstr: no match"
  575         instr = case makeImmediate W32 True (off-delta) of
  576                 Just _  -> LD
  577                 Nothing -> LDFAR -- pseudo instruction: 32 bit offsets
  578 
  579     in [instr fmt reg (AddrRegImm sp (ImmInt (off-delta)))]
  580 
  581 
  582 -- | The size of a minimal stackframe header including minimal
  583 -- parameter save area.
  584 stackFrameHeaderSize :: Platform -> Int
  585 stackFrameHeaderSize platform
  586   = case platformOS platform of
  587       OSAIX    -> 24 + 8 * 4
  588       _ -> case platformArch platform of
  589                              -- header + parameter save area
  590              ArchPPC           -> 64 -- TODO: check ABI spec
  591              ArchPPC_64 ELF_V1 -> 48 + 8 * 8
  592              ArchPPC_64 ELF_V2 -> 32 + 8 * 8
  593              _ -> panic "PPC.stackFrameHeaderSize: not defined for this OS"
  594 
  595 -- | The maximum number of bytes required to spill a register. PPC32
  596 -- has 32-bit GPRs and 64-bit FPRs, while PPC64 has 64-bit GPRs and
  597 -- 64-bit FPRs. So the maximum is 8 regardless of platforms unlike
  598 -- x86. Note that AltiVec's vector registers are 128-bit wide so we
  599 -- must not use this to spill them.
  600 spillSlotSize :: Int
  601 spillSlotSize = 8
  602 
  603 -- | The number of spill slots available without allocating more.
  604 maxSpillSlots :: NCGConfig -> Int
  605 maxSpillSlots config
  606 --  = 0 -- useful for testing allocMoreStack
  607     = let platform = ncgPlatform config
  608       in ((ncgSpillPreallocSize config - stackFrameHeaderSize platform)
  609          `div` spillSlotSize) - 1
  610 
  611 -- | The number of bytes that the stack pointer should be aligned
  612 -- to. This is 16 both on PPC32 and PPC64 ELF (see ELF processor
  613 -- specific supplements).
  614 stackAlign :: Int
  615 stackAlign = 16
  616 
  617 -- | Convert a spill slot number to a *byte* offset, with no sign.
  618 spillSlotToOffset :: Platform -> Int -> Int
  619 spillSlotToOffset platform slot
  620    = stackFrameHeaderSize platform + spillSlotSize * slot
  621 
  622 
  623 --------------------------------------------------------------------------------
  624 -- | See if this instruction is telling us the current C stack delta
  625 takeDeltaInstr
  626     :: Instr
  627     -> Maybe Int
  628 
  629 takeDeltaInstr instr
  630  = case instr of
  631      DELTA i  -> Just i
  632      _        -> Nothing
  633 
  634 
  635 isMetaInstr
  636     :: Instr
  637     -> Bool
  638 
  639 isMetaInstr instr
  640  = case instr of
  641     COMMENT{}   -> True
  642     LOCATION{}  -> True
  643     LDATA{}     -> True
  644     NEWBLOCK{}  -> True
  645     DELTA{}     -> True
  646     _           -> False
  647 
  648 
  649 -- | Copy the value in a register to another one.
  650 -- Must work for all register classes.
  651 mkRegRegMoveInstr
  652     :: Reg
  653     -> Reg
  654     -> Instr
  655 
  656 mkRegRegMoveInstr src dst
  657     = MR dst src
  658 
  659 
  660 -- | Make an unconditional jump instruction.
  661 mkJumpInstr
  662     :: BlockId
  663     -> [Instr]
  664 
  665 mkJumpInstr id
  666     = [BCC ALWAYS id Nothing]
  667 
  668 
  669 -- | Take the source and destination from this reg -> reg move instruction
  670 -- or Nothing if it's not one
  671 takeRegRegMoveInstr :: Instr -> Maybe (Reg,Reg)
  672 takeRegRegMoveInstr (MR dst src) = Just (src,dst)
  673 takeRegRegMoveInstr _  = Nothing
  674 
  675 -- -----------------------------------------------------------------------------
  676 -- Making far branches
  677 
  678 -- Conditional branches on PowerPC are limited to +-32KB; if our Procs get too
  679 -- big, we have to work around this limitation.
  680 
  681 makeFarBranches
  682         :: LabelMap RawCmmStatics
  683         -> [NatBasicBlock Instr]
  684         -> [NatBasicBlock Instr]
  685 makeFarBranches info_env blocks
  686     | last blockAddresses < nearLimit = blocks
  687     | otherwise = zipWith handleBlock blockAddresses blocks
  688     where
  689         blockAddresses = scanl (+) 0 $ map blockLen blocks
  690         blockLen (BasicBlock _ instrs) = length instrs
  691 
  692         handleBlock addr (BasicBlock id instrs)
  693                 = BasicBlock id (zipWith makeFar [addr..] instrs)
  694 
  695         makeFar _ (BCC ALWAYS tgt _) = BCC ALWAYS tgt Nothing
  696         makeFar addr (BCC cond tgt p)
  697             | abs (addr - targetAddr) >= nearLimit
  698             = BCCFAR cond tgt p
  699             | otherwise
  700             = BCC cond tgt p
  701             where Just targetAddr = lookupUFM blockAddressMap tgt
  702         makeFar _ other            = other
  703 
  704         -- 8192 instructions are allowed; let's keep some distance, as
  705         -- we have a few pseudo-insns that are pretty-printed as
  706         -- multiple instructions, and it's just not worth the effort
  707         -- to calculate things exactly
  708         nearLimit = 7000 - mapSize info_env * maxRetInfoTableSizeW
  709 
  710         blockAddressMap = listToUFM $ zip (map blockId blocks) blockAddresses