never executed always true always false
    1 {-# OPTIONS_GHC -fno-warn-orphans #-}
    2 
    3 module GHC.CmmToAsm.AArch64.Instr
    4 
    5 where
    6 
    7 import GHC.Prelude
    8 
    9 import GHC.CmmToAsm.AArch64.Cond
   10 import GHC.CmmToAsm.AArch64.Regs
   11 
   12 import GHC.CmmToAsm.Instr (RegUsage(..))
   13 import GHC.CmmToAsm.Format
   14 import GHC.CmmToAsm.Types
   15 import GHC.CmmToAsm.Utils
   16 import GHC.CmmToAsm.Config
   17 import GHC.Platform.Reg
   18 
   19 import GHC.Platform.Regs
   20 import GHC.Cmm.BlockId
   21 import GHC.Cmm.Dataflow.Collections
   22 import GHC.Cmm.Dataflow.Label
   23 import GHC.Cmm
   24 import GHC.Cmm.CLabel
   25 import GHC.Utils.Outputable
   26 import GHC.Platform
   27 import GHC.Types.Unique.Supply
   28 
   29 import GHC.Utils.Panic
   30 
   31 import Control.Monad (replicateM)
   32 import Data.Maybe (fromMaybe)
   33 
   34 import GHC.Stack
   35 
   36 -- | TODO: verify this!
   37 stackFrameHeaderSize :: Platform -> Int
   38 stackFrameHeaderSize _ = 64
   39 
   40 -- | All registers are 8 byte wide.
   41 spillSlotSize :: Int
   42 spillSlotSize = 8
   43 
   44 -- | The number of bytes that the stack pointer should be aligned
   45 -- to.
   46 stackAlign :: Int
   47 stackAlign = 16
   48 
   49 -- | The number of spill slots available without allocating more.
   50 maxSpillSlots :: NCGConfig -> Int
   51 maxSpillSlots config
   52 --  = 0 -- set to zero, to see when allocMoreStack has to fire.
   53     = let platform = ncgPlatform config
   54       in ((ncgSpillPreallocSize config - stackFrameHeaderSize platform)
   55          `div` spillSlotSize) - 1
   56 
   57 -- | Convert a spill slot number to a *byte* offset, with no sign.
   58 spillSlotToOffset :: NCGConfig -> Int -> Int
   59 spillSlotToOffset config slot
   60    = stackFrameHeaderSize (ncgPlatform config) + spillSlotSize * slot
   61 
   62 -- | Get the registers that are being used by this instruction.
   63 -- regUsage doesn't need to do any trickery for jumps and such.
   64 -- Just state precisely the regs read and written by that insn.
   65 -- The consequences of control flow transfers, as far as register
   66 -- allocation goes, are taken care of by the register allocator.
   67 --
   68 -- RegUsage = RU [<read regs>] [<write regs>]
   69 
   70 instance Outputable RegUsage where
   71     ppr (RU reads writes) = text "RegUsage(reads:" <+> ppr reads <> comma <+> text "writes:" <+> ppr writes <> char ')'
   72 
   73 regUsageOfInstr :: Platform -> Instr -> RegUsage
   74 regUsageOfInstr platform instr = case instr of
   75   ANN _ i                  -> regUsageOfInstr platform i
   76   -- 1. Arithmetic Instructions ------------------------------------------------
   77   ADD dst src1 src2        -> usage (regOp src1 ++ regOp src2, regOp dst)
   78   CMN l r                  -> usage (regOp l ++ regOp r, [])
   79   CMP l r                  -> usage (regOp l ++ regOp r, [])
   80   MSUB dst src1 src2 src3  -> usage (regOp src1 ++ regOp src2 ++ regOp src3, regOp dst)
   81   MUL dst src1 src2        -> usage (regOp src1 ++ regOp src2, regOp dst)
   82   NEG dst src              -> usage (regOp src, regOp dst)
   83   SDIV dst src1 src2       -> usage (regOp src1 ++ regOp src2, regOp dst)
   84   SUB dst src1 src2        -> usage (regOp src1 ++ regOp src2, regOp dst)
   85   UDIV dst src1 src2       -> usage (regOp src1 ++ regOp src2, regOp dst)
   86 
   87   -- 2. Bit Manipulation Instructions ------------------------------------------
   88   SBFM dst src _ _         -> usage (regOp src, regOp dst)
   89   UBFM dst src _ _         -> usage (regOp src, regOp dst)
   90   SBFX dst src _ _         -> usage (regOp src, regOp dst)
   91   UBFX dst src _ _         -> usage (regOp src, regOp dst)
   92   SXTB dst src             -> usage (regOp src, regOp dst)
   93   UXTB dst src             -> usage (regOp src, regOp dst)
   94   SXTH dst src             -> usage (regOp src, regOp dst)
   95   UXTH dst src             -> usage (regOp src, regOp dst)
   96   -- 3. Logical and Move Instructions ------------------------------------------
   97   AND dst src1 src2        -> usage (regOp src1 ++ regOp src2, regOp dst)
   98   ASR dst src1 src2        -> usage (regOp src1 ++ regOp src2, regOp dst)
   99   BIC dst src1 src2        -> usage (regOp src1 ++ regOp src2, regOp dst)
  100   BICS dst src1 src2       -> usage (regOp src1 ++ regOp src2, regOp dst)
  101   EON dst src1 src2        -> usage (regOp src1 ++ regOp src2, regOp dst)
  102   EOR dst src1 src2        -> usage (regOp src1 ++ regOp src2, regOp dst)
  103   LSL dst src1 src2        -> usage (regOp src1 ++ regOp src2, regOp dst)
  104   LSR dst src1 src2        -> usage (regOp src1 ++ regOp src2, regOp dst)
  105   MOV dst src              -> usage (regOp src, regOp dst)
  106   MOVK dst src             -> usage (regOp src, regOp dst)
  107   MVN dst src              -> usage (regOp src, regOp dst)
  108   ORR dst src1 src2        -> usage (regOp src1 ++ regOp src2, regOp dst)
  109   ROR dst src1 src2        -> usage (regOp src1 ++ regOp src2, regOp dst)
  110   TST src1 src2            -> usage (regOp src1 ++ regOp src2, [])
  111   -- 4. Branch Instructions ----------------------------------------------------
  112   J t                      -> usage (regTarget t, [])
  113   B t                      -> usage (regTarget t, [])
  114   BCOND _ t                -> usage (regTarget t, [])
  115   BL t ps _rs              -> usage (regTarget t ++ ps, callerSavedRegisters)
  116 
  117   -- 5. Atomic Instructions ----------------------------------------------------
  118   -- 6. Conditional Instructions -----------------------------------------------
  119   CSET dst _               -> usage ([], regOp dst)
  120   CBZ src _                -> usage (regOp src, [])
  121   CBNZ src _               -> usage (regOp src, [])
  122   -- 7. Load and Store Instructions --------------------------------------------
  123   STR _ src dst            -> usage (regOp src ++ regOp dst, [])
  124   LDR _ dst src            -> usage (regOp src, regOp dst)
  125   -- TODO is this right? see STR, which I'm only partial about being right?
  126   STP _ src1 src2 dst      -> usage (regOp src1 ++ regOp src2 ++ regOp dst, [])
  127   LDP _ dst1 dst2 src      -> usage (regOp src, regOp dst1 ++ regOp dst2)
  128 
  129   -- 8. Synchronization Instructions -------------------------------------------
  130   DMBSY                    -> usage ([], [])
  131 
  132   -- 9. Floating Point Instructions --------------------------------------------
  133   FCVT dst src             -> usage (regOp src, regOp dst)
  134   SCVTF dst src            -> usage (regOp src, regOp dst)
  135   FCVTZS dst src           -> usage (regOp src, regOp dst)
  136   FABS dst src             -> usage (regOp src, regOp dst)
  137 
  138   _ -> panic "regUsageOfInstr"
  139 
  140   where
  141         -- filtering the usage is necessary, otherwise the register
  142         -- allocator will try to allocate pre-defined fixed stg
  143         -- registers as well, as they show up.
  144         usage (src, dst) = RU (filter (interesting platform) src)
  145                               (filter (interesting platform) dst)
  146 
  147         regAddr :: AddrMode -> [Reg]
  148         regAddr (AddrRegReg r1 r2) = [r1, r2]
  149         regAddr (AddrRegImm r1 _)  = [r1]
  150         regAddr (AddrReg r1)       = [r1]
  151         regOp :: Operand -> [Reg]
  152         regOp (OpReg _ r1) = [r1]
  153         regOp (OpRegExt _ r1 _ _) = [r1]
  154         regOp (OpRegShift _ r1 _ _) = [r1]
  155         regOp (OpAddr a) = regAddr a
  156         regOp (OpImm _) = []
  157         regOp (OpImmShift _ _ _) = []
  158         regTarget :: Target -> [Reg]
  159         regTarget (TBlock _) = []
  160         regTarget (TLabel _) = []
  161         regTarget (TReg r1)  = [r1]
  162 
  163         -- Is this register interesting for the register allocator?
  164         interesting :: Platform -> Reg -> Bool
  165         interesting _        (RegVirtual _)                 = True
  166         interesting _        (RegReal (RealRegSingle (-1))) = False
  167         interesting platform (RegReal (RealRegSingle i))    = freeReg platform i
  168         interesting _        (RegReal (RealRegPair{}))
  169             = panic "AArch64.Instr.interesting: no reg pairs on this arch"
  170 
  171 -- Save caller save registers
  172 -- This is x0-x18
  173 --
  174 -- For SIMD/FP Registers:
  175 -- Registers v8-v15 must be preserved by a callee across subroutine calls;
  176 -- the remaining registers (v0-v7, v16-v31) do not need to be preserved (or
  177 -- should be preserved by the caller). Additionally, only the bottom 64 bits
  178 -- of each value stored in v8-v15 need to be preserved [7]; it is the
  179 -- responsibility of the caller to preserve larger values.
  180 --
  181 -- .---------------------------------------------------------------------------------------------------------------------------------------------------------------.
  182 -- |  0 |  1 |  2 |  3 |  4 |  5 |  6 |  7 |  8 |  9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 |
  183 -- | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 42 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 |
  184 -- |== General Purpose registers ==================================================================================================================================|
  185 -- | <---- argument passing -------------> | IR | <------- tmp registers --------> | IP0| IP1| PL | <------------------- callee saved ------------> | FP | LR | SP |
  186 -- | <------ free registers --------------------------------------------------------------------> | BR | Sp | Hp | R1 | R2 | R3 | R4 | R5 | R6 | SL | -- | -- | -- |
  187 -- |== SIMD/FP Registers ==========================================================================================================================================|
  188 -- | <---- argument passing -------------> | <-- callee saved (lower 64 bits) ---> | <--------------------------------------- caller saved ----------------------> |
  189 -- | <------ free registers -------------> | F1 | F2 | F3 | F4 | D1 | D2 | D3 | D4 | <------ free registers -----------------------------------------------------> |
  190 -- '---------------------------------------------------------------------------------------------------------------------------------------------------------------'
  191 -- IR: Indirect result location register, IP: Intra-procedure register, PL: Platform register, FP: Frame pointer, LR: Link register, SP: Stack pointer
  192 -- BR: Base, SL: SpLim
  193 callerSavedRegisters :: [Reg]
  194 callerSavedRegisters
  195     = map regSingle [0..18]
  196     ++ map regSingle [32..39]
  197     ++ map regSingle [48..63]
  198 
  199 -- | Apply a given mapping to all the register references in this
  200 -- instruction.
  201 patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr
  202 patchRegsOfInstr instr env = case instr of
  203     -- 0. Meta Instructions
  204     ANN d i        -> ANN d (patchRegsOfInstr i env)
  205     -- 1. Arithmetic Instructions ----------------------------------------------
  206     ADD o1 o2 o3   -> ADD (patchOp o1) (patchOp o2) (patchOp o3)
  207     CMN o1 o2      -> CMN (patchOp o1) (patchOp o2)
  208     CMP o1 o2      -> CMP (patchOp o1) (patchOp o2)
  209     MSUB o1 o2 o3 o4 -> MSUB (patchOp o1) (patchOp o2) (patchOp o3) (patchOp o4)
  210     MUL o1 o2 o3   -> MUL (patchOp o1) (patchOp o2) (patchOp o3)
  211     NEG o1 o2      -> NEG (patchOp o1) (patchOp o2)
  212     SDIV o1 o2 o3  -> SDIV (patchOp o1) (patchOp o2) (patchOp o3)
  213     SUB o1 o2 o3   -> SUB  (patchOp o1) (patchOp o2) (patchOp o3)
  214     UDIV o1 o2 o3  -> UDIV (patchOp o1) (patchOp o2) (patchOp o3)
  215 
  216     -- 2. Bit Manipulation Instructions ----------------------------------------
  217     SBFM o1 o2 o3 o4 -> SBFM (patchOp o1) (patchOp o2) (patchOp o3) (patchOp o4)
  218     UBFM o1 o2 o3 o4 -> UBFM (patchOp o1) (patchOp o2) (patchOp o3) (patchOp o4)
  219     SBFX o1 o2 o3 o4 -> SBFX (patchOp o1) (patchOp o2) (patchOp o3) (patchOp o4)
  220     UBFX o1 o2 o3 o4 -> UBFX (patchOp o1) (patchOp o2) (patchOp o3) (patchOp o4)
  221     SXTB o1 o2       -> SXTB (patchOp o1) (patchOp o2)
  222     UXTB o1 o2       -> UXTB (patchOp o1) (patchOp o2)
  223     SXTH o1 o2       -> SXTH (patchOp o1) (patchOp o2)
  224     UXTH o1 o2       -> UXTH (patchOp o1) (patchOp o2)
  225 
  226     -- 3. Logical and Move Instructions ----------------------------------------
  227     AND o1 o2 o3   -> AND  (patchOp o1) (patchOp o2) (patchOp o3)
  228     ANDS o1 o2 o3  -> ANDS (patchOp o1) (patchOp o2) (patchOp o3)
  229     ASR o1 o2 o3   -> ASR  (patchOp o1) (patchOp o2) (patchOp o3)
  230     BIC o1 o2 o3   -> BIC  (patchOp o1) (patchOp o2) (patchOp o3)
  231     BICS o1 o2 o3  -> BICS (patchOp o1) (patchOp o2) (patchOp o3)
  232     EON o1 o2 o3   -> EON  (patchOp o1) (patchOp o2) (patchOp o3)
  233     EOR o1 o2 o3   -> EOR  (patchOp o1) (patchOp o2) (patchOp o3)
  234     LSL o1 o2 o3   -> LSL  (patchOp o1) (patchOp o2) (patchOp o3)
  235     LSR o1 o2 o3   -> LSR  (patchOp o1) (patchOp o2) (patchOp o3)
  236     MOV o1 o2      -> MOV  (patchOp o1) (patchOp o2)
  237     MOVK o1 o2     -> MOVK (patchOp o1) (patchOp o2)
  238     MVN o1 o2      -> MVN  (patchOp o1) (patchOp o2)
  239     ORR o1 o2 o3   -> ORR  (patchOp o1) (patchOp o2) (patchOp o3)
  240     ROR o1 o2 o3   -> ROR  (patchOp o1) (patchOp o2) (patchOp o3)
  241     TST o1 o2      -> TST  (patchOp o1) (patchOp o2)
  242 
  243     -- 4. Branch Instructions --------------------------------------------------
  244     J t            -> J (patchTarget t)
  245     B t            -> B (patchTarget t)
  246     BL t rs ts     -> BL (patchTarget t) rs ts
  247     BCOND c t      -> BCOND c (patchTarget t)
  248 
  249     -- 5. Atomic Instructions --------------------------------------------------
  250     -- 6. Conditional Instructions ---------------------------------------------
  251     CSET o c       -> CSET (patchOp o) c
  252     CBZ o l        -> CBZ (patchOp o) l
  253     CBNZ o l       -> CBNZ (patchOp o) l
  254     -- 7. Load and Store Instructions ------------------------------------------
  255     STR f o1 o2    -> STR f (patchOp o1) (patchOp o2)
  256     LDR f o1 o2    -> LDR f (patchOp o1) (patchOp o2)
  257     STP f o1 o2 o3 -> STP f (patchOp o1) (patchOp o2) (patchOp o3)
  258     LDP f o1 o2 o3 -> LDP f (patchOp o1) (patchOp o2) (patchOp o3)
  259 
  260     -- 8. Synchronization Instructions -----------------------------------------
  261     DMBSY          -> DMBSY
  262 
  263     -- 9. Floating Point Instructions ------------------------------------------
  264     FCVT o1 o2     -> FCVT (patchOp o1) (patchOp o2)
  265     SCVTF o1 o2    -> SCVTF (patchOp o1) (patchOp o2)
  266     FCVTZS o1 o2   -> FCVTZS (patchOp o1) (patchOp o2)
  267     FABS o1 o2     -> FABS (patchOp o1) (patchOp o2)
  268 
  269     _ -> pprPanic "patchRegsOfInstr" (text $ show instr)
  270     where
  271         patchOp :: Operand -> Operand
  272         patchOp (OpReg w r) = OpReg w (env r)
  273         patchOp (OpRegExt w r x s) = OpRegExt w (env r) x s
  274         patchOp (OpRegShift w r m s) = OpRegShift w (env r) m s
  275         patchOp (OpAddr a) = OpAddr (patchAddr a)
  276         patchOp op = op
  277         patchTarget :: Target -> Target
  278         patchTarget (TReg r) = TReg (env r)
  279         patchTarget t = t
  280         patchAddr :: AddrMode -> AddrMode
  281         patchAddr (AddrRegReg r1 r2) = AddrRegReg (env r1) (env r2)
  282         patchAddr (AddrRegImm r1 i)  = AddrRegImm (env r1) i
  283         patchAddr (AddrReg r) = AddrReg (env r)
  284 --------------------------------------------------------------------------------
  285 -- | Checks whether this instruction is a jump/branch instruction.
  286 -- One that can change the flow of control in a way that the
  287 -- register allocator needs to worry about.
  288 isJumpishInstr :: Instr -> Bool
  289 isJumpishInstr instr = case instr of
  290     ANN _ i -> isJumpishInstr i
  291     CBZ{} -> True
  292     CBNZ{} -> True
  293     J{} -> True
  294     B{} -> True
  295     BL{} -> True
  296     BCOND{} -> True
  297     _ -> False
  298 
  299 -- | Checks whether this instruction is a jump/branch instruction.
  300 -- One that can change the flow of control in a way that the
  301 -- register allocator needs to worry about.
  302 jumpDestsOfInstr :: Instr -> [BlockId]
  303 jumpDestsOfInstr (ANN _ i) = jumpDestsOfInstr i
  304 jumpDestsOfInstr (CBZ _ t) = [ id | TBlock id <- [t]]
  305 jumpDestsOfInstr (CBNZ _ t) = [ id | TBlock id <- [t]]
  306 jumpDestsOfInstr (J t) = [id | TBlock id <- [t]]
  307 jumpDestsOfInstr (B t) = [id | TBlock id <- [t]]
  308 jumpDestsOfInstr (BL t _ _) = [ id | TBlock id <- [t]]
  309 jumpDestsOfInstr (BCOND _ t) = [ id | TBlock id <- [t]]
  310 jumpDestsOfInstr _ = []
  311 
  312 -- | Change the destination of this jump instruction.
  313 -- Used in the linear allocator when adding fixup blocks for join
  314 -- points.
  315 patchJumpInstr :: Instr -> (BlockId -> BlockId) -> Instr
  316 patchJumpInstr instr patchF
  317     = case instr of
  318         ANN d i -> ANN d (patchJumpInstr i patchF)
  319         CBZ r (TBlock bid) -> CBZ r (TBlock (patchF bid))
  320         CBNZ r (TBlock bid) -> CBNZ r (TBlock (patchF bid))
  321         J (TBlock bid) -> J (TBlock (patchF bid))
  322         B (TBlock bid) -> B (TBlock (patchF bid))
  323         BL (TBlock bid) ps rs -> BL (TBlock (patchF bid)) ps rs
  324         BCOND c (TBlock bid) -> BCOND c (TBlock (patchF bid))
  325         _ -> pprPanic "patchJumpInstr" (text $ show instr)
  326 
  327 -- -----------------------------------------------------------------------------
  328 -- Note [Spills and Reloads]
  329 -- ~~~~~~~~~~~~~~~~~~~~~~~~~
  330 -- We reserve @RESERVED_C_STACK_BYTES@ on the C stack for spilling and reloading
  331 -- registers.  AArch64s maximum displacement for SP relative spills and reloads
  332 -- is essentially [-256,255], or [0, 0xFFF]*8 = [0, 32760] for 64bits.
  333 --
  334 -- The @RESERVED_C_STACK_BYTES@ is 16k, so we can't address any location in a
  335 -- single instruction.  The idea is to use the Inter Procedure 0 (ip0) register
  336 -- to perform the computations for larger offsets.
  337 --
  338 -- Using sp to compute the offset will violate assumptions about the stack pointer
  339 -- pointing to the top of the stack during signal handling.  As we can't force
  340 -- every signal to use its own stack, we have to ensure that the stack poitner
  341 -- always poitns to the top of the stack, and we can't use it for computation.
  342 --
  343 -- | An instruction to spill a register into a spill slot.
  344 mkSpillInstr
  345    :: HasCallStack
  346    => NCGConfig
  347    -> Reg       -- register to spill
  348    -> Int       -- current stack delta
  349    -> Int       -- spill slot to use
  350    -> [Instr]
  351 
  352 mkSpillInstr config reg delta slot =
  353   case (spillSlotToOffset config slot) - delta of
  354     imm | -256 <= imm && imm <= 255                               -> [ mkStrSp imm ]
  355     imm | imm > 0 && imm .&. 0x7 == 0x0 && imm <= 0xfff           -> [ mkStrSp imm ]
  356     imm | imm > 0xfff && imm <= 0xffffff && imm .&. 0x7 == 0x0    -> [ mkIp0SpillAddr (imm .&~. 0xfff)
  357                                                                      , mkStrIp0 (imm .&.  0xfff)
  358                                                                      ]
  359     imm -> pprPanic "mkSpillInstr" (text "Unable to spill into" <+> int imm)
  360     where
  361         a .&~. b = a .&. (complement b)
  362 
  363         fmt = case reg of
  364             RegReal (RealRegSingle n) | n < 32 -> II64
  365             _                                  -> FF64
  366         mkIp0SpillAddr imm = ANN (text "Spill: IP0 <- SP + " <> int imm) $ ADD ip0 sp (OpImm (ImmInt imm))
  367         mkStrSp imm = ANN (text "Spill@" <> int (off - delta)) $ STR fmt (OpReg W64 reg) (OpAddr (AddrRegImm (regSingle 31) (ImmInt imm)))
  368         mkStrIp0 imm = ANN (text "Spill@" <> int (off - delta)) $ STR fmt (OpReg W64 reg) (OpAddr (AddrRegImm (regSingle 16) (ImmInt imm)))
  369 
  370         off = spillSlotToOffset config slot
  371 
  372 mkLoadInstr
  373    :: NCGConfig
  374    -> Reg       -- register to load
  375    -> Int       -- current stack delta
  376    -> Int       -- spill slot to use
  377    -> [Instr]
  378 
  379 mkLoadInstr config reg delta slot =
  380   case (spillSlotToOffset config slot) - delta of
  381     imm | -256 <= imm && imm <= 255                               -> [ mkLdrSp imm ]
  382     imm | imm > 0 && imm .&. 0x7 == 0x0 && imm <= 0xfff           -> [ mkLdrSp imm ]
  383     imm | imm > 0xfff && imm <= 0xffffff && imm .&. 0x7 == 0x0    -> [ mkIp0SpillAddr (imm .&~. 0xfff)
  384                                                                      , mkLdrIp0 (imm .&.  0xfff)
  385                                                                      ]
  386     imm -> pprPanic "mkSpillInstr" (text "Unable to spill into" <+> int imm)
  387     where
  388         a .&~. b = a .&. (complement b)
  389 
  390         fmt = case reg of
  391             RegReal (RealRegSingle n) | n < 32 -> II64
  392             _                                  -> FF64
  393 
  394         mkIp0SpillAddr imm = ANN (text "Reload: IP0 <- SP + " <> int imm) $ ADD ip0 sp (OpImm (ImmInt imm))
  395         mkLdrSp imm = ANN (text "Reload@" <> int (off - delta)) $ LDR fmt (OpReg W64 reg) (OpAddr (AddrRegImm (regSingle 31) (ImmInt imm)))
  396         mkLdrIp0 imm = ANN (text "Reload@" <> int (off - delta)) $ LDR fmt (OpReg W64 reg) (OpAddr (AddrRegImm (regSingle 16) (ImmInt imm)))
  397 
  398         off = spillSlotToOffset config slot
  399 
  400 --------------------------------------------------------------------------------
  401 -- | See if this instruction is telling us the current C stack delta
  402 takeDeltaInstr :: Instr -> Maybe Int
  403 takeDeltaInstr (ANN _ i) = takeDeltaInstr i
  404 takeDeltaInstr (DELTA i) = Just i
  405 takeDeltaInstr _         = Nothing
  406 
  407 -- Not real instructions.  Just meta data
  408 isMetaInstr :: Instr -> Bool
  409 isMetaInstr instr
  410  = case instr of
  411     ANN _ i     -> isMetaInstr i
  412     COMMENT{}   -> True
  413     MULTILINE_COMMENT{} -> True
  414     LOCATION{}  -> True
  415     LDATA{}     -> True
  416     NEWBLOCK{}  -> True
  417     DELTA{}     -> True
  418     PUSH_STACK_FRAME -> True
  419     POP_STACK_FRAME -> True
  420     _           -> False
  421 
  422 -- | Copy the value in a register to another one.
  423 -- Must work for all register classes.
  424 mkRegRegMoveInstr :: Reg -> Reg -> Instr
  425 mkRegRegMoveInstr src dst = ANN (text "Reg->Reg Move: " <> ppr src <> text " -> " <> ppr dst) $ MOV (OpReg W64 dst) (OpReg W64 src)
  426 
  427 -- | Take the source and destination from this reg -> reg move instruction
  428 -- or Nothing if it's not one
  429 takeRegRegMoveInstr :: Instr -> Maybe (Reg,Reg)
  430 --takeRegRegMoveInstr (MOV (OpReg fmt dst) (OpReg fmt' src)) | fmt == fmt' = Just (src, dst)
  431 takeRegRegMoveInstr _ = Nothing
  432 
  433 -- | Make an unconditional jump instruction.
  434 mkJumpInstr :: BlockId -> [Instr]
  435 mkJumpInstr id = [B (TBlock id)]
  436 
  437 mkStackAllocInstr :: Platform -> Int -> [Instr]
  438 mkStackAllocInstr platform n
  439     | n == 0 = []
  440     | n > 0 && n < 4096 = [ ANN (text "Alloc More Stack") $ SUB sp sp (OpImm (ImmInt n)) ]
  441     | n > 0 =  ANN (text "Alloc More Stack") (SUB sp sp (OpImm (ImmInt 4095))) : mkStackAllocInstr platform (n - 4095)
  442 mkStackAllocInstr _platform n = pprPanic "mkStackAllocInstr" (int n)
  443 
  444 mkStackDeallocInstr :: Platform -> Int -> [Instr]
  445 mkStackDeallocInstr platform n
  446     | n == 0 = []
  447     | n > 0 && n < 4096 = [ ANN (text "Dealloc More Stack") $ ADD sp sp (OpImm (ImmInt n)) ]
  448     | n > 0 =  ANN (text "Dealloc More Stack") (ADD sp sp (OpImm (ImmInt 4095))) : mkStackDeallocInstr platform (n - 4095)
  449 mkStackDeallocInstr _platform n = pprPanic "mkStackDeallocInstr" (int n)
  450 
  451 --
  452 -- See note [extra spill slots] in X86/Instr.hs
  453 --
  454 allocMoreStack
  455   :: Platform
  456   -> Int
  457   -> NatCmmDecl statics GHC.CmmToAsm.AArch64.Instr.Instr
  458   -> UniqSM (NatCmmDecl statics GHC.CmmToAsm.AArch64.Instr.Instr, [(BlockId,BlockId)])
  459 
  460 allocMoreStack _ _ top@(CmmData _ _) = return (top,[])
  461 allocMoreStack platform slots proc@(CmmProc info lbl live (ListGraph code)) = do
  462     let entries = entryBlocks proc
  463 
  464     uniqs <- replicateM (length entries) getUniqueM
  465 
  466     let
  467       delta = ((x + stackAlign - 1) `quot` stackAlign) * stackAlign -- round up
  468         where x = slots * spillSlotSize -- sp delta
  469 
  470       alloc   = mkStackAllocInstr   platform delta
  471       dealloc = mkStackDeallocInstr platform delta
  472 
  473       retargetList = (zip entries (map mkBlockId uniqs))
  474 
  475       new_blockmap :: LabelMap BlockId
  476       new_blockmap = mapFromList retargetList
  477 
  478       insert_stack_insn (BasicBlock id insns)
  479         | Just new_blockid <- mapLookup id new_blockmap
  480         = [ BasicBlock id $ alloc ++ [ B (TBlock new_blockid) ]
  481           , BasicBlock new_blockid block' ]
  482         | otherwise
  483         = [ BasicBlock id block' ]
  484         where
  485           block' = foldr insert_dealloc [] insns
  486 
  487       insert_dealloc insn r = case insn of
  488         J _ -> dealloc ++ (insn : r)
  489         ANN _ (J _) -> dealloc ++ (insn : r)
  490         _other | jumpDestsOfInstr insn /= []
  491             -> patchJumpInstr insn retarget : r
  492         _other -> insn : r
  493 
  494         where retarget b = fromMaybe b (mapLookup b new_blockmap)
  495 
  496       new_code = concatMap insert_stack_insn code
  497     -- in
  498     return (CmmProc info lbl live (ListGraph new_code), retargetList)
  499 -- -----------------------------------------------------------------------------
  500 -- Machine's assembly language
  501 
  502 -- We have a few common "instructions" (nearly all the pseudo-ops) but
  503 -- mostly all of 'Instr' is machine-specific.
  504 
  505 -- Some additional (potential future) instructions are commented out. They are
  506 -- not needed yet for the backend but could be used in the future.
  507 data Instr
  508     -- comment pseudo-op
  509     = COMMENT SDoc
  510     | MULTILINE_COMMENT SDoc
  511 
  512     -- Annotated instruction. Should print <instr> # <doc>
  513     | ANN SDoc Instr
  514 
  515     -- location pseudo-op (file, line, col, name)
  516     | LOCATION Int Int Int String
  517 
  518     -- some static data spat out during code
  519     -- generation.  Will be extracted before
  520     -- pretty-printing.
  521     | LDATA   Section RawCmmStatics
  522 
  523     -- start a new basic block.  Useful during
  524     -- codegen, removed later.  Preceding
  525     -- instruction should be a jump, as per the
  526     -- invariants for a BasicBlock (see Cmm).
  527     | NEWBLOCK BlockId
  528 
  529     -- specify current stack offset for
  530     -- benefit of subsequent passes
  531     | DELTA   Int
  532 
  533     -- 0. Pseudo Instructions --------------------------------------------------
  534     | SXTB Operand Operand
  535     | UXTB Operand Operand
  536     | SXTH Operand Operand
  537     | UXTH Operand Operand
  538     -- | SXTW Operand Operand
  539     -- | SXTX Operand Operand
  540     | PUSH_STACK_FRAME
  541     | POP_STACK_FRAME
  542     -- 1. Arithmetic Instructions ----------------------------------------------
  543     -- | ADC Operand Operand Operang -- rd = rn + rm + C
  544     -- | ADCS ...
  545     | ADD Operand Operand Operand -- rd = rn + rm
  546     -- | ADDS Operand Operand Operand -- rd = rn + rm
  547     -- | ADR ...
  548     -- | ADRP ...
  549     | CMN Operand Operand -- rd + op2
  550     | CMP Operand Operand -- rd - op2
  551     -- | MADD ...
  552     -- | MNEG ...
  553     | MSUB Operand Operand Operand Operand -- rd = ra - rn × rm
  554     | MUL Operand Operand Operand -- rd = rn × rm
  555     | NEG Operand Operand -- rd = -op2
  556     -- | NEGS ...
  557     -- | NGC ...
  558     -- | NGCS ...
  559     -- | SBC ...
  560     -- | SBCS ...
  561     | SDIV Operand Operand Operand -- rd = rn ÷ rm
  562     -- | SMADDL ...
  563     -- | SMNEGL ...
  564     -- | SMSUBL ...
  565     -- | SMULH ...
  566     -- | SMULL ...
  567     | SUB Operand Operand Operand -- rd = rn - op2
  568     -- | SUBS ...
  569     | UDIV Operand Operand Operand -- rd = rn ÷ rm
  570     -- | UMADDL ...  -- Xd = Xa + Wn × Wm
  571     -- | UMNEGL ... -- Xd = - Wn × Wm
  572     -- | UMSUBL ... -- Xd = Xa - Wn × Wm
  573     -- | UMULH ... -- Xd = (Xn × Xm)_127:64
  574     -- | UMULL ... -- Xd = Wn × Wm
  575 
  576     -- 2. Bit Manipulation Instructions ----------------------------------------
  577     | SBFM Operand Operand Operand Operand -- rd = rn[i,j]
  578     -- SXTB = SBFM <Wd>, <Wn>, #0, #7
  579     -- SXTH = SBFM <Wd>, <Wn>, #0, #15
  580     -- SXTW = SBFM <Wd>, <Wn>, #0, #31
  581     | UBFM Operand Operand Operand Operand -- rd = rn[i,j]
  582     -- UXTB = UBFM <Wd>, <Wn>, #0, #7
  583     -- UXTH = UBFM <Wd>, <Wn>, #0, #15
  584     -- Signed/Unsigned bitfield extract
  585     | SBFX Operand Operand Operand Operand -- rd = rn[i,j]
  586     | UBFX Operand Operand Operand Operand -- rd = rn[i,j]
  587 
  588     -- 3. Logical and Move Instructions ----------------------------------------
  589     | AND Operand Operand Operand -- rd = rn & op2
  590     | ANDS Operand Operand Operand -- rd = rn & op2
  591     | ASR Operand Operand Operand -- rd = rn ≫ rm  or  rd = rn ≫ #i, i is 6 bits
  592     | BIC Operand Operand Operand -- rd = rn & ~op2
  593     | BICS Operand Operand Operand -- rd = rn & ~op2
  594     | EON Operand Operand Operand -- rd = rn ⊕ ~op2
  595     | EOR Operand Operand Operand -- rd = rn ⊕ op2
  596     | LSL Operand Operand Operand -- rd = rn ≪ rm  or rd = rn ≪ #i, i is 6 bits
  597     | LSR Operand Operand Operand -- rd = rn ≫ rm  or rd = rn ≫ #i, i is 6 bits
  598     | MOV Operand Operand -- rd = rn  or  rd = #i
  599     | MOVK Operand Operand
  600     -- | MOVN Operand Operand
  601     -- | MOVZ Operand Operand
  602     | MVN Operand Operand -- rd = ~rn
  603     | ORN Operand Operand Operand -- rd = rn | ~op2
  604     | ORR Operand Operand Operand -- rd = rn | op2
  605     | ROR Operand Operand Operand -- rd = rn ≫ rm  or  rd = rn ≫ #i, i is 6 bits
  606     | TST Operand Operand -- rn & op2
  607     -- Load and stores.
  608     -- TODO STR/LDR might want to change to STP/LDP with XZR for the second register.
  609     | STR Format Operand Operand -- str Xn, address-mode // Xn -> *addr
  610     | LDR Format Operand Operand -- ldr Xn, address-mode // Xn <- *addr
  611     | STP Format Operand Operand Operand -- stp Xn, Xm, address-mode // Xn -> *addr, Xm -> *(addr + 8)
  612     | LDP Format Operand Operand Operand -- stp Xn, Xm, address-mode // Xn <- *addr, Xm <- *(addr + 8)
  613 
  614     -- Conditional instructions
  615     | CSET Operand Cond   -- if(cond) op <- 1 else op <- 0
  616 
  617     | CBZ Operand Target  -- if op == 0, then branch.
  618     | CBNZ Operand Target -- if op /= 0, then branch.
  619     -- Branching.
  620     | J Target            -- like B, but only generated from genJump. Used to distinguish genJumps from others.
  621     | B Target            -- unconditional branching b/br. (To a blockid, label or register)
  622     | BL Target [Reg] [Reg] -- branch and link (e.g. set x30 to next pc, and branch)
  623     | BCOND Cond Target   -- branch with condition. b.<cond>
  624 
  625     -- 8. Synchronization Instructions -----------------------------------------
  626     | DMBSY
  627     -- 9. Floating Point Instructions
  628     -- Float ConVerT
  629     | FCVT Operand Operand
  630     -- Signed ConVerT Float
  631     | SCVTF Operand Operand
  632     -- Float ConVerT to Zero Signed
  633     | FCVTZS Operand Operand
  634     -- Float ABSolute value
  635     | FABS Operand Operand
  636 
  637 instance Show Instr where
  638     show (LDR _f o1 o2) = "LDR " ++ show o1 ++ ", " ++ show o2
  639     show (MOV o1 o2) = "MOV " ++ show o1 ++ ", " ++ show o2
  640     show _ = "missing"
  641 
  642 data Target
  643     = TBlock BlockId
  644     | TLabel CLabel
  645     | TReg   Reg
  646 
  647 
  648 -- Extension
  649 -- {Unsigned|Signed}XT{Byte|Half|Word|Doube}
  650 data ExtMode
  651     = EUXTB | EUXTH | EUXTW | EUXTX
  652     | ESXTB | ESXTH | ESXTW | ESXTX
  653     deriving (Eq, Show)
  654 
  655 data ShiftMode
  656     = SLSL | SLSR | SASR | SROR
  657     deriving (Eq, Show)
  658 
  659 
  660 -- We can also add ExtShift to Extension.
  661 -- However at most 3bits.
  662 type ExtShift = Int
  663 -- at most 6bits
  664 type RegShift = Int
  665 
  666 data Operand
  667         = OpReg Width Reg            -- register
  668         | OpRegExt Width Reg ExtMode ExtShift -- rm, <ext>[, <shift left>]
  669         | OpRegShift Width Reg ShiftMode RegShift     -- rm, <shift>, <0-64>
  670         | OpImm Imm            -- immediate value
  671         | OpImmShift Imm ShiftMode RegShift
  672         | OpAddr AddrMode       -- memory reference
  673         deriving (Eq, Show)
  674 
  675 -- Smart constructors
  676 opReg :: Width -> Reg -> Operand
  677 opReg = OpReg
  678 
  679 xzr, wzr, sp, ip0 :: Operand
  680 xzr = OpReg W64 (RegReal (RealRegSingle (-1)))
  681 wzr = OpReg W32 (RegReal (RealRegSingle (-1)))
  682 sp  = OpReg W64 (RegReal (RealRegSingle 31))
  683 ip0 = OpReg W64 (RegReal (RealRegSingle 16))
  684 
  685 _x :: Int -> Operand
  686 _x i = OpReg W64 (RegReal (RealRegSingle i))
  687 x0,  x1,  x2,  x3,  x4,  x5,  x6,  x7  :: Operand
  688 x8,  x9,  x10, x11, x12, x13, x14, x15 :: Operand
  689 x16, x17, x18, x19, x20, x21, x22, x23 :: Operand
  690 x24, x25, x26, x27, x28, x29, x30, x31 :: Operand
  691 x0  = OpReg W64 (RegReal (RealRegSingle  0))
  692 x1  = OpReg W64 (RegReal (RealRegSingle  1))
  693 x2  = OpReg W64 (RegReal (RealRegSingle  2))
  694 x3  = OpReg W64 (RegReal (RealRegSingle  3))
  695 x4  = OpReg W64 (RegReal (RealRegSingle  4))
  696 x5  = OpReg W64 (RegReal (RealRegSingle  5))
  697 x6  = OpReg W64 (RegReal (RealRegSingle  6))
  698 x7  = OpReg W64 (RegReal (RealRegSingle  7))
  699 x8  = OpReg W64 (RegReal (RealRegSingle  8))
  700 x9  = OpReg W64 (RegReal (RealRegSingle  9))
  701 x10 = OpReg W64 (RegReal (RealRegSingle 10))
  702 x11 = OpReg W64 (RegReal (RealRegSingle 11))
  703 x12 = OpReg W64 (RegReal (RealRegSingle 12))
  704 x13 = OpReg W64 (RegReal (RealRegSingle 13))
  705 x14 = OpReg W64 (RegReal (RealRegSingle 14))
  706 x15 = OpReg W64 (RegReal (RealRegSingle 15))
  707 x16 = OpReg W64 (RegReal (RealRegSingle 16))
  708 x17 = OpReg W64 (RegReal (RealRegSingle 17))
  709 x18 = OpReg W64 (RegReal (RealRegSingle 18))
  710 x19 = OpReg W64 (RegReal (RealRegSingle 19))
  711 x20 = OpReg W64 (RegReal (RealRegSingle 20))
  712 x21 = OpReg W64 (RegReal (RealRegSingle 21))
  713 x22 = OpReg W64 (RegReal (RealRegSingle 22))
  714 x23 = OpReg W64 (RegReal (RealRegSingle 23))
  715 x24 = OpReg W64 (RegReal (RealRegSingle 24))
  716 x25 = OpReg W64 (RegReal (RealRegSingle 25))
  717 x26 = OpReg W64 (RegReal (RealRegSingle 26))
  718 x27 = OpReg W64 (RegReal (RealRegSingle 27))
  719 x28 = OpReg W64 (RegReal (RealRegSingle 28))
  720 x29 = OpReg W64 (RegReal (RealRegSingle 29))
  721 x30 = OpReg W64 (RegReal (RealRegSingle 30))
  722 x31 = OpReg W64 (RegReal (RealRegSingle 31))
  723 
  724 _d :: Int -> Operand
  725 _d = OpReg W64 . RegReal . RealRegSingle
  726 d0,  d1,  d2,  d3,  d4,  d5,  d6,  d7  :: Operand
  727 d8,  d9,  d10, d11, d12, d13, d14, d15 :: Operand
  728 d16, d17, d18, d19, d20, d21, d22, d23 :: Operand
  729 d24, d25, d26, d27, d28, d29, d30, d31 :: Operand
  730 d0  = OpReg W64 (RegReal (RealRegSingle 32))
  731 d1  = OpReg W64 (RegReal (RealRegSingle 33))
  732 d2  = OpReg W64 (RegReal (RealRegSingle 34))
  733 d3  = OpReg W64 (RegReal (RealRegSingle 35))
  734 d4  = OpReg W64 (RegReal (RealRegSingle 36))
  735 d5  = OpReg W64 (RegReal (RealRegSingle 37))
  736 d6  = OpReg W64 (RegReal (RealRegSingle 38))
  737 d7  = OpReg W64 (RegReal (RealRegSingle 39))
  738 d8  = OpReg W64 (RegReal (RealRegSingle 40))
  739 d9  = OpReg W64 (RegReal (RealRegSingle 41))
  740 d10 = OpReg W64 (RegReal (RealRegSingle 42))
  741 d11 = OpReg W64 (RegReal (RealRegSingle 43))
  742 d12 = OpReg W64 (RegReal (RealRegSingle 44))
  743 d13 = OpReg W64 (RegReal (RealRegSingle 45))
  744 d14 = OpReg W64 (RegReal (RealRegSingle 46))
  745 d15 = OpReg W64 (RegReal (RealRegSingle 47))
  746 d16 = OpReg W64 (RegReal (RealRegSingle 48))
  747 d17 = OpReg W64 (RegReal (RealRegSingle 49))
  748 d18 = OpReg W64 (RegReal (RealRegSingle 50))
  749 d19 = OpReg W64 (RegReal (RealRegSingle 51))
  750 d20 = OpReg W64 (RegReal (RealRegSingle 52))
  751 d21 = OpReg W64 (RegReal (RealRegSingle 53))
  752 d22 = OpReg W64 (RegReal (RealRegSingle 54))
  753 d23 = OpReg W64 (RegReal (RealRegSingle 55))
  754 d24 = OpReg W64 (RegReal (RealRegSingle 56))
  755 d25 = OpReg W64 (RegReal (RealRegSingle 57))
  756 d26 = OpReg W64 (RegReal (RealRegSingle 58))
  757 d27 = OpReg W64 (RegReal (RealRegSingle 59))
  758 d28 = OpReg W64 (RegReal (RealRegSingle 60))
  759 d29 = OpReg W64 (RegReal (RealRegSingle 61))
  760 d30 = OpReg W64 (RegReal (RealRegSingle 62))
  761 d31 = OpReg W64 (RegReal (RealRegSingle 63))
  762 
  763 opRegUExt :: Width -> Reg -> Operand
  764 opRegUExt W64 r = OpRegExt W64 r EUXTX 0
  765 opRegUExt W32 r = OpRegExt W32 r EUXTW 0
  766 opRegUExt W16 r = OpRegExt W16 r EUXTH 0
  767 opRegUExt W8  r = OpRegExt W8  r EUXTB 0
  768 opRegUExt w  _r = pprPanic "opRegUExt" (text $ show w)
  769 
  770 opRegSExt :: Width -> Reg -> Operand
  771 opRegSExt W64 r = OpRegExt W64 r ESXTX 0
  772 opRegSExt W32 r = OpRegExt W32 r ESXTW 0
  773 opRegSExt W16 r = OpRegExt W16 r ESXTH 0
  774 opRegSExt W8  r = OpRegExt W8  r ESXTB 0
  775 opRegSExt w  _r = pprPanic "opRegSExt" (text $ show w)