never executed always true always false
    1 module GHC.CmmToAsm.Reg.Linear.AArch64 where
    2 
    3 import GHC.Prelude
    4 
    5 import GHC.CmmToAsm.AArch64.Regs
    6 import GHC.Platform.Reg.Class
    7 import GHC.Platform.Reg
    8 
    9 import GHC.Utils.Outputable
   10 import GHC.Utils.Panic
   11 import GHC.Platform
   12 
   13 import Data.Word
   14 
   15 import GHC.Stack
   16 -- AArch64 has 32 64bit general purpose register r0..r30, and zr/sp
   17 -- AArch64 has 32 128bit floating point registers v0..v31 as part of the NEON
   18 -- extension in Armv8-A.
   19 --
   20 -- Armv8-A is a fundamental change to the Arm architecture. It supports the
   21 -- 64-bit Execution state called “AArch64”, and a new 64-bit instruction set
   22 -- “A64”. To provide compatibility with the Armv7-A (32-bit architecture)
   23 -- instruction set, a 32-bit variant of Armv8-A “AArch32” is provided. Most of
   24 -- existing Armv7-A code can be run in the AArch32 execution state of Armv8-A.
   25 --
   26 -- these can be addresses as q/d/s/h/b 0..31, or v.f<size>[idx]
   27 -- where size is 64, 32, 16, 8, ... and the index i allows us
   28 -- to access the given part.
   29 --
   30 -- History of Arm Adv SIMD
   31 -- .---------------------------------------------------------------------------.
   32 -- | Armv6                  | Armv7-A                | Armv8-A AArch64         |
   33 -- | SIMD extension         | NEON                   | NEON                    |
   34 -- |===========================================================================|
   35 -- | - Operates on 32-bit   | - Separate reg. bank,  | - Separate reg. bank,   |
   36 -- |   GP ARM registers     |    32x64-bit NEON regs |   32x128-bit NEON regs  |
   37 -- | - 8-bit/16-bit integer | - 8/16/32/64-bit int   | - 8/16/32/64-bit int    |
   38 -- |                        | - Single percision fp  | - Single percision fp   |
   39 -- |                        |                        | - Double precision fp   |
   40 -- |                        |                        | - Single/Double fp are  |
   41 -- |                        |                        |   IEEE compliant        |
   42 -- | - 2x16-bit/4x8-bit ops | - Up to 16x8-bit ops   | - Up to 16x8-bit ops    |
   43 -- |   per instruction      |   per instruction      |   per instruction       |
   44 -- '---------------------------------------------------------------------------'
   45 
   46 data FreeRegs = FreeRegs !Word32 !Word32
   47 
   48 instance Show FreeRegs where
   49   show (FreeRegs g f) = "FreeRegs: " ++ showBits g ++ "; " ++ showBits f
   50 
   51 instance Outputable FreeRegs where
   52     ppr (FreeRegs g f) = text "   " <+> foldr (\i x -> pad_int i    <+> x) (text "") [0..31]
   53                       $$ text "GPR" <+> foldr (\i x -> show_bit g i <+> x) (text "") [0..31]
   54                       $$ text "FPR" <+> foldr (\i x -> show_bit f i <+> x) (text "") [0..31]
   55       where pad_int i | i < 10 = char ' ' <> int i
   56             pad_int i = int i
   57             -- remember bit = 1 means it's available.
   58             show_bit bits bit | testBit bits bit = text "  "
   59             show_bit _    _ = text " x"
   60 
   61 noFreeRegs :: FreeRegs
   62 noFreeRegs = FreeRegs 0 0
   63 
   64 showBits :: Word32 -> String
   65 showBits w = map (\i -> if testBit w i then '1' else '0') [0..31]
   66 
   67 -- FR instance implementation (See Linear.FreeRegs)
   68 allocateReg :: HasCallStack => RealReg -> FreeRegs -> FreeRegs
   69 allocateReg (RealRegSingle r) (FreeRegs g f)
   70     | r > 31 && testBit f (r - 32) = FreeRegs g (clearBit f (r - 32))
   71     | r < 32 && testBit g r = FreeRegs (clearBit g r) f
   72     | r > 31 = panic $ "Linear.AArch64.allocReg: double allocation of float reg v" ++ show (r - 32) ++ "; " ++ showBits f
   73     | otherwise = pprPanic "Linear.AArch64.allocReg" $ text ("double allocation of gp reg x" ++ show r ++ "; " ++ showBits g)
   74 allocateReg _ _ = panic "Linear.AArch64.allocReg: bad reg"
   75 
   76 -- we start from 28 downwards... the logic is similar to the ppc logic.
   77 -- 31 is Stack Pointer
   78 -- 30 is Link Register
   79 -- 29 is Stack Frame (by convention)
   80 -- 19-28 are callee save
   81 -- the lower ones are all caller save
   82 
   83 -- For this reason someone decided to give aarch64 only 6 regs for
   84 -- STG:
   85 -- 19: Base
   86 -- 20: Sp
   87 -- 21: Hp
   88 -- 22-27: R1-R6
   89 -- 28: SpLim
   90 
   91 -- For LLVM code gen interop:
   92 -- See https://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20150119/253722.html
   93 -- and the current ghccc implementation here:
   94 -- https://github.com/llvm/llvm-project/blob/161ae1f39816edf667aaa190bce702a86879c7bd/llvm/lib/Target/AArch64/AArch64CallingConvention.td#L324-L363
   95 -- and https://gitlab.haskell.org/ghc/ghc/-/wikis/commentary/compiler/generated-code
   96 -- for the STG discussion.
   97 {- For reference the ghcc from the link above:
   98 let Entry = 1 in
   99 def CC_AArch64_GHC : CallingConv<[
  100   CCIfType<[iPTR], CCBitConvertToType<i64>>,
  101 
  102   // Handle all vector types as either f64 or v2f64.
  103   CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
  104   CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, f128], CCBitConvertToType<v2f64>>,
  105 
  106   CCIfType<[v2f64], CCAssignToReg<[Q4, Q5]>>,
  107   CCIfType<[f32], CCAssignToReg<[S8, S9, S10, S11]>>,
  108   CCIfType<[f64], CCAssignToReg<[D12, D13, D14, D15]>>,
  109 
  110   // Promote i8/i16/i32 arguments to i64.
  111   CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
  112 
  113   // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, SpLim
  114   CCIfType<[i64], CCAssignToReg<[X19, X20, X21, X22, X23, X24, X25, X26, X27, X28]>>
  115 ]>;
  116 -}
  117 
  118 getFreeRegs :: RegClass -> FreeRegs -> [RealReg]
  119 getFreeRegs cls (FreeRegs g f)
  120   | RcFloat   <- cls = [] -- For now we only support double and integer registers, floats will need to be promoted.
  121   | RcDouble  <- cls = go 32 f 31
  122   | RcInteger <- cls = go  0 g 18
  123     where
  124         go _   _ i | i < 0 = []
  125         go off x i | testBit x i = RealRegSingle (off + i) : (go off x $! i - 1)
  126                    | otherwise   = go off x $! i - 1
  127 
  128 initFreeRegs :: Platform -> FreeRegs
  129 initFreeRegs platform = foldl' (flip releaseReg) noFreeRegs (allocatableRegs platform)
  130 
  131 releaseReg :: HasCallStack => RealReg -> FreeRegs -> FreeRegs
  132 releaseReg (RealRegSingle r) (FreeRegs g f)
  133   | r > 31 && testBit f (r - 32) = pprPanic "Linear.AArch64.releaseReg" (text  "can't release non-allocated reg v" <> int (r - 32))
  134   | r < 32 && testBit g r = pprPanic "Linear.AArch64.releaseReg" (text "can't release non-allocated reg x" <> int r)
  135   | r > 31 = FreeRegs g (setBit f (r - 32))
  136   | otherwise = FreeRegs (setBit g r) f
  137 releaseReg _ _ = pprPanic "Linear.AArch64.releaseReg" (text "bad reg")