never executed always true always false
1 module GHC.CmmToAsm.Reg.Linear.AArch64 where
2
3 import GHC.Prelude
4
5 import GHC.CmmToAsm.AArch64.Regs
6 import GHC.Platform.Reg.Class
7 import GHC.Platform.Reg
8
9 import GHC.Utils.Outputable
10 import GHC.Utils.Panic
11 import GHC.Platform
12
13 import Data.Word
14
15 import GHC.Stack
16 -- AArch64 has 32 64bit general purpose register r0..r30, and zr/sp
17 -- AArch64 has 32 128bit floating point registers v0..v31 as part of the NEON
18 -- extension in Armv8-A.
19 --
20 -- Armv8-A is a fundamental change to the Arm architecture. It supports the
21 -- 64-bit Execution state called “AArch64”, and a new 64-bit instruction set
22 -- “A64”. To provide compatibility with the Armv7-A (32-bit architecture)
23 -- instruction set, a 32-bit variant of Armv8-A “AArch32” is provided. Most of
24 -- existing Armv7-A code can be run in the AArch32 execution state of Armv8-A.
25 --
26 -- these can be addresses as q/d/s/h/b 0..31, or v.f<size>[idx]
27 -- where size is 64, 32, 16, 8, ... and the index i allows us
28 -- to access the given part.
29 --
30 -- History of Arm Adv SIMD
31 -- .---------------------------------------------------------------------------.
32 -- | Armv6 | Armv7-A | Armv8-A AArch64 |
33 -- | SIMD extension | NEON | NEON |
34 -- |===========================================================================|
35 -- | - Operates on 32-bit | - Separate reg. bank, | - Separate reg. bank, |
36 -- | GP ARM registers | 32x64-bit NEON regs | 32x128-bit NEON regs |
37 -- | - 8-bit/16-bit integer | - 8/16/32/64-bit int | - 8/16/32/64-bit int |
38 -- | | - Single percision fp | - Single percision fp |
39 -- | | | - Double precision fp |
40 -- | | | - Single/Double fp are |
41 -- | | | IEEE compliant |
42 -- | - 2x16-bit/4x8-bit ops | - Up to 16x8-bit ops | - Up to 16x8-bit ops |
43 -- | per instruction | per instruction | per instruction |
44 -- '---------------------------------------------------------------------------'
45
46 data FreeRegs = FreeRegs !Word32 !Word32
47
48 instance Show FreeRegs where
49 show (FreeRegs g f) = "FreeRegs: " ++ showBits g ++ "; " ++ showBits f
50
51 instance Outputable FreeRegs where
52 ppr (FreeRegs g f) = text " " <+> foldr (\i x -> pad_int i <+> x) (text "") [0..31]
53 $$ text "GPR" <+> foldr (\i x -> show_bit g i <+> x) (text "") [0..31]
54 $$ text "FPR" <+> foldr (\i x -> show_bit f i <+> x) (text "") [0..31]
55 where pad_int i | i < 10 = char ' ' <> int i
56 pad_int i = int i
57 -- remember bit = 1 means it's available.
58 show_bit bits bit | testBit bits bit = text " "
59 show_bit _ _ = text " x"
60
61 noFreeRegs :: FreeRegs
62 noFreeRegs = FreeRegs 0 0
63
64 showBits :: Word32 -> String
65 showBits w = map (\i -> if testBit w i then '1' else '0') [0..31]
66
67 -- FR instance implementation (See Linear.FreeRegs)
68 allocateReg :: HasCallStack => RealReg -> FreeRegs -> FreeRegs
69 allocateReg (RealRegSingle r) (FreeRegs g f)
70 | r > 31 && testBit f (r - 32) = FreeRegs g (clearBit f (r - 32))
71 | r < 32 && testBit g r = FreeRegs (clearBit g r) f
72 | r > 31 = panic $ "Linear.AArch64.allocReg: double allocation of float reg v" ++ show (r - 32) ++ "; " ++ showBits f
73 | otherwise = pprPanic "Linear.AArch64.allocReg" $ text ("double allocation of gp reg x" ++ show r ++ "; " ++ showBits g)
74 allocateReg _ _ = panic "Linear.AArch64.allocReg: bad reg"
75
76 -- we start from 28 downwards... the logic is similar to the ppc logic.
77 -- 31 is Stack Pointer
78 -- 30 is Link Register
79 -- 29 is Stack Frame (by convention)
80 -- 19-28 are callee save
81 -- the lower ones are all caller save
82
83 -- For this reason someone decided to give aarch64 only 6 regs for
84 -- STG:
85 -- 19: Base
86 -- 20: Sp
87 -- 21: Hp
88 -- 22-27: R1-R6
89 -- 28: SpLim
90
91 -- For LLVM code gen interop:
92 -- See https://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20150119/253722.html
93 -- and the current ghccc implementation here:
94 -- https://github.com/llvm/llvm-project/blob/161ae1f39816edf667aaa190bce702a86879c7bd/llvm/lib/Target/AArch64/AArch64CallingConvention.td#L324-L363
95 -- and https://gitlab.haskell.org/ghc/ghc/-/wikis/commentary/compiler/generated-code
96 -- for the STG discussion.
97 {- For reference the ghcc from the link above:
98 let Entry = 1 in
99 def CC_AArch64_GHC : CallingConv<[
100 CCIfType<[iPTR], CCBitConvertToType<i64>>,
101
102 // Handle all vector types as either f64 or v2f64.
103 CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
104 CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, f128], CCBitConvertToType<v2f64>>,
105
106 CCIfType<[v2f64], CCAssignToReg<[Q4, Q5]>>,
107 CCIfType<[f32], CCAssignToReg<[S8, S9, S10, S11]>>,
108 CCIfType<[f64], CCAssignToReg<[D12, D13, D14, D15]>>,
109
110 // Promote i8/i16/i32 arguments to i64.
111 CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
112
113 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, SpLim
114 CCIfType<[i64], CCAssignToReg<[X19, X20, X21, X22, X23, X24, X25, X26, X27, X28]>>
115 ]>;
116 -}
117
118 getFreeRegs :: RegClass -> FreeRegs -> [RealReg]
119 getFreeRegs cls (FreeRegs g f)
120 | RcFloat <- cls = [] -- For now we only support double and integer registers, floats will need to be promoted.
121 | RcDouble <- cls = go 32 f 31
122 | RcInteger <- cls = go 0 g 18
123 where
124 go _ _ i | i < 0 = []
125 go off x i | testBit x i = RealRegSingle (off + i) : (go off x $! i - 1)
126 | otherwise = go off x $! i - 1
127
128 initFreeRegs :: Platform -> FreeRegs
129 initFreeRegs platform = foldl' (flip releaseReg) noFreeRegs (allocatableRegs platform)
130
131 releaseReg :: HasCallStack => RealReg -> FreeRegs -> FreeRegs
132 releaseReg (RealRegSingle r) (FreeRegs g f)
133 | r > 31 && testBit f (r - 32) = pprPanic "Linear.AArch64.releaseReg" (text "can't release non-allocated reg v" <> int (r - 32))
134 | r < 32 && testBit g r = pprPanic "Linear.AArch64.releaseReg" (text "can't release non-allocated reg x" <> int r)
135 | r > 31 = FreeRegs g (setBit f (r - 32))
136 | otherwise = FreeRegs (setBit g r) f
137 releaseReg _ _ = pprPanic "Linear.AArch64.releaseReg" (text "bad reg")