never executed always true always false
1 {-# LANGUAGE BangPatterns, GADTs #-}
2
3 module GHC.Cmm.Graph
4 ( CmmAGraph, CmmAGraphScoped, CgStmt(..)
5 , (<*>), catAGraphs
6 , mkLabel, mkMiddle, mkLast, outOfLine
7 , lgraphOfAGraph, labelAGraph
8
9 , stackStubExpr
10 , mkNop, mkAssign, mkStore
11 , mkUnsafeCall, mkFinalCall, mkCallReturnsTo
12 , mkJumpReturnsTo
13 , mkJump, mkJumpExtra
14 , mkRawJump
15 , mkCbranch, mkSwitch
16 , mkReturn, mkComment, mkCallEntry, mkBranch
17 , mkUnwind
18 , copyInOflow, copyOutOflow
19 , noExtraStack
20 , toCall, Transfer(..)
21 )
22 where
23
24 import GHC.Prelude hiding ( (<*>) ) -- avoid importing (<*>)
25
26 import GHC.Platform.Profile
27
28 import GHC.Cmm.BlockId
29 import GHC.Cmm
30 import GHC.Cmm.CallConv
31 import GHC.Cmm.Switch (SwitchTargets)
32
33 import GHC.Cmm.Dataflow.Block
34 import GHC.Cmm.Dataflow.Graph
35 import GHC.Cmm.Dataflow.Label
36 import GHC.Data.FastString
37 import GHC.Types.ForeignCall
38 import GHC.Data.OrdList
39 import GHC.Runtime.Heap.Layout (ByteOff)
40 import GHC.Types.Unique.Supply
41 import GHC.Utils.Panic
42 import GHC.Utils.Constants (debugIsOn)
43
44
45 -----------------------------------------------------------------------------
46 -- Building Graphs
47
48
49 -- | CmmAGraph is a chunk of code consisting of:
50 --
51 -- * ordinary statements (assignments, stores etc.)
52 -- * jumps
53 -- * labels
54 -- * out-of-line labelled blocks
55 --
56 -- The semantics is that control falls through labels and out-of-line
57 -- blocks. Everything after a jump up to the next label is by
58 -- definition unreachable code, and will be discarded.
59 --
60 -- Two CmmAGraphs can be stuck together with <*>, with the meaning that
61 -- control flows from the first to the second.
62 --
63 -- A 'CmmAGraph' can be turned into a 'CmmGraph' (closed at both ends)
64 -- by providing a label for the entry point and a tick scope; see
65 -- 'labelAGraph'.
66 type CmmAGraph = OrdList CgStmt
67 -- | Unlabeled graph with tick scope
68 type CmmAGraphScoped = (CmmAGraph, CmmTickScope)
69
70 data CgStmt
71 = CgLabel BlockId CmmTickScope
72 | CgStmt (CmmNode O O)
73 | CgLast (CmmNode O C)
74 | CgFork BlockId CmmAGraph CmmTickScope
75
76 flattenCmmAGraph :: BlockId -> CmmAGraphScoped -> CmmGraph
77 flattenCmmAGraph id (stmts_t, tscope) =
78 CmmGraph { g_entry = id,
79 g_graph = GMany NothingO body NothingO }
80 where
81 body = foldr addBlock emptyBody $ flatten id stmts_t tscope []
82
83 --
84 -- flatten: given an entry label and a CmmAGraph, make a list of blocks.
85 --
86 -- NB. avoid the quadratic-append trap by passing in the tail of the
87 -- list. This is important for Very Long Functions (e.g. in T783).
88 --
89 flatten :: Label -> CmmAGraph -> CmmTickScope -> [Block CmmNode C C]
90 -> [Block CmmNode C C]
91 flatten id g tscope blocks
92 = flatten1 (fromOL g) block' blocks
93 where !block' = blockJoinHead (CmmEntry id tscope) emptyBlock
94 --
95 -- flatten0: we are outside a block at this point: any code before
96 -- the first label is unreachable, so just drop it.
97 --
98 flatten0 :: [CgStmt] -> [Block CmmNode C C] -> [Block CmmNode C C]
99 flatten0 [] blocks = blocks
100
101 flatten0 (CgLabel id tscope : stmts) blocks
102 = flatten1 stmts block blocks
103 where !block = blockJoinHead (CmmEntry id tscope) emptyBlock
104
105 flatten0 (CgFork fork_id stmts_t tscope : rest) blocks
106 = flatten fork_id stmts_t tscope $ flatten0 rest blocks
107
108 flatten0 (CgLast _ : stmts) blocks = flatten0 stmts blocks
109 flatten0 (CgStmt _ : stmts) blocks = flatten0 stmts blocks
110
111 --
112 -- flatten1: we have a partial block, collect statements until the
113 -- next last node to make a block, then call flatten0 to get the rest
114 -- of the blocks
115 --
116 flatten1 :: [CgStmt] -> Block CmmNode C O
117 -> [Block CmmNode C C] -> [Block CmmNode C C]
118
119 -- The current block falls through to the end of a function or fork:
120 -- this code should not be reachable, but it may be referenced by
121 -- other code that is not reachable. We'll remove it later with
122 -- dead-code analysis, but for now we have to keep the graph
123 -- well-formed, so we terminate the block with a branch to the
124 -- beginning of the current block.
125 flatten1 [] block blocks
126 = blockJoinTail block (CmmBranch (entryLabel block)) : blocks
127
128 flatten1 (CgLast stmt : stmts) block blocks
129 = block' : flatten0 stmts blocks
130 where !block' = blockJoinTail block stmt
131
132 flatten1 (CgStmt stmt : stmts) block blocks
133 = flatten1 stmts block' blocks
134 where !block' = blockSnoc block stmt
135
136 flatten1 (CgFork fork_id stmts_t tscope : rest) block blocks
137 = flatten fork_id stmts_t tscope $ flatten1 rest block blocks
138
139 -- a label here means that we should start a new block, and the
140 -- current block should fall through to the new block.
141 flatten1 (CgLabel id tscp : stmts) block blocks
142 = blockJoinTail block (CmmBranch id) :
143 flatten1 stmts (blockJoinHead (CmmEntry id tscp) emptyBlock) blocks
144
145
146
147 ---------- AGraph manipulation
148
149 (<*>) :: CmmAGraph -> CmmAGraph -> CmmAGraph
150 (<*>) = appOL
151
152 catAGraphs :: [CmmAGraph] -> CmmAGraph
153 catAGraphs = concatOL
154
155 -- | creates a sequence "goto id; id:" as an AGraph
156 mkLabel :: BlockId -> CmmTickScope -> CmmAGraph
157 mkLabel bid scp = unitOL (CgLabel bid scp)
158
159 -- | creates an open AGraph from a given node
160 mkMiddle :: CmmNode O O -> CmmAGraph
161 mkMiddle middle = unitOL (CgStmt middle)
162
163 -- | creates a closed AGraph from a given node
164 mkLast :: CmmNode O C -> CmmAGraph
165 mkLast last = unitOL (CgLast last)
166
167 -- | A labelled code block; should end in a last node
168 outOfLine :: BlockId -> CmmAGraphScoped -> CmmAGraph
169 outOfLine l (c,s) = unitOL (CgFork l c s)
170
171 -- | allocate a fresh label for the entry point
172 lgraphOfAGraph :: CmmAGraphScoped -> UniqSM CmmGraph
173 lgraphOfAGraph g = do
174 u <- getUniqueM
175 return (labelAGraph (mkBlockId u) g)
176
177 -- | use the given BlockId as the label of the entry point
178 labelAGraph :: BlockId -> CmmAGraphScoped -> CmmGraph
179 labelAGraph lbl ag = flattenCmmAGraph lbl ag
180
181 ---------- No-ops
182 mkNop :: CmmAGraph
183 mkNop = nilOL
184
185 mkComment :: FastString -> CmmAGraph
186 mkComment fs
187 -- SDM: generating all those comments takes time, this saved about 4% for me
188 | debugIsOn = mkMiddle $ CmmComment fs
189 | otherwise = nilOL
190
191 ---------- Assignment and store
192 mkAssign :: CmmReg -> CmmExpr -> CmmAGraph
193 mkAssign l (CmmReg r) | l == r = mkNop
194 mkAssign l r = mkMiddle $ CmmAssign l r
195
196 mkStore :: CmmExpr -> CmmExpr -> CmmAGraph
197 mkStore l r = mkMiddle $ CmmStore l r
198
199 ---------- Control transfer
200 mkJump :: Profile -> Convention -> CmmExpr
201 -> [CmmExpr]
202 -> UpdFrameOffset
203 -> CmmAGraph
204 mkJump profile conv e actuals updfr_off =
205 lastWithArgs profile Jump Old conv actuals updfr_off $
206 toCall e Nothing updfr_off 0
207
208 -- | A jump where the caller says what the live GlobalRegs are. Used
209 -- for low-level hand-written Cmm.
210 mkRawJump :: Profile -> CmmExpr -> UpdFrameOffset -> [GlobalReg]
211 -> CmmAGraph
212 mkRawJump profile e updfr_off vols =
213 lastWithArgs profile Jump Old NativeNodeCall [] updfr_off $
214 \arg_space _ -> toCall e Nothing updfr_off 0 arg_space vols
215
216
217 mkJumpExtra :: Profile -> Convention -> CmmExpr -> [CmmExpr]
218 -> UpdFrameOffset -> [CmmExpr]
219 -> CmmAGraph
220 mkJumpExtra profile conv e actuals updfr_off extra_stack =
221 lastWithArgsAndExtraStack profile Jump Old conv actuals updfr_off extra_stack $
222 toCall e Nothing updfr_off 0
223
224 mkCbranch :: CmmExpr -> BlockId -> BlockId -> Maybe Bool -> CmmAGraph
225 mkCbranch pred ifso ifnot likely =
226 mkLast (CmmCondBranch pred ifso ifnot likely)
227
228 mkSwitch :: CmmExpr -> SwitchTargets -> CmmAGraph
229 mkSwitch e tbl = mkLast $ CmmSwitch e tbl
230
231 mkReturn :: Profile -> CmmExpr -> [CmmExpr] -> UpdFrameOffset
232 -> CmmAGraph
233 mkReturn profile e actuals updfr_off =
234 lastWithArgs profile Ret Old NativeReturn actuals updfr_off $
235 toCall e Nothing updfr_off 0
236
237 mkBranch :: BlockId -> CmmAGraph
238 mkBranch bid = mkLast (CmmBranch bid)
239
240 mkFinalCall :: Profile
241 -> CmmExpr -> CCallConv -> [CmmExpr] -> UpdFrameOffset
242 -> CmmAGraph
243 mkFinalCall profile f _ actuals updfr_off =
244 lastWithArgs profile Call Old NativeDirectCall actuals updfr_off $
245 toCall f Nothing updfr_off 0
246
247 mkCallReturnsTo :: Profile -> CmmExpr -> Convention -> [CmmExpr]
248 -> BlockId
249 -> ByteOff
250 -> UpdFrameOffset
251 -> [CmmExpr]
252 -> CmmAGraph
253 mkCallReturnsTo profile f callConv actuals ret_lbl ret_off updfr_off extra_stack =
254 lastWithArgsAndExtraStack profile Call (Young ret_lbl) callConv actuals
255 updfr_off extra_stack $
256 toCall f (Just ret_lbl) updfr_off ret_off
257
258 -- Like mkCallReturnsTo, but does not push the return address (it is assumed to be
259 -- already on the stack).
260 mkJumpReturnsTo :: Profile -> CmmExpr -> Convention -> [CmmExpr]
261 -> BlockId
262 -> ByteOff
263 -> UpdFrameOffset
264 -> CmmAGraph
265 mkJumpReturnsTo profile f callConv actuals ret_lbl ret_off updfr_off =
266 lastWithArgs profile JumpRet (Young ret_lbl) callConv actuals updfr_off $
267 toCall f (Just ret_lbl) updfr_off ret_off
268
269 mkUnsafeCall :: ForeignTarget -> [CmmFormal] -> [CmmActual] -> CmmAGraph
270 mkUnsafeCall t fs as = mkMiddle $ CmmUnsafeForeignCall t fs as
271
272 -- | Construct a 'CmmUnwind' node for the given register and unwinding
273 -- expression.
274 mkUnwind :: GlobalReg -> CmmExpr -> CmmAGraph
275 mkUnwind r e = mkMiddle $ CmmUnwind [(r, Just e)]
276
277 --------------------------------------------------------------------------
278
279
280
281
282 -- Why are we inserting extra blocks that simply branch to the successors?
283 -- Because in addition to the branch instruction, @mkBranch@ will insert
284 -- a necessary adjustment to the stack pointer.
285
286
287 -- For debugging purposes, we can stub out dead stack slots:
288 stackStubExpr :: Width -> CmmExpr
289 stackStubExpr w = CmmLit (CmmInt 0 w)
290
291 -- When we copy in parameters, we usually want to put overflow
292 -- parameters on the stack, but sometimes we want to pass the
293 -- variables in their spill slots. Therefore, for copying arguments
294 -- and results, we provide different functions to pass the arguments
295 -- in an overflow area and to pass them in spill slots.
296 copyInOflow :: Profile -> Convention -> Area
297 -> [CmmFormal]
298 -> [CmmFormal]
299 -> (Int, [GlobalReg], CmmAGraph)
300
301 copyInOflow profile conv area formals extra_stk
302 = (offset, gregs, catAGraphs $ map mkMiddle nodes)
303 where (offset, gregs, nodes) = copyIn profile conv area formals extra_stk
304
305 -- Return the number of bytes used for copying arguments, as well as the
306 -- instructions to copy the arguments.
307 copyIn :: Profile -> Convention -> Area
308 -> [CmmFormal]
309 -> [CmmFormal]
310 -> (ByteOff, [GlobalReg], [CmmNode O O])
311 copyIn profile conv area formals extra_stk
312 = (stk_size, [r | (_, RegisterParam r) <- args], map ci (stk_args ++ args))
313 where
314 platform = profilePlatform profile
315 -- See Note [Width of parameters]
316 ci (reg, RegisterParam r@(VanillaReg {})) =
317 let local = CmmLocal reg
318 global = CmmReg (CmmGlobal r)
319 width = cmmRegWidth platform local
320 expr
321 | width == wordWidth platform = global
322 | width < wordWidth platform =
323 CmmMachOp (MO_XX_Conv (wordWidth platform) width) [global]
324 | otherwise = panic "Parameter width greater than word width"
325
326 in CmmAssign local expr
327
328 -- Non VanillaRegs
329 ci (reg, RegisterParam r) =
330 CmmAssign (CmmLocal reg) (CmmReg (CmmGlobal r))
331
332 ci (reg, StackParam off)
333 | isBitsType $ localRegType reg
334 , typeWidth (localRegType reg) < wordWidth platform =
335 let
336 stack_slot = (CmmLoad (CmmStackSlot area off) (cmmBits $ wordWidth platform))
337 local = CmmLocal reg
338 width = cmmRegWidth platform local
339 expr = CmmMachOp (MO_XX_Conv (wordWidth platform) width) [stack_slot]
340 in CmmAssign local expr
341
342 | otherwise =
343 CmmAssign (CmmLocal reg) (CmmLoad (CmmStackSlot area off) ty)
344 where ty = localRegType reg
345
346 init_offset = widthInBytes (wordWidth platform) -- infotable
347
348 (stk_off, stk_args) = assignStack platform init_offset localRegType extra_stk
349
350 (stk_size, args) = assignArgumentsPos profile stk_off conv
351 localRegType formals
352
353 -- Factoring out the common parts of the copyout functions yielded something
354 -- more complicated:
355
356 data Transfer = Call | JumpRet | Jump | Ret deriving Eq
357
358 copyOutOflow :: Profile -> Convention -> Transfer -> Area -> [CmmExpr]
359 -> UpdFrameOffset
360 -> [CmmExpr] -- extra stack args
361 -> (Int, [GlobalReg], CmmAGraph)
362
363 -- Generate code to move the actual parameters into the locations
364 -- required by the calling convention. This includes a store for the
365 -- return address.
366 --
367 -- The argument layout function ignores the pointer to the info table,
368 -- so we slot that in here. When copying-out to a young area, we set
369 -- the info table for return and adjust the offsets of the other
370 -- parameters. If this is a call instruction, we adjust the offsets
371 -- of the other parameters.
372 copyOutOflow profile conv transfer area actuals updfr_off extra_stack_stuff
373 = (stk_size, regs, graph)
374 where
375 platform = profilePlatform profile
376 (regs, graph) = foldr co ([], mkNop) (setRA ++ args ++ stack_params)
377
378 -- See Note [Width of parameters]
379 co (v, RegisterParam r@(VanillaReg {})) (rs, ms) =
380 let width = cmmExprWidth platform v
381 value
382 | width == wordWidth platform = v
383 | width < wordWidth platform =
384 CmmMachOp (MO_XX_Conv width (wordWidth platform)) [v]
385 | otherwise = panic "Parameter width greater than word width"
386
387 in (r:rs, mkAssign (CmmGlobal r) value <*> ms)
388
389 -- Non VanillaRegs
390 co (v, RegisterParam r) (rs, ms) =
391 (r:rs, mkAssign (CmmGlobal r) v <*> ms)
392
393 -- See Note [Width of parameters]
394 co (v, StackParam off) (rs, ms)
395 = (rs, mkStore (CmmStackSlot area off) (value v) <*> ms)
396
397 width v = cmmExprWidth platform v
398 value v
399 | isBitsType $ cmmExprType platform v
400 , width v < wordWidth platform =
401 CmmMachOp (MO_XX_Conv (width v) (wordWidth platform)) [v]
402 | otherwise = v
403
404 (setRA, init_offset) =
405 case area of
406 Young id -> -- Generate a store instruction for
407 -- the return address if making a call
408 case transfer of
409 Call ->
410 ([(CmmLit (CmmBlock id), StackParam init_offset)],
411 widthInBytes (wordWidth platform))
412 JumpRet ->
413 ([],
414 widthInBytes (wordWidth platform))
415 _other ->
416 ([], 0)
417 Old -> ([], updfr_off)
418
419 (extra_stack_off, stack_params) =
420 assignStack platform init_offset (cmmExprType platform) extra_stack_stuff
421
422 args :: [(CmmExpr, ParamLocation)] -- The argument and where to put it
423 (stk_size, args) = assignArgumentsPos profile extra_stack_off conv
424 (cmmExprType platform) actuals
425
426
427 -- Note [Width of parameters]
428 --
429 -- Consider passing a small (< word width) primitive like Int8# to a function.
430 -- It's actually non-trivial to do this without extending/narrowing:
431 -- * Global registers are considered to have native word width (i.e., 64-bits on
432 -- x86-64), so CmmLint would complain if we assigned an 8-bit parameter to a
433 -- global register.
434 -- * Same problem exists with LLVM IR.
435 -- * Lowering gets harder since on x86-32 not every register exposes its lower
436 -- 8 bits (e.g., for %eax we can use %al, but there isn't a corresponding
437 -- 8-bit register for %edi). So we would either need to extend/narrow anyway,
438 -- or complicate the calling convention.
439 -- * Passing a small integer in a stack slot, which has native word width,
440 -- requires extending to word width when writing to the stack and narrowing
441 -- when reading off the stack (see #16258).
442 -- So instead, we always extend every parameter smaller than native word width
443 -- in copyOutOflow and then truncate it back to the expected width in copyIn.
444 -- Note that we do this in cmm using MO_XX_Conv to avoid requiring
445 -- zero-/sign-extending - it's up to a backend to handle this in a most
446 -- efficient way (e.g., a simple register move or a smaller size store).
447 -- This convention (of ignoring the upper bits) is different from some C ABIs,
448 -- e.g. all PowerPC ELF ABIs, that require sign or zero extending parameters.
449 --
450 -- There was some discussion about this on this PR:
451 -- https://github.com/ghc-proposals/ghc-proposals/pull/74
452
453
454 mkCallEntry :: Profile -> Convention -> [CmmFormal] -> [CmmFormal]
455 -> (Int, [GlobalReg], CmmAGraph)
456 mkCallEntry profile conv formals extra_stk
457 = copyInOflow profile conv Old formals extra_stk
458
459 lastWithArgs :: Profile -> Transfer -> Area -> Convention -> [CmmExpr]
460 -> UpdFrameOffset
461 -> (ByteOff -> [GlobalReg] -> CmmAGraph)
462 -> CmmAGraph
463 lastWithArgs profile transfer area conv actuals updfr_off last =
464 lastWithArgsAndExtraStack profile transfer area conv actuals
465 updfr_off noExtraStack last
466
467 lastWithArgsAndExtraStack :: Profile
468 -> Transfer -> Area -> Convention -> [CmmExpr]
469 -> UpdFrameOffset -> [CmmExpr]
470 -> (ByteOff -> [GlobalReg] -> CmmAGraph)
471 -> CmmAGraph
472 lastWithArgsAndExtraStack profile transfer area conv actuals updfr_off
473 extra_stack last =
474 copies <*> last outArgs regs
475 where
476 (outArgs, regs, copies) = copyOutOflow profile conv transfer area actuals
477 updfr_off extra_stack
478
479
480 noExtraStack :: [CmmExpr]
481 noExtraStack = []
482
483 toCall :: CmmExpr -> Maybe BlockId -> UpdFrameOffset -> ByteOff
484 -> ByteOff -> [GlobalReg]
485 -> CmmAGraph
486 toCall e cont updfr_off res_space arg_space regs =
487 mkLast $ CmmCall e cont regs arg_space res_space updfr_off