never executed always true always false
1 {-# LANGUAGE BangPatterns #-}
2 {-# LANGUAGE CPP #-}
3 {-# LANGUAGE FlexibleContexts #-}
4 {-# LANGUAGE FlexibleInstances #-}
5 {-# LANGUAGE GADTs #-}
6 {-# LANGUAGE MultiParamTypeClasses #-}
7 {-# LANGUAGE ScopedTypeVariables #-}
8 {-# LANGUAGE StandaloneDeriving #-}
9 {-# LANGUAGE UndecidableInstances #-}
10
11 {-# OPTIONS_GHC -Wno-incomplete-record-updates #-}
12 {-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}
13
14 -- CmmNode type for representation using Hoopl graphs.
15
16 module GHC.Cmm.Node (
17 CmmNode(..), CmmFormal, CmmActual, CmmTickish,
18 UpdFrameOffset, Convention(..),
19 ForeignConvention(..), ForeignTarget(..), foreignTargetHints,
20 CmmReturnInfo(..),
21 mapExp, mapExpDeep, wrapRecExp, foldExp, foldExpDeep, wrapRecExpf,
22 mapExpM, mapExpDeepM, wrapRecExpM, mapSuccessors, mapCollectSuccessors,
23
24 -- * Tick scopes
25 CmmTickScope(..), isTickSubScope, combineTickScopes,
26 ) where
27
28 import GHC.Prelude hiding (succ)
29
30 import GHC.Platform.Regs
31 import GHC.Cmm.Expr
32 import GHC.Cmm.Switch
33 import GHC.Data.FastString
34 import GHC.Types.ForeignCall
35 import GHC.Utils.Outputable
36 import GHC.Runtime.Heap.Layout
37 import GHC.Types.Tickish (CmmTickish)
38 import qualified GHC.Types.Unique as U
39
40 import GHC.Cmm.Dataflow.Block
41 import GHC.Cmm.Dataflow.Graph
42 import GHC.Cmm.Dataflow.Collections
43 import GHC.Cmm.Dataflow.Label
44 import Data.Maybe
45 import Data.List (tails,sortBy)
46 import GHC.Types.Unique (nonDetCmpUnique)
47 import GHC.Utils.Misc
48
49
50 ------------------------
51 -- CmmNode
52
53 #define ULabel {-# UNPACK #-} !Label
54
55 data CmmNode e x where
56 CmmEntry :: ULabel -> CmmTickScope -> CmmNode C O
57
58 CmmComment :: FastString -> CmmNode O O
59
60 -- Tick annotation, covering Cmm code in our tick scope. We only
61 -- expect non-code @Tickish@ at this point (e.g. @SourceNote@).
62 -- See Note [CmmTick scoping details]
63 CmmTick :: !CmmTickish -> CmmNode O O
64
65 -- Unwind pseudo-instruction, encoding stack unwinding
66 -- instructions for a debugger. This describes how to reconstruct
67 -- the "old" value of a register if we want to navigate the stack
68 -- up one frame. Having unwind information for @Sp@ will allow the
69 -- debugger to "walk" the stack.
70 --
71 -- See Note [What is this unwinding business?] in "GHC.Cmm.DebugBlock"
72 CmmUnwind :: [(GlobalReg, Maybe CmmExpr)] -> CmmNode O O
73
74 CmmAssign :: !CmmReg -> !CmmExpr -> CmmNode O O
75 -- Assign to register
76
77 CmmStore :: !CmmExpr -> !CmmExpr -> CmmNode O O
78 -- Assign to memory location. Size is
79 -- given by cmmExprType of the rhs.
80
81 CmmUnsafeForeignCall :: -- An unsafe foreign call;
82 -- see Note [Foreign calls]
83 -- Like a "fat machine instruction"; can occur
84 -- in the middle of a block
85 ForeignTarget -> -- call target
86 [CmmFormal] -> -- zero or more results
87 [CmmActual] -> -- zero or more arguments
88 CmmNode O O
89 -- Semantics: clobbers any GlobalRegs for which callerSaves r == True
90 -- See Note [Unsafe foreign calls clobber caller-save registers]
91 --
92 -- Invariant: the arguments and the ForeignTarget must not
93 -- mention any registers for which GHC.Platform.callerSaves
94 -- is True. See Note [Register parameter passing].
95
96 CmmBranch :: ULabel -> CmmNode O C
97 -- Goto another block in the same procedure
98
99 CmmCondBranch :: { -- conditional branch
100 cml_pred :: CmmExpr,
101 cml_true, cml_false :: ULabel,
102 cml_likely :: Maybe Bool -- likely result of the conditional,
103 -- if known
104 } -> CmmNode O C
105
106 CmmSwitch
107 :: CmmExpr -- Scrutinee, of some integral type
108 -> SwitchTargets -- Cases. See [Note SwitchTargets]
109 -> CmmNode O C
110
111 CmmCall :: { -- A native call or tail call
112 cml_target :: CmmExpr, -- never a CmmPrim to a CallishMachOp!
113
114 cml_cont :: Maybe Label,
115 -- Label of continuation (Nothing for return or tail call)
116 --
117 -- Note [Continuation BlockIds]: these BlockIds are called
118 -- Continuation BlockIds, and are the only BlockIds that can
119 -- occur in CmmExprs, namely as (CmmLit (CmmBlock b)) or
120 -- (CmmStackSlot (Young b) _).
121
122 cml_args_regs :: [GlobalReg],
123 -- The argument GlobalRegs (Rx, Fx, Dx, Lx) that are passed
124 -- to the call. This is essential information for the
125 -- native code generator's register allocator; without
126 -- knowing which GlobalRegs are live it has to assume that
127 -- they are all live. This list should only include
128 -- GlobalRegs that are mapped to real machine registers on
129 -- the target platform.
130
131 cml_args :: ByteOff,
132 -- Byte offset, from the *old* end of the Area associated with
133 -- the Label (if cml_cont = Nothing, then Old area), of
134 -- youngest outgoing arg. Set the stack pointer to this before
135 -- transferring control.
136 -- (NB: an update frame might also have been stored in the Old
137 -- area, but it'll be in an older part than the args.)
138
139 cml_ret_args :: ByteOff,
140 -- For calls *only*, the byte offset for youngest returned value
141 -- This is really needed at the *return* point rather than here
142 -- at the call, but in practice it's convenient to record it here.
143
144 cml_ret_off :: ByteOff
145 -- For calls *only*, the byte offset of the base of the frame that
146 -- must be described by the info table for the return point.
147 -- The older words are an update frames, which have their own
148 -- info-table and layout information
149
150 -- From a liveness point of view, the stack words older than
151 -- cml_ret_off are treated as live, even if the sequel of
152 -- the call goes into a loop.
153 } -> CmmNode O C
154
155 CmmForeignCall :: { -- A safe foreign call; see Note [Foreign calls]
156 -- Always the last node of a block
157 tgt :: ForeignTarget, -- call target and convention
158 res :: [CmmFormal], -- zero or more results
159 args :: [CmmActual], -- zero or more arguments; see Note [Register parameter passing]
160 succ :: ULabel, -- Label of continuation
161 ret_args :: ByteOff, -- same as cml_ret_args
162 ret_off :: ByteOff, -- same as cml_ret_off
163 intrbl:: Bool -- whether or not the call is interruptible
164 } -> CmmNode O C
165
166 {- Note [Foreign calls]
167 ~~~~~~~~~~~~~~~~~~~~~~~
168 A CmmUnsafeForeignCall is used for *unsafe* foreign calls;
169 a CmmForeignCall call is used for *safe* foreign calls.
170
171 Unsafe ones are mostly easy: think of them as a "fat machine
172 instruction". In particular, they do *not* kill all live registers,
173 just the registers they return to (there was a bit of code in GHC that
174 conservatively assumed otherwise.) However, see [Register parameter passing].
175
176 Safe ones are trickier. A safe foreign call
177 r = f(x)
178 ultimately expands to
179 push "return address" -- Never used to return to;
180 -- just points an info table
181 save registers into TSO
182 call suspendThread
183 r = f(x) -- Make the call
184 call resumeThread
185 restore registers
186 pop "return address"
187 We cannot "lower" a safe foreign call to this sequence of Cmms, because
188 after we've saved Sp all the Cmm optimiser's assumptions are broken.
189
190 Note that a safe foreign call needs an info table.
191
192 So Safe Foreign Calls must remain as last nodes until the stack is
193 made manifest in GHC.Cmm.LayoutStack, where they are lowered into the above
194 sequence.
195 -}
196
197 {- Note [Unsafe foreign calls clobber caller-save registers]
198 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
199
200 A foreign call is defined to clobber any GlobalRegs that are mapped to
201 caller-saves machine registers (according to the prevailing C ABI).
202 GHC.StgToCmm.Utils.callerSaves tells you which GlobalRegs are caller-saves.
203
204 This is a design choice that makes it easier to generate code later.
205 We could instead choose to say that foreign calls do *not* clobber
206 caller-saves regs, but then we would have to figure out which regs
207 were live across the call later and insert some saves/restores.
208
209 Furthermore when we generate code we never have any GlobalRegs live
210 across a call, because they are always copied-in to LocalRegs and
211 copied-out again before making a call/jump. So all we have to do is
212 avoid any code motion that would make a caller-saves GlobalReg live
213 across a foreign call during subsequent optimisations.
214 -}
215
216 {- Note [Register parameter passing]
217 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
218 On certain architectures, some registers are utilized for parameter
219 passing in the C calling convention. For example, in x86-64 Linux
220 convention, rdi, rsi, rdx and rcx (as well as r8 and r9) may be used for
221 argument passing. These are registers R3-R6, which our generated
222 code may also be using; as a result, it's necessary to save these
223 values before doing a foreign call. This is done during initial
224 code generation in callerSaveVolatileRegs in GHC.StgToCmm.Utils.
225
226 However, one result of doing this is that the contents of these registers may
227 mysteriously change if referenced inside the arguments. This is dangerous, so
228 you'll need to disable inlining much in the same way is done in GHC.Cmm.Sink
229 currently. We should fix this!
230 -}
231
232 ---------------------------------------------
233 -- Eq instance of CmmNode
234
235 deriving instance Eq (CmmNode e x)
236
237 ----------------------------------------------
238 -- Hoopl instances of CmmNode
239
240 instance NonLocal CmmNode where
241 entryLabel (CmmEntry l _) = l
242
243 successors (CmmBranch l) = [l]
244 successors (CmmCondBranch {cml_true=t, cml_false=f}) = [f, t] -- meets layout constraint
245 successors (CmmSwitch _ ids) = switchTargetsToList ids
246 successors (CmmCall {cml_cont=l}) = maybeToList l
247 successors (CmmForeignCall {succ=l}) = [l]
248
249
250 --------------------------------------------------
251 -- Various helper types
252
253 type CmmActual = CmmExpr
254 type CmmFormal = LocalReg
255
256 type UpdFrameOffset = ByteOff
257
258 -- | A convention maps a list of values (function arguments or return
259 -- values) to registers or stack locations.
260 data Convention
261 = NativeDirectCall
262 -- ^ top-level Haskell functions use @NativeDirectCall@, which
263 -- maps arguments to registers starting with R2, according to
264 -- how many registers are available on the platform. This
265 -- convention ignores R1, because for a top-level function call
266 -- the function closure is implicit, and doesn't need to be passed.
267 | NativeNodeCall
268 -- ^ non-top-level Haskell functions, which pass the address of
269 -- the function closure in R1 (regardless of whether R1 is a
270 -- real register or not), and the rest of the arguments in
271 -- registers or on the stack.
272 | NativeReturn
273 -- ^ a native return. The convention for returns depends on
274 -- how many values are returned: for just one value returned,
275 -- the appropriate register is used (R1, F1, etc.). regardless
276 -- of whether it is a real register or not. For multiple
277 -- values returned, they are mapped to registers or the stack.
278 | Slow
279 -- ^ Slow entry points: all args pushed on the stack
280 | GC
281 -- ^ Entry to the garbage collector: uses the node reg!
282 -- (TODO: I don't think we need this --SDM)
283 deriving( Eq )
284
285 data ForeignConvention
286 = ForeignConvention
287 CCallConv -- Which foreign-call convention
288 [ForeignHint] -- Extra info about the args
289 [ForeignHint] -- Extra info about the result
290 CmmReturnInfo
291 deriving Eq
292
293 data CmmReturnInfo
294 = CmmMayReturn
295 | CmmNeverReturns
296 deriving ( Eq )
297
298 data ForeignTarget -- The target of a foreign call
299 = ForeignTarget -- A foreign procedure
300 CmmExpr -- Its address
301 ForeignConvention -- Its calling convention
302 | PrimTarget -- A possibly-side-effecting machine operation
303 CallishMachOp -- Which one
304 deriving Eq
305
306 foreignTargetHints :: ForeignTarget -> ([ForeignHint], [ForeignHint])
307 foreignTargetHints target
308 = ( res_hints ++ repeat NoHint
309 , arg_hints ++ repeat NoHint )
310 where
311 (res_hints, arg_hints) =
312 case target of
313 PrimTarget op -> callishMachOpHints op
314 ForeignTarget _ (ForeignConvention _ arg_hints res_hints _) ->
315 (res_hints, arg_hints)
316
317 --------------------------------------------------
318 -- Instances of register and slot users / definers
319
320 instance UserOfRegs LocalReg (CmmNode e x) where
321 {-# INLINEABLE foldRegsUsed #-}
322 foldRegsUsed platform f !z n = case n of
323 CmmAssign _ expr -> fold f z expr
324 CmmStore addr rval -> fold f (fold f z addr) rval
325 CmmUnsafeForeignCall t _ args -> fold f (fold f z t) args
326 CmmCondBranch expr _ _ _ -> fold f z expr
327 CmmSwitch expr _ -> fold f z expr
328 CmmCall {cml_target=tgt} -> fold f z tgt
329 CmmForeignCall {tgt=tgt, args=args} -> fold f (fold f z tgt) args
330 _ -> z
331 where fold :: forall a b. UserOfRegs LocalReg a
332 => (b -> LocalReg -> b) -> b -> a -> b
333 fold f z n = foldRegsUsed platform f z n
334
335 instance UserOfRegs GlobalReg (CmmNode e x) where
336 {-# INLINEABLE foldRegsUsed #-}
337 foldRegsUsed platform f !z n = case n of
338 CmmAssign _ expr -> fold f z expr
339 CmmStore addr rval -> fold f (fold f z addr) rval
340 CmmUnsafeForeignCall t _ args -> fold f (fold f z t) args
341 CmmCondBranch expr _ _ _ -> fold f z expr
342 CmmSwitch expr _ -> fold f z expr
343 CmmCall {cml_target=tgt, cml_args_regs=args} -> fold f (fold f z args) tgt
344 CmmForeignCall {tgt=tgt, args=args} -> fold f (fold f z tgt) args
345 _ -> z
346 where fold :: forall a b. UserOfRegs GlobalReg a
347 => (b -> GlobalReg -> b) -> b -> a -> b
348 fold f z n = foldRegsUsed platform f z n
349
350 instance (Ord r, UserOfRegs r CmmReg) => UserOfRegs r ForeignTarget where
351 -- The (Ord r) in the context is necessary here
352 -- See Note [Recursive superclasses] in GHC.Tc.TyCl.Instance
353 {-# INLINEABLE foldRegsUsed #-}
354 foldRegsUsed _ _ !z (PrimTarget _) = z
355 foldRegsUsed platform f !z (ForeignTarget e _) = foldRegsUsed platform f z e
356
357 instance DefinerOfRegs LocalReg (CmmNode e x) where
358 {-# INLINEABLE foldRegsDefd #-}
359 foldRegsDefd platform f !z n = case n of
360 CmmAssign lhs _ -> fold f z lhs
361 CmmUnsafeForeignCall _ fs _ -> fold f z fs
362 CmmForeignCall {res=res} -> fold f z res
363 _ -> z
364 where fold :: forall a b. DefinerOfRegs LocalReg a
365 => (b -> LocalReg -> b) -> b -> a -> b
366 fold f z n = foldRegsDefd platform f z n
367
368 instance DefinerOfRegs GlobalReg (CmmNode e x) where
369 {-# INLINEABLE foldRegsDefd #-}
370 foldRegsDefd platform f !z n = case n of
371 CmmAssign lhs _ -> fold f z lhs
372 CmmUnsafeForeignCall tgt _ _ -> fold f z (foreignTargetRegs tgt)
373 CmmCall {} -> fold f z activeRegs
374 CmmForeignCall {} -> fold f z activeRegs
375 -- See Note [Safe foreign calls clobber STG registers]
376 _ -> z
377 where fold :: forall a b. DefinerOfRegs GlobalReg a
378 => (b -> GlobalReg -> b) -> b -> a -> b
379 fold f z n = foldRegsDefd platform f z n
380
381 activeRegs = activeStgRegs platform
382 activeCallerSavesRegs = filter (callerSaves platform) activeRegs
383
384 foreignTargetRegs (ForeignTarget _ (ForeignConvention _ _ _ CmmNeverReturns)) = []
385 foreignTargetRegs _ = activeCallerSavesRegs
386
387 -- Note [Safe foreign calls clobber STG registers]
388 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
389 --
390 -- During stack layout phase every safe foreign call is expanded into a block
391 -- that contains unsafe foreign call (instead of safe foreign call) and ends
392 -- with a normal call (See Note [Foreign calls]). This means that we must
393 -- treat safe foreign call as if it was a normal call (because eventually it
394 -- will be). This is important if we try to run sinking pass before stack
395 -- layout phase. Consider this example of what might go wrong (this is cmm
396 -- code from stablename001 test). Here is code after common block elimination
397 -- (before stack layout):
398 --
399 -- c1q6:
400 -- _s1pf::P64 = R1;
401 -- _c1q8::I64 = performMajorGC;
402 -- I64[(young<c1q9> + 8)] = c1q9;
403 -- foreign call "ccall" arg hints: [] result hints: [] (_c1q8::I64)(...)
404 -- returns to c1q9 args: ([]) ress: ([])ret_args: 8ret_off: 8;
405 -- c1q9:
406 -- I64[(young<c1qb> + 8)] = c1qb;
407 -- R1 = _s1pc::P64;
408 -- call stg_makeStableName#(R1) returns to c1qb, args: 8, res: 8, upd: 8;
409 --
410 -- If we run sinking pass now (still before stack layout) we will get this:
411 --
412 -- c1q6:
413 -- I64[(young<c1q9> + 8)] = c1q9;
414 -- foreign call "ccall" arg hints: [] result hints: [] performMajorGC(...)
415 -- returns to c1q9 args: ([]) ress: ([])ret_args: 8ret_off: 8;
416 -- c1q9:
417 -- I64[(young<c1qb> + 8)] = c1qb;
418 -- _s1pf::P64 = R1; <------ _s1pf sunk past safe foreign call
419 -- R1 = _s1pc::P64;
420 -- call stg_makeStableName#(R1) returns to c1qb, args: 8, res: 8, upd: 8;
421 --
422 -- Notice that _s1pf was sunk past a foreign call. When we run stack layout
423 -- safe call to performMajorGC will be turned into:
424 --
425 -- c1q6:
426 -- _s1pc::P64 = P64[Sp + 8];
427 -- I64[Sp - 8] = c1q9;
428 -- Sp = Sp - 8;
429 -- I64[I64[CurrentTSO + 24] + 16] = Sp;
430 -- P64[CurrentNursery + 8] = Hp + 8;
431 -- (_u1qI::I64) = call "ccall" arg hints: [PtrHint,]
432 -- result hints: [PtrHint] suspendThread(BaseReg, 0);
433 -- call "ccall" arg hints: [] result hints: [] performMajorGC();
434 -- (_u1qJ::I64) = call "ccall" arg hints: [PtrHint]
435 -- result hints: [PtrHint] resumeThread(_u1qI::I64);
436 -- BaseReg = _u1qJ::I64;
437 -- _u1qK::P64 = CurrentTSO;
438 -- _u1qL::P64 = I64[_u1qK::P64 + 24];
439 -- Sp = I64[_u1qL::P64 + 16];
440 -- SpLim = _u1qL::P64 + 192;
441 -- HpAlloc = 0;
442 -- Hp = I64[CurrentNursery + 8] - 8;
443 -- HpLim = I64[CurrentNursery] + (%MO_SS_Conv_W32_W64(I32[CurrentNursery + 48]) * 4096 - 1);
444 -- call (I64[Sp])() returns to c1q9, args: 8, res: 8, upd: 8;
445 -- c1q9:
446 -- I64[(young<c1qb> + 8)] = c1qb;
447 -- _s1pf::P64 = R1; <------ INCORRECT!
448 -- R1 = _s1pc::P64;
449 -- call stg_makeStableName#(R1) returns to c1qb, args: 8, res: 8, upd: 8;
450 --
451 -- Notice that c1q6 now ends with a call. Sinking _s1pf::P64 = R1 past that
452 -- call is clearly incorrect. This is what would happen if we assumed that
453 -- safe foreign call has the same semantics as unsafe foreign call. To prevent
454 -- this we need to treat safe foreign call as if was normal call.
455
456 -----------------------------------
457 -- mapping Expr in GHC.Cmm.Node
458
459 mapForeignTarget :: (CmmExpr -> CmmExpr) -> ForeignTarget -> ForeignTarget
460 mapForeignTarget exp (ForeignTarget e c) = ForeignTarget (exp e) c
461 mapForeignTarget _ m@(PrimTarget _) = m
462
463 wrapRecExp :: (CmmExpr -> CmmExpr) -> CmmExpr -> CmmExpr
464 -- Take a transformer on expressions and apply it recursively.
465 -- (wrapRecExp f e) first recursively applies itself to sub-expressions of e
466 -- then uses f to rewrite the resulting expression
467 wrapRecExp f (CmmMachOp op es) = f (CmmMachOp op $ map (wrapRecExp f) es)
468 wrapRecExp f (CmmLoad addr ty) = f (CmmLoad (wrapRecExp f addr) ty)
469 wrapRecExp f e = f e
470
471 mapExp :: (CmmExpr -> CmmExpr) -> CmmNode e x -> CmmNode e x
472 mapExp _ f@(CmmEntry{}) = f
473 mapExp _ m@(CmmComment _) = m
474 mapExp _ m@(CmmTick _) = m
475 mapExp f (CmmUnwind regs) = CmmUnwind (map (fmap (fmap f)) regs)
476 mapExp f (CmmAssign r e) = CmmAssign r (f e)
477 mapExp f (CmmStore addr e) = CmmStore (f addr) (f e)
478 mapExp f (CmmUnsafeForeignCall tgt fs as) = CmmUnsafeForeignCall (mapForeignTarget f tgt) fs (map f as)
479 mapExp _ l@(CmmBranch _) = l
480 mapExp f (CmmCondBranch e ti fi l) = CmmCondBranch (f e) ti fi l
481 mapExp f (CmmSwitch e ids) = CmmSwitch (f e) ids
482 mapExp f n@CmmCall {cml_target=tgt} = n{cml_target = f tgt}
483 mapExp f (CmmForeignCall tgt fs as succ ret_args updfr intrbl) = CmmForeignCall (mapForeignTarget f tgt) fs (map f as) succ ret_args updfr intrbl
484
485 mapExpDeep :: (CmmExpr -> CmmExpr) -> CmmNode e x -> CmmNode e x
486 mapExpDeep f = mapExp $ wrapRecExp f
487
488 ------------------------------------------------------------------------
489 -- mapping Expr in GHC.Cmm.Node, but not performing allocation if no changes
490
491 mapForeignTargetM :: (CmmExpr -> Maybe CmmExpr) -> ForeignTarget -> Maybe ForeignTarget
492 mapForeignTargetM f (ForeignTarget e c) = (\x -> ForeignTarget x c) `fmap` f e
493 mapForeignTargetM _ (PrimTarget _) = Nothing
494
495 wrapRecExpM :: (CmmExpr -> Maybe CmmExpr) -> (CmmExpr -> Maybe CmmExpr)
496 -- (wrapRecExpM f e) first recursively applies itself to sub-expressions of e
497 -- then gives f a chance to rewrite the resulting expression
498 wrapRecExpM f n@(CmmMachOp op es) = maybe (f n) (f . CmmMachOp op) (mapListM (wrapRecExpM f) es)
499 wrapRecExpM f n@(CmmLoad addr ty) = maybe (f n) (f . flip CmmLoad ty) (wrapRecExpM f addr)
500 wrapRecExpM f e = f e
501
502 mapExpM :: (CmmExpr -> Maybe CmmExpr) -> CmmNode e x -> Maybe (CmmNode e x)
503 mapExpM _ (CmmEntry{}) = Nothing
504 mapExpM _ (CmmComment _) = Nothing
505 mapExpM _ (CmmTick _) = Nothing
506 mapExpM f (CmmUnwind regs) = CmmUnwind `fmap` mapM (\(r,e) -> mapM f e >>= \e' -> pure (r,e')) regs
507 mapExpM f (CmmAssign r e) = CmmAssign r `fmap` f e
508 mapExpM f (CmmStore addr e) = (\[addr', e'] -> CmmStore addr' e') `fmap` mapListM f [addr, e]
509 mapExpM _ (CmmBranch _) = Nothing
510 mapExpM f (CmmCondBranch e ti fi l) = (\x -> CmmCondBranch x ti fi l) `fmap` f e
511 mapExpM f (CmmSwitch e tbl) = (\x -> CmmSwitch x tbl) `fmap` f e
512 mapExpM f (CmmCall tgt mb_id r o i s) = (\x -> CmmCall x mb_id r o i s) `fmap` f tgt
513 mapExpM f (CmmUnsafeForeignCall tgt fs as)
514 = case mapForeignTargetM f tgt of
515 Just tgt' -> Just (CmmUnsafeForeignCall tgt' fs (mapListJ f as))
516 Nothing -> (\xs -> CmmUnsafeForeignCall tgt fs xs) `fmap` mapListM f as
517 mapExpM f (CmmForeignCall tgt fs as succ ret_args updfr intrbl)
518 = case mapForeignTargetM f tgt of
519 Just tgt' -> Just (CmmForeignCall tgt' fs (mapListJ f as) succ ret_args updfr intrbl)
520 Nothing -> (\xs -> CmmForeignCall tgt fs xs succ ret_args updfr intrbl) `fmap` mapListM f as
521
522 -- share as much as possible
523 mapListM :: (a -> Maybe a) -> [a] -> Maybe [a]
524 mapListM f xs = let (b, r) = mapListT f xs
525 in if b then Just r else Nothing
526
527 mapListJ :: (a -> Maybe a) -> [a] -> [a]
528 mapListJ f xs = snd (mapListT f xs)
529
530 mapListT :: (a -> Maybe a) -> [a] -> (Bool, [a])
531 mapListT f xs = foldr g (False, []) (zip3 (tails xs) xs (map f xs))
532 where g (_, y, Nothing) (True, ys) = (True, y:ys)
533 g (_, _, Just y) (True, ys) = (True, y:ys)
534 g (ys', _, Nothing) (False, _) = (False, ys')
535 g (_, _, Just y) (False, ys) = (True, y:ys)
536
537 mapExpDeepM :: (CmmExpr -> Maybe CmmExpr) -> CmmNode e x -> Maybe (CmmNode e x)
538 mapExpDeepM f = mapExpM $ wrapRecExpM f
539
540 -----------------------------------
541 -- folding Expr in GHC.Cmm.Node
542
543 foldExpForeignTarget :: (CmmExpr -> z -> z) -> ForeignTarget -> z -> z
544 foldExpForeignTarget exp (ForeignTarget e _) z = exp e z
545 foldExpForeignTarget _ (PrimTarget _) z = z
546
547 -- Take a folder on expressions and apply it recursively.
548 -- Specifically (wrapRecExpf f e z) deals with CmmMachOp and CmmLoad
549 -- itself, delegating all the other CmmExpr forms to 'f'.
550 wrapRecExpf :: (CmmExpr -> z -> z) -> CmmExpr -> z -> z
551 wrapRecExpf f e@(CmmMachOp _ es) z = foldr (wrapRecExpf f) (f e z) es
552 wrapRecExpf f e@(CmmLoad addr _) z = wrapRecExpf f addr (f e z)
553 wrapRecExpf f e z = f e z
554
555 foldExp :: (CmmExpr -> z -> z) -> CmmNode e x -> z -> z
556 foldExp _ (CmmEntry {}) z = z
557 foldExp _ (CmmComment {}) z = z
558 foldExp _ (CmmTick {}) z = z
559 foldExp f (CmmUnwind xs) z = foldr (maybe id f) z (map snd xs)
560 foldExp f (CmmAssign _ e) z = f e z
561 foldExp f (CmmStore addr e) z = f addr $ f e z
562 foldExp f (CmmUnsafeForeignCall t _ as) z = foldr f (foldExpForeignTarget f t z) as
563 foldExp _ (CmmBranch _) z = z
564 foldExp f (CmmCondBranch e _ _ _) z = f e z
565 foldExp f (CmmSwitch e _) z = f e z
566 foldExp f (CmmCall {cml_target=tgt}) z = f tgt z
567 foldExp f (CmmForeignCall {tgt=tgt, args=args}) z = foldr f (foldExpForeignTarget f tgt z) args
568
569 foldExpDeep :: (CmmExpr -> z -> z) -> CmmNode e x -> z -> z
570 foldExpDeep f = foldExp (wrapRecExpf f)
571
572 -- -----------------------------------------------------------------------------
573
574 mapSuccessors :: (Label -> Label) -> CmmNode O C -> CmmNode O C
575 mapSuccessors f (CmmBranch bid) = CmmBranch (f bid)
576 mapSuccessors f (CmmCondBranch p y n l) = CmmCondBranch p (f y) (f n) l
577 mapSuccessors f (CmmSwitch e ids) = CmmSwitch e (mapSwitchTargets f ids)
578 mapSuccessors _ n = n
579
580 mapCollectSuccessors :: forall a. (Label -> (Label,a)) -> CmmNode O C
581 -> (CmmNode O C, [a])
582 mapCollectSuccessors f (CmmBranch bid)
583 = let (bid', acc) = f bid in (CmmBranch bid', [acc])
584 mapCollectSuccessors f (CmmCondBranch p y n l)
585 = let (bidt, acct) = f y
586 (bidf, accf) = f n
587 in (CmmCondBranch p bidt bidf l, [accf, acct])
588 mapCollectSuccessors f (CmmSwitch e ids)
589 = let lbls = switchTargetsToList ids :: [Label]
590 lblMap = mapFromList $ zip lbls (map f lbls) :: LabelMap (Label, a)
591 in ( CmmSwitch e
592 (mapSwitchTargets
593 (\l -> fst $ mapFindWithDefault (error "impossible") l lblMap) ids)
594 , map snd (mapElems lblMap)
595 )
596 mapCollectSuccessors _ n = (n, [])
597
598 -- -----------------------------------------------------------------------------
599
600 -- | Tick scope identifier, allowing us to reason about what
601 -- annotations in a Cmm block should scope over. We especially take
602 -- care to allow optimisations to reorganise blocks without losing
603 -- tick association in the process.
604 data CmmTickScope
605 = GlobalScope
606 -- ^ The global scope is the "root" of the scope graph. Every
607 -- scope is a sub-scope of the global scope. It doesn't make sense
608 -- to add ticks to this scope. On the other hand, this means that
609 -- setting this scope on a block means no ticks apply to it.
610
611 | SubScope !U.Unique CmmTickScope
612 -- ^ Constructs a new sub-scope to an existing scope. This allows
613 -- us to translate Core-style scoping rules (see @tickishScoped@)
614 -- into the Cmm world. Suppose the following code:
615 --
616 -- tick<1> case ... of
617 -- A -> tick<2> ...
618 -- B -> tick<3> ...
619 --
620 -- We want the top-level tick annotation to apply to blocks
621 -- generated for the A and B alternatives. We can achieve that by
622 -- generating tick<1> into a block with scope a, while the code
623 -- for alternatives A and B gets generated into sub-scopes a/b and
624 -- a/c respectively.
625
626 | CombinedScope CmmTickScope CmmTickScope
627 -- ^ A combined scope scopes over everything that the two given
628 -- scopes cover. It is therefore a sub-scope of either scope. This
629 -- is required for optimisations. Consider common block elimination:
630 --
631 -- A -> tick<2> case ... of
632 -- C -> [common]
633 -- B -> tick<3> case ... of
634 -- D -> [common]
635 --
636 -- We will generate code for the C and D alternatives, and figure
637 -- out afterwards that it's actually common code. Scoping rules
638 -- dictate that the resulting common block needs to be covered by
639 -- both tick<2> and tick<3>, therefore we need to construct a
640 -- scope that is a child to *both* scope. Now we can do that - if
641 -- we assign the scopes a/c and b/d to the common-ed up blocks,
642 -- the new block could have a combined tick scope a/c+b/d, which
643 -- both tick<2> and tick<3> apply to.
644
645 -- Note [CmmTick scoping details]:
646 --
647 -- The scope of a @CmmTick@ is given by the @CmmEntry@ node of the
648 -- same block. Note that as a result of this, optimisations making
649 -- tick scopes more specific can *reduce* the amount of code a tick
650 -- scopes over. Fixing this would require a separate @CmmTickScope@
651 -- field for @CmmTick@. Right now we do not do this simply because I
652 -- couldn't find an example where it actually mattered -- multiple
653 -- blocks within the same scope generally jump to each other, which
654 -- prevents common block elimination from happening in the first
655 -- place. But this is no strong reason, so if Cmm optimisations become
656 -- more involved in future this might have to be revisited.
657
658 -- | Output all scope paths.
659 scopeToPaths :: CmmTickScope -> [[U.Unique]]
660 scopeToPaths GlobalScope = [[]]
661 scopeToPaths (SubScope u s) = map (u:) (scopeToPaths s)
662 scopeToPaths (CombinedScope s1 s2) = scopeToPaths s1 ++ scopeToPaths s2
663
664 -- | Returns the head uniques of the scopes. This is based on the
665 -- assumption that the @Unique@ of @SubScope@ identifies the
666 -- underlying super-scope. Used for efficient equality and comparison,
667 -- see below.
668 scopeUniques :: CmmTickScope -> [U.Unique]
669 scopeUniques GlobalScope = []
670 scopeUniques (SubScope u _) = [u]
671 scopeUniques (CombinedScope s1 s2) = scopeUniques s1 ++ scopeUniques s2
672
673 -- Equality and order is based on the head uniques defined above. We
674 -- take care to short-cut the (extremely) common cases.
675 instance Eq CmmTickScope where
676 GlobalScope == GlobalScope = True
677 GlobalScope == _ = False
678 _ == GlobalScope = False
679 (SubScope u _) == (SubScope u' _) = u == u'
680 (SubScope _ _) == _ = False
681 _ == (SubScope _ _) = False
682 scope == scope' =
683 sortBy nonDetCmpUnique (scopeUniques scope) ==
684 sortBy nonDetCmpUnique (scopeUniques scope')
685 -- This is still deterministic because
686 -- the order is the same for equal lists
687
688 -- This is non-deterministic but we do not currently support deterministic
689 -- code-generation. See Note [Unique Determinism and code generation]
690 -- See Note [No Ord for Unique]
691 instance Ord CmmTickScope where
692 compare GlobalScope GlobalScope = EQ
693 compare GlobalScope _ = LT
694 compare _ GlobalScope = GT
695 compare (SubScope u _) (SubScope u' _) = nonDetCmpUnique u u'
696 compare scope scope' = cmpList nonDetCmpUnique
697 (sortBy nonDetCmpUnique $ scopeUniques scope)
698 (sortBy nonDetCmpUnique $ scopeUniques scope')
699
700 instance Outputable CmmTickScope where
701 ppr GlobalScope = text "global"
702 ppr (SubScope us GlobalScope)
703 = ppr us
704 ppr (SubScope us s) = ppr s <> char '/' <> ppr us
705 ppr combined = parens $ hcat $ punctuate (char '+') $
706 map (hcat . punctuate (char '/') . map ppr . reverse) $
707 scopeToPaths combined
708
709 -- | Checks whether two tick scopes are sub-scopes of each other. True
710 -- if the two scopes are equal.
711 isTickSubScope :: CmmTickScope -> CmmTickScope -> Bool
712 isTickSubScope = cmp
713 where cmp _ GlobalScope = True
714 cmp GlobalScope _ = False
715 cmp (CombinedScope s1 s2) s' = cmp s1 s' && cmp s2 s'
716 cmp s (CombinedScope s1' s2') = cmp s s1' || cmp s s2'
717 cmp (SubScope u s) s'@(SubScope u' _) = u == u' || cmp s s'
718
719 -- | Combine two tick scopes. The new scope should be sub-scope of
720 -- both parameters. We simplify automatically if one tick scope is a
721 -- sub-scope of the other already.
722 combineTickScopes :: CmmTickScope -> CmmTickScope -> CmmTickScope
723 combineTickScopes s1 s2
724 | s1 `isTickSubScope` s2 = s1
725 | s2 `isTickSubScope` s1 = s2
726 | otherwise = CombinedScope s1 s2