never executed always true always false
1 {-# LANGUAGE GADTs #-}
2 {-# LANGUAGE BangPatterns #-}
3 {-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
4 {-# OPTIONS_GHC -Wno-incomplete-record-updates #-}
5 module GHC.Cmm.ContFlowOpt
6 ( cmmCfgOpts
7 , cmmCfgOptsProc
8 , removeUnreachableBlocksProc
9 , replaceLabels
10 )
11 where
12
13 import GHC.Prelude hiding (succ, unzip, zip)
14
15 import GHC.Cmm.Dataflow.Block
16 import GHC.Cmm.Dataflow.Collections
17 import GHC.Cmm.Dataflow.Graph
18 import GHC.Cmm.Dataflow.Label
19 import GHC.Cmm.BlockId
20 import GHC.Cmm
21 import GHC.Cmm.Utils
22 import GHC.Cmm.Switch (mapSwitchTargets, switchTargetsToList)
23 import GHC.Data.Maybe
24 import GHC.Utils.Panic
25 import GHC.Utils.Misc
26
27 import Control.Monad
28
29
30 -- Note [What is shortcutting]
31 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
32 --
33 -- Consider this Cmm code:
34 --
35 -- L1: ...
36 -- goto L2;
37 -- L2: goto L3;
38 -- L3: ...
39 --
40 -- Here L2 is an empty block and contains only an unconditional branch
41 -- to L3. In this situation any block that jumps to L2 can jump
42 -- directly to L3:
43 --
44 -- L1: ...
45 -- goto L3;
46 -- L2: goto L3;
47 -- L3: ...
48 --
49 -- In this situation we say that we shortcut L2 to L3. One of
50 -- consequences of shortcutting is that some blocks of code may become
51 -- unreachable (in the example above this is true for L2).
52
53
54 -- Note [Control-flow optimisations]
55 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
56 --
57 -- This optimisation does three things:
58 --
59 -- - If a block finishes in an unconditional branch to another block
60 -- and that is the only jump to that block we concatenate the
61 -- destination block at the end of the current one.
62 --
63 -- - If a block finishes in a call whose continuation block is a
64 -- goto, then we can shortcut the destination, making the
65 -- continuation block the destination of the goto - but see Note
66 -- [Shortcut call returns].
67 --
68 -- - For any block that is not a call we try to shortcut the
69 -- destination(s). Additionally, if a block ends with a
70 -- conditional branch we try to invert the condition.
71 --
72 -- Blocks are processed using postorder DFS traversal. A side effect
73 -- of determining traversal order with a graph search is elimination
74 -- of any blocks that are unreachable.
75 --
76 -- Transformations are improved by working from the end of the graph
77 -- towards the beginning, because we may be able to perform many
78 -- shortcuts in one go.
79
80
81 -- Note [Shortcut call returns]
82 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
83 --
84 -- We are going to maintain the "current" graph (LabelMap CmmBlock) as
85 -- we go, and also a mapping from BlockId to BlockId, representing
86 -- continuation labels that we have renamed. This latter mapping is
87 -- important because we might shortcut a CmmCall continuation. For
88 -- example:
89 --
90 -- Sp[0] = L
91 -- call g returns to L
92 -- L: goto M
93 -- M: ...
94 --
95 -- So when we shortcut the L block, we need to replace not only
96 -- the continuation of the call, but also references to L in the
97 -- code (e.g. the assignment Sp[0] = L):
98 --
99 -- Sp[0] = M
100 -- call g returns to M
101 -- M: ...
102 --
103 -- So we keep track of which labels we have renamed and apply the mapping
104 -- at the end with replaceLabels.
105
106
107 -- Note [Shortcut call returns and proc-points]
108 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
109 --
110 -- Consider this code that you might get from a recursive
111 -- let-no-escape:
112 --
113 -- goto L1
114 -- L1:
115 -- if (Hp > HpLim) then L2 else L3
116 -- L2:
117 -- call stg_gc_noregs returns to L4
118 -- L4:
119 -- goto L1
120 -- L3:
121 -- ...
122 -- goto L1
123 --
124 -- Then the control-flow optimiser shortcuts L4. But that turns L1
125 -- into the call-return proc point, and every iteration of the loop
126 -- has to shuffle variables to and from the stack. So we must *not*
127 -- shortcut L4.
128 --
129 -- Moreover not shortcutting call returns is probably fine. If L4 can
130 -- concat with its branch target then it will still do so. And we
131 -- save some compile time because we don't have to traverse all the
132 -- code in replaceLabels.
133 --
134 -- However, we probably do want to do this if we are splitting proc
135 -- points, because L1 will be a proc-point anyway, so merging it with
136 -- L4 reduces the number of proc points. Unfortunately recursive
137 -- let-no-escapes won't generate very good code with proc-point
138 -- splitting on - we should probably compile them to explicitly use
139 -- the native calling convention instead.
140
141 cmmCfgOpts :: Bool -> CmmGraph -> CmmGraph
142 cmmCfgOpts split g = fst (blockConcat split g)
143
144 cmmCfgOptsProc :: Bool -> CmmDecl -> CmmDecl
145 cmmCfgOptsProc split (CmmProc info lbl live g) = CmmProc info' lbl live g'
146 where (g', env) = blockConcat split g
147 info' = info{ info_tbls = new_info_tbls }
148 new_info_tbls = mapFromList (map upd_info (mapToList (info_tbls info)))
149
150 -- If we changed any labels, then we have to update the info tables
151 -- too, except for the top-level info table because that might be
152 -- referred to by other procs.
153 upd_info (k,info)
154 | Just k' <- mapLookup k env
155 = (k', if k' == g_entry g'
156 then info
157 else info{ cit_lbl = infoTblLbl k' })
158 | otherwise
159 = (k,info)
160 cmmCfgOptsProc _ top = top
161
162
163 blockConcat :: Bool -> CmmGraph -> (CmmGraph, LabelMap BlockId)
164 blockConcat splitting_procs g@CmmGraph { g_entry = entry_id }
165 = (replaceLabels shortcut_map $ ofBlockMap new_entry new_blocks, shortcut_map')
166 where
167 -- We might be able to shortcut the entry BlockId itself.
168 -- Remember to update the shortcut_map, since we also have to
169 -- update the info_tbls mapping now.
170 (new_entry, shortcut_map')
171 | Just entry_blk <- mapLookup entry_id new_blocks
172 , Just dest <- canShortcut entry_blk
173 = (dest, mapInsert entry_id dest shortcut_map)
174 | otherwise
175 = (entry_id, shortcut_map)
176
177 -- blocks are sorted in reverse postorder, but we want to go from the exit
178 -- towards beginning, so we use foldr below.
179 blocks = revPostorder g
180 blockmap = foldl' (flip addBlock) emptyBody blocks
181
182 -- Accumulator contains three components:
183 -- * map of blocks in a graph
184 -- * map of shortcut labels. See Note [Shortcut call returns]
185 -- * map containing number of predecessors for each block. We discard
186 -- it after we process all blocks.
187 (new_blocks, shortcut_map, _) =
188 foldr maybe_concat (blockmap, mapEmpty, initialBackEdges) blocks
189
190 -- Map of predecessors for initial graph. We increase number of
191 -- predecessors for entry block by one to denote that it is
192 -- target of a jump, even if no block in the current graph jumps
193 -- to it.
194 initialBackEdges = incPreds entry_id (predMap blocks)
195
196 maybe_concat :: CmmBlock
197 -> (LabelMap CmmBlock, LabelMap BlockId, LabelMap Int)
198 -> (LabelMap CmmBlock, LabelMap BlockId, LabelMap Int)
199 maybe_concat block (!blocks, !shortcut_map, !backEdges)
200 -- If:
201 -- (1) current block ends with unconditional branch to b' and
202 -- (2) it has exactly one predecessor (namely, current block)
203 --
204 -- Then:
205 -- (1) append b' block at the end of current block
206 -- (2) remove b' from the map of blocks
207 -- (3) remove information about b' from predecessors map
208 --
209 -- Since we know that the block has only one predecessor we call
210 -- mapDelete directly instead of calling decPreds.
211 --
212 -- Note that we always maintain an up-to-date list of predecessors, so
213 -- we can ignore the contents of shortcut_map
214 | CmmBranch b' <- last
215 , hasOnePredecessor b'
216 , Just blk' <- mapLookup b' blocks
217 = let bid' = entryLabel blk'
218 in ( mapDelete bid' $ mapInsert bid (splice head blk') blocks
219 , shortcut_map
220 , mapDelete b' backEdges )
221
222 -- If:
223 -- (1) we are splitting proc points (see Note
224 -- [Shortcut call returns and proc-points]) and
225 -- (2) current block is a CmmCall or CmmForeignCall with
226 -- continuation b' and
227 -- (3) we can shortcut that continuation to dest
228 -- Then:
229 -- (1) we change continuation to point to b'
230 -- (2) create mapping from b' to dest
231 -- (3) increase number of predecessors of dest by 1
232 -- (4) decrease number of predecessors of b' by 1
233 --
234 -- Later we will use replaceLabels to substitute all occurrences of b'
235 -- with dest.
236 | splitting_procs
237 , Just b' <- callContinuation_maybe last
238 , Just blk' <- mapLookup b' blocks
239 , Just dest <- canShortcut blk'
240 = ( mapInsert bid (blockJoinTail head (update_cont dest)) blocks
241 , mapInsert b' dest shortcut_map
242 , decPreds b' $ incPreds dest backEdges )
243
244 -- If:
245 -- (1) a block does not end with a call
246 -- Then:
247 -- (1) if it ends with a conditional attempt to invert the
248 -- conditional
249 -- (2) attempt to shortcut all destination blocks
250 -- (3) if new successors of a block are different from the old ones
251 -- update the of predecessors accordingly
252 --
253 -- A special case of this is a situation when a block ends with an
254 -- unconditional jump to a block that can be shortcut.
255 | Nothing <- callContinuation_maybe last
256 = let oldSuccs = successors last
257 newSuccs = successors rewrite_last
258 in ( mapInsert bid (blockJoinTail head rewrite_last) blocks
259 , shortcut_map
260 , if oldSuccs == newSuccs
261 then backEdges
262 else foldr incPreds (foldr decPreds backEdges oldSuccs) newSuccs )
263
264 -- Otherwise don't do anything
265 | otherwise
266 = ( blocks, shortcut_map, backEdges )
267 where
268 (head, last) = blockSplitTail block
269 bid = entryLabel block
270
271 -- Changes continuation of a call to a specified label
272 update_cont dest =
273 case last of
274 CmmCall{} -> last { cml_cont = Just dest }
275 CmmForeignCall{} -> last { succ = dest }
276 _ -> panic "Can't shortcut continuation."
277
278 -- Attempts to shortcut successors of last node
279 shortcut_last = mapSuccessors shortcut last
280 where
281 shortcut l =
282 case mapLookup l blocks of
283 Just b | Just dest <- canShortcut b -> dest
284 _otherwise -> l
285
286 rewrite_last
287 -- Sometimes we can get rid of the conditional completely.
288 | CmmCondBranch _cond t f _l <- shortcut_last
289 , t == f
290 = CmmBranch t
291
292 -- See Note [Invert Cmm conditionals]
293 | CmmCondBranch cond t f l <- shortcut_last
294 , hasOnePredecessor t -- inverting will make t a fallthrough
295 , likelyTrue l || (numPreds f > 1)
296 , Just cond' <- maybeInvertCmmExpr cond
297 = CmmCondBranch cond' f t (invertLikeliness l)
298
299 -- If all jump destinations of a switch go to the
300 -- same target eliminate the switch.
301 | CmmSwitch _expr targets <- shortcut_last
302 , (t:ts) <- switchTargetsToList targets
303 , all (== t) ts
304 = CmmBranch t
305
306 | otherwise
307 = shortcut_last
308
309 likelyTrue (Just True) = True
310 likelyTrue _ = False
311
312 invertLikeliness :: Maybe Bool -> Maybe Bool
313 invertLikeliness = fmap not
314
315 -- Number of predecessors for a block
316 numPreds bid = mapLookup bid backEdges `orElse` 0
317
318 hasOnePredecessor b = numPreds b == 1
319
320 {-
321 Note [Invert Cmm conditionals]
322 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
323 The native code generator always produces jumps to the true branch.
324 Falling through to the false branch is however faster. So we try to
325 arrange for that to happen.
326 This means we invert the condition if:
327 * The likely path will become a fallthrough.
328 * We can't guarantee a fallthrough for the false branch but for the
329 true branch.
330
331 In some cases it's faster to avoid inverting when the false branch is likely.
332 However determining when that is the case is neither easy nor cheap so for
333 now we always invert as this produces smaller binaries and code that is
334 equally fast on average. (On an i7-6700K)
335
336 TODO:
337 There is also the edge case when both branches have multiple predecessors.
338 In this case we could assume that we will end up with a jump for BOTH
339 branches. In this case it might be best to put the likely path in the true
340 branch especially if there are large numbers of predecessors as this saves
341 us the jump thats not taken. However I haven't tested this and as of early
342 2018 we almost never generate cmm where this would apply.
343 -}
344
345 -- Functions for incrementing and decrementing number of predecessors. If
346 -- decrementing would set the predecessor count to 0, we remove entry from the
347 -- map.
348 -- Invariant: if a block has no predecessors it should be dropped from the
349 -- graph because it is unreachable. maybe_concat is constructed to maintain
350 -- that invariant, but calling replaceLabels may introduce unreachable blocks.
351 -- We rely on subsequent passes in the Cmm pipeline to remove unreachable
352 -- blocks.
353 incPreds, decPreds :: BlockId -> LabelMap Int -> LabelMap Int
354 incPreds bid edges = mapInsertWith (+) bid 1 edges
355 decPreds bid edges = case mapLookup bid edges of
356 Just preds | preds > 1 -> mapInsert bid (preds - 1) edges
357 Just _ -> mapDelete bid edges
358 _ -> edges
359
360
361 -- Checks if a block consists only of "goto dest". If it does than we return
362 -- "Just dest" label. See Note [What is shortcutting]
363 canShortcut :: CmmBlock -> Maybe BlockId
364 canShortcut block
365 | (_, middle, CmmBranch dest) <- blockSplit block
366 , all dont_care $ blockToList middle
367 = Just dest
368 | otherwise
369 = Nothing
370 where dont_care CmmComment{} = True
371 dont_care CmmTick{} = True
372 dont_care _other = False
373
374 -- Concatenates two blocks. First one is assumed to be open on exit, the second
375 -- is assumed to be closed on entry (i.e. it has a label attached to it, which
376 -- the splice function removes by calling snd on result of blockSplitHead).
377 splice :: Block CmmNode C O -> CmmBlock -> CmmBlock
378 splice head rest = entry `blockJoinHead` code0 `blockAppend` code1
379 where (CmmEntry lbl sc0, code0) = blockSplitHead head
380 (CmmEntry _ sc1, code1) = blockSplitHead rest
381 entry = CmmEntry lbl (combineTickScopes sc0 sc1)
382
383 -- If node is a call with continuation call return Just label of that
384 -- continuation. Otherwise return Nothing.
385 callContinuation_maybe :: CmmNode O C -> Maybe BlockId
386 callContinuation_maybe (CmmCall { cml_cont = Just b }) = Just b
387 callContinuation_maybe (CmmForeignCall { succ = b }) = Just b
388 callContinuation_maybe _ = Nothing
389
390
391 -- Map over the CmmGraph, replacing each label with its mapping in the
392 -- supplied LabelMap.
393 replaceLabels :: LabelMap BlockId -> CmmGraph -> CmmGraph
394 replaceLabels env g
395 | mapNull env = g
396 | otherwise = replace_eid $ mapGraphNodes1 txnode g
397 where
398 replace_eid g = g {g_entry = lookup (g_entry g)}
399 lookup id = mapLookup id env `orElse` id
400
401 txnode :: CmmNode e x -> CmmNode e x
402 txnode (CmmBranch bid) = CmmBranch (lookup bid)
403 txnode (CmmCondBranch p t f l) =
404 mkCmmCondBranch (exp p) (lookup t) (lookup f) l
405 txnode (CmmSwitch e ids) =
406 CmmSwitch (exp e) (mapSwitchTargets lookup ids)
407 txnode (CmmCall t k rg a res r) =
408 CmmCall (exp t) (liftM lookup k) rg a res r
409 txnode fc@CmmForeignCall{} =
410 fc{ args = map exp (args fc), succ = lookup (succ fc) }
411 txnode other = mapExpDeep exp other
412
413 exp :: CmmExpr -> CmmExpr
414 exp (CmmLit (CmmBlock bid)) = CmmLit (CmmBlock (lookup bid))
415 exp (CmmStackSlot (Young id) i) = CmmStackSlot (Young (lookup id)) i
416 exp e = e
417
418 mkCmmCondBranch :: CmmExpr -> Label -> Label -> Maybe Bool -> CmmNode O C
419 mkCmmCondBranch p t f l =
420 if t == f then CmmBranch t else CmmCondBranch p t f l
421
422 -- Build a map from a block to its set of predecessors.
423 predMap :: [CmmBlock] -> LabelMap Int
424 predMap blocks = foldr add_preds mapEmpty blocks
425 where
426 add_preds block env = foldr add env (successors block)
427 where add lbl env = mapInsertWith (+) lbl 1 env
428
429 -- Removing unreachable blocks
430 removeUnreachableBlocksProc :: CmmDecl -> CmmDecl
431 removeUnreachableBlocksProc proc@(CmmProc info lbl live g)
432 | used_blocks `lengthLessThan` mapSize (toBlockMap g)
433 = CmmProc info' lbl live g'
434 | otherwise
435 = proc
436 where
437 g' = ofBlockList (g_entry g) used_blocks
438 info' = info { info_tbls = keep_used (info_tbls info) }
439 -- Remove any info_tbls for unreachable
440
441 keep_used :: LabelMap CmmInfoTable -> LabelMap CmmInfoTable
442 keep_used bs = mapFoldlWithKey keep mapEmpty bs
443
444 keep :: LabelMap CmmInfoTable -> Label -> CmmInfoTable -> LabelMap CmmInfoTable
445 keep env l i | l `setMember` used_lbls = mapInsert l i env
446 | otherwise = env
447
448 used_blocks :: [CmmBlock]
449 used_blocks = revPostorder g
450
451 used_lbls :: LabelSet
452 used_lbls = setFromList $ map entryLabel used_blocks