never executed always true always false
1 {-# LANGUAGE CPP #-}
2 {-# LANGUAGE DataKinds #-}
3 {-# LANGUAGE FlexibleContexts #-}
4 {-# LANGUAGE MultiWayIf #-}
5 {-# LANGUAGE ScopedTypeVariables #-}
6 {-# LANGUAGE TypeFamilies #-}
7
8 --
9 -- Copyright (c) 2018 Andreas Klebinger
10 --
11
12 module GHC.CmmToAsm.BlockLayout
13 ( sequenceTop, backendMaintainsCfg)
14 where
15
16 import GHC.Prelude
17
18 import GHC.Platform
19
20 import GHC.CmmToAsm.Instr
21 import GHC.CmmToAsm.Monad
22 import GHC.CmmToAsm.CFG
23 import GHC.CmmToAsm.Types
24 import GHC.CmmToAsm.Config
25
26 import GHC.Cmm
27 import GHC.Cmm.BlockId
28 import GHC.Cmm.Dataflow.Collections
29 import GHC.Cmm.Dataflow.Label
30
31 import GHC.Types.Unique.FM
32
33 import GHC.Data.Graph.Directed
34 import GHC.Data.Maybe
35 import GHC.Data.List.SetOps (removeDups)
36 import GHC.Data.OrdList
37
38 import GHC.Utils.Trace
39 import GHC.Utils.Outputable
40 import GHC.Utils.Panic
41 import GHC.Utils.Panic.Plain
42 import GHC.Utils.Misc
43
44 import Data.List (sortOn, sortBy, nub)
45 import Data.Foldable (toList)
46 import qualified Data.Set as Set
47 import Data.STRef
48 import Control.Monad.ST.Strict
49 import Control.Monad (foldM, unless)
50 import GHC.Data.UnionFind
51
52 {-
53 Note [CFG based code layout]
54 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
55
56 The major steps in placing blocks are as follow:
57 * Compute a CFG based on the Cmm AST, see getCfgProc.
58 This CFG will have edge weights representing a guess
59 on how important they are.
60 * After we convert Cmm to Asm we run `optimizeCFG` which
61 adds a few more "educated guesses" to the equation.
62 * Then we run loop analysis on the CFG (`loopInfo`) which tells us
63 about loop headers, loop nesting levels and the sort.
64 * Based on the CFG and loop information refine the edge weights
65 in the CFG and normalize them relative to the most often visited
66 node. (See `mkGlobalWeights`)
67 * Feed this CFG into the block layout code (`sequenceTop`) in this
68 module. Which will then produce a code layout based on the input weights.
69
70 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
71 ~~~ Note [Chain based CFG serialization]
72 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
73
74 For additional information also look at
75 https://gitlab.haskell.org/ghc/ghc/wikis/commentary/compiler/code-layout
76
77 We have a CFG with edge weights based on which we try to place blocks next to
78 each other.
79
80 Edge weights not only represent likelihood of control transfer between blocks
81 but also how much a block would benefit from being placed sequentially after
82 it's predecessor.
83 For example blocks which are preceded by an info table are more likely to end
84 up in a different cache line than their predecessor and we can't eliminate the jump
85 so there is less benefit to placing them sequentially.
86
87 For example consider this example:
88
89 A: ...
90 jmp cond D (weak successor)
91 jmp B
92 B: ...
93 jmp C
94 C: ...
95 jmp X
96 D: ...
97 jmp B (weak successor)
98
99 We determine a block layout by building up chunks (calling them chains) of
100 possible control flows for which blocks will be placed sequentially.
101
102 Eg for our example we might end up with two chains like:
103 [A->B->C->X],[D]. Blocks inside chains will always be placed sequentially.
104 However there is no particular order in which chains are placed since
105 (hopefully) the blocks for which sequentiality is important have already
106 been placed in the same chain.
107
108 -----------------------------------------------------------------------------
109 1) First try to create a list of good chains.
110 -----------------------------------------------------------------------------
111
112 Good chains are these which allow us to eliminate jump instructions.
113 Which further eliminate often executed jumps first.
114
115 We do so by:
116
117 *) Ignore edges which represent instructions which can not be replaced
118 by fall through control flow. Primarily calls and edges to blocks which
119 are prefixed by a info table we have to jump across.
120
121 *) Then process remaining edges in order of frequency taken and:
122
123 +) If source and target have not been placed build a new chain from them.
124
125 +) If source and target have been placed, and are ends of differing chains
126 try to merge the two chains.
127
128 +) If one side of the edge is a end/front of a chain, add the other block of
129 to edge to the same chain
130
131 Eg if we look at edge (B -> C) and already have the chain (A -> B)
132 then we extend the chain to (A -> B -> C).
133
134 +) If the edge was used to modify or build a new chain remove the edge from
135 our working list.
136
137 *) If there any blocks not being placed into a chain after these steps we place
138 them into a chain consisting of only this block.
139
140 Ranking edges by their taken frequency, if
141 two edges compete for fall through on the same target block, the one taken
142 more often will automatically win out. Resulting in fewer instructions being
143 executed.
144
145 Creating singleton chains is required for situations where we have code of the
146 form:
147
148 A: goto B:
149 <infoTable>
150 B: goto C:
151 <infoTable>
152 C: ...
153
154 As the code in block B is only connected to the rest of the program via edges
155 which will be ignored in this step we make sure that B still ends up in a chain
156 this way.
157
158 -----------------------------------------------------------------------------
159 2) We also try to fuse chains.
160 -----------------------------------------------------------------------------
161
162 As a result from the above step we still end up with multiple chains which
163 represent sequential control flow chunks. But they are not yet suitable for
164 code layout as we need to place *all* blocks into a single sequence.
165
166 In this step we combine chains result from the above step via these steps:
167
168 *) Look at the ranked list of *all* edges, including calls/jumps across info tables
169 and the like.
170
171 *) Look at each edge and
172
173 +) Given an edge (A -> B) try to find two chains for which
174 * Block A is at the end of one chain
175 * Block B is at the front of the other chain.
176 +) If we find such a chain we "fuse" them into a single chain, remove the
177 edge from working set and continue.
178 +) If we can't find such chains we skip the edge and continue.
179
180 -----------------------------------------------------------------------------
181 3) Place indirect successors (neighbours) after each other
182 -----------------------------------------------------------------------------
183
184 We might have chains [A,B,C,X],[E] in a CFG of the sort:
185
186 A ---> B ---> C --------> X(exit)
187 \- ->E- -/
188
189 While E does not follow X it's still beneficial to place them near each other.
190 This can be advantageous if eg C,X,E will end up in the same cache line.
191
192 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
193 ~~~ Note [Triangle Control Flow]
194 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
195
196 Checking if an argument is already evaluated leads to a somewhat
197 special case which looks like this:
198
199 A:
200 if (R1 & 7 != 0) goto Leval; else goto Lwork;
201 Leval: // global
202 call (I64[R1])(R1) returns to Lwork, args: 8, res: 8, upd: 8;
203 Lwork: // global
204 ...
205
206 A
207 |\
208 | Leval
209 |/ - (This edge can be missing because of optimizations)
210 Lwork
211
212 Once we hit the metal the call instruction is just 2-3 bytes large
213 depending on the register used. So we lay out the assembly like this:
214
215 movq %rbx,%rax
216 andl $7,%eax
217 cmpq $1,%rax
218 jne Lwork
219 Leval:
220 jmp *(%rbx) # encoded in 2-3 bytes.
221 <info table>
222 Lwork:
223 ...
224
225 We could explicitly check for this control flow pattern.
226
227 This is advantageous because:
228 * It's optimal if the argument isn't evaluated.
229 * If it's evaluated we only have the extra cost of jumping over
230 the 2-3 bytes for the call.
231 * Guarantees the smaller encoding for the conditional jump.
232
233 However given that Lwork usually has an info table we
234 penalize this edge. So Leval should get placed first
235 either way and things work out for the best.
236
237 Optimizing for the evaluated case instead would penalize
238 the other code path. It adds an jump as we can't fall through
239 to Lwork because of the info table.
240 Assuming that Lwork is large the chance that the "call" ends up
241 in the same cache line is also fairly small.
242
243 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
244 ~~~ Note [Layout relevant edge weights]
245 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
246
247 The input to the chain based code layout algorithm is a CFG
248 with edges annotated with their frequency. The frequency
249 of traversal corresponds quite well to the cost of not placing
250 the connected blocks next to each other.
251
252 However even if having the same frequency certain edges are
253 inherently more or less relevant to code layout.
254
255 In particular:
256
257 * Edges which cross an info table are less relevant than others.
258
259 If we place the blocks across this edge next to each other
260 they are still separated by the info table which negates
261 much of the benefit. It makes it less likely both blocks
262 will share a cache line reducing the benefits from locality.
263 But it also prevents us from eliminating jump instructions.
264
265 * Conditional branches and switches are slightly less relevant.
266
267 We can completely remove unconditional jumps by placing them
268 next to each other. This is not true for conditional branch edges.
269 We apply a small modifier to them to ensure edges for which we can
270 eliminate the overhead completely are considered first. See also #18053.
271
272 * Edges constituted by a call are ignored.
273
274 Considering these hardly helped with performance and ignoring
275 them helps quite a bit to improve compiler performance.
276
277 So we perform a preprocessing step where we apply a multiplicator
278 to these kinds of edges.
279
280 -}
281
282
283 -- | Look at X number of blocks in two chains to determine
284 -- if they are "neighbours".
285 neighbourOverlapp :: Int
286 neighbourOverlapp = 2
287
288 -- | Maps blocks near the end of a chain to it's chain AND
289 -- the other blocks near the end.
290 -- [A,B,C,D,E] Gives entries like (B -> ([A,B], [A,B,C,D,E]))
291 -- where [A,B] are blocks in the end region of a chain.
292 -- This is cheaper then recomputing the ends multiple times.
293 type FrontierMap = LabelMap ([BlockId],BlockChain)
294
295 -- | A non empty ordered sequence of basic blocks.
296 -- It is suitable for serialization in this order.
297 --
298 -- We use OrdList instead of [] to allow fast append on both sides
299 -- when combining chains.
300 newtype BlockChain
301 = BlockChain { chainBlocks :: (OrdList BlockId) }
302
303 -- All chains are constructed the same way so comparison
304 -- including structure is faster.
305 instance Eq BlockChain where
306 BlockChain b1 == BlockChain b2 = strictlyEqOL b1 b2
307
308 -- Useful for things like sets and debugging purposes, sorts by blocks
309 -- in the chain.
310 instance Ord (BlockChain) where
311 (BlockChain lbls1) `compare` (BlockChain lbls2)
312 = assert (toList lbls1 /= toList lbls2 || lbls1 `strictlyEqOL` lbls2) $
313 strictlyOrdOL lbls1 lbls2
314
315 instance Outputable (BlockChain) where
316 ppr (BlockChain blks) =
317 parens (text "Chain:" <+> ppr (fromOL $ blks) )
318
319 chainFoldl :: (b -> BlockId -> b) -> b -> BlockChain -> b
320 chainFoldl f z (BlockChain blocks) = foldl' f z blocks
321
322 noDups :: [BlockChain] -> Bool
323 noDups chains =
324 let chainBlocks = concatMap chainToBlocks chains :: [BlockId]
325 (_blocks, dups) = removeDups compare chainBlocks
326 in if null dups then True
327 else pprTrace "Duplicates:" (ppr (map toList dups) $$ text "chains" <+> ppr chains ) False
328
329 inFront :: BlockId -> BlockChain -> Bool
330 inFront bid (BlockChain seq)
331 = headOL seq == bid
332
333 chainSingleton :: BlockId -> BlockChain
334 chainSingleton lbl
335 = BlockChain (unitOL lbl)
336
337 chainFromList :: [BlockId] -> BlockChain
338 chainFromList = BlockChain . toOL
339
340 chainSnoc :: BlockChain -> BlockId -> BlockChain
341 chainSnoc (BlockChain blks) lbl
342 = BlockChain (blks `snocOL` lbl)
343
344 chainCons :: BlockId -> BlockChain -> BlockChain
345 chainCons lbl (BlockChain blks)
346 = BlockChain (lbl `consOL` blks)
347
348 chainConcat :: BlockChain -> BlockChain -> BlockChain
349 chainConcat (BlockChain blks1) (BlockChain blks2)
350 = BlockChain (blks1 `appOL` blks2)
351
352 chainToBlocks :: BlockChain -> [BlockId]
353 chainToBlocks (BlockChain blks) = fromOL blks
354
355 -- | Given the Chain A -> B -> C -> D and we break at C
356 -- we get the two Chains (A -> B, C -> D) as result.
357 breakChainAt :: BlockId -> BlockChain
358 -> (BlockChain,BlockChain)
359 breakChainAt bid (BlockChain blks)
360 | not (bid == head rblks)
361 = panic "Block not in chain"
362 | otherwise
363 = (BlockChain (toOL lblks),
364 BlockChain (toOL rblks))
365 where
366 (lblks, rblks) = break (\lbl -> lbl == bid) (fromOL blks)
367
368 takeR :: Int -> BlockChain -> [BlockId]
369 takeR n (BlockChain blks) =
370 take n . fromOLReverse $ blks
371
372 takeL :: Int -> BlockChain -> [BlockId]
373 takeL n (BlockChain blks) =
374 take n . fromOL $ blks
375
376 -- Note [Combining neighborhood chains]
377 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
378
379 -- See also Note [Chain based CFG serialization]
380 -- We have the chains (A-B-C-D) and (E-F) and an Edge C->E.
381 --
382 -- While placing the latter after the former doesn't result in sequential
383 -- control flow it is still beneficial. As block C and E might end
384 -- up in the same cache line.
385 --
386 -- So we place these chains next to each other even if we can't fuse them.
387 --
388 -- A -> B -> C -> D
389 -- v
390 -- - -> E -> F ...
391 --
392 -- A simple heuristic to chose which chains we want to combine:
393 -- * Process edges in descending priority.
394 -- * Check if there is a edge near the end of one chain which goes
395 -- to a block near the start of another edge.
396 --
397 -- While we could take into account the space between the two blocks which
398 -- share an edge this blows up compile times quite a bit. It requires
399 -- us to find all edges between two chains, check the distance for all edges,
400 -- rank them based on the distance and only then we can select two chains
401 -- to combine. Which would add a lot of complexity for little gain.
402 --
403 -- So instead we just rank by the strength of the edge and use the first pair we
404 -- find.
405
406 -- | For a given list of chains and edges try to combine chains with strong
407 -- edges between them.
408 combineNeighbourhood :: [CfgEdge] -- ^ Edges to consider
409 -> [BlockChain] -- ^ Current chains of blocks
410 -> ([BlockChain], Set.Set (BlockId,BlockId))
411 -- ^ Resulting list of block chains, and a set of edges which
412 -- were used to fuse chains and as such no longer need to be
413 -- considered.
414 combineNeighbourhood edges chains
415 = -- pprTraceIt "Neighbours" $
416 -- pprTrace "combineNeighbours" (ppr edges) $
417 applyEdges edges endFrontier startFrontier (Set.empty)
418 where
419 --Build maps from chain ends to chains
420 endFrontier, startFrontier :: FrontierMap
421 endFrontier =
422 mapFromList $ concatMap (\chain ->
423 let ends = getEnds chain :: [BlockId]
424 entry = (ends,chain)
425 in map (\x -> (x,entry)) ends ) chains
426 startFrontier =
427 mapFromList $ concatMap (\chain ->
428 let front = getFronts chain
429 entry = (front,chain)
430 in map (\x -> (x,entry)) front) chains
431 applyEdges :: [CfgEdge] -> FrontierMap -> FrontierMap -> Set.Set (BlockId, BlockId)
432 -> ([BlockChain], Set.Set (BlockId,BlockId))
433 applyEdges [] chainEnds _chainFronts combined =
434 (ordNub $ map snd $ mapElems chainEnds, combined)
435 applyEdges ((CfgEdge from to _w):edges) chainEnds chainFronts combined
436 | Just (c1_e,c1) <- mapLookup from chainEnds
437 , Just (c2_f,c2) <- mapLookup to chainFronts
438 , c1 /= c2 -- Avoid trying to concat a chain with itself.
439 = let newChain = chainConcat c1 c2
440 newChainFrontier = getFronts newChain
441 newChainEnds = getEnds newChain
442 newFronts :: FrontierMap
443 newFronts =
444 let withoutOld =
445 foldl' (\m b -> mapDelete b m :: FrontierMap) chainFronts (c2_f ++ getFronts c1)
446 entry =
447 (newChainFrontier,newChain) --let bound to ensure sharing
448 in foldl' (\m x -> mapInsert x entry m)
449 withoutOld newChainFrontier
450
451 newEnds =
452 let withoutOld = foldl' (\m b -> mapDelete b m) chainEnds (c1_e ++ getEnds c2)
453 entry = (newChainEnds,newChain) --let bound to ensure sharing
454 in foldl' (\m x -> mapInsert x entry m)
455 withoutOld newChainEnds
456 in
457 -- pprTrace "ApplyEdges"
458 -- (text "before" $$
459 -- text "fronts" <+> ppr chainFronts $$
460 -- text "ends" <+> ppr chainEnds $$
461
462 -- text "various" $$
463 -- text "newChain" <+> ppr newChain $$
464 -- text "newChainFrontier" <+> ppr newChainFrontier $$
465 -- text "newChainEnds" <+> ppr newChainEnds $$
466 -- text "drop" <+> ppr ((c2_f ++ getFronts c1) ++ (c1_e ++ getEnds c2)) $$
467
468 -- text "after" $$
469 -- text "fronts" <+> ppr newFronts $$
470 -- text "ends" <+> ppr newEnds
471 -- )
472 applyEdges edges newEnds newFronts (Set.insert (from,to) combined)
473 | otherwise
474 = applyEdges edges chainEnds chainFronts combined
475
476 getFronts chain = takeL neighbourOverlapp chain
477 getEnds chain = takeR neighbourOverlapp chain
478
479 -- In the last stop we combine all chains into a single one.
480 -- Trying to place chains with strong edges next to each other.
481 mergeChains :: [CfgEdge] -> [BlockChain]
482 -> (BlockChain)
483 mergeChains edges chains
484 = runST $ do
485 let addChain m0 chain = do
486 ref <- fresh chain
487 return $ chainFoldl (\m' b -> mapInsert b ref m') m0 chain
488 chainMap' <- foldM (\m0 c -> addChain m0 c) mapEmpty chains
489 merge edges chainMap'
490 where
491 -- We keep a map from ALL blocks to their respective chain (sigh)
492 -- This is required since when looking at an edge we need to find
493 -- the associated chains quickly.
494 -- We use a union-find data structure to do this efficiently.
495
496 merge :: forall s. [CfgEdge] -> LabelMap (Point s BlockChain) -> ST s BlockChain
497 merge [] chains = do
498 chains' <- mapM find =<< (nub <$> (mapM repr $ mapElems chains)) :: ST s [BlockChain]
499 return $ foldl' chainConcat (head chains') (tail chains')
500 merge ((CfgEdge from to _):edges) chains
501 -- | pprTrace "merge" (ppr (from,to) <> ppr chains) False
502 -- = undefined
503 = do
504 same <- equivalent cFrom cTo
505 unless same $ do
506 cRight <- find cTo
507 cLeft <- find cFrom
508 new_point <- fresh (chainConcat cLeft cRight)
509 union cTo new_point
510 union cFrom new_point
511 merge edges chains
512 where
513 cFrom = expectJust "mergeChains:chainMap:from" $ mapLookup from chains
514 cTo = expectJust "mergeChains:chainMap:to" $ mapLookup to chains
515
516
517 -- See Note [Chain based CFG serialization] for the general idea.
518 -- This creates and fuses chains at the same time for performance reasons.
519
520 -- Try to build chains from a list of edges.
521 -- Edges must be sorted **descending** by their priority.
522 -- Returns the constructed chains, along with all edges which
523 -- are irrelevant past this point, this information doesn't need
524 -- to be complete - it's only used to speed up the process.
525 -- An Edge is irrelevant if the ends are part of the same chain.
526 -- We say these edges are already linked
527 buildChains :: [CfgEdge] -> [BlockId]
528 -> ( LabelMap BlockChain -- Resulting chains, indexed by end if chain.
529 , Set.Set (BlockId, BlockId)) --List of fused edges.
530 buildChains edges blocks
531 = runST $ buildNext setEmpty mapEmpty mapEmpty edges Set.empty
532 where
533 -- buildNext builds up chains from edges one at a time.
534
535 -- We keep a map from the ends of chains to the chains.
536 -- This way we can easily check if an block should be appended to an
537 -- existing chain!
538 -- We store them using STRefs so we don't have to rebuild the spine of both
539 -- maps every time we update a chain.
540 buildNext :: forall s. LabelSet
541 -> LabelMap (STRef s BlockChain) -- Map from end of chain to chain.
542 -> LabelMap (STRef s BlockChain) -- Map from start of chain to chain.
543 -> [CfgEdge] -- Edges to check - ordered by decreasing weight
544 -> Set.Set (BlockId, BlockId) -- Used edges
545 -> ST s ( LabelMap BlockChain -- Chains by end
546 , Set.Set (BlockId, BlockId) --List of fused edges
547 )
548 buildNext placed _chainStarts chainEnds [] linked = do
549 ends' <- sequence $ mapMap readSTRef chainEnds :: ST s (LabelMap BlockChain)
550 -- Any remaining blocks have to be made to singleton chains.
551 -- They might be combined with other chains later on outside this function.
552 let unplaced = filter (\x -> not (setMember x placed)) blocks
553 singletons = map (\x -> (x,chainSingleton x)) unplaced :: [(BlockId,BlockChain)]
554 return (foldl' (\m (k,v) -> mapInsert k v m) ends' singletons , linked)
555 buildNext placed chainStarts chainEnds (edge:todo) linked
556 | from == to
557 -- We skip self edges
558 = buildNext placed chainStarts chainEnds todo (Set.insert (from,to) linked)
559 | not (alreadyPlaced from) &&
560 not (alreadyPlaced to)
561 = do
562 --pprTraceM "Edge-Chain:" (ppr edge)
563 chain' <- newSTRef $ chainFromList [from,to]
564 buildNext
565 (setInsert to (setInsert from placed))
566 (mapInsert from chain' chainStarts)
567 (mapInsert to chain' chainEnds)
568 todo
569 (Set.insert (from,to) linked)
570
571 | (alreadyPlaced from) &&
572 (alreadyPlaced to)
573 , Just predChain <- mapLookup from chainEnds
574 , Just succChain <- mapLookup to chainStarts
575 , predChain /= succChain -- Otherwise we try to create a cycle.
576 = fuseChain predChain succChain
577
578 | (alreadyPlaced from) &&
579 (alreadyPlaced to)
580 = buildNext placed chainStarts chainEnds todo linked
581
582 | otherwise
583 = findChain
584 where
585 from = edgeFrom edge
586 to = edgeTo edge
587 alreadyPlaced blkId = (setMember blkId placed)
588
589 -- Combine two chains into a single one.
590 fuseChain :: STRef s BlockChain -> STRef s BlockChain
591 -> ST s ( LabelMap BlockChain -- Chains by end
592 , Set.Set (BlockId, BlockId) --List of fused edges
593 )
594 fuseChain fromRef toRef = do
595 fromChain <- readSTRef fromRef
596 toChain <- readSTRef toRef
597 let newChain = chainConcat fromChain toChain
598 ref <- newSTRef newChain
599 let start = head $ takeL 1 newChain
600 let end = head $ takeR 1 newChain
601 -- chains <- sequence $ mapMap readSTRef chainStarts
602 -- pprTraceM "pre-fuse chains:" $ ppr chains
603 buildNext
604 placed
605 (mapInsert start ref $ mapDelete to $ chainStarts)
606 (mapInsert end ref $ mapDelete from $ chainEnds)
607 todo
608 (Set.insert (from,to) linked)
609
610
611 --Add the block to a existing chain or creates a new chain
612 findChain :: ST s ( LabelMap BlockChain -- Chains by end
613 , Set.Set (BlockId, BlockId) --List of fused edges
614 )
615 findChain
616 -- We can attach the block to the end of a chain
617 | alreadyPlaced from
618 , Just predChain <- mapLookup from chainEnds
619 = do
620 chain <- readSTRef predChain
621 let newChain = chainSnoc chain to
622 writeSTRef predChain newChain
623 let chainEnds' = mapInsert to predChain $ mapDelete from chainEnds
624 -- chains <- sequence $ mapMap readSTRef chainStarts
625 -- pprTraceM "from chains:" $ ppr chains
626 buildNext (setInsert to placed) chainStarts chainEnds' todo (Set.insert (from,to) linked)
627 -- We can attack it to the front of a chain
628 | alreadyPlaced to
629 , Just succChain <- mapLookup to chainStarts
630 = do
631 chain <- readSTRef succChain
632 let newChain = from `chainCons` chain
633 writeSTRef succChain newChain
634 let chainStarts' = mapInsert from succChain $ mapDelete to chainStarts
635 -- chains <- sequence $ mapMap readSTRef chainStarts'
636 -- pprTraceM "to chains:" $ ppr chains
637 buildNext (setInsert from placed) chainStarts' chainEnds todo (Set.insert (from,to) linked)
638 -- The placed end of the edge is part of a chain already and not an end.
639 | otherwise
640 = do
641 let block = if alreadyPlaced to then from else to
642 --pprTraceM "Singleton" $ ppr block
643 let newChain = chainSingleton block
644 ref <- newSTRef newChain
645 buildNext (setInsert block placed) (mapInsert block ref chainStarts)
646 (mapInsert block ref chainEnds) todo (linked)
647 where
648 alreadyPlaced blkId = (setMember blkId placed)
649
650 -- | Place basic blocks based on the given CFG.
651 -- See Note [Chain based CFG serialization]
652 sequenceChain :: forall a i. Instruction i
653 => LabelMap a -- ^ Keys indicate an info table on the block.
654 -> CFG -- ^ Control flow graph and some meta data.
655 -> [GenBasicBlock i] -- ^ List of basic blocks to be placed.
656 -> [GenBasicBlock i] -- ^ Blocks placed in sequence.
657 sequenceChain _info _weights [] = []
658 sequenceChain _info _weights [x] = [x]
659 sequenceChain info weights blocks@((BasicBlock entry _):_) =
660 let directEdges :: [CfgEdge]
661 directEdges = sortBy (flip compare) $ catMaybes . map relevantWeight $ (infoEdgeList weights)
662 where
663 -- Apply modifiers to turn edge frequencies into useable weights
664 -- for computing code layout.
665 -- See also Note [Layout relevant edge weights]
666 relevantWeight :: CfgEdge -> Maybe CfgEdge
667 relevantWeight edge@(CfgEdge from to edgeInfo)
668 | (EdgeInfo CmmSource { trans_cmmNode = CmmCall {} } _) <- edgeInfo
669 -- Ignore edges across calls.
670 = Nothing
671 | mapMember to info
672 , w <- edgeWeight edgeInfo
673 -- The payoff is quite small if we jump over an info table
674 = Just (CfgEdge from to edgeInfo { edgeWeight = w/8 })
675 | (EdgeInfo CmmSource { trans_cmmNode = exitNode } _) <- edgeInfo
676 , cantEliminate exitNode
677 , w <- edgeWeight edgeInfo
678 -- A small penalty to edge types which
679 -- we can't optimize away by layout.
680 -- w * 0.96875 == w - w/32
681 = Just (CfgEdge from to edgeInfo { edgeWeight = w * 0.96875 })
682 | otherwise
683 = Just edge
684 where
685 cantEliminate CmmCondBranch {} = True
686 cantEliminate CmmSwitch {} = True
687 cantEliminate _ = False
688
689 blockMap :: LabelMap (GenBasicBlock i)
690 blockMap
691 = foldl' (\m blk@(BasicBlock lbl _ins) ->
692 mapInsert lbl blk m)
693 mapEmpty blocks
694
695 (builtChains, builtEdges)
696 = {-# SCC "buildChains" #-}
697 --pprTraceIt "generatedChains" $
698 --pprTrace "blocks" (ppr (mapKeys blockMap)) $
699 buildChains directEdges (mapKeys blockMap)
700
701 rankedEdges :: [CfgEdge]
702 -- Sort descending by weight, remove fused edges
703 rankedEdges =
704 filter (\edge -> not (Set.member (edgeFrom edge,edgeTo edge) builtEdges)) $
705 directEdges
706
707 (neighbourChains, combined)
708 = assert (noDups $ mapElems builtChains) $
709 {-# SCC "groupNeighbourChains" #-}
710 -- pprTraceIt "NeighbourChains" $
711 combineNeighbourhood rankedEdges (mapElems builtChains)
712
713
714 allEdges :: [CfgEdge]
715 allEdges = {-# SCC allEdges #-}
716 sortOn (relevantWeight) $ filter (not . deadEdge) $ (infoEdgeList weights)
717 where
718 deadEdge :: CfgEdge -> Bool
719 deadEdge (CfgEdge from to _) = let e = (from,to) in Set.member e combined || Set.member e builtEdges
720 relevantWeight :: CfgEdge -> EdgeWeight
721 relevantWeight (CfgEdge _ _ edgeInfo)
722 | EdgeInfo (CmmSource { trans_cmmNode = CmmCall {}}) _ <- edgeInfo
723 -- Penalize edges across calls
724 = weight/(64.0)
725 | otherwise
726 = weight
727 where
728 -- negate to sort descending
729 weight = negate (edgeWeight edgeInfo)
730
731 masterChain =
732 {-# SCC "mergeChains" #-}
733 -- pprTraceIt "MergedChains" $
734 mergeChains allEdges neighbourChains
735
736 --Make sure the first block stays first
737 prepedChains
738 | inFront entry masterChain
739 = [masterChain]
740 | (rest,entry) <- breakChainAt entry masterChain
741 = [entry,rest]
742 #if __GLASGOW_HASKELL__ <= 810
743 | otherwise = pprPanic "Entry point eliminated" $
744 ppr masterChain
745 #endif
746
747 blockList
748 = assert (noDups [masterChain])
749 (concatMap fromOL $ map chainBlocks prepedChains)
750
751 --chainPlaced = setFromList $ map blockId blockList :: LabelSet
752 chainPlaced = setFromList $ blockList :: LabelSet
753 unplaced =
754 let blocks = mapKeys blockMap
755 isPlaced b = setMember (b) chainPlaced
756 in filter (\block -> not (isPlaced block)) blocks
757
758 placedBlocks =
759 -- We want debug builds to catch this as it's a good indicator for
760 -- issues with CFG invariants. But we don't want to blow up production
761 -- builds if something slips through.
762 assert (null unplaced) $
763 --pprTraceIt "placedBlocks" $
764 -- ++ [] is stil kinda expensive
765 if null unplaced then blockList else blockList ++ unplaced
766 getBlock bid = expectJust "Block placement" $ mapLookup bid blockMap
767 in
768 --Assert we placed all blocks given as input
769 assert (all (\bid -> mapMember bid blockMap) placedBlocks) $
770 dropJumps info $ map getBlock placedBlocks
771
772 {-# SCC dropJumps #-}
773 -- | Remove redundant jumps between blocks when we can rely on
774 -- fall through.
775 dropJumps :: forall a i. Instruction i => LabelMap a -> [GenBasicBlock i]
776 -> [GenBasicBlock i]
777 dropJumps _ [] = []
778 dropJumps info ((BasicBlock lbl ins):todo)
779 | not . null $ ins --This can happen because of shortcutting
780 , [dest] <- jumpDestsOfInstr (last ins)
781 , ((BasicBlock nextLbl _) : _) <- todo
782 , not (mapMember dest info)
783 , nextLbl == dest
784 = BasicBlock lbl (init ins) : dropJumps info todo
785 | otherwise
786 = BasicBlock lbl ins : dropJumps info todo
787
788
789 -- -----------------------------------------------------------------------------
790 -- Sequencing the basic blocks
791
792 -- Cmm BasicBlocks are self-contained entities: they always end in a
793 -- jump, either non-local or to another basic block in the same proc.
794 -- In this phase, we attempt to place the basic blocks in a sequence
795 -- such that as many of the local jumps as possible turn into
796 -- fallthroughs.
797
798 sequenceTop
799 :: Instruction instr
800 => NcgImpl statics instr jumpDest
801 -> Maybe CFG -- ^ CFG if we have one.
802 -> NatCmmDecl statics instr -- ^ Function to serialize
803 -> NatCmmDecl statics instr
804
805 sequenceTop _ _ top@(CmmData _ _) = top
806 sequenceTop ncgImpl edgeWeights (CmmProc info lbl live (ListGraph blocks))
807 = let
808 config = ncgConfig ncgImpl
809 platform = ncgPlatform config
810
811 in CmmProc info lbl live $ ListGraph $ ncgMakeFarBranches ncgImpl info $
812 if -- Chain based algorithm
813 | ncgCfgBlockLayout config
814 , backendMaintainsCfg platform
815 , Just cfg <- edgeWeights
816 -> {-# SCC layoutBlocks #-} sequenceChain info cfg blocks
817
818 -- Old algorithm without edge weights
819 | ncgCfgWeightlessLayout config
820 || not (backendMaintainsCfg platform)
821 -> {-# SCC layoutBlocks #-} sequenceBlocks Nothing info blocks
822
823 -- Old algorithm with edge weights (if any)
824 | otherwise
825 -> {-# SCC layoutBlocks #-} sequenceBlocks edgeWeights info blocks
826
827 -- The old algorithm:
828 -- It is very simple (and stupid): We make a graph out of
829 -- the blocks where there is an edge from one block to another iff the
830 -- first block ends by jumping to the second. Then we topologically
831 -- sort this graph. Then traverse the list: for each block, we first
832 -- output the block, then if it has an out edge, we move the
833 -- destination of the out edge to the front of the list, and continue.
834
835 -- FYI, the classic layout for basic blocks uses postorder DFS; this
836 -- algorithm is implemented in Hoopl.
837
838 sequenceBlocks :: Instruction inst => Maybe CFG -> LabelMap a
839 -> [GenBasicBlock inst] -> [GenBasicBlock inst]
840 sequenceBlocks _edgeWeight _ [] = []
841 sequenceBlocks edgeWeights infos (entry:blocks) =
842 let entryNode = mkNode edgeWeights entry
843 bodyNodes = reverse
844 (flattenSCCs (sccBlocks edgeWeights blocks))
845 in dropJumps infos . seqBlocks infos $ ( entryNode : bodyNodes)
846 -- the first block is the entry point ==> it must remain at the start.
847
848 sccBlocks
849 :: Instruction instr
850 => Maybe CFG -> [NatBasicBlock instr]
851 -> [SCC (Node BlockId (NatBasicBlock instr))]
852 sccBlocks edgeWeights blocks =
853 stronglyConnCompFromEdgedVerticesUniqR
854 (map (mkNode edgeWeights) blocks)
855
856 mkNode :: (Instruction t)
857 => Maybe CFG -> GenBasicBlock t
858 -> Node BlockId (GenBasicBlock t)
859 mkNode edgeWeights block@(BasicBlock id instrs) =
860 DigraphNode block id outEdges
861 where
862 outEdges :: [BlockId]
863 outEdges
864 --Select the heaviest successor, ignore weights <= zero
865 = successor
866 where
867 successor
868 | Just successors <- fmap (`getSuccEdgesSorted` id)
869 edgeWeights -- :: Maybe [(Label, EdgeInfo)]
870 = case successors of
871 [] -> []
872 ((target,info):_)
873 | length successors > 2 || edgeWeight info <= 0 -> []
874 | otherwise -> [target]
875 | otherwise
876 = case jumpDestsOfInstr (last instrs) of
877 [one] -> [one]
878 _many -> []
879
880
881 seqBlocks :: LabelMap i -> [Node BlockId (GenBasicBlock t1)]
882 -> [GenBasicBlock t1]
883 seqBlocks infos blocks = placeNext pullable0 todo0
884 where
885 -- pullable: Blocks that are not yet placed
886 -- todo: Original order of blocks, to be followed if we have no good
887 -- reason not to;
888 -- may include blocks that have already been placed, but then
889 -- these are not in pullable
890 pullable0 = listToUFM [ (i,(b,n)) | DigraphNode b i n <- blocks ]
891 todo0 = map node_key blocks
892
893 placeNext _ [] = []
894 placeNext pullable (i:rest)
895 | Just (block, pullable') <- lookupDeleteUFM pullable i
896 = place pullable' rest block
897 | otherwise
898 -- We already placed this block, so ignore
899 = placeNext pullable rest
900
901 place pullable todo (block,[])
902 = block : placeNext pullable todo
903 place pullable todo (block@(BasicBlock id instrs),[next])
904 | mapMember next infos
905 = block : placeNext pullable todo
906 | Just (nextBlock, pullable') <- lookupDeleteUFM pullable next
907 = BasicBlock id instrs : place pullable' todo nextBlock
908 | otherwise
909 = block : placeNext pullable todo
910 place _ _ (_,tooManyNextNodes)
911 = pprPanic "seqBlocks" (ppr tooManyNextNodes)
912
913
914 lookupDeleteUFM :: UniqFM BlockId elt -> BlockId
915 -> Maybe (elt, UniqFM BlockId elt)
916 lookupDeleteUFM m k = do -- Maybe monad
917 v <- lookupUFM m k
918 return (v, delFromUFM m k)
919
920 backendMaintainsCfg :: Platform -> Bool
921 backendMaintainsCfg platform = case platformArch platform of
922 -- ArchX86 -- Should work but not tested so disabled currently.
923 ArchX86_64 -> True
924 _otherwise -> False
925