never executed always true always false
1 {-# LANGUAGE DataKinds #-}
2 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
3 {-# LANGUAGE Rank2Types #-}
4 {-# LANGUAGE ScopedTypeVariables #-}
5 {-# LANGUAGE TypeFamilies #-}
6 {-# LANGUAGE FlexibleContexts #-}
7 {-# LANGUAGE TupleSections #-}
8 --
9 -- Copyright (c) 2018 Andreas Klebinger
10 --
11
12 module GHC.CmmToAsm.CFG
13 ( CFG, CfgEdge(..), EdgeInfo(..), EdgeWeight(..)
14 , TransitionSource(..)
15
16 --Modify the CFG
17 , addWeightEdge, addEdge
18 , delEdge
19 , addNodesBetween, shortcutWeightMap
20 , reverseEdges, filterEdges
21 , addImmediateSuccessor
22 , mkWeightInfo, adjustEdgeWeight, setEdgeWeight
23
24 --Query the CFG
25 , infoEdgeList, edgeList
26 , getSuccessorEdges, getSuccessors
27 , getSuccEdgesSorted
28 , getEdgeInfo
29 , getCfgNodes, hasNode
30
31 -- Loop Information
32 , loopMembers, loopLevels, loopInfo
33
34 --Construction/Misc
35 , getCfg, getCfgProc, pprEdgeWeights, sanityCheckCfg
36
37 --Find backedges and update their weight
38 , optimizeCFG
39 , mkGlobalWeights
40
41 )
42 where
43
44 import GHC.Prelude
45 import GHC.Platform
46
47 import GHC.Cmm.BlockId
48 import GHC.Cmm as Cmm
49
50 import GHC.Cmm.Utils
51 import GHC.Cmm.Switch
52 import GHC.Cmm.Dataflow.Collections
53 import GHC.Cmm.Dataflow.Label
54 import GHC.Cmm.Dataflow.Block
55 import qualified GHC.Cmm.Dataflow.Graph as G
56
57 import GHC.Utils.Misc
58 import GHC.Data.Graph.Directed
59 import GHC.Data.Maybe
60
61 import GHC.Types.Unique
62 import qualified GHC.CmmToAsm.CFG.Dominators as Dom
63 import GHC.CmmToAsm.CFG.Weight
64 import Data.IntMap.Strict (IntMap)
65 import Data.IntSet (IntSet)
66
67 import qualified Data.IntMap.Strict as IM
68 import qualified Data.Map as M
69 import qualified Data.IntSet as IS
70 import qualified Data.Set as S
71 import Data.Tree
72 import Data.Bifunctor
73
74 import GHC.Utils.Outputable
75 import GHC.Utils.Panic
76 import GHC.Utils.Panic.Plain
77 -- DEBUGGING ONLY
78 --import GHC.Cmm.DebugBlock
79 --import GHC.Data.OrdList
80 --import GHC.Cmm.DebugBlock.Trace
81 import GHC.Cmm.Ppr () -- For Outputable instances
82
83 import Data.List (sort, nub, partition)
84 import Data.STRef.Strict
85 import Control.Monad.ST
86
87 import Data.Array.MArray
88 import Data.Array.ST
89 import Data.Array.IArray
90 import Data.Array.Unsafe (unsafeFreeze)
91 import Data.Array.Base (unsafeRead, unsafeWrite)
92
93 import Control.Monad
94 import GHC.Data.UnionFind
95
96 type Prob = Double
97
98 type Edge = (BlockId, BlockId)
99 type Edges = [Edge]
100
101 newtype EdgeWeight
102 = EdgeWeight { weightToDouble :: Double }
103 deriving (Eq,Ord,Enum,Num,Real,Fractional)
104
105 instance Outputable EdgeWeight where
106 ppr (EdgeWeight w) = doublePrec 5 w
107
108 type EdgeInfoMap edgeInfo = LabelMap (LabelMap edgeInfo)
109
110 -- | A control flow graph where edges have been annotated with a weight.
111 -- Implemented as IntMap (IntMap \<edgeData>)
112 -- We must uphold the invariant that for each edge A -> B we must have:
113 -- A entry B in the outer map.
114 -- A entry B in the map we get when looking up A.
115 -- Maintaining this invariant is useful as any failed lookup now indicates
116 -- an actual error in code which might go unnoticed for a while
117 -- otherwise.
118 type CFG = EdgeInfoMap EdgeInfo
119
120 data CfgEdge
121 = CfgEdge
122 { edgeFrom :: !BlockId
123 , edgeTo :: !BlockId
124 , edgeInfo :: !EdgeInfo
125 }
126
127 -- | Careful! Since we assume there is at most one edge from A to B
128 -- the Eq instance does not consider weight.
129 instance Eq CfgEdge where
130 (==) (CfgEdge from1 to1 _) (CfgEdge from2 to2 _)
131 = from1 == from2 && to1 == to2
132
133 -- | Edges are sorted ascending pointwise by weight, source and destination
134 instance Ord CfgEdge where
135 compare (CfgEdge from1 to1 (EdgeInfo {edgeWeight = weight1}))
136 (CfgEdge from2 to2 (EdgeInfo {edgeWeight = weight2}))
137 | weight1 < weight2 || weight1 == weight2 && from1 < from2 ||
138 weight1 == weight2 && from1 == from2 && to1 < to2
139 = LT
140 | from1 == from2 && to1 == to2 && weight1 == weight2
141 = EQ
142 | otherwise
143 = GT
144
145 instance Outputable CfgEdge where
146 ppr (CfgEdge from1 to1 edgeInfo)
147 = parens (ppr from1 <+> text "-(" <> ppr edgeInfo <> text ")->" <+> ppr to1)
148
149 -- | Can we trace back a edge to a specific Cmm Node
150 -- or has it been introduced during assembly codegen. We use this to maintain
151 -- some information which would otherwise be lost during the
152 -- Cmm \<-> asm transition.
153 -- See also Note [Inverting Conditional Branches]
154 data TransitionSource
155 = CmmSource { trans_cmmNode :: (CmmNode O C)
156 , trans_info :: BranchInfo }
157 | AsmCodeGen
158 deriving (Eq)
159
160 data BranchInfo = NoInfo -- ^ Unknown, but not heap or stack check.
161 | HeapStackCheck -- ^ Heap or stack check
162 deriving Eq
163
164 instance Outputable BranchInfo where
165 ppr NoInfo = text "regular"
166 ppr HeapStackCheck = text "heap/stack"
167
168 isHeapOrStackCheck :: TransitionSource -> Bool
169 isHeapOrStackCheck (CmmSource { trans_info = HeapStackCheck}) = True
170 isHeapOrStackCheck _ = False
171
172 -- | Information about edges
173 data EdgeInfo
174 = EdgeInfo
175 { transitionSource :: !TransitionSource
176 , edgeWeight :: !EdgeWeight
177 } deriving (Eq)
178
179 instance Outputable EdgeInfo where
180 ppr edgeInfo = text "weight:" <+> ppr (edgeWeight edgeInfo)
181
182 -- | Convenience function, generate edge info based
183 -- on weight not originating from cmm.
184 mkWeightInfo :: EdgeWeight -> EdgeInfo
185 mkWeightInfo = EdgeInfo AsmCodeGen
186
187 -- | Adjust the weight between the blocks using the given function.
188 -- If there is no such edge returns the original map.
189 adjustEdgeWeight :: CFG -> (EdgeWeight -> EdgeWeight)
190 -> BlockId -> BlockId -> CFG
191 adjustEdgeWeight cfg f from to
192 | Just info <- getEdgeInfo from to cfg
193 , !weight <- edgeWeight info
194 , !newWeight <- f weight
195 = addEdge from to (info { edgeWeight = newWeight}) cfg
196 | otherwise = cfg
197
198 -- | Set the weight between the blocks to the given weight.
199 -- If there is no such edge returns the original map.
200 setEdgeWeight :: CFG -> EdgeWeight
201 -> BlockId -> BlockId -> CFG
202 setEdgeWeight cfg !weight from to
203 | Just info <- getEdgeInfo from to cfg
204 = addEdge from to (info { edgeWeight = weight}) cfg
205 | otherwise = cfg
206
207
208 getCfgNodes :: CFG -> [BlockId]
209 getCfgNodes m =
210 mapKeys m
211
212 -- | Is this block part of this graph?
213 hasNode :: CFG -> BlockId -> Bool
214 hasNode m node =
215 -- Check the invariant that each node must exist in the first map or not at all.
216 assert (found || not (any (mapMember node) m))
217 found
218 where
219 found = mapMember node m
220
221
222
223 -- | Check if the nodes in the cfg and the set of blocks are the same.
224 -- In a case of a missmatch we panic and show the difference.
225 sanityCheckCfg :: CFG -> LabelSet -> SDoc -> Bool
226 sanityCheckCfg m blockSet msg
227 | blockSet == cfgNodes
228 = True
229 | otherwise =
230 pprPanic "Block list and cfg nodes don't match" (
231 text "difference:" <+> ppr diff $$
232 text "blocks:" <+> ppr blockSet $$
233 text "cfg:" <+> pprEdgeWeights m $$
234 msg )
235 False
236 where
237 cfgNodes = setFromList $ getCfgNodes m :: LabelSet
238 diff = (setUnion cfgNodes blockSet) `setDifference` (setIntersection cfgNodes blockSet) :: LabelSet
239
240 -- | Filter the CFG with a custom function f.
241 -- Paramaeters are `f from to edgeInfo`
242 filterEdges :: (BlockId -> BlockId -> EdgeInfo -> Bool) -> CFG -> CFG
243 filterEdges f cfg =
244 mapMapWithKey filterSources cfg
245 where
246 filterSources from m =
247 mapFilterWithKey (\to w -> f from to w) m
248
249
250 {- Note [Updating the CFG during shortcutting]
251
252 See Note [What is shortcutting] in the control flow optimization
253 code (GHC.Cmm.ContFlowOpt) for a slightly more in depth explanation on shortcutting.
254
255 In the native backend we shortcut jumps at the assembly level. ("GHC.CmmToAsm")
256 This means we remove blocks containing only one jump from the code
257 and instead redirecting all jumps targeting this block to the deleted
258 blocks jump target.
259
260 However we want to have an accurate representation of control
261 flow in the CFG. So we add/remove edges accordingly to account
262 for the eliminated blocks and new edges.
263
264 If we shortcut A -> B -> C to A -> C:
265 * We delete edges A -> B and B -> C
266 * Replacing them with the edge A -> C
267
268 We also try to preserve jump weights while doing so.
269
270 Note that:
271 * The edge B -> C can't have interesting weights since
272 the block B consists of a single unconditional jump without branching.
273 * We delete the edge A -> B and add the edge A -> C.
274 * The edge A -> B can be one of many edges originating from A so likely
275 has edge weights we want to preserve.
276
277 For this reason we simply store the edge info from the original A -> B
278 edge and apply this information to the new edge A -> C.
279
280 Sometimes we have a scenario where jump target C is not represented by an
281 BlockId but an immediate value. I'm only aware of this happening without
282 tables next to code currently.
283
284 Then we go from A ---> B - -> IMM to A - -> IMM where the dashed arrows
285 are not stored in the CFG.
286
287 In that case we simply delete the edge A -> B.
288
289 In terms of implementation the native backend first builds a mapping
290 from blocks suitable for shortcutting to their jump targets.
291 Then it redirects all jump instructions to these blocks using the
292 built up mapping.
293 This function (shortcutWeightMap) takes the same mapping and
294 applies the mapping to the CFG in the way laid out above.
295
296 -}
297 shortcutWeightMap :: LabelMap (Maybe BlockId) -> CFG -> CFG
298 shortcutWeightMap cuts cfg
299 | mapNull cuts = cfg
300 | otherwise = normalised_cfg
301 where
302 -- First take the cuts map and collapse any shortcuts, for example
303 -- if the cuts map has A -> B and B -> C then we want to rewrite
304 -- A -> C and B -> C directly.
305 normalised_cuts_st :: forall s . ST s (LabelMap (Maybe BlockId))
306 normalised_cuts_st = do
307 (null :: Point s (Maybe BlockId)) <- fresh Nothing
308 let cuts_list = mapToList cuts
309 -- Create a unification variable for each of the nodes in a rewrite
310 cuts_vars <- traverse (\p -> (p,) <$> fresh (Just p)) (concatMap (\(a, b) -> [a] ++ maybe [] (:[]) b) cuts_list)
311 let cuts_map = mapFromList cuts_vars :: LabelMap (Point s (Maybe BlockId))
312 -- Then unify according the the rewrites in the cuts map
313 mapM_ (\(from, to) -> expectJust "shortcutWeightMap" (mapLookup from cuts_map)
314 `union` expectJust "shortcutWeightMap" (maybe (Just null) (flip mapLookup cuts_map) to) ) cuts_list
315 -- Then recover the unique representative, which is the result of following
316 -- the chain to the end.
317 mapM find cuts_map
318
319 normalised_cuts = runST normalised_cuts_st
320
321 cuts_domain :: LabelSet
322 cuts_domain = setFromList $ mapKeys cuts
323
324 -- The CFG is shortcutted using the normalised cuts map
325 normalised_cfg :: CFG
326 normalised_cfg = mapFoldlWithKey update_edge mapEmpty cfg
327
328 update_edge :: CFG -> Label -> LabelMap EdgeInfo -> CFG
329 update_edge new_map from edge_map
330 -- If the from edge is in the cuts map then delete the edge
331 | setMember from cuts_domain = new_map
332 -- Otherwise we are keeping the edge, but might have shortcutted some of
333 -- the target nodes.
334 | otherwise = mapInsert from (mapFoldlWithKey update_from_edge mapEmpty edge_map) new_map
335
336 update_from_edge :: LabelMap a -> Label -> a -> LabelMap a
337 update_from_edge new_map to_edge edge_info
338 -- Edge is in the normalised cuts
339 | Just new_edge <- mapLookup to_edge normalised_cuts =
340 case new_edge of
341 -- The result was Nothing, so edge is deleted
342 Nothing -> new_map
343 -- The new target for the edge, write it with the old edge_info.
344 Just new_to -> mapInsert new_to edge_info new_map
345 -- Node wasn't in the cuts map, so just add it back
346 | otherwise = mapInsert to_edge edge_info new_map
347
348
349 -- | Sometimes we insert a block which should unconditionally be executed
350 -- after a given block. This function updates the CFG for these cases.
351 -- So we get A -> B => A -> A' -> B
352 -- \ \
353 -- -> C => -> C
354 --
355 addImmediateSuccessor :: Weights -> BlockId -> BlockId -> CFG -> CFG
356 addImmediateSuccessor weights node follower cfg
357 = updateEdges . addWeightEdge node follower weight $ cfg
358 where
359 weight = fromIntegral (uncondWeight weights)
360 targets = getSuccessorEdges cfg node
361 successors = map fst targets :: [BlockId]
362 updateEdges = addNewSuccs . remOldSuccs
363 remOldSuccs m = foldl' (flip (delEdge node)) m successors
364 addNewSuccs m =
365 foldl' (\m' (t,info) -> addEdge follower t info m') m targets
366
367 -- | Adds a new edge, overwrites existing edges if present
368 addEdge :: BlockId -> BlockId -> EdgeInfo -> CFG -> CFG
369 addEdge from to info cfg =
370 mapAlter addFromToEdge from $
371 mapAlter addDestNode to cfg
372 where
373 -- Simply insert the edge into the edge list.
374 addFromToEdge Nothing = Just $ mapSingleton to info
375 addFromToEdge (Just wm) = Just $ mapInsert to info wm
376 -- We must add the destination node explicitly
377 addDestNode Nothing = Just $ mapEmpty
378 addDestNode n@(Just _) = n
379
380
381 -- | Adds a edge with the given weight to the cfg
382 -- If there already existed an edge it is overwritten.
383 -- `addWeightEdge from to weight cfg`
384 addWeightEdge :: BlockId -> BlockId -> EdgeWeight -> CFG -> CFG
385 addWeightEdge from to weight cfg =
386 addEdge from to (mkWeightInfo weight) cfg
387
388 delEdge :: BlockId -> BlockId -> CFG -> CFG
389 delEdge from to m =
390 mapAlter remDest from m
391 where
392 remDest Nothing = Nothing
393 remDest (Just wm) = Just $ mapDelete to wm
394
395
396 -- | Destinations from bid ordered by weight (descending)
397 getSuccEdgesSorted :: CFG -> BlockId -> [(BlockId,EdgeInfo)]
398 getSuccEdgesSorted m bid =
399 let destMap = mapFindWithDefault mapEmpty bid m
400 cfgEdges = mapToList destMap
401 sortedEdges = sortWith (negate . edgeWeight . snd) cfgEdges
402 in --pprTrace "getSuccEdgesSorted" (ppr bid <+> text "map:" <+> ppr m)
403 sortedEdges
404
405 -- | Get successors of a given node with edge weights.
406 getSuccessorEdges :: HasDebugCallStack => CFG -> BlockId -> [(BlockId,EdgeInfo)]
407 getSuccessorEdges m bid = maybe lookupError mapToList (mapLookup bid m)
408 where
409 lookupError = pprPanic "getSuccessorEdges: Block does not exist" $
410 ppr bid <+> pprEdgeWeights m
411
412 getEdgeInfo :: BlockId -> BlockId -> CFG -> Maybe EdgeInfo
413 getEdgeInfo from to m
414 | Just wm <- mapLookup from m
415 , Just info <- mapLookup to wm
416 = Just $! info
417 | otherwise
418 = Nothing
419
420 getEdgeWeight :: CFG -> BlockId -> BlockId -> EdgeWeight
421 getEdgeWeight cfg from to =
422 edgeWeight $ expectJust "Edgeweight for noexisting block" $
423 getEdgeInfo from to cfg
424
425 getTransitionSource :: BlockId -> BlockId -> CFG -> TransitionSource
426 getTransitionSource from to cfg = transitionSource $ expectJust "Source info for noexisting block" $
427 getEdgeInfo from to cfg
428
429 reverseEdges :: CFG -> CFG
430 reverseEdges cfg = mapFoldlWithKey (\cfg from toMap -> go (addNode cfg from) from toMap) mapEmpty cfg
431 where
432 -- We must preserve nodes without outgoing edges!
433 addNode :: CFG -> BlockId -> CFG
434 addNode cfg b = mapInsertWith mapUnion b mapEmpty cfg
435 go :: CFG -> BlockId -> (LabelMap EdgeInfo) -> CFG
436 go cfg from toMap = mapFoldlWithKey (\cfg to info -> addEdge to from info cfg) cfg toMap :: CFG
437
438
439 -- | Returns a unordered list of all edges with info
440 infoEdgeList :: CFG -> [CfgEdge]
441 infoEdgeList m =
442 go (mapToList m) []
443 where
444 -- We avoid foldMap to avoid thunk buildup
445 go :: [(BlockId,LabelMap EdgeInfo)] -> [CfgEdge] -> [CfgEdge]
446 go [] acc = acc
447 go ((from,toMap):xs) acc
448 = go' xs from (mapToList toMap) acc
449 go' :: [(BlockId,LabelMap EdgeInfo)] -> BlockId -> [(BlockId,EdgeInfo)] -> [CfgEdge] -> [CfgEdge]
450 go' froms _ [] acc = go froms acc
451 go' froms from ((to,info):tos) acc
452 = go' froms from tos (CfgEdge from to info : acc)
453
454 -- | Returns a unordered list of all edges without weights
455 edgeList :: CFG -> [Edge]
456 edgeList m =
457 go (mapToList m) []
458 where
459 -- We avoid foldMap to avoid thunk buildup
460 go :: [(BlockId,LabelMap EdgeInfo)] -> [Edge] -> [Edge]
461 go [] acc = acc
462 go ((from,toMap):xs) acc
463 = go' xs from (mapKeys toMap) acc
464 go' :: [(BlockId,LabelMap EdgeInfo)] -> BlockId -> [BlockId] -> [Edge] -> [Edge]
465 go' froms _ [] acc = go froms acc
466 go' froms from (to:tos) acc
467 = go' froms from tos ((from,to) : acc)
468
469 -- | Get successors of a given node without edge weights.
470 getSuccessors :: HasDebugCallStack => CFG -> BlockId -> [BlockId]
471 getSuccessors m bid
472 | Just wm <- mapLookup bid m
473 = mapKeys wm
474 | otherwise = lookupError
475 where
476 lookupError = pprPanic "getSuccessors: Block does not exist" $
477 ppr bid <+> pprEdgeWeights m
478
479 pprEdgeWeights :: CFG -> SDoc
480 pprEdgeWeights m =
481 let edges = sort $ infoEdgeList m :: [CfgEdge]
482 printEdge (CfgEdge from to (EdgeInfo { edgeWeight = weight }))
483 = text "\t" <> ppr from <+> text "->" <+> ppr to <>
484 text "[label=\"" <> ppr weight <> text "\",weight=\"" <>
485 ppr weight <> text "\"];\n"
486 --for the case that there are no edges from/to this node.
487 --This should rarely happen but it can save a lot of time
488 --to immediately see it when it does.
489 printNode node
490 = text "\t" <> ppr node <> text ";\n"
491 getEdgeNodes (CfgEdge from to _) = [from,to]
492 edgeNodes = setFromList $ concatMap getEdgeNodes edges :: LabelSet
493 nodes = filter (\n -> (not . setMember n) edgeNodes) . mapKeys $ mapFilter null m
494 in
495 text "digraph {\n" <>
496 (foldl' (<>) empty (map printEdge edges)) <>
497 (foldl' (<>) empty (map printNode nodes)) <>
498 text "}\n"
499
500 {-# INLINE updateEdgeWeight #-} --Allows eliminating the tuple when possible
501 -- | Invariant: The edge **must** exist already in the graph.
502 updateEdgeWeight :: (EdgeWeight -> EdgeWeight) -> Edge -> CFG -> CFG
503 updateEdgeWeight f (from, to) cfg
504 | Just oldInfo <- getEdgeInfo from to cfg
505 = let !oldWeight = edgeWeight oldInfo
506 !newWeight = f oldWeight
507 in addEdge from to (oldInfo {edgeWeight = newWeight}) cfg
508 | otherwise
509 = panic "Trying to update invalid edge"
510
511 -- from to oldWeight => newWeight
512 mapWeights :: (BlockId -> BlockId -> EdgeWeight -> EdgeWeight) -> CFG -> CFG
513 mapWeights f cfg =
514 foldl' (\cfg (CfgEdge from to info) ->
515 let oldWeight = edgeWeight info
516 newWeight = f from to oldWeight
517 in addEdge from to (info {edgeWeight = newWeight}) cfg)
518 cfg (infoEdgeList cfg)
519
520
521 -- | Insert a block in the control flow between two other blocks.
522 -- We pass a list of tuples (A,B,C) where
523 -- * A -> C: Old edge
524 -- * A -> B -> C : New Arc, where B is the new block.
525 -- It's possible that a block has two jumps to the same block
526 -- in the assembly code. However we still only store a single edge for
527 -- these cases.
528 -- We assign the old edge info to the edge A -> B and assign B -> C the
529 -- weight of an unconditional jump.
530 addNodesBetween :: Weights -> CFG -> [(BlockId,BlockId,BlockId)] -> CFG
531 addNodesBetween weights m updates =
532 foldl' updateWeight m .
533 weightUpdates $ updates
534 where
535 weight = fromIntegral (uncondWeight weights)
536 -- We might add two blocks for different jumps along a single
537 -- edge. So we end up with edges: A -> B -> C , A -> D -> C
538 -- in this case after applying the first update the weight for A -> C
539 -- is no longer available. So we calculate future weights before updates.
540 weightUpdates = map getWeight
541 getWeight :: (BlockId,BlockId,BlockId) -> (BlockId,BlockId,BlockId,EdgeInfo)
542 getWeight (from,between,old)
543 | Just edgeInfo <- getEdgeInfo from old m
544 = (from,between,old,edgeInfo)
545 | otherwise
546 = pprPanic "Can't find weight for edge that should have one" (
547 text "triple" <+> ppr (from,between,old) $$
548 text "updates" <+> ppr updates $$
549 text "cfg:" <+> pprEdgeWeights m )
550 updateWeight :: CFG -> (BlockId,BlockId,BlockId,EdgeInfo) -> CFG
551 updateWeight m (from,between,old,edgeInfo)
552 = addEdge from between edgeInfo .
553 addWeightEdge between old weight .
554 delEdge from old $ m
555
556 {-
557 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
558 ~~~ Note [CFG Edge Weights] ~~~
559 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
560
561 Edge weights assigned do not currently represent a specific
562 cost model and rather just a ranking of which blocks should
563 be placed next to each other given their connection type in
564 the CFG.
565 This is especially relevant if we whenever two blocks will
566 jump to the same target.
567
568 A B
569 \ /
570 C
571
572 Should A or B be placed in front of C? The block layout algorithm
573 decides this based on which edge (A,C)/(B,C) is heavier. So we
574 make a educated guess on which branch should be preferred.
575
576 We rank edges in this order:
577 * Unconditional Control Transfer - They will always
578 transfer control to their target. Unless there is a info table
579 we can turn the jump into a fallthrough as well.
580 We use 20k as default, so it's easy to spot if values have been
581 modified but unlikely that we run into issues with overflow.
582 * If branches (likely) - We assume branches marked as likely
583 are taken more than 80% of the time.
584 By ranking them below unconditional jumps we make sure we
585 prefer the unconditional if there is a conditional and
586 unconditional edge towards a block.
587 * If branches (regular) - The false branch can potentially be turned
588 into a fallthrough so we prefer it slightly over the true branch.
589 * Unlikely branches - These can be assumed to be taken less than 20%
590 of the time. So we given them one of the lowest priorities.
591 * Switches - Switches at this level are implemented as jump tables
592 so have a larger number of successors. So without more information
593 we can only say that each individual successor is unlikely to be
594 jumped to and we rank them accordingly.
595 * Calls - We currently ignore calls completely:
596 * By the time we return from a call there is a good chance
597 that the address we return to has already been evicted from
598 cache eliminating a main advantage sequential placement brings.
599 * Calls always require a info table in front of their return
600 address. This reduces the chance that we return to the same
601 cache line further.
602
603 -}
604 -- | Generate weights for a Cmm proc based on some simple heuristics.
605 getCfgProc :: Platform -> Weights -> RawCmmDecl -> CFG
606 getCfgProc _ _ (CmmData {}) = mapEmpty
607 getCfgProc platform weights (CmmProc _info _lab _live graph) = getCfg platform weights graph
608
609 getCfg :: Platform -> Weights -> CmmGraph -> CFG
610 getCfg platform weights graph =
611 foldl' insertEdge edgelessCfg $ concatMap getBlockEdges blocks
612 where
613 Weights
614 { uncondWeight = uncondWeight
615 , condBranchWeight = condBranchWeight
616 , switchWeight = switchWeight
617 , callWeight = callWeight
618 , likelyCondWeight = likelyCondWeight
619 , unlikelyCondWeight = unlikelyCondWeight
620 -- Last two are used in other places
621 --, infoTablePenalty = infoTablePenalty
622 --, backEdgeBonus = backEdgeBonus
623 } = weights
624 -- Explicitly add all nodes to the cfg to ensure they are part of the
625 -- CFG.
626 edgelessCfg = mapFromList $ zip (map G.entryLabel blocks) (repeat mapEmpty)
627 insertEdge :: CFG -> ((BlockId,BlockId),EdgeInfo) -> CFG
628 insertEdge m ((from,to),weight) =
629 mapAlter f from m
630 where
631 f :: Maybe (LabelMap EdgeInfo) -> Maybe (LabelMap EdgeInfo)
632 f Nothing = Just $ mapSingleton to weight
633 f (Just destMap) = Just $ mapInsert to weight destMap
634 getBlockEdges :: CmmBlock -> [((BlockId,BlockId),EdgeInfo)]
635 getBlockEdges block =
636 case branch of
637 CmmBranch dest -> [mkEdge dest uncondWeight]
638 CmmCondBranch cond t f l
639 | l == Nothing ->
640 [mkEdge f condBranchWeight, mkEdge t condBranchWeight]
641 | l == Just True ->
642 [mkEdge f unlikelyCondWeight, mkEdge t likelyCondWeight]
643 | l == Just False ->
644 [mkEdge f likelyCondWeight, mkEdge t unlikelyCondWeight]
645 where
646 mkEdgeInfo = -- pprTrace "Info" (ppr branchInfo <+> ppr cond)
647 EdgeInfo (CmmSource branch branchInfo) . fromIntegral
648 mkEdge target weight = ((bid,target), mkEdgeInfo weight)
649 branchInfo =
650 foldRegsUsed
651 (panic "GHC.CmmToAsm.CFG.getCfg: foldRegsUsed")
652 (\info r -> if r == SpLim || r == HpLim || r == BaseReg
653 then HeapStackCheck else info)
654 NoInfo cond
655
656 (CmmSwitch _e ids) ->
657 let switchTargets = switchTargetsToList ids
658 --Compiler performance hack - for very wide switches don't
659 --consider targets for layout.
660 adjustedWeight =
661 if (length switchTargets > 10) then -1 else switchWeight
662 in map (\x -> mkEdge x adjustedWeight) switchTargets
663 (CmmCall { cml_cont = Just cont}) -> [mkEdge cont callWeight]
664 (CmmForeignCall {Cmm.succ = cont}) -> [mkEdge cont callWeight]
665 (CmmCall { cml_cont = Nothing }) -> []
666 other ->
667 panic "Foo" $
668 assertPpr False (ppr "Unknown successor cause:" <>
669 (pdoc platform branch <+> text "=>" <> pdoc platform (G.successors other))) $
670 map (\x -> ((bid,x),mkEdgeInfo 0)) $ G.successors other
671 where
672 bid = G.entryLabel block
673 mkEdgeInfo = EdgeInfo (CmmSource branch NoInfo) . fromIntegral
674 mkEdge target weight = ((bid,target), mkEdgeInfo weight)
675 branch = lastNode block :: CmmNode O C
676
677 blocks = revPostorder graph :: [CmmBlock]
678
679 --Find back edges by BFS
680 findBackEdges :: HasDebugCallStack => BlockId -> CFG -> Edges
681 findBackEdges root cfg =
682 --pprTraceIt "Backedges:" $
683 map fst .
684 filter (\x -> snd x == Backward) $ typedEdges
685 where
686 edges = edgeList cfg :: [(BlockId,BlockId)]
687 getSuccs = getSuccessors cfg :: BlockId -> [BlockId]
688 typedEdges =
689 classifyEdges root getSuccs edges :: [((BlockId,BlockId),EdgeType)]
690
691 optimizeCFG :: Bool -> Weights -> RawCmmDecl -> CFG -> CFG
692 optimizeCFG _ _ (CmmData {}) cfg = cfg
693 optimizeCFG doStaticPred weights proc@(CmmProc _info _lab _live graph) cfg =
694 (if doStaticPred then staticPredCfg (g_entry graph) else id) $
695 optHsPatterns weights proc $ cfg
696
697 -- | Modify branch weights based on educated guess on
698 -- patterns GHC tends to produce and how they affect
699 -- performance.
700 --
701 -- Most importantly we penalize jumps across info tables.
702 optHsPatterns :: Weights -> RawCmmDecl -> CFG -> CFG
703 optHsPatterns _ (CmmData {}) cfg = cfg
704 optHsPatterns weights (CmmProc info _lab _live graph) cfg =
705 {-# SCC optHsPatterns #-}
706 -- pprTrace "Initial:" (pprEdgeWeights cfg) $
707 -- pprTrace "Initial:" (ppr $ mkGlobalWeights (g_entry graph) cfg) $
708
709 -- pprTrace "LoopInfo:" (ppr $ loopInfo cfg (g_entry graph)) $
710 favourFewerPreds .
711 penalizeInfoTables info .
712 increaseBackEdgeWeight (g_entry graph) $ cfg
713 where
714
715 -- | Increase the weight of all backedges in the CFG
716 -- this helps to make loop jumpbacks the heaviest edges
717 increaseBackEdgeWeight :: BlockId -> CFG -> CFG
718 increaseBackEdgeWeight root cfg =
719 let backedges = findBackEdges root cfg
720 update weight
721 --Keep irrelevant edges irrelevant
722 | weight <= 0 = 0
723 | otherwise
724 = weight + fromIntegral (backEdgeBonus weights)
725 in foldl' (\cfg edge -> updateEdgeWeight update edge cfg)
726 cfg backedges
727
728 -- | Since we cant fall through info tables we penalize these.
729 penalizeInfoTables :: LabelMap a -> CFG -> CFG
730 penalizeInfoTables info cfg =
731 mapWeights fupdate cfg
732 where
733 fupdate :: BlockId -> BlockId -> EdgeWeight -> EdgeWeight
734 fupdate _ to weight
735 | mapMember to info
736 = weight - (fromIntegral $ infoTablePenalty weights)
737 | otherwise = weight
738
739 -- | If a block has two successors, favour the one with fewer
740 -- predecessors and/or the one allowing fall through.
741 favourFewerPreds :: CFG -> CFG
742 favourFewerPreds cfg =
743 let
744 revCfg =
745 reverseEdges $ filterEdges
746 (\_from -> fallthroughTarget) cfg
747
748 predCount n = length $ getSuccessorEdges revCfg n
749 nodes = getCfgNodes cfg
750
751 modifiers :: Int -> Int -> (EdgeWeight, EdgeWeight)
752 modifiers preds1 preds2
753 | preds1 < preds2 = ( 1,-1)
754 | preds1 == preds2 = ( 0, 0)
755 | otherwise = (-1, 1)
756
757 update :: CFG -> BlockId -> CFG
758 update cfg node
759 | [(s1,e1),(s2,e2)] <- getSuccessorEdges cfg node
760 , !w1 <- edgeWeight e1
761 , !w2 <- edgeWeight e2
762 --Only change the weights if there isn't already a ordering.
763 , w1 == w2
764 , (mod1,mod2) <- modifiers (predCount s1) (predCount s2)
765 = (\cfg' ->
766 (adjustEdgeWeight cfg' (+mod2) node s2))
767 (adjustEdgeWeight cfg (+mod1) node s1)
768 | otherwise
769 = cfg
770 in foldl' update cfg nodes
771 where
772 fallthroughTarget :: BlockId -> EdgeInfo -> Bool
773 fallthroughTarget to (EdgeInfo source _weight)
774 | mapMember to info = False
775 | AsmCodeGen <- source = True
776 | CmmSource { trans_cmmNode = CmmBranch {} } <- source = True
777 | CmmSource { trans_cmmNode = CmmCondBranch {} } <- source = True
778 | otherwise = False
779
780 -- | Convert block-local branch weights to global weights.
781 staticPredCfg :: BlockId -> CFG -> CFG
782 staticPredCfg entry cfg = cfg'
783 where
784 (_, globalEdgeWeights) = {-# SCC mkGlobalWeights #-}
785 mkGlobalWeights entry cfg
786 cfg' = {-# SCC rewriteEdges #-}
787 mapFoldlWithKey
788 (\cfg from m ->
789 mapFoldlWithKey
790 (\cfg to w -> setEdgeWeight cfg (EdgeWeight w) from to )
791 cfg m )
792 cfg
793 globalEdgeWeights
794
795 -- | Determine loop membership of blocks based on SCC analysis
796 -- This is faster but only gives yes/no answers.
797 loopMembers :: HasDebugCallStack => CFG -> LabelMap Bool
798 loopMembers cfg =
799 foldl' (flip setLevel) mapEmpty sccs
800 where
801 mkNode :: BlockId -> Node BlockId BlockId
802 mkNode bid = DigraphNode bid bid (getSuccessors cfg bid)
803 nodes = map mkNode (getCfgNodes cfg)
804
805 sccs = stronglyConnCompFromEdgedVerticesOrd nodes
806
807 setLevel :: SCC BlockId -> LabelMap Bool -> LabelMap Bool
808 setLevel (AcyclicSCC bid) m = mapInsert bid False m
809 setLevel (CyclicSCC bids) m = foldl' (\m k -> mapInsert k True m) m bids
810
811 loopLevels :: CFG -> BlockId -> LabelMap Int
812 loopLevels cfg root = liLevels loopInfos
813 where
814 loopInfos = loopInfo cfg root
815
816 data LoopInfo = LoopInfo
817 { liBackEdges :: [(Edge)] -- ^ List of back edges
818 , liLevels :: LabelMap Int -- ^ BlockId -> LoopLevel mapping
819 , liLoops :: [(Edge, LabelSet)] -- ^ (backEdge, loopBody), body includes header
820 }
821
822 instance Outputable LoopInfo where
823 ppr (LoopInfo _ _lvls loops) =
824 text "Loops:(backEdge, bodyNodes)" $$
825 (vcat $ map ppr loops)
826
827 {- Note [Determining the loop body]
828 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
829
830 Starting with the knowledge that:
831 * head dominates the loop
832 * `tail` -> `head` is a backedge
833
834 We can determine all nodes by:
835 * Deleting the loop head from the graph.
836 * Collect all blocks which are reachable from the `tail`.
837
838 We do so by performing bfs from the tail node towards the head.
839 -}
840
841 -- | Determine loop membership of blocks based on Dominator analysis.
842 -- This is slower but gives loop levels instead of just loop membership.
843 -- However it only detects natural loops. Irreducible control flow is not
844 -- recognized even if it loops. But that is rare enough that we don't have
845 -- to care about that special case.
846 loopInfo :: HasDebugCallStack => CFG -> BlockId -> LoopInfo
847 loopInfo cfg root = LoopInfo { liBackEdges = backEdges
848 , liLevels = mapFromList loopCounts
849 , liLoops = loopBodies }
850 where
851 revCfg = reverseEdges cfg
852
853 graph = -- pprTrace "CFG - loopInfo" (pprEdgeWeights cfg) $
854 fmap (setFromList . mapKeys ) cfg :: LabelMap LabelSet
855
856
857 --TODO - This should be a no op: Export constructors? Use unsafeCoerce? ...
858 rooted = ( fromBlockId root
859 , toIntMap $ fmap toIntSet graph) :: (Int, IntMap IntSet)
860 tree = fmap toBlockId $ Dom.domTree rooted :: Tree BlockId
861
862 -- Map from Nodes to their dominators
863 domMap :: LabelMap LabelSet
864 domMap = mkDomMap tree
865
866 edges = edgeList cfg :: [(BlockId, BlockId)]
867 -- We can't recompute nodes from edges, there might be blocks not connected via edges.
868 nodes = getCfgNodes cfg :: [BlockId]
869
870 -- identify back edges
871 isBackEdge (from,to)
872 | Just doms <- mapLookup from domMap
873 , setMember to doms
874 = True
875 | otherwise = False
876
877 -- See Note [Determining the loop body]
878 -- Get the loop body associated with a back edge.
879 findBody edge@(tail, head)
880 = ( edge, setInsert head $ go (setSingleton tail) (setSingleton tail) )
881 where
882 -- See Note [Determining the loop body]
883
884
885 go :: LabelSet -> LabelSet -> LabelSet
886 go found current
887 | setNull current = found
888 | otherwise = go (setUnion newSuccessors found)
889 newSuccessors
890 where
891 -- Really predecessors, since we use the reversed cfg.
892 newSuccessors = setFilter (\n -> not $ setMember n found) successors :: LabelSet
893 successors = setDelete head $ setUnions $ map
894 (\x -> if x == head then setEmpty else setFromList (getSuccessors revCfg x))
895 (setElems current) :: LabelSet
896
897 backEdges = filter isBackEdge edges
898 loopBodies = map findBody backEdges :: [(Edge, LabelSet)]
899
900 -- Block b is part of n loop bodies => loop nest level of n
901 loopCounts =
902 let bodies = map (first snd) loopBodies -- [(Header, Body)]
903 loopCount n = length $ nub . map fst . filter (setMember n . snd) $ bodies
904 in map (\n -> (n, loopCount n)) $ nodes :: [(BlockId, Int)]
905
906 toIntSet :: LabelSet -> IntSet
907 toIntSet s = IS.fromList . map fromBlockId . setElems $ s
908 toIntMap :: LabelMap a -> IntMap a
909 toIntMap m = IM.fromList $ map (\(x,y) -> (fromBlockId x,y)) $ mapToList m
910
911 mkDomMap :: Tree BlockId -> LabelMap LabelSet
912 mkDomMap root = mapFromList $ go setEmpty root
913 where
914 go :: LabelSet -> Tree BlockId -> [(Label,LabelSet)]
915 go parents (Node lbl [])
916 = [(lbl, parents)]
917 go parents (Node _ leaves)
918 = let nodes = map rootLabel leaves
919 entries = map (\x -> (x,parents)) nodes
920 in entries ++ concatMap
921 (\n -> go (setInsert (rootLabel n) parents) n)
922 leaves
923
924 fromBlockId :: BlockId -> Int
925 fromBlockId = getKey . getUnique
926
927 toBlockId :: Int -> BlockId
928 toBlockId = mkBlockId . mkUniqueGrimily
929
930 -- We make the CFG a Hoopl Graph, so we can reuse revPostOrder.
931 newtype BlockNode (e :: Extensibility) (x :: Extensibility) = BN (BlockId,[BlockId])
932
933 instance G.NonLocal (BlockNode) where
934 entryLabel (BN (lbl,_)) = lbl
935 successors (BN (_,succs)) = succs
936
937 revPostorderFrom :: HasDebugCallStack => CFG -> BlockId -> [BlockId]
938 revPostorderFrom cfg root =
939 map fromNode $ G.revPostorderFrom hooplGraph root
940 where
941 nodes = getCfgNodes cfg
942 hooplGraph = foldl' (\m n -> mapInsert n (toNode n) m) mapEmpty nodes
943
944 fromNode :: BlockNode C C -> BlockId
945 fromNode (BN x) = fst x
946
947 toNode :: BlockId -> BlockNode C C
948 toNode bid =
949 BN (bid,getSuccessors cfg $ bid)
950
951
952 -- | We take in a CFG which has on its edges weights which are
953 -- relative only to other edges originating from the same node.
954 --
955 -- We return a CFG for which each edge represents a GLOBAL weight.
956 -- This means edge weights are comparable across the whole graph.
957 --
958 -- For irreducible control flow results might be imprecise, otherwise they
959 -- are reliable.
960 --
961 -- The algorithm is based on the Paper
962 -- "Static Branch Prediction and Program Profile Analysis" by Y Wu, JR Larus
963 -- The only big change is that we go over the nodes in the body of loops in
964 -- reverse post order. Which is required for diamond control flow to work probably.
965 --
966 -- We also apply a few prediction heuristics (based on the same paper)
967 --
968 -- The returned result represents frequences.
969 -- For blocks it's the expected number of executions and
970 -- for edges is the number of traversals.
971
972 {-# NOINLINE mkGlobalWeights #-}
973 {-# SCC mkGlobalWeights #-}
974 mkGlobalWeights :: HasDebugCallStack => BlockId -> CFG -> (LabelMap Double, LabelMap (LabelMap Double))
975 mkGlobalWeights root localCfg
976 | null localCfg = panic "Error - Empty CFG"
977 | otherwise
978 = (blockFreqs', edgeFreqs')
979 where
980 -- Calculate fixpoints
981 (blockFreqs, edgeFreqs) = calcFreqs nodeProbs backEdges' bodies' revOrder'
982 blockFreqs' = mapFromList $ map (first fromVertex) (assocs blockFreqs) :: LabelMap Double
983 edgeFreqs' = fmap fromVertexMap $ fromVertexMap edgeFreqs
984
985 fromVertexMap :: IM.IntMap x -> LabelMap x
986 fromVertexMap m = mapFromList . map (first fromVertex) $ IM.toList m
987
988 revOrder = revPostorderFrom localCfg root :: [BlockId]
989 loopResults@(LoopInfo backedges _levels bodies) = loopInfo localCfg root
990
991 revOrder' = map toVertex revOrder
992 backEdges' = map (bimap toVertex toVertex) backedges
993 bodies' = map calcBody bodies
994
995 estimatedCfg = staticBranchPrediction root loopResults localCfg
996 -- Normalize the weights to probabilities and apply heuristics
997 nodeProbs = cfgEdgeProbabilities estimatedCfg toVertex
998
999 -- By mapping vertices to numbers in reverse post order we can bring any subset into reverse post
1000 -- order simply by sorting.
1001 -- TODO: The sort is redundant if we can guarantee that setElems returns elements ascending
1002 calcBody (backedge, blocks) =
1003 (toVertex $ snd backedge, sort . map toVertex $ (setElems blocks))
1004
1005 vertexMapping = mapFromList $ zip revOrder [0..] :: LabelMap Int
1006 blockMapping = listArray (0,mapSize vertexMapping - 1) revOrder :: Array Int BlockId
1007 -- Map from blockId to indices starting at zero
1008 toVertex :: BlockId -> Int
1009 toVertex blockId = expectJust "mkGlobalWeights" $ mapLookup blockId vertexMapping
1010 -- Map from indices starting at zero to blockIds
1011 fromVertex :: Int -> BlockId
1012 fromVertex vertex = blockMapping ! vertex
1013
1014 {- Note [Static Branch Prediction]
1015 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1016
1017 The work here has been based on the paper
1018 "Static Branch Prediction and Program Profile Analysis" by Y Wu, JR Larus.
1019
1020 The primary differences are that if we branch on the result of a heap
1021 check we do not apply any of the heuristics.
1022 The reason is simple: They look like loops in the control flow graph
1023 but are usually never entered, and if at most once.
1024
1025 Currently implemented is a heuristic to predict that we do not exit
1026 loops (lehPredicts) and one to predict that backedges are more likely
1027 than any other edge.
1028
1029 The back edge case is special as it superceeds any other heuristic if it
1030 applies.
1031
1032 Do NOT rely solely on nofib results for benchmarking this. I recommend at least
1033 comparing megaparsec and container benchmarks. Nofib does not seeem to have
1034 many instances of "loopy" Cmm where these make a difference.
1035
1036 TODO:
1037 * The paper containers more benchmarks which should be implemented.
1038 * If we turn the likelihood on if/else branches into a probability
1039 instead of true/false we could implement this as a Cmm pass.
1040 + The complete Cmm code still exists and can be accessed by the heuristics
1041 + There is no chance of register allocation/codegen inserting branches/blocks
1042 + making the TransitionSource info wrong.
1043 + potential to use this information in CmmPasses.
1044 - Requires refactoring of all the code relying on the binary nature of likelihood.
1045 - Requires refactoring `loopInfo` to work on both, Cmm Graphs and the backend CFG.
1046 -}
1047
1048 -- | Combination of target node id and information about the branch
1049 -- we are looking at.
1050 type TargetNodeInfo = (BlockId, EdgeInfo)
1051
1052
1053 -- | Update branch weights based on certain heuristics.
1054 -- See Note [Static Branch Prediction]
1055 -- TODO: This should be combined with optimizeCFG
1056 {-# SCC staticBranchPrediction #-}
1057 staticBranchPrediction :: BlockId -> LoopInfo -> CFG -> CFG
1058 staticBranchPrediction _root (LoopInfo l_backEdges loopLevels l_loops) cfg =
1059 -- pprTrace "staticEstimatesOn" (ppr (cfg)) $
1060 foldl' update cfg nodes
1061 where
1062 nodes = getCfgNodes cfg
1063 backedges = S.fromList $ l_backEdges
1064 -- Loops keyed by their back edge
1065 loops = M.fromList $ l_loops :: M.Map Edge LabelSet
1066 loopHeads = S.fromList $ map snd $ M.keys loops
1067
1068 update :: CFG -> BlockId -> CFG
1069 update cfg node
1070 -- No successors, nothing to do.
1071 | null successors = cfg
1072
1073 -- Mix of backedges and others:
1074 -- Always predict the backedges.
1075 | not (null m) && length m < length successors
1076 -- Heap/Stack checks "loop", but only once.
1077 -- So we simply exclude any case involving them.
1078 , not $ any (isHeapOrStackCheck . transitionSource . snd) successors
1079 = let loopChance = repeat $! pred_LBH / (fromIntegral $ length m)
1080 exitChance = repeat $! (1 - pred_LBH) / fromIntegral (length not_m)
1081 updates = zip (map fst m) loopChance ++ zip (map fst not_m) exitChance
1082 in -- pprTrace "mix" (ppr (node,successors)) $
1083 foldl' (\cfg (to,weight) -> setEdgeWeight cfg weight node to) cfg updates
1084
1085 -- For (regular) non-binary branches we keep the weights from the STG -> Cmm translation.
1086 | length successors /= 2
1087 = cfg
1088
1089 -- Only backedges - no need to adjust
1090 | length m > 0
1091 = cfg
1092
1093 -- A regular binary branch, we can plug addition predictors in here.
1094 | [(s1,s1_info),(s2,s2_info)] <- successors
1095 , not $ any (isHeapOrStackCheck . transitionSource . snd) successors
1096 = -- Normalize weights to total of 1
1097 let !w1 = max (edgeWeight s1_info) (0)
1098 !w2 = max (edgeWeight s2_info) (0)
1099 -- Of both weights are <= 0 we set both to 0.5
1100 normalizeWeight w = if w1 + w2 == 0 then 0.5 else w/(w1+w2)
1101 !cfg' = setEdgeWeight cfg (normalizeWeight w1) node s1
1102 !cfg'' = setEdgeWeight cfg' (normalizeWeight w2) node s2
1103
1104 -- Figure out which heuristics apply to these successors
1105 heuristics = map ($ ((s1,s1_info),(s2,s2_info)))
1106 [lehPredicts, phPredicts, ohPredicts, ghPredicts, lhhPredicts, chPredicts
1107 , shPredicts, rhPredicts]
1108 -- Apply result of a heuristic. Argument is the likelihood
1109 -- predicted for s1.
1110 applyHeuristic :: CFG -> Maybe Prob -> CFG
1111 applyHeuristic cfg Nothing = cfg
1112 applyHeuristic cfg (Just (s1_pred :: Double))
1113 | s1_old == 0 || s2_old == 0 ||
1114 isHeapOrStackCheck (transitionSource s1_info) ||
1115 isHeapOrStackCheck (transitionSource s2_info)
1116 = cfg
1117 | otherwise =
1118 let -- Predictions from heuristic
1119 s1_prob = EdgeWeight s1_pred :: EdgeWeight
1120 s2_prob = 1.0 - s1_prob
1121 -- Update
1122 d = (s1_old * s1_prob) + (s2_old * s2_prob) :: EdgeWeight
1123 s1_prob' = s1_old * s1_prob / d
1124 !s2_prob' = s2_old * s2_prob / d
1125 !cfg_s1 = setEdgeWeight cfg s1_prob' node s1
1126 in -- pprTrace "Applying heuristic!" (ppr (node,s1,s2) $$ ppr (s1_prob', s2_prob')) $
1127 setEdgeWeight cfg_s1 s2_prob' node s2
1128 where
1129 -- Old weights
1130 s1_old = getEdgeWeight cfg node s1
1131 s2_old = getEdgeWeight cfg node s2
1132
1133 in
1134 -- pprTraceIt "RegularCfgResult" $
1135 foldl' applyHeuristic cfg'' heuristics
1136
1137 -- Branch on heap/stack check
1138 | otherwise = cfg
1139
1140 where
1141 -- Chance that loops are taken.
1142 pred_LBH = 0.875
1143 -- successors
1144 successors = getSuccessorEdges cfg node
1145 -- backedges
1146 (m,not_m) = partition (\succ -> S.member (node, fst succ) backedges) successors
1147
1148 -- Heuristics return nothing if they don't say anything about this branch
1149 -- or Just (prob_s1) where prob_s1 is the likelihood for s1 to be the
1150 -- taken branch. s1 is the branch in the true case.
1151
1152 -- Loop exit heuristic.
1153 -- We are unlikely to leave a loop unless it's to enter another one.
1154 pred_LEH = 0.75
1155 -- If and only if no successor is a loopheader,
1156 -- then we will likely not exit the current loop body.
1157 lehPredicts :: (TargetNodeInfo,TargetNodeInfo) -> Maybe Prob
1158 lehPredicts ((s1,_s1_info),(s2,_s2_info))
1159 | S.member s1 loopHeads || S.member s2 loopHeads
1160 = Nothing
1161
1162 | otherwise
1163 = --pprTrace "lehPredict:" (ppr $ compare s1Level s2Level) $
1164 case compare s1Level s2Level of
1165 EQ -> Nothing
1166 LT -> Just (1-pred_LEH) --s1 exits to a shallower loop level (exits loop)
1167 GT -> Just (pred_LEH) --s1 exits to a deeper loop level
1168 where
1169 s1Level = mapLookup s1 loopLevels
1170 s2Level = mapLookup s2 loopLevels
1171
1172 -- Comparing to a constant is unlikely to be equal.
1173 ohPredicts (s1,_s2)
1174 | CmmSource { trans_cmmNode = src1 } <- getTransitionSource node (fst s1) cfg
1175 , CmmCondBranch cond ltrue _lfalse likely <- src1
1176 , likely == Nothing
1177 , CmmMachOp mop args <- cond
1178 , MO_Eq {} <- mop
1179 , not (null [x | x@CmmLit{} <- args])
1180 = if fst s1 == ltrue then Just 0.3 else Just 0.7
1181
1182 | otherwise
1183 = Nothing
1184
1185 -- TODO: These are all the other heuristics from the paper.
1186 -- Not all will apply, for now we just stub them out as Nothing.
1187 phPredicts = const Nothing
1188 ghPredicts = const Nothing
1189 lhhPredicts = const Nothing
1190 chPredicts = const Nothing
1191 shPredicts = const Nothing
1192 rhPredicts = const Nothing
1193
1194 -- We normalize all edge weights as probabilities between 0 and 1.
1195 -- Ignoring rounding errors all outgoing edges sum up to 1.
1196 cfgEdgeProbabilities :: CFG -> (BlockId -> Int) -> IM.IntMap (IM.IntMap Prob)
1197 cfgEdgeProbabilities cfg toVertex
1198 = mapFoldlWithKey foldEdges IM.empty cfg
1199 where
1200 foldEdges = (\m from toMap -> IM.insert (toVertex from) (normalize toMap) m)
1201
1202 normalize :: (LabelMap EdgeInfo) -> (IM.IntMap Prob)
1203 normalize weightMap
1204 | edgeCount <= 1 = mapFoldlWithKey (\m k _ -> IM.insert (toVertex k) 1.0 m) IM.empty weightMap
1205 | otherwise = mapFoldlWithKey (\m k _ -> IM.insert (toVertex k) (normalWeight k) m) IM.empty weightMap
1206 where
1207 edgeCount = mapSize weightMap
1208 -- Negative weights are generally allowed but are mapped to zero.
1209 -- We then check if there is at least one non-zero edge and if not
1210 -- assign uniform weights to all branches.
1211 minWeight = 0 :: Prob
1212 weightMap' = fmap (\w -> max (weightToDouble . edgeWeight $ w) minWeight) weightMap
1213 totalWeight = sum weightMap'
1214
1215 normalWeight :: BlockId -> Prob
1216 normalWeight bid
1217 | totalWeight == 0
1218 = 1.0 / fromIntegral edgeCount
1219 | Just w <- mapLookup bid weightMap'
1220 = w/totalWeight
1221 | otherwise = panic "impossible"
1222
1223 -- This is the fixpoint algorithm from
1224 -- "Static Branch Prediction and Program Profile Analysis" by Y Wu, JR Larus
1225 -- The adaption to Haskell is my own.
1226 calcFreqs :: IM.IntMap (IM.IntMap Prob) -> [(Int,Int)] -> [(Int, [Int])] -> [Int]
1227 -> (Array Int Double, IM.IntMap (IM.IntMap Prob))
1228 calcFreqs graph backEdges loops revPostOrder = runST $ do
1229 visitedNodes <- newArray (0,nodeCount-1) False :: ST s (STUArray s Int Bool)
1230 blockFreqs <- newArray (0,nodeCount-1) 0.0 :: ST s (STUArray s Int Double)
1231 edgeProbs <- newSTRef graph
1232 edgeBackProbs <- newSTRef graph
1233
1234 -- let traceArray a = do
1235 -- vs <- forM [0..nodeCount-1] $ \i -> readArray a i >>= (\v -> return (i,v))
1236 -- trace ("array: " ++ show vs) $ return ()
1237
1238 let -- See #1600, we need to inline or unboxing makes perf worse.
1239 -- {-# INLINE getFreq #-}
1240 {-# INLINE visited #-}
1241 visited b = unsafeRead visitedNodes b
1242 getFreq b = unsafeRead blockFreqs b
1243 -- setFreq :: forall s. Int -> Double -> ST s ()
1244 setFreq b f = unsafeWrite blockFreqs b f
1245 -- setVisited :: forall s. Node -> ST s ()
1246 setVisited b = unsafeWrite visitedNodes b True
1247 -- Frequency/probability that edge is taken.
1248 getProb' arr b1 b2 = readSTRef arr >>=
1249 (\graph ->
1250 return .
1251 fromMaybe (error "getFreq 1") .
1252 IM.lookup b2 .
1253 fromMaybe (error "getFreq 2") $
1254 (IM.lookup b1 graph)
1255 )
1256 setProb' arr b1 b2 prob = do
1257 g <- readSTRef arr
1258 let !m = fromMaybe (error "Foo") $ IM.lookup b1 g
1259 !m' = IM.insert b2 prob m
1260 writeSTRef arr $! (IM.insert b1 m' g)
1261
1262 getEdgeFreq b1 b2 = getProb' edgeProbs b1 b2
1263 setEdgeFreq b1 b2 = setProb' edgeProbs b1 b2
1264 getProb b1 b2 = fromMaybe (error "getProb") $ do
1265 m' <- IM.lookup b1 graph
1266 IM.lookup b2 m'
1267
1268 getBackProb b1 b2 = getProb' edgeBackProbs b1 b2
1269 setBackProb b1 b2 = setProb' edgeBackProbs b1 b2
1270
1271
1272 let -- calcOutFreqs :: Node -> ST s ()
1273 calcOutFreqs bhead block = do
1274 !f <- getFreq block
1275 forM (successors block) $ \bi -> do
1276 let !prob = getProb block bi
1277 let !succFreq = f * prob
1278 setEdgeFreq block bi succFreq
1279 -- traceM $ "SetOut: " ++ show (block, bi, f, prob, succFreq)
1280 when (bi == bhead) $ setBackProb block bi succFreq
1281
1282
1283 let propFreq block head = do
1284 -- traceM ("prop:" ++ show (block,head))
1285 -- traceShowM block
1286
1287 !v <- visited block
1288 if v then
1289 return () --Dont look at nodes twice
1290 else if block == head then
1291 setFreq block 1.0 -- Loop header frequency is always 1
1292 else do
1293 let preds = IS.elems $ predecessors block
1294 irreducible <- (fmap or) $ forM preds $ \bp -> do
1295 !bp_visited <- visited bp
1296 let bp_backedge = isBackEdge bp block
1297 return (not bp_visited && not bp_backedge)
1298
1299 if irreducible
1300 then return () -- Rare we don't care
1301 else do
1302 setFreq block 0
1303 !cycleProb <- sum <$> (forM preds $ \pred -> do
1304 if isBackEdge pred block
1305 then
1306 getBackProb pred block
1307 else do
1308 !f <- getFreq block
1309 !prob <- getEdgeFreq pred block
1310 setFreq block $! f + prob
1311 return 0)
1312 -- traceM $ "cycleProb:" ++ show cycleProb
1313 let limit = 1 - 1/512 -- Paper uses 1 - epsilon, but this works.
1314 -- determines how large likelyhoods in loops can grow.
1315 !cycleProb <- return $ min cycleProb limit -- <- return $ if cycleProb > limit then limit else cycleProb
1316 -- traceM $ "cycleProb:" ++ show cycleProb
1317
1318 !f <- getFreq block
1319 setFreq block (f / (1.0 - cycleProb))
1320
1321 setVisited block
1322 calcOutFreqs head block
1323
1324 -- Loops, by nesting, inner to outer
1325 forM_ loops $ \(head, body) -> do
1326 forM_ [0 .. nodeCount - 1] (\i -> unsafeWrite visitedNodes i True) -- Mark all nodes as visited.
1327 forM_ body (\i -> unsafeWrite visitedNodes i False) -- Mark all blocks reachable from head as not visited
1328 forM_ body $ \block -> propFreq block head
1329
1330 -- After dealing with all loops, deal with non-looping parts of the CFG
1331 forM_ [0 .. nodeCount - 1] (\i -> unsafeWrite visitedNodes i False) -- Everything in revPostOrder is reachable
1332 forM_ revPostOrder $ \block -> propFreq block (head revPostOrder)
1333
1334 -- trace ("Final freqs:") $ return ()
1335 -- let freqString = pprFreqs freqs
1336 -- trace (unlines freqString) $ return ()
1337 -- trace (pprFre) $ return ()
1338 graph' <- readSTRef edgeProbs
1339 freqs' <- unsafeFreeze blockFreqs
1340
1341 return (freqs', graph')
1342 where
1343 -- How can these lookups fail? Consider the CFG [A -> B]
1344 predecessors :: Int -> IS.IntSet
1345 predecessors b = fromMaybe IS.empty $ IM.lookup b revGraph
1346 successors :: Int -> [Int]
1347 successors b = fromMaybe (lookupError "succ" b graph)$ IM.keys <$> IM.lookup b graph
1348 lookupError s b g = pprPanic ("Lookup error " ++ s) $
1349 ( text "node" <+> ppr b $$
1350 text "graph" <+>
1351 vcat (map (\(k,m) -> ppr (k,m :: IM.IntMap Double)) $ IM.toList g)
1352 )
1353
1354 nodeCount = IM.foldl' (\count toMap -> IM.foldlWithKey' countTargets count toMap) (IM.size graph) graph
1355 where
1356 countTargets = (\count k _ -> countNode k + count )
1357 countNode n = if IM.member n graph then 0 else 1
1358
1359 isBackEdge from to = S.member (from,to) backEdgeSet
1360 backEdgeSet = S.fromList backEdges
1361
1362 revGraph :: IntMap IntSet
1363 revGraph = IM.foldlWithKey' (\m from toMap -> addEdges m from toMap) IM.empty graph
1364 where
1365 addEdges m0 from toMap = IM.foldlWithKey' (\m k _ -> addEdge m from k) m0 toMap
1366 addEdge m0 from to = IM.insertWith IS.union to (IS.singleton from) m0