never executed always true always false
    1 {-# LANGUAGE DataKinds                  #-}
    2 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
    3 {-# LANGUAGE Rank2Types                 #-}
    4 {-# LANGUAGE ScopedTypeVariables        #-}
    5 {-# LANGUAGE TypeFamilies               #-}
    6 {-# LANGUAGE FlexibleContexts           #-}
    7 {-# LANGUAGE TupleSections              #-}
    8 --
    9 -- Copyright (c) 2018 Andreas Klebinger
   10 --
   11 
   12 module GHC.CmmToAsm.CFG
   13     ( CFG, CfgEdge(..), EdgeInfo(..), EdgeWeight(..)
   14     , TransitionSource(..)
   15 
   16     --Modify the CFG
   17     , addWeightEdge, addEdge
   18     , delEdge
   19     , addNodesBetween, shortcutWeightMap
   20     , reverseEdges, filterEdges
   21     , addImmediateSuccessor
   22     , mkWeightInfo, adjustEdgeWeight, setEdgeWeight
   23 
   24     --Query the CFG
   25     , infoEdgeList, edgeList
   26     , getSuccessorEdges, getSuccessors
   27     , getSuccEdgesSorted
   28     , getEdgeInfo
   29     , getCfgNodes, hasNode
   30 
   31     -- Loop Information
   32     , loopMembers, loopLevels, loopInfo
   33 
   34     --Construction/Misc
   35     , getCfg, getCfgProc, pprEdgeWeights, sanityCheckCfg
   36 
   37     --Find backedges and update their weight
   38     , optimizeCFG
   39     , mkGlobalWeights
   40 
   41      )
   42 where
   43 
   44 import GHC.Prelude
   45 import GHC.Platform
   46 
   47 import GHC.Cmm.BlockId
   48 import GHC.Cmm as Cmm
   49 
   50 import GHC.Cmm.Utils
   51 import GHC.Cmm.Switch
   52 import GHC.Cmm.Dataflow.Collections
   53 import GHC.Cmm.Dataflow.Label
   54 import GHC.Cmm.Dataflow.Block
   55 import qualified GHC.Cmm.Dataflow.Graph as G
   56 
   57 import GHC.Utils.Misc
   58 import GHC.Data.Graph.Directed
   59 import GHC.Data.Maybe
   60 
   61 import GHC.Types.Unique
   62 import qualified GHC.CmmToAsm.CFG.Dominators as Dom
   63 import GHC.CmmToAsm.CFG.Weight
   64 import Data.IntMap.Strict (IntMap)
   65 import Data.IntSet (IntSet)
   66 
   67 import qualified Data.IntMap.Strict as IM
   68 import qualified Data.Map as M
   69 import qualified Data.IntSet as IS
   70 import qualified Data.Set as S
   71 import Data.Tree
   72 import Data.Bifunctor
   73 
   74 import GHC.Utils.Outputable
   75 import GHC.Utils.Panic
   76 import GHC.Utils.Panic.Plain
   77 -- DEBUGGING ONLY
   78 --import GHC.Cmm.DebugBlock
   79 --import GHC.Data.OrdList
   80 --import GHC.Cmm.DebugBlock.Trace
   81 import GHC.Cmm.Ppr () -- For Outputable instances
   82 
   83 import Data.List (sort, nub, partition)
   84 import Data.STRef.Strict
   85 import Control.Monad.ST
   86 
   87 import Data.Array.MArray
   88 import Data.Array.ST
   89 import Data.Array.IArray
   90 import Data.Array.Unsafe (unsafeFreeze)
   91 import Data.Array.Base (unsafeRead, unsafeWrite)
   92 
   93 import Control.Monad
   94 import GHC.Data.UnionFind
   95 
   96 type Prob = Double
   97 
   98 type Edge = (BlockId, BlockId)
   99 type Edges = [Edge]
  100 
  101 newtype EdgeWeight
  102   = EdgeWeight { weightToDouble :: Double }
  103   deriving (Eq,Ord,Enum,Num,Real,Fractional)
  104 
  105 instance Outputable EdgeWeight where
  106   ppr (EdgeWeight w) = doublePrec 5 w
  107 
  108 type EdgeInfoMap edgeInfo = LabelMap (LabelMap edgeInfo)
  109 
  110 -- | A control flow graph where edges have been annotated with a weight.
  111 -- Implemented as IntMap (IntMap \<edgeData>)
  112 -- We must uphold the invariant that for each edge A -> B we must have:
  113 -- A entry B in the outer map.
  114 -- A entry B in the map we get when looking up A.
  115 -- Maintaining this invariant is useful as any failed lookup now indicates
  116 -- an actual error in code which might go unnoticed for a while
  117 -- otherwise.
  118 type CFG = EdgeInfoMap EdgeInfo
  119 
  120 data CfgEdge
  121   = CfgEdge
  122   { edgeFrom :: !BlockId
  123   , edgeTo :: !BlockId
  124   , edgeInfo :: !EdgeInfo
  125   }
  126 
  127 -- | Careful! Since we assume there is at most one edge from A to B
  128 --   the Eq instance does not consider weight.
  129 instance Eq CfgEdge where
  130   (==) (CfgEdge from1 to1 _) (CfgEdge from2 to2 _)
  131     = from1 == from2 && to1 == to2
  132 
  133 -- | Edges are sorted ascending pointwise by weight, source and destination
  134 instance Ord CfgEdge where
  135   compare (CfgEdge from1 to1 (EdgeInfo {edgeWeight = weight1}))
  136           (CfgEdge from2 to2 (EdgeInfo {edgeWeight = weight2}))
  137     | weight1 < weight2 || weight1 == weight2 && from1 < from2 ||
  138       weight1 == weight2 && from1 == from2 && to1 < to2
  139     = LT
  140     | from1 == from2 && to1 == to2 && weight1 == weight2
  141     = EQ
  142     | otherwise
  143     = GT
  144 
  145 instance Outputable CfgEdge where
  146   ppr (CfgEdge from1 to1 edgeInfo)
  147     = parens (ppr from1 <+> text "-(" <> ppr edgeInfo <> text ")->" <+> ppr to1)
  148 
  149 -- | Can we trace back a edge to a specific Cmm Node
  150 -- or has it been introduced during assembly codegen. We use this to maintain
  151 -- some information which would otherwise be lost during the
  152 -- Cmm \<-> asm transition.
  153 -- See also Note [Inverting Conditional Branches]
  154 data TransitionSource
  155   = CmmSource { trans_cmmNode :: (CmmNode O C)
  156               , trans_info :: BranchInfo }
  157   | AsmCodeGen
  158   deriving (Eq)
  159 
  160 data BranchInfo = NoInfo         -- ^ Unknown, but not heap or stack check.
  161                 | HeapStackCheck -- ^ Heap or stack check
  162     deriving Eq
  163 
  164 instance Outputable BranchInfo where
  165     ppr NoInfo = text "regular"
  166     ppr HeapStackCheck = text "heap/stack"
  167 
  168 isHeapOrStackCheck :: TransitionSource -> Bool
  169 isHeapOrStackCheck (CmmSource { trans_info = HeapStackCheck}) = True
  170 isHeapOrStackCheck _ = False
  171 
  172 -- | Information about edges
  173 data EdgeInfo
  174   = EdgeInfo
  175   { transitionSource :: !TransitionSource
  176   , edgeWeight :: !EdgeWeight
  177   } deriving (Eq)
  178 
  179 instance Outputable EdgeInfo where
  180   ppr edgeInfo = text "weight:" <+> ppr (edgeWeight edgeInfo)
  181 
  182 -- | Convenience function, generate edge info based
  183 --   on weight not originating from cmm.
  184 mkWeightInfo :: EdgeWeight -> EdgeInfo
  185 mkWeightInfo = EdgeInfo AsmCodeGen
  186 
  187 -- | Adjust the weight between the blocks using the given function.
  188 --   If there is no such edge returns the original map.
  189 adjustEdgeWeight :: CFG -> (EdgeWeight -> EdgeWeight)
  190                  -> BlockId -> BlockId -> CFG
  191 adjustEdgeWeight cfg f from to
  192   | Just info <- getEdgeInfo from to cfg
  193   , !weight <- edgeWeight info
  194   , !newWeight <- f weight
  195   = addEdge from to (info { edgeWeight = newWeight}) cfg
  196   | otherwise = cfg
  197 
  198 -- | Set the weight between the blocks to the given weight.
  199 --   If there is no such edge returns the original map.
  200 setEdgeWeight :: CFG -> EdgeWeight
  201               -> BlockId -> BlockId -> CFG
  202 setEdgeWeight cfg !weight from to
  203   | Just info <- getEdgeInfo from to cfg
  204   = addEdge from to (info { edgeWeight = weight}) cfg
  205   | otherwise = cfg
  206 
  207 
  208 getCfgNodes :: CFG -> [BlockId]
  209 getCfgNodes m =
  210     mapKeys m
  211 
  212 -- | Is this block part of this graph?
  213 hasNode :: CFG -> BlockId -> Bool
  214 hasNode m node =
  215   -- Check the invariant that each node must exist in the first map or not at all.
  216   assert (found || not (any (mapMember node) m))
  217   found
  218     where
  219       found = mapMember node m
  220 
  221 
  222 
  223 -- | Check if the nodes in the cfg and the set of blocks are the same.
  224 --   In a case of a missmatch we panic and show the difference.
  225 sanityCheckCfg :: CFG -> LabelSet -> SDoc -> Bool
  226 sanityCheckCfg m blockSet msg
  227     | blockSet == cfgNodes
  228     = True
  229     | otherwise =
  230         pprPanic "Block list and cfg nodes don't match" (
  231             text "difference:" <+> ppr diff $$
  232             text "blocks:" <+> ppr blockSet $$
  233             text "cfg:" <+> pprEdgeWeights m $$
  234             msg )
  235             False
  236     where
  237       cfgNodes = setFromList $ getCfgNodes m :: LabelSet
  238       diff = (setUnion cfgNodes blockSet) `setDifference` (setIntersection cfgNodes blockSet) :: LabelSet
  239 
  240 -- | Filter the CFG with a custom function f.
  241 --   Paramaeters are `f from to edgeInfo`
  242 filterEdges :: (BlockId -> BlockId -> EdgeInfo -> Bool) -> CFG -> CFG
  243 filterEdges f cfg =
  244     mapMapWithKey filterSources cfg
  245     where
  246       filterSources from m =
  247         mapFilterWithKey (\to w -> f from to w) m
  248 
  249 
  250 {- Note [Updating the CFG during shortcutting]
  251 
  252 See Note [What is shortcutting] in the control flow optimization
  253 code (GHC.Cmm.ContFlowOpt) for a slightly more in depth explanation on shortcutting.
  254 
  255 In the native backend we shortcut jumps at the assembly level. ("GHC.CmmToAsm")
  256 This means we remove blocks containing only one jump from the code
  257 and instead redirecting all jumps targeting this block to the deleted
  258 blocks jump target.
  259 
  260 However we want to have an accurate representation of control
  261 flow in the CFG. So we add/remove edges accordingly to account
  262 for the eliminated blocks and new edges.
  263 
  264 If we shortcut A -> B -> C to A -> C:
  265 * We delete edges A -> B and B -> C
  266 * Replacing them with the edge A -> C
  267 
  268 We also try to preserve jump weights while doing so.
  269 
  270 Note that:
  271 * The edge B -> C can't have interesting weights since
  272   the block B consists of a single unconditional jump without branching.
  273 * We delete the edge A -> B and add the edge A -> C.
  274 * The edge A -> B can be one of many edges originating from A so likely
  275   has edge weights we want to preserve.
  276 
  277 For this reason we simply store the edge info from the original A -> B
  278 edge and apply this information to the new edge A -> C.
  279 
  280 Sometimes we have a scenario where jump target C is not represented by an
  281 BlockId but an immediate value. I'm only aware of this happening without
  282 tables next to code currently.
  283 
  284 Then we go from A ---> B - -> IMM   to   A - -> IMM where the dashed arrows
  285 are not stored in the CFG.
  286 
  287 In that case we simply delete the edge A -> B.
  288 
  289 In terms of implementation the native backend first builds a mapping
  290 from blocks suitable for shortcutting to their jump targets.
  291 Then it redirects all jump instructions to these blocks using the
  292 built up mapping.
  293 This function (shortcutWeightMap) takes the same mapping and
  294 applies the mapping to the CFG in the way laid out above.
  295 
  296 -}
  297 shortcutWeightMap :: LabelMap (Maybe BlockId) -> CFG -> CFG
  298 shortcutWeightMap cuts cfg
  299   | mapNull cuts = cfg
  300   | otherwise = normalised_cfg
  301     where
  302       -- First take the cuts map and collapse any shortcuts, for example
  303       -- if the cuts map has A -> B and B -> C then we want to rewrite
  304       -- A -> C and B -> C directly.
  305       normalised_cuts_st :: forall s . ST s (LabelMap (Maybe BlockId))
  306       normalised_cuts_st = do
  307         (null :: Point s (Maybe BlockId)) <- fresh Nothing
  308         let cuts_list = mapToList cuts
  309         -- Create a unification variable for each of the nodes in a rewrite
  310         cuts_vars <- traverse (\p -> (p,) <$> fresh (Just p)) (concatMap (\(a, b) -> [a] ++ maybe [] (:[]) b) cuts_list)
  311         let cuts_map = mapFromList cuts_vars :: LabelMap (Point s (Maybe BlockId))
  312         -- Then unify according the the rewrites in the cuts map
  313         mapM_ (\(from, to) -> expectJust "shortcutWeightMap" (mapLookup from cuts_map)
  314                               `union` expectJust "shortcutWeightMap" (maybe (Just null) (flip mapLookup cuts_map) to) ) cuts_list
  315         -- Then recover the unique representative, which is the result of following
  316         -- the chain to the end.
  317         mapM find cuts_map
  318 
  319       normalised_cuts = runST normalised_cuts_st
  320 
  321       cuts_domain :: LabelSet
  322       cuts_domain = setFromList $ mapKeys cuts
  323 
  324       -- The CFG is shortcutted using the normalised cuts map
  325       normalised_cfg :: CFG
  326       normalised_cfg = mapFoldlWithKey update_edge mapEmpty cfg
  327 
  328       update_edge :: CFG -> Label -> LabelMap EdgeInfo -> CFG
  329       update_edge new_map from edge_map
  330         -- If the from edge is in the cuts map then delete the edge
  331         | setMember from cuts_domain = new_map
  332         -- Otherwise we are keeping the edge, but might have shortcutted some of
  333         -- the target nodes.
  334         | otherwise = mapInsert from (mapFoldlWithKey update_from_edge mapEmpty edge_map) new_map
  335 
  336       update_from_edge :: LabelMap a -> Label -> a -> LabelMap a
  337       update_from_edge new_map to_edge edge_info
  338         -- Edge is in the normalised cuts
  339         | Just new_edge <- mapLookup to_edge normalised_cuts =
  340             case new_edge of
  341               -- The result was Nothing, so edge is deleted
  342               Nothing -> new_map
  343               -- The new target for the edge, write it with the old edge_info.
  344               Just new_to -> mapInsert new_to edge_info new_map
  345         -- Node wasn't in the cuts map, so just add it back
  346         | otherwise = mapInsert to_edge edge_info new_map
  347 
  348 
  349 -- | Sometimes we insert a block which should unconditionally be executed
  350 --   after a given block. This function updates the CFG for these cases.
  351 --  So we get A -> B    => A -> A' -> B
  352 --             \                  \
  353 --              -> C    =>         -> C
  354 --
  355 addImmediateSuccessor :: Weights -> BlockId -> BlockId -> CFG -> CFG
  356 addImmediateSuccessor weights node follower cfg
  357     = updateEdges . addWeightEdge node follower weight $ cfg
  358     where
  359         weight = fromIntegral (uncondWeight weights)
  360         targets = getSuccessorEdges cfg node
  361         successors = map fst targets :: [BlockId]
  362         updateEdges = addNewSuccs . remOldSuccs
  363         remOldSuccs m = foldl' (flip (delEdge node)) m successors
  364         addNewSuccs m =
  365           foldl' (\m' (t,info) -> addEdge follower t info m') m targets
  366 
  367 -- | Adds a new edge, overwrites existing edges if present
  368 addEdge :: BlockId -> BlockId -> EdgeInfo -> CFG -> CFG
  369 addEdge from to info cfg =
  370     mapAlter addFromToEdge from $
  371     mapAlter addDestNode to cfg
  372     where
  373         -- Simply insert the edge into the edge list.
  374         addFromToEdge Nothing = Just $ mapSingleton to info
  375         addFromToEdge (Just wm) = Just $ mapInsert to info wm
  376         -- We must add the destination node explicitly
  377         addDestNode Nothing = Just $ mapEmpty
  378         addDestNode n@(Just _) = n
  379 
  380 
  381 -- | Adds a edge with the given weight to the cfg
  382 --   If there already existed an edge it is overwritten.
  383 --   `addWeightEdge from to weight cfg`
  384 addWeightEdge :: BlockId -> BlockId -> EdgeWeight -> CFG -> CFG
  385 addWeightEdge from to weight cfg =
  386     addEdge from to (mkWeightInfo weight) cfg
  387 
  388 delEdge :: BlockId -> BlockId -> CFG -> CFG
  389 delEdge from to m =
  390     mapAlter remDest from m
  391     where
  392         remDest Nothing = Nothing
  393         remDest (Just wm) = Just $ mapDelete to wm
  394 
  395 
  396 -- | Destinations from bid ordered by weight (descending)
  397 getSuccEdgesSorted :: CFG -> BlockId -> [(BlockId,EdgeInfo)]
  398 getSuccEdgesSorted m bid =
  399     let destMap = mapFindWithDefault mapEmpty bid m
  400         cfgEdges = mapToList destMap
  401         sortedEdges = sortWith (negate . edgeWeight . snd) cfgEdges
  402     in  --pprTrace "getSuccEdgesSorted" (ppr bid <+> text "map:" <+> ppr m)
  403         sortedEdges
  404 
  405 -- | Get successors of a given node with edge weights.
  406 getSuccessorEdges :: HasDebugCallStack => CFG -> BlockId -> [(BlockId,EdgeInfo)]
  407 getSuccessorEdges m bid = maybe lookupError mapToList (mapLookup bid m)
  408   where
  409     lookupError = pprPanic "getSuccessorEdges: Block does not exist" $
  410                     ppr bid <+> pprEdgeWeights m
  411 
  412 getEdgeInfo :: BlockId -> BlockId -> CFG -> Maybe EdgeInfo
  413 getEdgeInfo from to m
  414     | Just wm <- mapLookup from m
  415     , Just info <- mapLookup to wm
  416     = Just $! info
  417     | otherwise
  418     = Nothing
  419 
  420 getEdgeWeight :: CFG -> BlockId -> BlockId -> EdgeWeight
  421 getEdgeWeight cfg from to =
  422     edgeWeight $ expectJust "Edgeweight for noexisting block" $
  423                  getEdgeInfo from to cfg
  424 
  425 getTransitionSource :: BlockId -> BlockId -> CFG -> TransitionSource
  426 getTransitionSource from to cfg = transitionSource $ expectJust "Source info for noexisting block" $
  427                         getEdgeInfo from to cfg
  428 
  429 reverseEdges :: CFG -> CFG
  430 reverseEdges cfg = mapFoldlWithKey (\cfg from toMap -> go (addNode cfg from) from toMap) mapEmpty cfg
  431   where
  432     -- We must preserve nodes without outgoing edges!
  433     addNode :: CFG -> BlockId -> CFG
  434     addNode cfg b = mapInsertWith mapUnion b mapEmpty cfg
  435     go :: CFG -> BlockId -> (LabelMap EdgeInfo) -> CFG
  436     go cfg from toMap = mapFoldlWithKey (\cfg to info -> addEdge to from info cfg) cfg toMap  :: CFG
  437 
  438 
  439 -- | Returns a unordered list of all edges with info
  440 infoEdgeList :: CFG -> [CfgEdge]
  441 infoEdgeList m =
  442     go (mapToList m) []
  443   where
  444     -- We avoid foldMap to avoid thunk buildup
  445     go :: [(BlockId,LabelMap EdgeInfo)] -> [CfgEdge] -> [CfgEdge]
  446     go [] acc = acc
  447     go ((from,toMap):xs) acc
  448       = go' xs from (mapToList toMap) acc
  449     go' :: [(BlockId,LabelMap EdgeInfo)] -> BlockId -> [(BlockId,EdgeInfo)] -> [CfgEdge] -> [CfgEdge]
  450     go' froms _    []              acc = go froms acc
  451     go' froms from ((to,info):tos) acc
  452       = go' froms from tos (CfgEdge from to info : acc)
  453 
  454 -- | Returns a unordered list of all edges without weights
  455 edgeList :: CFG -> [Edge]
  456 edgeList m =
  457     go (mapToList m) []
  458   where
  459     -- We avoid foldMap to avoid thunk buildup
  460     go :: [(BlockId,LabelMap EdgeInfo)] -> [Edge] -> [Edge]
  461     go [] acc = acc
  462     go ((from,toMap):xs) acc
  463       = go' xs from (mapKeys toMap) acc
  464     go' :: [(BlockId,LabelMap EdgeInfo)] -> BlockId -> [BlockId] -> [Edge] -> [Edge]
  465     go' froms _    []              acc = go froms acc
  466     go' froms from (to:tos) acc
  467       = go' froms from tos ((from,to) : acc)
  468 
  469 -- | Get successors of a given node without edge weights.
  470 getSuccessors :: HasDebugCallStack => CFG -> BlockId -> [BlockId]
  471 getSuccessors m bid
  472     | Just wm <- mapLookup bid m
  473     = mapKeys wm
  474     | otherwise = lookupError
  475     where
  476       lookupError = pprPanic "getSuccessors: Block does not exist" $
  477                     ppr bid <+> pprEdgeWeights m
  478 
  479 pprEdgeWeights :: CFG -> SDoc
  480 pprEdgeWeights m =
  481     let edges = sort $ infoEdgeList m :: [CfgEdge]
  482         printEdge (CfgEdge from to (EdgeInfo { edgeWeight = weight }))
  483             = text "\t" <> ppr from <+> text "->" <+> ppr to <>
  484               text "[label=\"" <> ppr weight <> text "\",weight=\"" <>
  485               ppr weight <> text "\"];\n"
  486         --for the case that there are no edges from/to this node.
  487         --This should rarely happen but it can save a lot of time
  488         --to immediately see it when it does.
  489         printNode node
  490             = text "\t" <> ppr node <> text ";\n"
  491         getEdgeNodes (CfgEdge from to _) = [from,to]
  492         edgeNodes = setFromList $ concatMap getEdgeNodes edges :: LabelSet
  493         nodes = filter (\n -> (not . setMember n) edgeNodes) . mapKeys $ mapFilter null m
  494     in
  495     text "digraph {\n" <>
  496         (foldl' (<>) empty (map printEdge edges)) <>
  497         (foldl' (<>) empty (map printNode nodes)) <>
  498     text "}\n"
  499 
  500 {-# INLINE updateEdgeWeight #-} --Allows eliminating the tuple when possible
  501 -- | Invariant: The edge **must** exist already in the graph.
  502 updateEdgeWeight :: (EdgeWeight -> EdgeWeight) -> Edge -> CFG -> CFG
  503 updateEdgeWeight f (from, to) cfg
  504     | Just oldInfo <- getEdgeInfo from to cfg
  505     = let !oldWeight = edgeWeight oldInfo
  506           !newWeight = f oldWeight
  507       in addEdge from to (oldInfo {edgeWeight = newWeight}) cfg
  508     | otherwise
  509     = panic "Trying to update invalid edge"
  510 
  511 -- from to oldWeight => newWeight
  512 mapWeights :: (BlockId -> BlockId -> EdgeWeight -> EdgeWeight) -> CFG -> CFG
  513 mapWeights f cfg =
  514   foldl' (\cfg (CfgEdge from to info) ->
  515             let oldWeight = edgeWeight info
  516                 newWeight = f from to oldWeight
  517             in addEdge from to (info {edgeWeight = newWeight}) cfg)
  518           cfg (infoEdgeList cfg)
  519 
  520 
  521 -- | Insert a block in the control flow between two other blocks.
  522 -- We pass a list of tuples (A,B,C) where
  523 -- * A -> C: Old edge
  524 -- * A -> B -> C : New Arc, where B is the new block.
  525 -- It's possible that a block has two jumps to the same block
  526 -- in the assembly code. However we still only store a single edge for
  527 -- these cases.
  528 -- We assign the old edge info to the edge A -> B and assign B -> C the
  529 -- weight of an unconditional jump.
  530 addNodesBetween :: Weights -> CFG -> [(BlockId,BlockId,BlockId)] -> CFG
  531 addNodesBetween weights m updates =
  532   foldl'  updateWeight m .
  533           weightUpdates $ updates
  534     where
  535       weight = fromIntegral (uncondWeight weights)
  536       -- We might add two blocks for different jumps along a single
  537       -- edge. So we end up with edges:   A -> B -> C   ,   A -> D -> C
  538       -- in this case after applying the first update the weight for A -> C
  539       -- is no longer available. So we calculate future weights before updates.
  540       weightUpdates = map getWeight
  541       getWeight :: (BlockId,BlockId,BlockId) -> (BlockId,BlockId,BlockId,EdgeInfo)
  542       getWeight (from,between,old)
  543         | Just edgeInfo <- getEdgeInfo from old m
  544         = (from,between,old,edgeInfo)
  545         | otherwise
  546         = pprPanic "Can't find weight for edge that should have one" (
  547             text "triple" <+> ppr (from,between,old) $$
  548             text "updates" <+> ppr updates $$
  549             text "cfg:" <+> pprEdgeWeights m )
  550       updateWeight :: CFG -> (BlockId,BlockId,BlockId,EdgeInfo) -> CFG
  551       updateWeight m (from,between,old,edgeInfo)
  552         = addEdge from between edgeInfo .
  553           addWeightEdge between old weight .
  554           delEdge from old $ m
  555 
  556 {-
  557   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  558   ~~~       Note [CFG Edge Weights]    ~~~
  559   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  560 
  561   Edge weights assigned do not currently represent a specific
  562   cost model and rather just a ranking of which blocks should
  563   be placed next to each other given their connection type in
  564   the CFG.
  565   This is especially relevant if we whenever two blocks will
  566   jump to the same target.
  567 
  568                      A   B
  569                       \ /
  570                        C
  571 
  572   Should A or B be placed in front of C? The block layout algorithm
  573   decides this based on which edge (A,C)/(B,C) is heavier. So we
  574   make a educated guess on which branch should be preferred.
  575 
  576   We rank edges in this order:
  577   * Unconditional Control Transfer - They will always
  578     transfer control to their target. Unless there is a info table
  579     we can turn the jump into a fallthrough as well.
  580     We use 20k as default, so it's easy to spot if values have been
  581     modified but unlikely that we run into issues with overflow.
  582   * If branches (likely) - We assume branches marked as likely
  583     are taken more than 80% of the time.
  584     By ranking them below unconditional jumps we make sure we
  585     prefer the unconditional if there is a conditional and
  586     unconditional edge towards a block.
  587   * If branches (regular) - The false branch can potentially be turned
  588     into a fallthrough so we prefer it slightly over the true branch.
  589   * Unlikely branches - These can be assumed to be taken less than 20%
  590     of the time. So we given them one of the lowest priorities.
  591   * Switches - Switches at this level are implemented as jump tables
  592     so have a larger number of successors. So without more information
  593     we can only say that each individual successor is unlikely to be
  594     jumped to and we rank them accordingly.
  595   * Calls - We currently ignore calls completely:
  596         * By the time we return from a call there is a good chance
  597           that the address we return to has already been evicted from
  598           cache eliminating a main advantage sequential placement brings.
  599         * Calls always require a info table in front of their return
  600           address. This reduces the chance that we return to the same
  601           cache line further.
  602 
  603 -}
  604 -- | Generate weights for a Cmm proc based on some simple heuristics.
  605 getCfgProc :: Platform -> Weights -> RawCmmDecl -> CFG
  606 getCfgProc _        _       (CmmData {}) = mapEmpty
  607 getCfgProc platform weights (CmmProc _info _lab _live graph) = getCfg platform weights graph
  608 
  609 getCfg :: Platform -> Weights -> CmmGraph -> CFG
  610 getCfg platform weights graph =
  611   foldl' insertEdge edgelessCfg $ concatMap getBlockEdges blocks
  612   where
  613     Weights
  614             { uncondWeight = uncondWeight
  615             , condBranchWeight = condBranchWeight
  616             , switchWeight = switchWeight
  617             , callWeight = callWeight
  618             , likelyCondWeight = likelyCondWeight
  619             , unlikelyCondWeight = unlikelyCondWeight
  620             --  Last two are used in other places
  621             --, infoTablePenalty = infoTablePenalty
  622             --, backEdgeBonus = backEdgeBonus
  623             } = weights
  624     -- Explicitly add all nodes to the cfg to ensure they are part of the
  625     -- CFG.
  626     edgelessCfg = mapFromList $ zip (map G.entryLabel blocks) (repeat mapEmpty)
  627     insertEdge :: CFG -> ((BlockId,BlockId),EdgeInfo) -> CFG
  628     insertEdge m ((from,to),weight) =
  629       mapAlter f from m
  630         where
  631           f :: Maybe (LabelMap EdgeInfo) -> Maybe (LabelMap EdgeInfo)
  632           f Nothing = Just $ mapSingleton to weight
  633           f (Just destMap) = Just $ mapInsert to weight destMap
  634     getBlockEdges :: CmmBlock -> [((BlockId,BlockId),EdgeInfo)]
  635     getBlockEdges block =
  636       case branch of
  637         CmmBranch dest -> [mkEdge dest uncondWeight]
  638         CmmCondBranch cond t f l
  639           | l == Nothing ->
  640               [mkEdge f condBranchWeight,   mkEdge t condBranchWeight]
  641           | l == Just True ->
  642               [mkEdge f unlikelyCondWeight, mkEdge t likelyCondWeight]
  643           | l == Just False ->
  644               [mkEdge f likelyCondWeight,   mkEdge t unlikelyCondWeight]
  645           where
  646             mkEdgeInfo = -- pprTrace "Info" (ppr branchInfo <+> ppr cond)
  647                          EdgeInfo (CmmSource branch branchInfo) . fromIntegral
  648             mkEdge target weight = ((bid,target), mkEdgeInfo weight)
  649             branchInfo =
  650               foldRegsUsed
  651                 (panic "GHC.CmmToAsm.CFG.getCfg: foldRegsUsed")
  652                 (\info r -> if r == SpLim || r == HpLim || r == BaseReg
  653                     then HeapStackCheck else info)
  654                 NoInfo cond
  655 
  656         (CmmSwitch _e ids) ->
  657           let switchTargets = switchTargetsToList ids
  658               --Compiler performance hack - for very wide switches don't
  659               --consider targets for layout.
  660               adjustedWeight =
  661                 if (length switchTargets > 10) then -1 else switchWeight
  662           in map (\x -> mkEdge x adjustedWeight) switchTargets
  663         (CmmCall { cml_cont = Just cont})  -> [mkEdge cont callWeight]
  664         (CmmForeignCall {Cmm.succ = cont}) -> [mkEdge cont callWeight]
  665         (CmmCall { cml_cont = Nothing })   -> []
  666         other ->
  667             panic "Foo" $
  668             assertPpr False (ppr "Unknown successor cause:" <>
  669               (pdoc platform branch <+> text "=>" <> pdoc platform (G.successors other))) $
  670             map (\x -> ((bid,x),mkEdgeInfo 0)) $ G.successors other
  671       where
  672         bid = G.entryLabel block
  673         mkEdgeInfo = EdgeInfo (CmmSource branch NoInfo) . fromIntegral
  674         mkEdge target weight = ((bid,target), mkEdgeInfo weight)
  675         branch = lastNode block :: CmmNode O C
  676 
  677     blocks = revPostorder graph :: [CmmBlock]
  678 
  679 --Find back edges by BFS
  680 findBackEdges :: HasDebugCallStack => BlockId -> CFG -> Edges
  681 findBackEdges root cfg =
  682     --pprTraceIt "Backedges:" $
  683     map fst .
  684     filter (\x -> snd x == Backward) $ typedEdges
  685   where
  686     edges = edgeList cfg :: [(BlockId,BlockId)]
  687     getSuccs = getSuccessors cfg :: BlockId -> [BlockId]
  688     typedEdges =
  689       classifyEdges root getSuccs edges :: [((BlockId,BlockId),EdgeType)]
  690 
  691 optimizeCFG :: Bool -> Weights -> RawCmmDecl -> CFG -> CFG
  692 optimizeCFG _ _ (CmmData {}) cfg = cfg
  693 optimizeCFG doStaticPred weights proc@(CmmProc _info _lab _live graph) cfg =
  694   (if doStaticPred then staticPredCfg (g_entry graph) else id) $
  695     optHsPatterns weights proc $ cfg
  696 
  697 -- | Modify branch weights based on educated guess on
  698 -- patterns GHC tends to produce and how they affect
  699 -- performance.
  700 --
  701 -- Most importantly we penalize jumps across info tables.
  702 optHsPatterns :: Weights -> RawCmmDecl -> CFG -> CFG
  703 optHsPatterns _ (CmmData {}) cfg = cfg
  704 optHsPatterns weights (CmmProc info _lab _live graph) cfg =
  705     {-# SCC optHsPatterns #-}
  706     -- pprTrace "Initial:" (pprEdgeWeights cfg) $
  707     -- pprTrace "Initial:" (ppr $ mkGlobalWeights (g_entry graph) cfg) $
  708 
  709     -- pprTrace "LoopInfo:" (ppr $ loopInfo cfg (g_entry graph)) $
  710     favourFewerPreds  .
  711     penalizeInfoTables info .
  712     increaseBackEdgeWeight (g_entry graph) $ cfg
  713   where
  714 
  715     -- | Increase the weight of all backedges in the CFG
  716     -- this helps to make loop jumpbacks the heaviest edges
  717     increaseBackEdgeWeight :: BlockId -> CFG -> CFG
  718     increaseBackEdgeWeight root cfg =
  719         let backedges = findBackEdges root cfg
  720             update weight
  721               --Keep irrelevant edges irrelevant
  722               | weight <= 0 = 0
  723               | otherwise
  724               = weight + fromIntegral (backEdgeBonus weights)
  725         in  foldl'  (\cfg edge -> updateEdgeWeight update edge cfg)
  726                     cfg backedges
  727 
  728     -- | Since we cant fall through info tables we penalize these.
  729     penalizeInfoTables :: LabelMap a -> CFG -> CFG
  730     penalizeInfoTables info cfg =
  731         mapWeights fupdate cfg
  732       where
  733         fupdate :: BlockId -> BlockId -> EdgeWeight -> EdgeWeight
  734         fupdate _ to weight
  735           | mapMember to info
  736           = weight - (fromIntegral $ infoTablePenalty weights)
  737           | otherwise = weight
  738 
  739     -- | If a block has two successors, favour the one with fewer
  740     -- predecessors and/or the one allowing fall through.
  741     favourFewerPreds :: CFG -> CFG
  742     favourFewerPreds cfg =
  743         let
  744             revCfg =
  745               reverseEdges $ filterEdges
  746                               (\_from -> fallthroughTarget)  cfg
  747 
  748             predCount n = length $ getSuccessorEdges revCfg n
  749             nodes = getCfgNodes cfg
  750 
  751             modifiers :: Int -> Int -> (EdgeWeight, EdgeWeight)
  752             modifiers preds1 preds2
  753               | preds1 <  preds2 = ( 1,-1)
  754               | preds1 == preds2 = ( 0, 0)
  755               | otherwise        = (-1, 1)
  756 
  757             update :: CFG -> BlockId -> CFG
  758             update cfg node
  759               | [(s1,e1),(s2,e2)] <- getSuccessorEdges cfg node
  760               , !w1 <- edgeWeight e1
  761               , !w2 <- edgeWeight e2
  762               --Only change the weights if there isn't already a ordering.
  763               , w1 == w2
  764               , (mod1,mod2) <- modifiers (predCount s1) (predCount s2)
  765               = (\cfg' ->
  766                   (adjustEdgeWeight cfg' (+mod2) node s2))
  767                     (adjustEdgeWeight cfg  (+mod1) node s1)
  768               | otherwise
  769               = cfg
  770         in foldl' update cfg nodes
  771       where
  772         fallthroughTarget :: BlockId -> EdgeInfo -> Bool
  773         fallthroughTarget to (EdgeInfo source _weight)
  774           | mapMember to info = False
  775           | AsmCodeGen <- source = True
  776           | CmmSource { trans_cmmNode = CmmBranch {} } <- source = True
  777           | CmmSource { trans_cmmNode = CmmCondBranch {} } <- source = True
  778           | otherwise = False
  779 
  780 -- | Convert block-local branch weights to global weights.
  781 staticPredCfg :: BlockId -> CFG -> CFG
  782 staticPredCfg entry cfg = cfg'
  783   where
  784     (_, globalEdgeWeights) = {-# SCC mkGlobalWeights #-}
  785                              mkGlobalWeights entry cfg
  786     cfg' = {-# SCC rewriteEdges #-}
  787             mapFoldlWithKey
  788                 (\cfg from m ->
  789                     mapFoldlWithKey
  790                         (\cfg to w -> setEdgeWeight cfg (EdgeWeight w) from to )
  791                         cfg m )
  792                 cfg
  793                 globalEdgeWeights
  794 
  795 -- | Determine loop membership of blocks based on SCC analysis
  796 --   This is faster but only gives yes/no answers.
  797 loopMembers :: HasDebugCallStack => CFG -> LabelMap Bool
  798 loopMembers cfg =
  799     foldl' (flip setLevel) mapEmpty sccs
  800   where
  801     mkNode :: BlockId -> Node BlockId BlockId
  802     mkNode bid = DigraphNode bid bid (getSuccessors cfg bid)
  803     nodes = map mkNode (getCfgNodes cfg)
  804 
  805     sccs = stronglyConnCompFromEdgedVerticesOrd nodes
  806 
  807     setLevel :: SCC BlockId -> LabelMap Bool -> LabelMap Bool
  808     setLevel (AcyclicSCC bid) m = mapInsert bid False m
  809     setLevel (CyclicSCC bids) m = foldl' (\m k -> mapInsert k True m) m bids
  810 
  811 loopLevels :: CFG -> BlockId -> LabelMap Int
  812 loopLevels cfg root = liLevels loopInfos
  813     where
  814       loopInfos = loopInfo cfg root
  815 
  816 data LoopInfo = LoopInfo
  817   { liBackEdges :: [(Edge)] -- ^ List of back edges
  818   , liLevels :: LabelMap Int -- ^ BlockId -> LoopLevel mapping
  819   , liLoops :: [(Edge, LabelSet)] -- ^ (backEdge, loopBody), body includes header
  820   }
  821 
  822 instance Outputable LoopInfo where
  823     ppr (LoopInfo _ _lvls loops) =
  824         text "Loops:(backEdge, bodyNodes)" $$
  825             (vcat $ map ppr loops)
  826 
  827 {-  Note [Determining the loop body]
  828     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  829 
  830     Starting with the knowledge that:
  831     * head dominates the loop
  832     * `tail` -> `head` is a backedge
  833 
  834     We can determine all nodes by:
  835     * Deleting the loop head from the graph.
  836     * Collect all blocks which are reachable from the `tail`.
  837 
  838     We do so by performing bfs from the tail node towards the head.
  839  -}
  840 
  841 -- | Determine loop membership of blocks based on Dominator analysis.
  842 --   This is slower but gives loop levels instead of just loop membership.
  843 --   However it only detects natural loops. Irreducible control flow is not
  844 --   recognized even if it loops. But that is rare enough that we don't have
  845 --   to care about that special case.
  846 loopInfo :: HasDebugCallStack => CFG -> BlockId -> LoopInfo
  847 loopInfo cfg root = LoopInfo  { liBackEdges = backEdges
  848                               , liLevels = mapFromList loopCounts
  849                               , liLoops = loopBodies }
  850   where
  851     revCfg = reverseEdges cfg
  852 
  853     graph = -- pprTrace "CFG - loopInfo" (pprEdgeWeights cfg) $
  854             fmap (setFromList . mapKeys ) cfg :: LabelMap LabelSet
  855 
  856 
  857     --TODO - This should be a no op: Export constructors? Use unsafeCoerce? ...
  858     rooted = ( fromBlockId root
  859               , toIntMap $ fmap toIntSet graph) :: (Int, IntMap IntSet)
  860     tree = fmap toBlockId $ Dom.domTree rooted :: Tree BlockId
  861 
  862     -- Map from Nodes to their dominators
  863     domMap :: LabelMap LabelSet
  864     domMap = mkDomMap tree
  865 
  866     edges = edgeList cfg :: [(BlockId, BlockId)]
  867     -- We can't recompute nodes from edges, there might be blocks not connected via edges.
  868     nodes = getCfgNodes cfg :: [BlockId]
  869 
  870     -- identify back edges
  871     isBackEdge (from,to)
  872       | Just doms <- mapLookup from domMap
  873       , setMember to doms
  874       = True
  875       | otherwise = False
  876 
  877     -- See Note [Determining the loop body]
  878     -- Get the loop body associated with a back edge.
  879     findBody edge@(tail, head)
  880       = ( edge, setInsert head $ go (setSingleton tail) (setSingleton tail) )
  881       where
  882         -- See Note [Determining the loop body]
  883 
  884 
  885         go :: LabelSet -> LabelSet -> LabelSet
  886         go found current
  887           | setNull current = found
  888           | otherwise = go  (setUnion newSuccessors found)
  889                             newSuccessors
  890           where
  891             -- Really predecessors, since we use the reversed cfg.
  892             newSuccessors = setFilter (\n -> not $ setMember n found) successors :: LabelSet
  893             successors = setDelete head $ setUnions $ map
  894                                       (\x -> if x == head then setEmpty else setFromList (getSuccessors revCfg x))
  895                                       (setElems current) :: LabelSet
  896 
  897     backEdges = filter isBackEdge edges
  898     loopBodies = map findBody backEdges :: [(Edge, LabelSet)]
  899 
  900     -- Block b is part of n loop bodies => loop nest level of n
  901     loopCounts =
  902       let bodies = map (first snd) loopBodies -- [(Header, Body)]
  903           loopCount n = length $ nub . map fst . filter (setMember n . snd) $ bodies
  904       in  map (\n -> (n, loopCount n)) $ nodes :: [(BlockId, Int)]
  905 
  906     toIntSet :: LabelSet -> IntSet
  907     toIntSet s = IS.fromList . map fromBlockId . setElems $ s
  908     toIntMap :: LabelMap a -> IntMap a
  909     toIntMap m = IM.fromList $ map (\(x,y) -> (fromBlockId x,y)) $ mapToList m
  910 
  911     mkDomMap :: Tree BlockId -> LabelMap LabelSet
  912     mkDomMap root = mapFromList $ go setEmpty root
  913       where
  914         go :: LabelSet -> Tree BlockId -> [(Label,LabelSet)]
  915         go parents (Node lbl [])
  916           =  [(lbl, parents)]
  917         go parents (Node _ leaves)
  918           = let nodes = map rootLabel leaves
  919                 entries = map (\x -> (x,parents)) nodes
  920             in  entries ++ concatMap
  921                             (\n -> go (setInsert (rootLabel n) parents) n)
  922                             leaves
  923 
  924     fromBlockId :: BlockId -> Int
  925     fromBlockId = getKey . getUnique
  926 
  927     toBlockId :: Int -> BlockId
  928     toBlockId = mkBlockId . mkUniqueGrimily
  929 
  930 -- We make the CFG a Hoopl Graph, so we can reuse revPostOrder.
  931 newtype BlockNode (e :: Extensibility) (x :: Extensibility) = BN (BlockId,[BlockId])
  932 
  933 instance G.NonLocal (BlockNode) where
  934   entryLabel (BN (lbl,_))   = lbl
  935   successors (BN (_,succs)) = succs
  936 
  937 revPostorderFrom :: HasDebugCallStack => CFG -> BlockId -> [BlockId]
  938 revPostorderFrom cfg root =
  939     map fromNode $ G.revPostorderFrom hooplGraph root
  940   where
  941     nodes = getCfgNodes cfg
  942     hooplGraph = foldl' (\m n -> mapInsert n (toNode n) m) mapEmpty nodes
  943 
  944     fromNode :: BlockNode C C -> BlockId
  945     fromNode (BN x) = fst x
  946 
  947     toNode :: BlockId -> BlockNode C C
  948     toNode bid =
  949         BN (bid,getSuccessors cfg $ bid)
  950 
  951 
  952 -- | We take in a CFG which has on its edges weights which are
  953 --   relative only to other edges originating from the same node.
  954 --
  955 --   We return a CFG for which each edge represents a GLOBAL weight.
  956 --   This means edge weights are comparable across the whole graph.
  957 --
  958 --   For irreducible control flow results might be imprecise, otherwise they
  959 --   are reliable.
  960 --
  961 --   The algorithm is based on the Paper
  962 --   "Static Branch Prediction and Program Profile Analysis" by Y Wu, JR Larus
  963 --   The only big change is that we go over the nodes in the body of loops in
  964 --   reverse post order. Which is required for diamond control flow to work probably.
  965 --
  966 --   We also apply a few prediction heuristics (based on the same paper)
  967 --
  968 --   The returned result represents frequences.
  969 --   For blocks it's the expected number of executions and
  970 --   for edges is the number of traversals.
  971 
  972 {-# NOINLINE mkGlobalWeights #-}
  973 {-# SCC mkGlobalWeights #-}
  974 mkGlobalWeights :: HasDebugCallStack => BlockId -> CFG -> (LabelMap Double, LabelMap (LabelMap Double))
  975 mkGlobalWeights root localCfg
  976   | null localCfg = panic "Error - Empty CFG"
  977   | otherwise
  978   = (blockFreqs', edgeFreqs')
  979   where
  980     -- Calculate fixpoints
  981     (blockFreqs, edgeFreqs) = calcFreqs nodeProbs backEdges' bodies' revOrder'
  982     blockFreqs' = mapFromList $ map (first fromVertex) (assocs blockFreqs) :: LabelMap Double
  983     edgeFreqs' = fmap fromVertexMap $ fromVertexMap edgeFreqs
  984 
  985     fromVertexMap :: IM.IntMap x -> LabelMap x
  986     fromVertexMap m = mapFromList . map (first fromVertex) $ IM.toList m
  987 
  988     revOrder = revPostorderFrom localCfg root :: [BlockId]
  989     loopResults@(LoopInfo backedges _levels bodies) = loopInfo localCfg root
  990 
  991     revOrder' = map toVertex revOrder
  992     backEdges' = map (bimap toVertex toVertex) backedges
  993     bodies' = map calcBody bodies
  994 
  995     estimatedCfg = staticBranchPrediction root loopResults localCfg
  996     -- Normalize the weights to probabilities and apply heuristics
  997     nodeProbs = cfgEdgeProbabilities estimatedCfg toVertex
  998 
  999     -- By mapping vertices to numbers in reverse post order we can bring any subset into reverse post
 1000     -- order simply by sorting.
 1001     -- TODO: The sort is redundant if we can guarantee that setElems returns elements ascending
 1002     calcBody (backedge, blocks) =
 1003         (toVertex $ snd backedge, sort . map toVertex $ (setElems blocks))
 1004 
 1005     vertexMapping = mapFromList $ zip revOrder [0..] :: LabelMap Int
 1006     blockMapping = listArray (0,mapSize vertexMapping - 1) revOrder :: Array Int BlockId
 1007     -- Map from blockId to indices starting at zero
 1008     toVertex :: BlockId -> Int
 1009     toVertex   blockId  = expectJust "mkGlobalWeights" $ mapLookup blockId vertexMapping
 1010     -- Map from indices starting at zero to blockIds
 1011     fromVertex :: Int -> BlockId
 1012     fromVertex vertex   = blockMapping ! vertex
 1013 
 1014 {- Note [Static Branch Prediction]
 1015    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 1016 
 1017 The work here has been based on the paper
 1018 "Static Branch Prediction and Program Profile Analysis" by Y Wu, JR Larus.
 1019 
 1020 The primary differences are that if we branch on the result of a heap
 1021 check we do not apply any of the heuristics.
 1022 The reason is simple: They look like loops in the control flow graph
 1023 but are usually never entered, and if at most once.
 1024 
 1025 Currently implemented is a heuristic to predict that we do not exit
 1026 loops (lehPredicts) and one to predict that backedges are more likely
 1027 than any other edge.
 1028 
 1029 The back edge case is special as it superceeds any other heuristic if it
 1030 applies.
 1031 
 1032 Do NOT rely solely on nofib results for benchmarking this. I recommend at least
 1033 comparing megaparsec and container benchmarks. Nofib does not seeem to have
 1034 many instances of "loopy" Cmm where these make a difference.
 1035 
 1036 TODO:
 1037 * The paper containers more benchmarks which should be implemented.
 1038 * If we turn the likelihood on if/else branches into a probability
 1039   instead of true/false we could implement this as a Cmm pass.
 1040   + The complete Cmm code still exists and can be accessed by the heuristics
 1041   + There is no chance of register allocation/codegen inserting branches/blocks
 1042   + making the TransitionSource info wrong.
 1043   + potential to use this information in CmmPasses.
 1044   - Requires refactoring of all the code relying on the binary nature of likelihood.
 1045   - Requires refactoring `loopInfo` to work on both, Cmm Graphs and the backend CFG.
 1046 -}
 1047 
 1048 -- | Combination of target node id and information about the branch
 1049 --   we are looking at.
 1050 type TargetNodeInfo = (BlockId, EdgeInfo)
 1051 
 1052 
 1053 -- | Update branch weights based on certain heuristics.
 1054 -- See Note [Static Branch Prediction]
 1055 -- TODO: This should be combined with optimizeCFG
 1056 {-# SCC staticBranchPrediction #-}
 1057 staticBranchPrediction :: BlockId -> LoopInfo -> CFG -> CFG
 1058 staticBranchPrediction _root (LoopInfo l_backEdges loopLevels l_loops) cfg =
 1059     -- pprTrace "staticEstimatesOn" (ppr (cfg)) $
 1060     foldl' update cfg nodes
 1061   where
 1062     nodes = getCfgNodes cfg
 1063     backedges = S.fromList $ l_backEdges
 1064     -- Loops keyed by their back edge
 1065     loops = M.fromList $ l_loops :: M.Map Edge LabelSet
 1066     loopHeads = S.fromList $ map snd $ M.keys loops
 1067 
 1068     update :: CFG -> BlockId -> CFG
 1069     update cfg node
 1070         -- No successors, nothing to do.
 1071         | null successors = cfg
 1072 
 1073         -- Mix of backedges and others:
 1074         -- Always predict the backedges.
 1075         | not (null m) && length m < length successors
 1076         -- Heap/Stack checks "loop", but only once.
 1077         -- So we simply exclude any case involving them.
 1078         , not $ any (isHeapOrStackCheck  . transitionSource . snd) successors
 1079         = let   loopChance = repeat $! pred_LBH / (fromIntegral $ length m)
 1080                 exitChance = repeat $! (1 - pred_LBH) / fromIntegral (length not_m)
 1081                 updates = zip (map fst m) loopChance ++ zip (map fst not_m) exitChance
 1082         in  -- pprTrace "mix" (ppr (node,successors)) $
 1083             foldl' (\cfg (to,weight) -> setEdgeWeight cfg weight node to) cfg updates
 1084 
 1085         -- For (regular) non-binary branches we keep the weights from the STG -> Cmm translation.
 1086         | length successors /= 2
 1087         = cfg
 1088 
 1089         -- Only backedges - no need to adjust
 1090         | length m > 0
 1091         = cfg
 1092 
 1093         -- A regular binary branch, we can plug addition predictors in here.
 1094         | [(s1,s1_info),(s2,s2_info)] <- successors
 1095         , not $ any (isHeapOrStackCheck  . transitionSource . snd) successors
 1096         = -- Normalize weights to total of 1
 1097             let !w1 = max (edgeWeight s1_info) (0)
 1098                 !w2 = max (edgeWeight s2_info) (0)
 1099                 -- Of both weights are <= 0 we set both to 0.5
 1100                 normalizeWeight w = if w1 + w2 == 0 then 0.5 else w/(w1+w2)
 1101                 !cfg'  = setEdgeWeight cfg  (normalizeWeight w1) node s1
 1102                 !cfg'' = setEdgeWeight cfg' (normalizeWeight w2) node s2
 1103 
 1104                 -- Figure out which heuristics apply to these successors
 1105                 heuristics = map ($ ((s1,s1_info),(s2,s2_info)))
 1106                             [lehPredicts, phPredicts, ohPredicts, ghPredicts, lhhPredicts, chPredicts
 1107                             , shPredicts, rhPredicts]
 1108                 -- Apply result of a heuristic. Argument is the likelihood
 1109                 -- predicted for s1.
 1110                 applyHeuristic :: CFG -> Maybe Prob -> CFG
 1111                 applyHeuristic cfg Nothing = cfg
 1112                 applyHeuristic cfg (Just (s1_pred :: Double))
 1113                   | s1_old == 0 || s2_old == 0 ||
 1114                     isHeapOrStackCheck (transitionSource s1_info) ||
 1115                     isHeapOrStackCheck (transitionSource s2_info)
 1116                   = cfg
 1117                   | otherwise =
 1118                     let -- Predictions from heuristic
 1119                         s1_prob = EdgeWeight s1_pred :: EdgeWeight
 1120                         s2_prob = 1.0 - s1_prob
 1121                         -- Update
 1122                         d = (s1_old * s1_prob) + (s2_old * s2_prob) :: EdgeWeight
 1123                         s1_prob' = s1_old * s1_prob / d
 1124                         !s2_prob' = s2_old * s2_prob / d
 1125                         !cfg_s1 = setEdgeWeight cfg    s1_prob' node s1
 1126                     in  -- pprTrace "Applying heuristic!" (ppr (node,s1,s2) $$ ppr (s1_prob', s2_prob')) $
 1127                         setEdgeWeight cfg_s1 s2_prob' node s2
 1128                   where
 1129                     -- Old weights
 1130                     s1_old = getEdgeWeight cfg node s1
 1131                     s2_old = getEdgeWeight cfg node s2
 1132 
 1133             in
 1134             -- pprTraceIt "RegularCfgResult" $
 1135             foldl' applyHeuristic cfg'' heuristics
 1136 
 1137         -- Branch on heap/stack check
 1138         | otherwise = cfg
 1139 
 1140       where
 1141         -- Chance that loops are taken.
 1142         pred_LBH = 0.875
 1143         -- successors
 1144         successors = getSuccessorEdges cfg node
 1145         -- backedges
 1146         (m,not_m) = partition (\succ -> S.member (node, fst succ) backedges) successors
 1147 
 1148         -- Heuristics return nothing if they don't say anything about this branch
 1149         -- or Just (prob_s1) where prob_s1 is the likelihood for s1 to be the
 1150         -- taken branch. s1 is the branch in the true case.
 1151 
 1152         -- Loop exit heuristic.
 1153         -- We are unlikely to leave a loop unless it's to enter another one.
 1154         pred_LEH = 0.75
 1155         -- If and only if no successor is a loopheader,
 1156         -- then we will likely not exit the current loop body.
 1157         lehPredicts :: (TargetNodeInfo,TargetNodeInfo) -> Maybe Prob
 1158         lehPredicts ((s1,_s1_info),(s2,_s2_info))
 1159           | S.member s1 loopHeads || S.member s2 loopHeads
 1160           = Nothing
 1161 
 1162           | otherwise
 1163           = --pprTrace "lehPredict:" (ppr $ compare s1Level s2Level) $
 1164             case compare s1Level s2Level of
 1165                 EQ -> Nothing
 1166                 LT -> Just (1-pred_LEH) --s1 exits to a shallower loop level (exits loop)
 1167                 GT -> Just (pred_LEH)   --s1 exits to a deeper loop level
 1168             where
 1169                 s1Level = mapLookup s1 loopLevels
 1170                 s2Level = mapLookup s2 loopLevels
 1171 
 1172         -- Comparing to a constant is unlikely to be equal.
 1173         ohPredicts (s1,_s2)
 1174             | CmmSource { trans_cmmNode = src1 } <- getTransitionSource node (fst s1) cfg
 1175             , CmmCondBranch cond ltrue _lfalse likely <- src1
 1176             , likely == Nothing
 1177             , CmmMachOp mop args <- cond
 1178             , MO_Eq {} <- mop
 1179             , not (null [x | x@CmmLit{} <- args])
 1180             = if fst s1 == ltrue then Just 0.3 else Just 0.7
 1181 
 1182             | otherwise
 1183             = Nothing
 1184 
 1185         -- TODO: These are all the other heuristics from the paper.
 1186         -- Not all will apply, for now we just stub them out as Nothing.
 1187         phPredicts = const Nothing
 1188         ghPredicts = const Nothing
 1189         lhhPredicts = const Nothing
 1190         chPredicts = const Nothing
 1191         shPredicts = const Nothing
 1192         rhPredicts = const Nothing
 1193 
 1194 -- We normalize all edge weights as probabilities between 0 and 1.
 1195 -- Ignoring rounding errors all outgoing edges sum up to 1.
 1196 cfgEdgeProbabilities :: CFG -> (BlockId -> Int) -> IM.IntMap (IM.IntMap Prob)
 1197 cfgEdgeProbabilities cfg toVertex
 1198     = mapFoldlWithKey foldEdges IM.empty cfg
 1199   where
 1200     foldEdges = (\m from toMap -> IM.insert (toVertex from) (normalize toMap) m)
 1201 
 1202     normalize :: (LabelMap EdgeInfo) -> (IM.IntMap Prob)
 1203     normalize weightMap
 1204         | edgeCount <= 1 = mapFoldlWithKey (\m k _ -> IM.insert (toVertex k) 1.0 m) IM.empty weightMap
 1205         | otherwise = mapFoldlWithKey (\m k _ -> IM.insert (toVertex k) (normalWeight k) m) IM.empty weightMap
 1206       where
 1207         edgeCount = mapSize weightMap
 1208         -- Negative weights are generally allowed but are mapped to zero.
 1209         -- We then check if there is at least one non-zero edge and if not
 1210         -- assign uniform weights to all branches.
 1211         minWeight = 0 :: Prob
 1212         weightMap' = fmap (\w -> max (weightToDouble . edgeWeight $ w) minWeight) weightMap
 1213         totalWeight = sum weightMap'
 1214 
 1215         normalWeight :: BlockId -> Prob
 1216         normalWeight bid
 1217          | totalWeight == 0
 1218          = 1.0 / fromIntegral edgeCount
 1219          | Just w <- mapLookup bid weightMap'
 1220          = w/totalWeight
 1221          | otherwise = panic "impossible"
 1222 
 1223 -- This is the fixpoint algorithm from
 1224 --   "Static Branch Prediction and Program Profile Analysis" by Y Wu, JR Larus
 1225 -- The adaption to Haskell is my own.
 1226 calcFreqs :: IM.IntMap (IM.IntMap Prob) -> [(Int,Int)] -> [(Int, [Int])] -> [Int]
 1227           -> (Array Int Double, IM.IntMap (IM.IntMap Prob))
 1228 calcFreqs graph backEdges loops revPostOrder = runST $ do
 1229     visitedNodes <- newArray (0,nodeCount-1) False :: ST s (STUArray s Int Bool)
 1230     blockFreqs <- newArray (0,nodeCount-1) 0.0 :: ST s (STUArray s Int Double)
 1231     edgeProbs <- newSTRef graph
 1232     edgeBackProbs <- newSTRef graph
 1233 
 1234     -- let traceArray a = do
 1235     --       vs <- forM [0..nodeCount-1] $ \i -> readArray a i >>= (\v -> return (i,v))
 1236           -- trace ("array: " ++ show vs) $ return ()
 1237 
 1238     let  -- See #1600, we need to inline or unboxing makes perf worse.
 1239         -- {-# INLINE getFreq #-}
 1240         {-# INLINE visited #-}
 1241         visited b = unsafeRead visitedNodes b
 1242         getFreq b = unsafeRead blockFreqs b
 1243         -- setFreq :: forall s. Int -> Double -> ST s ()
 1244         setFreq b f = unsafeWrite blockFreqs b f
 1245         -- setVisited :: forall s. Node -> ST s ()
 1246         setVisited b = unsafeWrite visitedNodes b True
 1247         -- Frequency/probability that edge is taken.
 1248         getProb' arr b1 b2 = readSTRef arr >>=
 1249             (\graph ->
 1250                 return .
 1251                         fromMaybe (error "getFreq 1") .
 1252                         IM.lookup b2 .
 1253                         fromMaybe (error "getFreq 2") $
 1254                         (IM.lookup b1 graph)
 1255             )
 1256         setProb' arr b1 b2 prob = do
 1257           g <- readSTRef arr
 1258           let !m = fromMaybe (error "Foo") $ IM.lookup b1 g
 1259               !m' = IM.insert b2 prob m
 1260           writeSTRef arr $! (IM.insert b1 m' g)
 1261 
 1262         getEdgeFreq b1 b2 = getProb' edgeProbs b1 b2
 1263         setEdgeFreq b1 b2 = setProb' edgeProbs b1 b2
 1264         getProb b1 b2 = fromMaybe (error "getProb") $ do
 1265             m' <- IM.lookup b1 graph
 1266             IM.lookup b2 m'
 1267 
 1268         getBackProb b1 b2 = getProb' edgeBackProbs b1 b2
 1269         setBackProb b1 b2 = setProb' edgeBackProbs b1 b2
 1270 
 1271 
 1272     let -- calcOutFreqs :: Node -> ST s ()
 1273         calcOutFreqs bhead block = do
 1274           !f <- getFreq block
 1275           forM (successors block) $ \bi -> do
 1276             let !prob = getProb block bi
 1277             let !succFreq = f * prob
 1278             setEdgeFreq block bi succFreq
 1279             -- traceM $ "SetOut: " ++ show (block, bi, f, prob, succFreq)
 1280             when (bi == bhead) $ setBackProb block bi succFreq
 1281 
 1282 
 1283     let propFreq block head = do
 1284             -- traceM ("prop:" ++ show (block,head))
 1285             -- traceShowM block
 1286 
 1287             !v <- visited block
 1288             if v then
 1289                 return () --Dont look at nodes twice
 1290             else if block == head then
 1291                 setFreq block 1.0 -- Loop header frequency is always 1
 1292             else do
 1293                 let preds = IS.elems $ predecessors block
 1294                 irreducible <- (fmap or) $ forM preds $ \bp -> do
 1295                     !bp_visited <- visited bp
 1296                     let bp_backedge = isBackEdge bp block
 1297                     return (not bp_visited && not bp_backedge)
 1298 
 1299                 if irreducible
 1300                 then return () -- Rare we don't care
 1301                 else do
 1302                     setFreq block 0
 1303                     !cycleProb <- sum <$> (forM preds $ \pred -> do
 1304                         if isBackEdge pred block
 1305                             then
 1306                                 getBackProb pred block
 1307                             else do
 1308                                 !f <- getFreq block
 1309                                 !prob <- getEdgeFreq pred block
 1310                                 setFreq block $! f + prob
 1311                                 return 0)
 1312                     -- traceM $ "cycleProb:" ++ show cycleProb
 1313                     let limit = 1 - 1/512 -- Paper uses 1 - epsilon, but this works.
 1314                                           -- determines how large likelyhoods in loops can grow.
 1315                     !cycleProb <- return $ min cycleProb limit -- <- return $ if cycleProb > limit then limit else cycleProb
 1316                     -- traceM $ "cycleProb:" ++ show cycleProb
 1317 
 1318                     !f <- getFreq block
 1319                     setFreq block (f / (1.0 - cycleProb))
 1320 
 1321             setVisited block
 1322             calcOutFreqs head block
 1323 
 1324     -- Loops, by nesting, inner to outer
 1325     forM_ loops $ \(head, body) -> do
 1326         forM_ [0 .. nodeCount - 1] (\i -> unsafeWrite visitedNodes i True) -- Mark all nodes as visited.
 1327         forM_ body (\i -> unsafeWrite visitedNodes i False) -- Mark all blocks reachable from head as not visited
 1328         forM_ body $ \block -> propFreq block head
 1329 
 1330     -- After dealing with all loops, deal with non-looping parts of the CFG
 1331     forM_ [0 .. nodeCount - 1] (\i -> unsafeWrite visitedNodes i False) -- Everything in revPostOrder is reachable
 1332     forM_ revPostOrder $ \block -> propFreq block (head revPostOrder)
 1333 
 1334     -- trace ("Final freqs:") $ return ()
 1335     -- let freqString = pprFreqs freqs
 1336     -- trace (unlines freqString) $ return ()
 1337     -- trace (pprFre) $ return ()
 1338     graph' <- readSTRef edgeProbs
 1339     freqs' <- unsafeFreeze  blockFreqs
 1340 
 1341     return (freqs', graph')
 1342   where
 1343     -- How can these lookups fail? Consider the CFG [A -> B]
 1344     predecessors :: Int -> IS.IntSet
 1345     predecessors b = fromMaybe IS.empty $ IM.lookup b revGraph
 1346     successors :: Int -> [Int]
 1347     successors b = fromMaybe (lookupError "succ" b graph)$ IM.keys <$> IM.lookup b graph
 1348     lookupError s b g = pprPanic ("Lookup error " ++ s) $
 1349                             ( text "node" <+> ppr b $$
 1350                                 text "graph" <+>
 1351                                 vcat (map (\(k,m) -> ppr (k,m :: IM.IntMap Double)) $ IM.toList g)
 1352                             )
 1353 
 1354     nodeCount = IM.foldl' (\count toMap -> IM.foldlWithKey' countTargets count toMap) (IM.size graph) graph
 1355       where
 1356         countTargets = (\count k _ -> countNode k + count )
 1357         countNode n = if IM.member n graph then 0 else 1
 1358 
 1359     isBackEdge from to = S.member (from,to) backEdgeSet
 1360     backEdgeSet = S.fromList backEdges
 1361 
 1362     revGraph :: IntMap IntSet
 1363     revGraph = IM.foldlWithKey' (\m from toMap -> addEdges m from toMap) IM.empty graph
 1364         where
 1365             addEdges m0 from toMap = IM.foldlWithKey' (\m k _ -> addEdge m from k) m0 toMap
 1366             addEdge m0 from to = IM.insertWith IS.union to (IS.singleton from) m0