never executed always true always false
    1 {-# LANGUAGE CPP #-}
    2 {-# LANGUAGE DataKinds #-}
    3 {-# LANGUAGE FlexibleContexts #-}
    4 {-# LANGUAGE MultiWayIf #-}
    5 {-# LANGUAGE ScopedTypeVariables #-}
    6 {-# LANGUAGE TypeFamilies #-}
    7 
    8 --
    9 -- Copyright (c) 2018 Andreas Klebinger
   10 --
   11 
   12 module GHC.CmmToAsm.BlockLayout
   13     ( sequenceTop, backendMaintainsCfg)
   14 where
   15 
   16 import GHC.Prelude
   17 
   18 import GHC.Platform
   19 
   20 import GHC.CmmToAsm.Instr
   21 import GHC.CmmToAsm.Monad
   22 import GHC.CmmToAsm.CFG
   23 import GHC.CmmToAsm.Types
   24 import GHC.CmmToAsm.Config
   25 
   26 import GHC.Cmm
   27 import GHC.Cmm.BlockId
   28 import GHC.Cmm.Dataflow.Collections
   29 import GHC.Cmm.Dataflow.Label
   30 
   31 import GHC.Types.Unique.FM
   32 
   33 import GHC.Data.Graph.Directed
   34 import GHC.Data.Maybe
   35 import GHC.Data.List.SetOps (removeDups)
   36 import GHC.Data.OrdList
   37 
   38 import GHC.Utils.Trace
   39 import GHC.Utils.Outputable
   40 import GHC.Utils.Panic
   41 import GHC.Utils.Panic.Plain
   42 import GHC.Utils.Misc
   43 
   44 import Data.List (sortOn, sortBy, nub)
   45 import Data.Foldable (toList)
   46 import qualified Data.Set as Set
   47 import Data.STRef
   48 import Control.Monad.ST.Strict
   49 import Control.Monad (foldM, unless)
   50 import GHC.Data.UnionFind
   51 
   52 {-
   53   Note [CFG based code layout]
   54   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   55 
   56   The major steps in placing blocks are as follow:
   57   * Compute a CFG based on the Cmm AST, see getCfgProc.
   58     This CFG will have edge weights representing a guess
   59     on how important they are.
   60   * After we convert Cmm to Asm we run `optimizeCFG` which
   61     adds a few more "educated guesses" to the equation.
   62   * Then we run loop analysis on the CFG (`loopInfo`) which tells us
   63     about loop headers, loop nesting levels and the sort.
   64   * Based on the CFG and loop information refine the edge weights
   65     in the CFG and normalize them relative to the most often visited
   66     node. (See `mkGlobalWeights`)
   67   * Feed this CFG into the block layout code (`sequenceTop`) in this
   68     module. Which will then produce a code layout based on the input weights.
   69 
   70   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   71   ~~~ Note [Chain based CFG serialization]
   72   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   73 
   74   For additional information also look at
   75   https://gitlab.haskell.org/ghc/ghc/wikis/commentary/compiler/code-layout
   76 
   77   We have a CFG with edge weights based on which we try to place blocks next to
   78   each other.
   79 
   80   Edge weights not only represent likelihood of control transfer between blocks
   81   but also how much a block would benefit from being placed sequentially after
   82   it's predecessor.
   83   For example blocks which are preceded by an info table are more likely to end
   84   up in a different cache line than their predecessor and we can't eliminate the jump
   85   so there is less benefit to placing them sequentially.
   86 
   87   For example consider this example:
   88 
   89   A:  ...
   90       jmp cond D (weak successor)
   91       jmp B
   92   B:  ...
   93       jmp C
   94   C:  ...
   95       jmp X
   96   D:  ...
   97       jmp B (weak successor)
   98 
   99   We determine a block layout by building up chunks (calling them chains) of
  100   possible control flows for which blocks will be placed sequentially.
  101 
  102   Eg for our example we might end up with two chains like:
  103   [A->B->C->X],[D]. Blocks inside chains will always be placed sequentially.
  104   However there is no particular order in which chains are placed since
  105   (hopefully) the blocks for which sequentiality is important have already
  106   been placed in the same chain.
  107 
  108   -----------------------------------------------------------------------------
  109      1) First try to create a list of good chains.
  110   -----------------------------------------------------------------------------
  111 
  112   Good chains are these which allow us to eliminate jump instructions.
  113   Which further eliminate often executed jumps first.
  114 
  115   We do so by:
  116 
  117   *)  Ignore edges which represent instructions which can not be replaced
  118       by fall through control flow. Primarily calls and edges to blocks which
  119       are prefixed by a info table we have to jump across.
  120 
  121   *)  Then process remaining edges in order of frequency taken and:
  122 
  123     +)  If source and target have not been placed build a new chain from them.
  124 
  125     +)  If source and target have been placed, and are ends of differing chains
  126         try to merge the two chains.
  127 
  128     +)  If one side of the edge is a end/front of a chain, add the other block of
  129         to edge to the same chain
  130 
  131         Eg if we look at edge (B -> C) and already have the chain (A -> B)
  132         then we extend the chain to (A -> B -> C).
  133 
  134     +)  If the edge was used to modify or build a new chain remove the edge from
  135         our working list.
  136 
  137   *) If there any blocks not being placed into a chain after these steps we place
  138      them into a chain consisting of only this block.
  139 
  140   Ranking edges by their taken frequency, if
  141   two edges compete for fall through on the same target block, the one taken
  142   more often will automatically win out. Resulting in fewer instructions being
  143   executed.
  144 
  145   Creating singleton chains is required for situations where we have code of the
  146   form:
  147 
  148     A: goto B:
  149     <infoTable>
  150     B: goto C:
  151     <infoTable>
  152     C: ...
  153 
  154   As the code in block B is only connected to the rest of the program via edges
  155   which will be ignored in this step we make sure that B still ends up in a chain
  156   this way.
  157 
  158   -----------------------------------------------------------------------------
  159      2) We also try to fuse chains.
  160   -----------------------------------------------------------------------------
  161 
  162   As a result from the above step we still end up with multiple chains which
  163   represent sequential control flow chunks. But they are not yet suitable for
  164   code layout as we need to place *all* blocks into a single sequence.
  165 
  166   In this step we combine chains result from the above step via these steps:
  167 
  168   *)  Look at the ranked list of *all* edges, including calls/jumps across info tables
  169       and the like.
  170 
  171   *)  Look at each edge and
  172 
  173     +) Given an edge (A -> B) try to find two chains for which
  174       * Block A is at the end of one chain
  175       * Block B is at the front of the other chain.
  176     +) If we find such a chain we "fuse" them into a single chain, remove the
  177        edge from working set and continue.
  178     +) If we can't find such chains we skip the edge and continue.
  179 
  180   -----------------------------------------------------------------------------
  181      3) Place indirect successors (neighbours) after each other
  182   -----------------------------------------------------------------------------
  183 
  184   We might have chains [A,B,C,X],[E] in a CFG of the sort:
  185 
  186     A ---> B ---> C --------> X(exit)
  187                    \- ->E- -/
  188 
  189   While E does not follow X it's still beneficial to place them near each other.
  190   This can be advantageous if eg C,X,E will end up in the same cache line.
  191 
  192   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  193   ~~~ Note [Triangle Control Flow]
  194   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  195 
  196   Checking if an argument is already evaluated leads to a somewhat
  197   special case  which looks like this:
  198 
  199     A:
  200         if (R1 & 7 != 0) goto Leval; else goto Lwork;
  201     Leval: // global
  202         call (I64[R1])(R1) returns to Lwork, args: 8, res: 8, upd: 8;
  203     Lwork: // global
  204         ...
  205 
  206         A
  207         |\
  208         | Leval
  209         |/ - (This edge can be missing because of optimizations)
  210         Lwork
  211 
  212   Once we hit the metal the call instruction is just 2-3 bytes large
  213   depending on the register used. So we lay out the assembly like this:
  214 
  215         movq %rbx,%rax
  216         andl $7,%eax
  217         cmpq $1,%rax
  218         jne Lwork
  219     Leval:
  220         jmp *(%rbx) # encoded in 2-3 bytes.
  221     <info table>
  222     Lwork:
  223         ...
  224 
  225   We could explicitly check for this control flow pattern.
  226 
  227   This is advantageous because:
  228   * It's optimal if the argument isn't evaluated.
  229   * If it's evaluated we only have the extra cost of jumping over
  230     the 2-3 bytes for the call.
  231   * Guarantees the smaller encoding for the conditional jump.
  232 
  233   However given that Lwork usually has an info table we
  234   penalize this edge. So Leval should get placed first
  235   either way and things work out for the best.
  236 
  237   Optimizing for the evaluated case instead would penalize
  238   the other code path. It adds an jump as we can't fall through
  239   to Lwork because of the info table.
  240   Assuming that Lwork is large the chance that the "call" ends up
  241   in the same cache line is also fairly small.
  242 
  243   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  244   ~~~ Note [Layout relevant edge weights]
  245   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  246 
  247   The input to the chain based code layout algorithm is a CFG
  248   with edges annotated with their frequency. The frequency
  249   of traversal corresponds quite well to the cost of not placing
  250   the connected blocks next to each other.
  251 
  252   However even if having the same frequency certain edges are
  253   inherently more or less relevant to code layout.
  254 
  255   In particular:
  256 
  257   * Edges which cross an info table are less relevant than others.
  258 
  259     If we place the blocks across this edge next to each other
  260     they are still separated by the info table which negates
  261     much of the benefit. It makes it less likely both blocks
  262     will share a cache line reducing the benefits from locality.
  263     But it also prevents us from eliminating jump instructions.
  264 
  265   * Conditional branches and switches are slightly less relevant.
  266 
  267     We can completely remove unconditional jumps by placing them
  268     next to each other. This is not true for conditional branch edges.
  269     We apply a small modifier to them to ensure edges for which we can
  270     eliminate the overhead completely are considered first. See also #18053.
  271 
  272   * Edges constituted by a call are ignored.
  273 
  274     Considering these hardly helped with performance and ignoring
  275     them helps quite a bit to improve compiler performance.
  276 
  277   So we perform a preprocessing step where we apply a multiplicator
  278   to these kinds of edges.
  279 
  280   -}
  281 
  282 
  283 -- | Look at X number of blocks in two chains to determine
  284 --   if they are "neighbours".
  285 neighbourOverlapp :: Int
  286 neighbourOverlapp = 2
  287 
  288 -- | Maps blocks near the end of a chain to it's chain AND
  289 -- the other blocks near the end.
  290 -- [A,B,C,D,E] Gives entries like (B -> ([A,B], [A,B,C,D,E]))
  291 -- where [A,B] are blocks in the end region of a chain.
  292 -- This is cheaper then recomputing the ends multiple times.
  293 type FrontierMap = LabelMap ([BlockId],BlockChain)
  294 
  295 -- | A non empty ordered sequence of basic blocks.
  296 --   It is suitable for serialization in this order.
  297 --
  298 --   We use OrdList instead of [] to allow fast append on both sides
  299 --   when combining chains.
  300 newtype BlockChain
  301     = BlockChain { chainBlocks :: (OrdList BlockId) }
  302 
  303 -- All chains are constructed the same way so comparison
  304 -- including structure is faster.
  305 instance Eq BlockChain where
  306     BlockChain b1 == BlockChain b2 = strictlyEqOL b1 b2
  307 
  308 -- Useful for things like sets and debugging purposes, sorts by blocks
  309 -- in the chain.
  310 instance Ord (BlockChain) where
  311    (BlockChain lbls1) `compare` (BlockChain lbls2)
  312        = assert (toList lbls1 /= toList lbls2 || lbls1 `strictlyEqOL` lbls2) $
  313          strictlyOrdOL lbls1 lbls2
  314 
  315 instance Outputable (BlockChain) where
  316     ppr (BlockChain blks) =
  317         parens (text "Chain:" <+> ppr (fromOL $ blks) )
  318 
  319 chainFoldl :: (b -> BlockId -> b) -> b -> BlockChain -> b
  320 chainFoldl f z (BlockChain blocks) = foldl' f z blocks
  321 
  322 noDups :: [BlockChain] -> Bool
  323 noDups chains =
  324     let chainBlocks = concatMap chainToBlocks chains :: [BlockId]
  325         (_blocks, dups) = removeDups compare chainBlocks
  326     in if null dups then True
  327         else pprTrace "Duplicates:" (ppr (map toList dups) $$ text "chains" <+> ppr chains ) False
  328 
  329 inFront :: BlockId -> BlockChain -> Bool
  330 inFront bid (BlockChain seq)
  331   = headOL seq == bid
  332 
  333 chainSingleton :: BlockId -> BlockChain
  334 chainSingleton lbl
  335     = BlockChain (unitOL lbl)
  336 
  337 chainFromList :: [BlockId] -> BlockChain
  338 chainFromList = BlockChain . toOL
  339 
  340 chainSnoc :: BlockChain -> BlockId -> BlockChain
  341 chainSnoc (BlockChain blks) lbl
  342   = BlockChain (blks `snocOL` lbl)
  343 
  344 chainCons :: BlockId -> BlockChain -> BlockChain
  345 chainCons lbl (BlockChain blks)
  346   = BlockChain (lbl `consOL` blks)
  347 
  348 chainConcat :: BlockChain -> BlockChain -> BlockChain
  349 chainConcat (BlockChain blks1) (BlockChain blks2)
  350   = BlockChain (blks1 `appOL` blks2)
  351 
  352 chainToBlocks :: BlockChain -> [BlockId]
  353 chainToBlocks (BlockChain blks) = fromOL blks
  354 
  355 -- | Given the Chain A -> B -> C -> D and we break at C
  356 --   we get the two Chains (A -> B, C -> D) as result.
  357 breakChainAt :: BlockId -> BlockChain
  358              -> (BlockChain,BlockChain)
  359 breakChainAt bid (BlockChain blks)
  360     | not (bid == head rblks)
  361     = panic "Block not in chain"
  362     | otherwise
  363     = (BlockChain (toOL lblks),
  364        BlockChain (toOL rblks))
  365   where
  366     (lblks, rblks) = break (\lbl -> lbl == bid) (fromOL blks)
  367 
  368 takeR :: Int -> BlockChain -> [BlockId]
  369 takeR n (BlockChain blks) =
  370     take n . fromOLReverse $ blks
  371 
  372 takeL :: Int -> BlockChain -> [BlockId]
  373 takeL n (BlockChain blks) =
  374     take n . fromOL $ blks
  375 
  376 -- Note [Combining neighborhood chains]
  377 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  378 
  379 -- See also Note [Chain based CFG serialization]
  380 -- We have the chains (A-B-C-D) and (E-F) and an Edge C->E.
  381 --
  382 -- While placing the latter after the former doesn't result in sequential
  383 -- control flow it is still beneficial. As block C and E might end
  384 -- up in the same cache line.
  385 --
  386 -- So we place these chains next to each other even if we can't fuse them.
  387 --
  388 --   A -> B -> C -> D
  389 --             v
  390 --             - -> E -> F ...
  391 --
  392 -- A simple heuristic to chose which chains we want to combine:
  393 --   * Process edges in descending priority.
  394 --   * Check if there is a edge near the end of one chain which goes
  395 --     to a block near the start of another edge.
  396 --
  397 -- While we could take into account the space between the two blocks which
  398 -- share an edge this blows up compile times quite a bit. It requires
  399 -- us to find all edges between two chains, check the distance for all edges,
  400 -- rank them based on the distance and only then we can select two chains
  401 -- to combine. Which would add a lot of complexity for little gain.
  402 --
  403 -- So instead we just rank by the strength of the edge and use the first pair we
  404 -- find.
  405 
  406 -- | For a given list of chains and edges try to combine chains with strong
  407 --   edges between them.
  408 combineNeighbourhood  :: [CfgEdge] -- ^ Edges to consider
  409                       -> [BlockChain] -- ^ Current chains of blocks
  410                       -> ([BlockChain], Set.Set (BlockId,BlockId))
  411                       -- ^ Resulting list of block chains, and a set of edges which
  412                       -- were used to fuse chains and as such no longer need to be
  413                       -- considered.
  414 combineNeighbourhood edges chains
  415     = -- pprTraceIt "Neighbours" $
  416     --   pprTrace "combineNeighbours" (ppr edges) $
  417       applyEdges edges endFrontier startFrontier (Set.empty)
  418     where
  419         --Build maps from chain ends to chains
  420         endFrontier, startFrontier :: FrontierMap
  421         endFrontier =
  422             mapFromList $ concatMap (\chain ->
  423                                 let ends = getEnds chain :: [BlockId]
  424                                     entry = (ends,chain)
  425                                 in map (\x -> (x,entry)) ends ) chains
  426         startFrontier =
  427             mapFromList $ concatMap (\chain ->
  428                                 let front = getFronts chain
  429                                     entry = (front,chain)
  430                                 in map (\x -> (x,entry)) front) chains
  431         applyEdges :: [CfgEdge] -> FrontierMap -> FrontierMap -> Set.Set (BlockId, BlockId)
  432                    -> ([BlockChain], Set.Set (BlockId,BlockId))
  433         applyEdges [] chainEnds _chainFronts combined =
  434             (ordNub $ map snd $ mapElems chainEnds, combined)
  435         applyEdges ((CfgEdge from to _w):edges) chainEnds chainFronts combined
  436             | Just (c1_e,c1) <- mapLookup from chainEnds
  437             , Just (c2_f,c2) <- mapLookup to chainFronts
  438             , c1 /= c2 -- Avoid trying to concat a chain with itself.
  439             = let newChain = chainConcat c1 c2
  440                   newChainFrontier = getFronts newChain
  441                   newChainEnds = getEnds newChain
  442                   newFronts :: FrontierMap
  443                   newFronts =
  444                     let withoutOld =
  445                             foldl' (\m b -> mapDelete b m :: FrontierMap) chainFronts (c2_f ++ getFronts c1)
  446                         entry =
  447                             (newChainFrontier,newChain) --let bound to ensure sharing
  448                     in foldl' (\m x -> mapInsert x entry m)
  449                               withoutOld newChainFrontier
  450 
  451                   newEnds =
  452                     let withoutOld = foldl' (\m b -> mapDelete b m) chainEnds (c1_e ++ getEnds c2)
  453                         entry = (newChainEnds,newChain) --let bound to ensure sharing
  454                     in foldl' (\m x -> mapInsert x entry m)
  455                               withoutOld newChainEnds
  456               in
  457                 -- pprTrace "ApplyEdges"
  458                 --  (text "before" $$
  459                 --   text "fronts" <+> ppr chainFronts $$
  460                 --   text "ends" <+> ppr chainEnds $$
  461 
  462                 --   text "various" $$
  463                 --   text "newChain" <+> ppr newChain $$
  464                 --   text "newChainFrontier" <+> ppr newChainFrontier $$
  465                 --   text "newChainEnds" <+> ppr newChainEnds $$
  466                 --   text "drop" <+> ppr ((c2_f ++ getFronts c1) ++ (c1_e ++ getEnds c2)) $$
  467 
  468                 --   text "after" $$
  469                 --   text "fronts" <+> ppr newFronts $$
  470                 --   text "ends" <+> ppr newEnds
  471                 --   )
  472                  applyEdges edges newEnds newFronts (Set.insert (from,to) combined)
  473             | otherwise
  474             = applyEdges edges chainEnds chainFronts combined
  475 
  476         getFronts chain = takeL neighbourOverlapp chain
  477         getEnds chain = takeR neighbourOverlapp chain
  478 
  479 -- In the last stop we combine all chains into a single one.
  480 -- Trying to place chains with strong edges next to each other.
  481 mergeChains :: [CfgEdge] -> [BlockChain]
  482             -> (BlockChain)
  483 mergeChains edges chains
  484     = runST $ do
  485         let addChain m0 chain = do
  486                 ref <- fresh chain
  487                 return $ chainFoldl (\m' b -> mapInsert b ref m') m0 chain
  488         chainMap' <- foldM (\m0 c -> addChain m0 c) mapEmpty chains
  489         merge edges chainMap'
  490     where
  491         -- We keep a map from ALL blocks to their respective chain (sigh)
  492         -- This is required since when looking at an edge we need to find
  493         -- the associated chains quickly.
  494         -- We use a union-find data structure to do this efficiently.
  495 
  496         merge :: forall s. [CfgEdge] -> LabelMap (Point s BlockChain) -> ST s BlockChain
  497         merge [] chains = do
  498             chains' <- mapM find =<< (nub <$> (mapM repr $ mapElems chains)) :: ST s [BlockChain]
  499             return $ foldl' chainConcat (head chains') (tail chains')
  500         merge ((CfgEdge from to _):edges) chains
  501         --   | pprTrace "merge" (ppr (from,to) <> ppr chains) False
  502         --   = undefined
  503           = do
  504             same <- equivalent cFrom cTo
  505             unless same $ do
  506               cRight <- find cTo
  507               cLeft <- find cFrom
  508               new_point <- fresh (chainConcat cLeft cRight)
  509               union cTo new_point
  510               union cFrom new_point
  511             merge edges chains
  512           where
  513             cFrom = expectJust "mergeChains:chainMap:from" $ mapLookup from chains
  514             cTo = expectJust "mergeChains:chainMap:to"   $ mapLookup to   chains
  515 
  516 
  517 -- See Note [Chain based CFG serialization] for the general idea.
  518 -- This creates and fuses chains at the same time for performance reasons.
  519 
  520 -- Try to build chains from a list of edges.
  521 -- Edges must be sorted **descending** by their priority.
  522 -- Returns the constructed chains, along with all edges which
  523 -- are irrelevant past this point, this information doesn't need
  524 -- to be complete - it's only used to speed up the process.
  525 -- An Edge is irrelevant if the ends are part of the same chain.
  526 -- We say these edges are already linked
  527 buildChains :: [CfgEdge] -> [BlockId]
  528             -> ( LabelMap BlockChain  -- Resulting chains, indexed by end if chain.
  529                , Set.Set (BlockId, BlockId)) --List of fused edges.
  530 buildChains edges blocks
  531   = runST $ buildNext setEmpty mapEmpty mapEmpty edges Set.empty
  532   where
  533     -- buildNext builds up chains from edges one at a time.
  534 
  535     -- We keep a map from the ends of chains to the chains.
  536     -- This way we can easily check if an block should be appended to an
  537     -- existing chain!
  538     -- We store them using STRefs so we don't have to rebuild the spine of both
  539     -- maps every time we update a chain.
  540     buildNext :: forall s. LabelSet
  541               -> LabelMap (STRef s BlockChain) -- Map from end of chain to chain.
  542               -> LabelMap (STRef s BlockChain) -- Map from start of chain to chain.
  543               -> [CfgEdge] -- Edges to check - ordered by decreasing weight
  544               -> Set.Set (BlockId, BlockId) -- Used edges
  545               -> ST s   ( LabelMap BlockChain -- Chains by end
  546                         , Set.Set (BlockId, BlockId) --List of fused edges
  547                         )
  548     buildNext placed _chainStarts chainEnds  [] linked = do
  549         ends' <- sequence $ mapMap readSTRef chainEnds :: ST s (LabelMap BlockChain)
  550         -- Any remaining blocks have to be made to singleton chains.
  551         -- They might be combined with other chains later on outside this function.
  552         let unplaced = filter (\x -> not (setMember x placed)) blocks
  553             singletons = map (\x -> (x,chainSingleton x)) unplaced :: [(BlockId,BlockChain)]
  554         return (foldl' (\m (k,v) -> mapInsert k v m) ends' singletons , linked)
  555     buildNext placed chainStarts chainEnds (edge:todo) linked
  556         | from == to
  557         -- We skip self edges
  558         = buildNext placed chainStarts chainEnds todo (Set.insert (from,to) linked)
  559         | not (alreadyPlaced from) &&
  560           not (alreadyPlaced to)
  561         = do
  562             --pprTraceM "Edge-Chain:" (ppr edge)
  563             chain' <- newSTRef $ chainFromList [from,to]
  564             buildNext
  565                 (setInsert to (setInsert from placed))
  566                 (mapInsert from chain' chainStarts)
  567                 (mapInsert to chain' chainEnds)
  568                 todo
  569                 (Set.insert (from,to) linked)
  570 
  571         | (alreadyPlaced from) &&
  572           (alreadyPlaced to)
  573         , Just predChain <- mapLookup from chainEnds
  574         , Just succChain <- mapLookup to chainStarts
  575         , predChain /= succChain -- Otherwise we try to create a cycle.
  576           = fuseChain predChain succChain
  577 
  578         | (alreadyPlaced from) &&
  579           (alreadyPlaced to)
  580           = buildNext placed chainStarts chainEnds todo linked
  581 
  582         | otherwise
  583           = findChain
  584       where
  585         from = edgeFrom edge
  586         to   = edgeTo   edge
  587         alreadyPlaced blkId = (setMember blkId placed)
  588 
  589         -- Combine two chains into a single one.
  590         fuseChain :: STRef s BlockChain -> STRef s BlockChain
  591                   -> ST s   ( LabelMap BlockChain -- Chains by end
  592                             , Set.Set (BlockId, BlockId) --List of fused edges
  593                             )
  594         fuseChain fromRef toRef = do
  595             fromChain <- readSTRef fromRef
  596             toChain <- readSTRef toRef
  597             let newChain = chainConcat fromChain toChain
  598             ref <- newSTRef newChain
  599             let start = head $ takeL 1 newChain
  600             let end = head $ takeR 1 newChain
  601             -- chains <- sequence $ mapMap readSTRef chainStarts
  602             -- pprTraceM "pre-fuse chains:" $ ppr chains
  603             buildNext
  604                 placed
  605                 (mapInsert start ref $ mapDelete to $ chainStarts)
  606                 (mapInsert end ref $ mapDelete from $ chainEnds)
  607                 todo
  608                 (Set.insert (from,to) linked)
  609 
  610 
  611         --Add the block to a existing chain or creates a new chain
  612         findChain :: ST s   ( LabelMap BlockChain -- Chains by end
  613                             , Set.Set (BlockId, BlockId) --List of fused edges
  614                             )
  615         findChain
  616           -- We can attach the block to the end of a chain
  617           | alreadyPlaced from
  618           , Just predChain <- mapLookup from chainEnds
  619           = do
  620             chain <- readSTRef predChain
  621             let newChain = chainSnoc chain to
  622             writeSTRef predChain newChain
  623             let chainEnds' = mapInsert to predChain $ mapDelete from chainEnds
  624             -- chains <- sequence $ mapMap readSTRef chainStarts
  625             -- pprTraceM "from chains:" $ ppr chains
  626             buildNext (setInsert to placed) chainStarts chainEnds' todo (Set.insert (from,to) linked)
  627           -- We can attack it to the front of a chain
  628           | alreadyPlaced to
  629           , Just succChain <- mapLookup to chainStarts
  630           = do
  631             chain <- readSTRef succChain
  632             let newChain = from `chainCons` chain
  633             writeSTRef succChain newChain
  634             let chainStarts' = mapInsert from succChain $ mapDelete to chainStarts
  635             -- chains <- sequence $ mapMap readSTRef chainStarts'
  636             -- pprTraceM "to chains:" $ ppr chains
  637             buildNext (setInsert from placed) chainStarts' chainEnds todo (Set.insert (from,to) linked)
  638           -- The placed end of the edge is part of a chain already and not an end.
  639           | otherwise
  640           = do
  641             let block    = if alreadyPlaced to then from else to
  642             --pprTraceM "Singleton" $ ppr block
  643             let newChain = chainSingleton block
  644             ref <- newSTRef newChain
  645             buildNext (setInsert block placed) (mapInsert block ref chainStarts)
  646                       (mapInsert block ref chainEnds) todo (linked)
  647             where
  648               alreadyPlaced blkId = (setMember blkId placed)
  649 
  650 -- | Place basic blocks based on the given CFG.
  651 -- See Note [Chain based CFG serialization]
  652 sequenceChain :: forall a i. Instruction i
  653               => LabelMap a -- ^ Keys indicate an info table on the block.
  654               -> CFG -- ^ Control flow graph and some meta data.
  655               -> [GenBasicBlock i] -- ^ List of basic blocks to be placed.
  656               -> [GenBasicBlock i] -- ^ Blocks placed in sequence.
  657 sequenceChain _info _weights    [] = []
  658 sequenceChain _info _weights    [x] = [x]
  659 sequenceChain  info weights     blocks@((BasicBlock entry _):_) =
  660     let directEdges :: [CfgEdge]
  661         directEdges = sortBy (flip compare) $ catMaybes . map relevantWeight $ (infoEdgeList weights)
  662           where
  663             -- Apply modifiers to turn edge frequencies into useable weights
  664             -- for computing code layout.
  665             -- See also Note [Layout relevant edge weights]
  666             relevantWeight :: CfgEdge -> Maybe CfgEdge
  667             relevantWeight edge@(CfgEdge from to edgeInfo)
  668                 | (EdgeInfo CmmSource { trans_cmmNode = CmmCall {} } _) <- edgeInfo
  669                 -- Ignore edges across calls.
  670                 = Nothing
  671                 | mapMember to info
  672                 , w <- edgeWeight edgeInfo
  673                 -- The payoff is quite small if we jump over an info table
  674                 = Just (CfgEdge from to edgeInfo { edgeWeight = w/8 })
  675                 | (EdgeInfo CmmSource { trans_cmmNode = exitNode } _) <- edgeInfo
  676                 , cantEliminate exitNode
  677                 , w <- edgeWeight edgeInfo
  678                 -- A small penalty to edge types which
  679                 -- we can't optimize away by layout.
  680                 -- w * 0.96875 == w - w/32
  681                 = Just (CfgEdge from to edgeInfo { edgeWeight = w * 0.96875 })
  682                 | otherwise
  683                 = Just edge
  684                 where
  685                   cantEliminate CmmCondBranch {} = True
  686                   cantEliminate CmmSwitch {} = True
  687                   cantEliminate _ = False
  688 
  689         blockMap :: LabelMap (GenBasicBlock i)
  690         blockMap
  691             = foldl' (\m blk@(BasicBlock lbl _ins) ->
  692                         mapInsert lbl blk m)
  693                      mapEmpty blocks
  694 
  695         (builtChains, builtEdges)
  696             = {-# SCC "buildChains" #-}
  697               --pprTraceIt "generatedChains" $
  698               --pprTrace "blocks" (ppr (mapKeys blockMap)) $
  699               buildChains directEdges (mapKeys blockMap)
  700 
  701         rankedEdges :: [CfgEdge]
  702         -- Sort descending by weight, remove fused edges
  703         rankedEdges =
  704             filter (\edge -> not (Set.member (edgeFrom edge,edgeTo edge) builtEdges)) $
  705             directEdges
  706 
  707         (neighbourChains, combined)
  708             = assert (noDups $ mapElems builtChains) $
  709               {-# SCC "groupNeighbourChains" #-}
  710             --   pprTraceIt "NeighbourChains" $
  711               combineNeighbourhood rankedEdges (mapElems builtChains)
  712 
  713 
  714         allEdges :: [CfgEdge]
  715         allEdges = {-# SCC allEdges #-}
  716                    sortOn (relevantWeight) $ filter (not . deadEdge) $ (infoEdgeList weights)
  717           where
  718             deadEdge :: CfgEdge -> Bool
  719             deadEdge (CfgEdge from to _) = let e = (from,to) in Set.member e combined || Set.member e builtEdges
  720             relevantWeight :: CfgEdge -> EdgeWeight
  721             relevantWeight (CfgEdge _ _ edgeInfo)
  722                 | EdgeInfo (CmmSource { trans_cmmNode = CmmCall {}}) _ <- edgeInfo
  723                 -- Penalize edges across calls
  724                 = weight/(64.0)
  725                 | otherwise
  726                 = weight
  727               where
  728                 -- negate to sort descending
  729                 weight = negate (edgeWeight edgeInfo)
  730 
  731         masterChain =
  732             {-# SCC "mergeChains" #-}
  733             -- pprTraceIt "MergedChains" $
  734             mergeChains allEdges neighbourChains
  735 
  736         --Make sure the first block stays first
  737         prepedChains
  738             | inFront entry masterChain
  739             = [masterChain]
  740             | (rest,entry) <- breakChainAt entry masterChain
  741             = [entry,rest]
  742 #if __GLASGOW_HASKELL__ <= 810
  743             | otherwise = pprPanic "Entry point eliminated" $
  744                             ppr masterChain
  745 #endif
  746 
  747         blockList
  748             = assert (noDups [masterChain])
  749               (concatMap fromOL $ map chainBlocks prepedChains)
  750 
  751         --chainPlaced = setFromList $ map blockId blockList :: LabelSet
  752         chainPlaced = setFromList $ blockList :: LabelSet
  753         unplaced =
  754             let blocks = mapKeys blockMap
  755                 isPlaced b = setMember (b) chainPlaced
  756             in filter (\block -> not (isPlaced block)) blocks
  757 
  758         placedBlocks =
  759             -- We want debug builds to catch this as it's a good indicator for
  760             -- issues with CFG invariants. But we don't want to blow up production
  761             -- builds if something slips through.
  762             assert (null unplaced) $
  763             --pprTraceIt "placedBlocks" $
  764             -- ++ [] is stil kinda expensive
  765             if null unplaced then blockList else blockList ++ unplaced
  766         getBlock bid = expectJust "Block placement" $ mapLookup bid blockMap
  767     in
  768         --Assert we placed all blocks given as input
  769         assert (all (\bid -> mapMember bid blockMap) placedBlocks) $
  770         dropJumps info $ map getBlock placedBlocks
  771 
  772 {-# SCC dropJumps #-}
  773 -- | Remove redundant jumps between blocks when we can rely on
  774 -- fall through.
  775 dropJumps :: forall a i. Instruction i => LabelMap a -> [GenBasicBlock i]
  776           -> [GenBasicBlock i]
  777 dropJumps _    [] = []
  778 dropJumps info ((BasicBlock lbl ins):todo)
  779     | not . null $ ins --This can happen because of shortcutting
  780     , [dest] <- jumpDestsOfInstr (last ins)
  781     , ((BasicBlock nextLbl _) : _) <- todo
  782     , not (mapMember dest info)
  783     , nextLbl == dest
  784     = BasicBlock lbl (init ins) : dropJumps info todo
  785     | otherwise
  786     = BasicBlock lbl ins : dropJumps info todo
  787 
  788 
  789 -- -----------------------------------------------------------------------------
  790 -- Sequencing the basic blocks
  791 
  792 -- Cmm BasicBlocks are self-contained entities: they always end in a
  793 -- jump, either non-local or to another basic block in the same proc.
  794 -- In this phase, we attempt to place the basic blocks in a sequence
  795 -- such that as many of the local jumps as possible turn into
  796 -- fallthroughs.
  797 
  798 sequenceTop
  799     :: Instruction instr
  800     => NcgImpl statics instr jumpDest
  801     -> Maybe CFG -- ^ CFG if we have one.
  802     -> NatCmmDecl statics instr -- ^ Function to serialize
  803     -> NatCmmDecl statics instr
  804 
  805 sequenceTop _       _           top@(CmmData _ _) = top
  806 sequenceTop ncgImpl edgeWeights (CmmProc info lbl live (ListGraph blocks))
  807   = let
  808       config     = ncgConfig ncgImpl
  809       platform   = ncgPlatform config
  810 
  811     in CmmProc info lbl live $ ListGraph $ ncgMakeFarBranches ncgImpl info $
  812          if -- Chain based algorithm
  813             | ncgCfgBlockLayout config
  814             , backendMaintainsCfg platform
  815             , Just cfg <- edgeWeights
  816             -> {-# SCC layoutBlocks #-} sequenceChain info cfg blocks
  817 
  818             -- Old algorithm without edge weights
  819             | ncgCfgWeightlessLayout config
  820                || not (backendMaintainsCfg platform)
  821             -> {-# SCC layoutBlocks #-} sequenceBlocks Nothing info blocks
  822 
  823             -- Old algorithm with edge weights (if any)
  824             | otherwise
  825             -> {-# SCC layoutBlocks #-} sequenceBlocks edgeWeights info blocks
  826 
  827 -- The old algorithm:
  828 -- It is very simple (and stupid): We make a graph out of
  829 -- the blocks where there is an edge from one block to another iff the
  830 -- first block ends by jumping to the second.  Then we topologically
  831 -- sort this graph.  Then traverse the list: for each block, we first
  832 -- output the block, then if it has an out edge, we move the
  833 -- destination of the out edge to the front of the list, and continue.
  834 
  835 -- FYI, the classic layout for basic blocks uses postorder DFS; this
  836 -- algorithm is implemented in Hoopl.
  837 
  838 sequenceBlocks :: Instruction inst => Maybe CFG -> LabelMap a
  839                -> [GenBasicBlock inst] -> [GenBasicBlock inst]
  840 sequenceBlocks _edgeWeight _ [] = []
  841 sequenceBlocks edgeWeights infos (entry:blocks) =
  842     let entryNode = mkNode edgeWeights entry
  843         bodyNodes = reverse
  844                     (flattenSCCs (sccBlocks edgeWeights blocks))
  845     in dropJumps infos . seqBlocks infos $ ( entryNode : bodyNodes)
  846   -- the first block is the entry point ==> it must remain at the start.
  847 
  848 sccBlocks
  849         :: Instruction instr
  850         => Maybe CFG -> [NatBasicBlock instr]
  851         -> [SCC (Node BlockId (NatBasicBlock instr))]
  852 sccBlocks edgeWeights blocks =
  853     stronglyConnCompFromEdgedVerticesUniqR
  854         (map (mkNode edgeWeights) blocks)
  855 
  856 mkNode :: (Instruction t)
  857        => Maybe CFG -> GenBasicBlock t
  858        -> Node BlockId (GenBasicBlock t)
  859 mkNode edgeWeights block@(BasicBlock id instrs) =
  860     DigraphNode block id outEdges
  861   where
  862     outEdges :: [BlockId]
  863     outEdges
  864       --Select the heaviest successor, ignore weights <= zero
  865       = successor
  866       where
  867         successor
  868           | Just successors <- fmap (`getSuccEdgesSorted` id)
  869                                     edgeWeights -- :: Maybe [(Label, EdgeInfo)]
  870           = case successors of
  871             [] -> []
  872             ((target,info):_)
  873               | length successors > 2 || edgeWeight info <= 0 -> []
  874               | otherwise -> [target]
  875           | otherwise
  876           = case jumpDestsOfInstr (last instrs) of
  877                 [one] -> [one]
  878                 _many -> []
  879 
  880 
  881 seqBlocks :: LabelMap i -> [Node BlockId (GenBasicBlock t1)]
  882                         -> [GenBasicBlock t1]
  883 seqBlocks infos blocks = placeNext pullable0 todo0
  884   where
  885     -- pullable: Blocks that are not yet placed
  886     -- todo:     Original order of blocks, to be followed if we have no good
  887     --           reason not to;
  888     --           may include blocks that have already been placed, but then
  889     --           these are not in pullable
  890     pullable0 = listToUFM [ (i,(b,n)) | DigraphNode b i n <- blocks ]
  891     todo0     = map node_key blocks
  892 
  893     placeNext _ [] = []
  894     placeNext pullable (i:rest)
  895         | Just (block, pullable') <- lookupDeleteUFM pullable i
  896         = place pullable' rest block
  897         | otherwise
  898         -- We already placed this block, so ignore
  899         = placeNext pullable rest
  900 
  901     place pullable todo (block,[])
  902                           = block : placeNext pullable todo
  903     place pullable todo (block@(BasicBlock id instrs),[next])
  904         | mapMember next infos
  905         = block : placeNext pullable todo
  906         | Just (nextBlock, pullable') <- lookupDeleteUFM pullable next
  907         = BasicBlock id instrs : place pullable' todo nextBlock
  908         | otherwise
  909         = block : placeNext pullable todo
  910     place _ _ (_,tooManyNextNodes)
  911         = pprPanic "seqBlocks" (ppr tooManyNextNodes)
  912 
  913 
  914 lookupDeleteUFM :: UniqFM BlockId elt -> BlockId
  915                 -> Maybe (elt, UniqFM BlockId elt)
  916 lookupDeleteUFM m k = do -- Maybe monad
  917     v <- lookupUFM m k
  918     return (v, delFromUFM m k)
  919 
  920 backendMaintainsCfg :: Platform -> Bool
  921 backendMaintainsCfg platform = case platformArch platform of
  922     -- ArchX86 -- Should work but not tested so disabled currently.
  923     ArchX86_64 -> True
  924     _otherwise -> False
  925