never executed always true always false
    1 {-# LANGUAGE GADTs #-}
    2 {-# LANGUAGE BangPatterns #-}
    3 {-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
    4 {-# OPTIONS_GHC -Wno-incomplete-record-updates #-}
    5 module GHC.Cmm.ContFlowOpt
    6     ( cmmCfgOpts
    7     , cmmCfgOptsProc
    8     , removeUnreachableBlocksProc
    9     , replaceLabels
   10     )
   11 where
   12 
   13 import GHC.Prelude hiding (succ, unzip, zip)
   14 
   15 import GHC.Cmm.Dataflow.Block
   16 import GHC.Cmm.Dataflow.Collections
   17 import GHC.Cmm.Dataflow.Graph
   18 import GHC.Cmm.Dataflow.Label
   19 import GHC.Cmm.BlockId
   20 import GHC.Cmm
   21 import GHC.Cmm.Utils
   22 import GHC.Cmm.Switch (mapSwitchTargets, switchTargetsToList)
   23 import GHC.Data.Maybe
   24 import GHC.Utils.Panic
   25 import GHC.Utils.Misc
   26 
   27 import Control.Monad
   28 
   29 
   30 -- Note [What is shortcutting]
   31 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
   32 --
   33 -- Consider this Cmm code:
   34 --
   35 -- L1: ...
   36 --     goto L2;
   37 -- L2: goto L3;
   38 -- L3: ...
   39 --
   40 -- Here L2 is an empty block and contains only an unconditional branch
   41 -- to L3. In this situation any block that jumps to L2 can jump
   42 -- directly to L3:
   43 --
   44 -- L1: ...
   45 --     goto L3;
   46 -- L2: goto L3;
   47 -- L3: ...
   48 --
   49 -- In this situation we say that we shortcut L2 to L3. One of
   50 -- consequences of shortcutting is that some blocks of code may become
   51 -- unreachable (in the example above this is true for L2).
   52 
   53 
   54 -- Note [Control-flow optimisations]
   55 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   56 --
   57 -- This optimisation does three things:
   58 --
   59 --   - If a block finishes in an unconditional branch to another block
   60 --     and that is the only jump to that block we concatenate the
   61 --     destination block at the end of the current one.
   62 --
   63 --   - If a block finishes in a call whose continuation block is a
   64 --     goto, then we can shortcut the destination, making the
   65 --     continuation block the destination of the goto - but see Note
   66 --     [Shortcut call returns].
   67 --
   68 --   - For any block that is not a call we try to shortcut the
   69 --     destination(s). Additionally, if a block ends with a
   70 --     conditional branch we try to invert the condition.
   71 --
   72 -- Blocks are processed using postorder DFS traversal. A side effect
   73 -- of determining traversal order with a graph search is elimination
   74 -- of any blocks that are unreachable.
   75 --
   76 -- Transformations are improved by working from the end of the graph
   77 -- towards the beginning, because we may be able to perform many
   78 -- shortcuts in one go.
   79 
   80 
   81 -- Note [Shortcut call returns]
   82 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   83 --
   84 -- We are going to maintain the "current" graph (LabelMap CmmBlock) as
   85 -- we go, and also a mapping from BlockId to BlockId, representing
   86 -- continuation labels that we have renamed.  This latter mapping is
   87 -- important because we might shortcut a CmmCall continuation.  For
   88 -- example:
   89 --
   90 --    Sp[0] = L
   91 --    call g returns to L
   92 --    L: goto M
   93 --    M: ...
   94 --
   95 -- So when we shortcut the L block, we need to replace not only
   96 -- the continuation of the call, but also references to L in the
   97 -- code (e.g. the assignment Sp[0] = L):
   98 --
   99 --    Sp[0] = M
  100 --    call g returns to M
  101 --    M: ...
  102 --
  103 -- So we keep track of which labels we have renamed and apply the mapping
  104 -- at the end with replaceLabels.
  105 
  106 
  107 -- Note [Shortcut call returns and proc-points]
  108 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  109 --
  110 -- Consider this code that you might get from a recursive
  111 -- let-no-escape:
  112 --
  113 --       goto L1
  114 --      L1:
  115 --       if (Hp > HpLim) then L2 else L3
  116 --      L2:
  117 --       call stg_gc_noregs returns to L4
  118 --      L4:
  119 --       goto L1
  120 --      L3:
  121 --       ...
  122 --       goto L1
  123 --
  124 -- Then the control-flow optimiser shortcuts L4.  But that turns L1
  125 -- into the call-return proc point, and every iteration of the loop
  126 -- has to shuffle variables to and from the stack.  So we must *not*
  127 -- shortcut L4.
  128 --
  129 -- Moreover not shortcutting call returns is probably fine.  If L4 can
  130 -- concat with its branch target then it will still do so.  And we
  131 -- save some compile time because we don't have to traverse all the
  132 -- code in replaceLabels.
  133 --
  134 -- However, we probably do want to do this if we are splitting proc
  135 -- points, because L1 will be a proc-point anyway, so merging it with
  136 -- L4 reduces the number of proc points.  Unfortunately recursive
  137 -- let-no-escapes won't generate very good code with proc-point
  138 -- splitting on - we should probably compile them to explicitly use
  139 -- the native calling convention instead.
  140 
  141 cmmCfgOpts :: Bool -> CmmGraph -> CmmGraph
  142 cmmCfgOpts split g = fst (blockConcat split g)
  143 
  144 cmmCfgOptsProc :: Bool -> CmmDecl -> CmmDecl
  145 cmmCfgOptsProc split (CmmProc info lbl live g) = CmmProc info' lbl live g'
  146     where (g', env) = blockConcat split g
  147           info' = info{ info_tbls = new_info_tbls }
  148           new_info_tbls = mapFromList (map upd_info (mapToList (info_tbls info)))
  149 
  150           -- If we changed any labels, then we have to update the info tables
  151           -- too, except for the top-level info table because that might be
  152           -- referred to by other procs.
  153           upd_info (k,info)
  154              | Just k' <- mapLookup k env
  155              = (k', if k' == g_entry g'
  156                        then info
  157                        else info{ cit_lbl = infoTblLbl k' })
  158              | otherwise
  159              = (k,info)
  160 cmmCfgOptsProc _ top = top
  161 
  162 
  163 blockConcat :: Bool -> CmmGraph -> (CmmGraph, LabelMap BlockId)
  164 blockConcat splitting_procs g@CmmGraph { g_entry = entry_id }
  165   = (replaceLabels shortcut_map $ ofBlockMap new_entry new_blocks, shortcut_map')
  166   where
  167      -- We might be able to shortcut the entry BlockId itself.
  168      -- Remember to update the shortcut_map, since we also have to
  169      -- update the info_tbls mapping now.
  170      (new_entry, shortcut_map')
  171        | Just entry_blk <- mapLookup entry_id new_blocks
  172        , Just dest      <- canShortcut entry_blk
  173        = (dest, mapInsert entry_id dest shortcut_map)
  174        | otherwise
  175        = (entry_id, shortcut_map)
  176 
  177      -- blocks are sorted in reverse postorder, but we want to go from the exit
  178      -- towards beginning, so we use foldr below.
  179      blocks = revPostorder g
  180      blockmap = foldl' (flip addBlock) emptyBody blocks
  181 
  182      -- Accumulator contains three components:
  183      --  * map of blocks in a graph
  184      --  * map of shortcut labels. See Note [Shortcut call returns]
  185      --  * map containing number of predecessors for each block. We discard
  186      --    it after we process all blocks.
  187      (new_blocks, shortcut_map, _) =
  188            foldr maybe_concat (blockmap, mapEmpty, initialBackEdges) blocks
  189 
  190      -- Map of predecessors for initial graph. We increase number of
  191      -- predecessors for entry block by one to denote that it is
  192      -- target of a jump, even if no block in the current graph jumps
  193      -- to it.
  194      initialBackEdges = incPreds entry_id (predMap blocks)
  195 
  196      maybe_concat :: CmmBlock
  197                   -> (LabelMap CmmBlock, LabelMap BlockId, LabelMap Int)
  198                   -> (LabelMap CmmBlock, LabelMap BlockId, LabelMap Int)
  199      maybe_concat block (!blocks, !shortcut_map, !backEdges)
  200         -- If:
  201         --   (1) current block ends with unconditional branch to b' and
  202         --   (2) it has exactly one predecessor (namely, current block)
  203         --
  204         -- Then:
  205         --   (1) append b' block at the end of current block
  206         --   (2) remove b' from the map of blocks
  207         --   (3) remove information about b' from predecessors map
  208         --
  209         -- Since we know that the block has only one predecessor we call
  210         -- mapDelete directly instead of calling decPreds.
  211         --
  212         -- Note that we always maintain an up-to-date list of predecessors, so
  213         -- we can ignore the contents of shortcut_map
  214         | CmmBranch b' <- last
  215         , hasOnePredecessor b'
  216         , Just blk' <- mapLookup b' blocks
  217         = let bid' = entryLabel blk'
  218           in ( mapDelete bid' $ mapInsert bid (splice head blk') blocks
  219              , shortcut_map
  220              , mapDelete b' backEdges )
  221 
  222         -- If:
  223         --   (1) we are splitting proc points (see Note
  224         --       [Shortcut call returns and proc-points]) and
  225         --   (2) current block is a CmmCall or CmmForeignCall with
  226         --       continuation b' and
  227         --   (3) we can shortcut that continuation to dest
  228         -- Then:
  229         --   (1) we change continuation to point to b'
  230         --   (2) create mapping from b' to dest
  231         --   (3) increase number of predecessors of dest by 1
  232         --   (4) decrease number of predecessors of b' by 1
  233         --
  234         -- Later we will use replaceLabels to substitute all occurrences of b'
  235         -- with dest.
  236         | splitting_procs
  237         , Just b'   <- callContinuation_maybe last
  238         , Just blk' <- mapLookup b' blocks
  239         , Just dest <- canShortcut blk'
  240         = ( mapInsert bid (blockJoinTail head (update_cont dest)) blocks
  241           , mapInsert b' dest shortcut_map
  242           , decPreds b' $ incPreds dest backEdges )
  243 
  244         -- If:
  245         --   (1) a block does not end with a call
  246         -- Then:
  247         --   (1) if it ends with a conditional attempt to invert the
  248         --       conditional
  249         --   (2) attempt to shortcut all destination blocks
  250         --   (3) if new successors of a block are different from the old ones
  251         --       update the of predecessors accordingly
  252         --
  253         -- A special case of this is a situation when a block ends with an
  254         -- unconditional jump to a block that can be shortcut.
  255         | Nothing <- callContinuation_maybe last
  256         = let oldSuccs = successors last
  257               newSuccs = successors rewrite_last
  258           in ( mapInsert bid (blockJoinTail head rewrite_last) blocks
  259              , shortcut_map
  260              , if oldSuccs == newSuccs
  261                then backEdges
  262                else foldr incPreds (foldr decPreds backEdges oldSuccs) newSuccs )
  263 
  264         -- Otherwise don't do anything
  265         | otherwise
  266         = ( blocks, shortcut_map, backEdges )
  267         where
  268           (head, last) = blockSplitTail block
  269           bid = entryLabel block
  270 
  271           -- Changes continuation of a call to a specified label
  272           update_cont dest =
  273               case last of
  274                 CmmCall{}        -> last { cml_cont = Just dest }
  275                 CmmForeignCall{} -> last { succ = dest }
  276                 _                -> panic "Can't shortcut continuation."
  277 
  278           -- Attempts to shortcut successors of last node
  279           shortcut_last = mapSuccessors shortcut last
  280             where
  281               shortcut l =
  282                  case mapLookup l blocks of
  283                    Just b | Just dest <- canShortcut b -> dest
  284                    _otherwise -> l
  285 
  286           rewrite_last
  287             -- Sometimes we can get rid of the conditional completely.
  288             | CmmCondBranch _cond t f _l <- shortcut_last
  289             , t == f
  290             = CmmBranch t
  291 
  292             -- See Note [Invert Cmm conditionals]
  293             | CmmCondBranch cond t f l <- shortcut_last
  294             , hasOnePredecessor t -- inverting will make t a fallthrough
  295             , likelyTrue l || (numPreds f > 1)
  296             , Just cond' <- maybeInvertCmmExpr cond
  297             = CmmCondBranch cond' f t (invertLikeliness l)
  298 
  299             -- If all jump destinations of a switch go to the
  300             -- same target eliminate the switch.
  301             | CmmSwitch _expr targets <- shortcut_last
  302             , (t:ts) <- switchTargetsToList targets
  303             , all (== t) ts
  304             = CmmBranch t
  305 
  306             | otherwise
  307             = shortcut_last
  308 
  309           likelyTrue (Just True)   = True
  310           likelyTrue _             = False
  311 
  312           invertLikeliness :: Maybe Bool -> Maybe Bool
  313           invertLikeliness         = fmap not
  314 
  315           -- Number of predecessors for a block
  316           numPreds bid = mapLookup bid backEdges `orElse` 0
  317 
  318           hasOnePredecessor b = numPreds b == 1
  319 
  320 {-
  321   Note [Invert Cmm conditionals]
  322   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  323   The native code generator always produces jumps to the true branch.
  324   Falling through to the false branch is however faster. So we try to
  325   arrange for that to happen.
  326   This means we invert the condition if:
  327   * The likely path will become a fallthrough.
  328   * We can't guarantee a fallthrough for the false branch but for the
  329     true branch.
  330 
  331   In some cases it's faster to avoid inverting when the false branch is likely.
  332   However determining when that is the case is neither easy nor cheap so for
  333   now we always invert as this produces smaller binaries and code that is
  334   equally fast on average. (On an i7-6700K)
  335 
  336   TODO:
  337   There is also the edge case when both branches have multiple predecessors.
  338   In this case we could assume that we will end up with a jump for BOTH
  339   branches. In this case it might be best to put the likely path in the true
  340   branch especially if there are large numbers of predecessors as this saves
  341   us the jump thats not taken. However I haven't tested this and as of early
  342   2018 we almost never generate cmm where this would apply.
  343 -}
  344 
  345 -- Functions for incrementing and decrementing number of predecessors. If
  346 -- decrementing would set the predecessor count to 0, we remove entry from the
  347 -- map.
  348 -- Invariant: if a block has no predecessors it should be dropped from the
  349 -- graph because it is unreachable. maybe_concat is constructed to maintain
  350 -- that invariant, but calling replaceLabels may introduce unreachable blocks.
  351 -- We rely on subsequent passes in the Cmm pipeline to remove unreachable
  352 -- blocks.
  353 incPreds, decPreds :: BlockId -> LabelMap Int -> LabelMap Int
  354 incPreds bid edges = mapInsertWith (+) bid 1 edges
  355 decPreds bid edges = case mapLookup bid edges of
  356                        Just preds | preds > 1 -> mapInsert bid (preds - 1) edges
  357                        Just _                 -> mapDelete bid edges
  358                        _                      -> edges
  359 
  360 
  361 -- Checks if a block consists only of "goto dest". If it does than we return
  362 -- "Just dest" label. See Note [What is shortcutting]
  363 canShortcut :: CmmBlock -> Maybe BlockId
  364 canShortcut block
  365     | (_, middle, CmmBranch dest) <- blockSplit block
  366     , all dont_care $ blockToList middle
  367     = Just dest
  368     | otherwise
  369     = Nothing
  370     where dont_care CmmComment{} = True
  371           dont_care CmmTick{}    = True
  372           dont_care _other       = False
  373 
  374 -- Concatenates two blocks. First one is assumed to be open on exit, the second
  375 -- is assumed to be closed on entry (i.e. it has a label attached to it, which
  376 -- the splice function removes by calling snd on result of blockSplitHead).
  377 splice :: Block CmmNode C O -> CmmBlock -> CmmBlock
  378 splice head rest = entry `blockJoinHead` code0 `blockAppend` code1
  379   where (CmmEntry lbl sc0, code0) = blockSplitHead head
  380         (CmmEntry _   sc1, code1) = blockSplitHead rest
  381         entry = CmmEntry lbl (combineTickScopes sc0 sc1)
  382 
  383 -- If node is a call with continuation call return Just label of that
  384 -- continuation. Otherwise return Nothing.
  385 callContinuation_maybe :: CmmNode O C -> Maybe BlockId
  386 callContinuation_maybe (CmmCall { cml_cont = Just b }) = Just b
  387 callContinuation_maybe (CmmForeignCall { succ = b })   = Just b
  388 callContinuation_maybe _ = Nothing
  389 
  390 
  391 -- Map over the CmmGraph, replacing each label with its mapping in the
  392 -- supplied LabelMap.
  393 replaceLabels :: LabelMap BlockId -> CmmGraph -> CmmGraph
  394 replaceLabels env g
  395   | mapNull env = g
  396   | otherwise   = replace_eid $ mapGraphNodes1 txnode g
  397    where
  398      replace_eid g = g {g_entry = lookup (g_entry g)}
  399      lookup id = mapLookup id env `orElse` id
  400 
  401      txnode :: CmmNode e x -> CmmNode e x
  402      txnode (CmmBranch bid) = CmmBranch (lookup bid)
  403      txnode (CmmCondBranch p t f l) =
  404        mkCmmCondBranch (exp p) (lookup t) (lookup f) l
  405      txnode (CmmSwitch e ids) =
  406        CmmSwitch (exp e) (mapSwitchTargets lookup ids)
  407      txnode (CmmCall t k rg a res r) =
  408        CmmCall (exp t) (liftM lookup k) rg a res r
  409      txnode fc@CmmForeignCall{} =
  410        fc{ args = map exp (args fc), succ = lookup (succ fc) }
  411      txnode other = mapExpDeep exp other
  412 
  413      exp :: CmmExpr -> CmmExpr
  414      exp (CmmLit (CmmBlock bid))                = CmmLit (CmmBlock (lookup bid))
  415      exp (CmmStackSlot (Young id) i) = CmmStackSlot (Young (lookup id)) i
  416      exp e                                      = e
  417 
  418 mkCmmCondBranch :: CmmExpr -> Label -> Label -> Maybe Bool -> CmmNode O C
  419 mkCmmCondBranch p t f l =
  420   if t == f then CmmBranch t else CmmCondBranch p t f l
  421 
  422 -- Build a map from a block to its set of predecessors.
  423 predMap :: [CmmBlock] -> LabelMap Int
  424 predMap blocks = foldr add_preds mapEmpty blocks
  425   where
  426     add_preds block env = foldr add env (successors block)
  427       where add lbl env = mapInsertWith (+) lbl 1 env
  428 
  429 -- Removing unreachable blocks
  430 removeUnreachableBlocksProc :: CmmDecl -> CmmDecl
  431 removeUnreachableBlocksProc proc@(CmmProc info lbl live g)
  432    | used_blocks `lengthLessThan` mapSize (toBlockMap g)
  433    = CmmProc info' lbl live g'
  434    | otherwise
  435    = proc
  436    where
  437      g'    = ofBlockList (g_entry g) used_blocks
  438      info' = info { info_tbls = keep_used (info_tbls info) }
  439              -- Remove any info_tbls for unreachable
  440 
  441      keep_used :: LabelMap CmmInfoTable -> LabelMap CmmInfoTable
  442      keep_used bs = mapFoldlWithKey keep mapEmpty bs
  443 
  444      keep :: LabelMap CmmInfoTable -> Label -> CmmInfoTable -> LabelMap CmmInfoTable
  445      keep env l i | l `setMember` used_lbls = mapInsert l i env
  446                   | otherwise               = env
  447 
  448      used_blocks :: [CmmBlock]
  449      used_blocks = revPostorder g
  450 
  451      used_lbls :: LabelSet
  452      used_lbls = setFromList $ map entryLabel used_blocks