never executed always true always false
    1 {-# LANGUAGE GADTs #-}
    2 {-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}
    3 module GHC.Cmm.Switch (
    4      SwitchTargets,
    5      mkSwitchTargets,
    6      switchTargetsCases, switchTargetsDefault, switchTargetsRange, switchTargetsSigned,
    7      mapSwitchTargets, switchTargetsToTable, switchTargetsFallThrough,
    8      switchTargetsToList, eqSwitchTargetWith,
    9 
   10      SwitchPlan(..),
   11      backendSupportsSwitch,
   12      createSwitchPlan,
   13   ) where
   14 
   15 import GHC.Prelude
   16 
   17 import GHC.Utils.Outputable
   18 import GHC.Driver.Backend
   19 import GHC.Utils.Panic
   20 import GHC.Cmm.Dataflow.Label (Label)
   21 
   22 import Data.Maybe
   23 import Data.List (groupBy)
   24 import Data.Function (on)
   25 import qualified Data.Map as M
   26 
   27 -- Note [Cmm Switches, the general plan]
   28 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   29 --
   30 -- Compiling a high-level switch statement, as it comes out of a STG case
   31 -- expression, for example, allows for a surprising amount of design decisions.
   32 -- Therefore, we cleanly separated this from the Stg → Cmm transformation, as
   33 -- well as from the actual code generation.
   34 --
   35 -- The overall plan is:
   36 --  * The Stg → Cmm transformation creates a single `SwitchTargets` in
   37 --    emitSwitch and emitCmmLitSwitch in GHC.StgToCmm.Utils.
   38 --    At this stage, they are unsuitable for code generation.
   39 --  * A dedicated Cmm transformation (GHC.Cmm.Switch.Implement) replaces these
   40 --    switch statements with code that is suitable for code generation, i.e.
   41 --    a nice balanced tree of decisions with dense jump tables in the leafs.
   42 --    The actual planning of this tree is performed in pure code in createSwitchPlan
   43 --    in this module. See Note [createSwitchPlan].
   44 --  * The actual code generation will not do any further processing and
   45 --    implement each CmmSwitch with a jump tables.
   46 --
   47 -- When compiling to LLVM or C, GHC.Cmm.Switch.Implement leaves the switch
   48 -- statements alone, as we can turn a SwitchTargets value into a nice
   49 -- switch-statement in LLVM resp. C, and leave the rest to the compiler.
   50 --
   51 -- See Note [GHC.Cmm.Switch vs. GHC.Cmm.Switch.Implement] why the two module are
   52 -- separated.
   53 
   54 -----------------------------------------------------------------------------
   55 -- Note [Magic Constants in GHC.Cmm.Switch]
   56 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   57 --
   58 -- There are a lot of heuristics here that depend on magic values where it is
   59 -- hard to determine the "best" value (for whatever that means). These are the
   60 -- magic values:
   61 
   62 -- | Number of consecutive default values allowed in a jump table. If there are
   63 -- more of them, the jump tables are split.
   64 --
   65 -- Currently 7, as it costs 7 words of additional code when a jump table is
   66 -- split (at least on x64, determined experimentally).
   67 maxJumpTableHole :: Integer
   68 maxJumpTableHole = 7
   69 
   70 -- | Minimum size of a jump table. If the number is smaller, the switch is
   71 -- implemented using conditionals.
   72 -- Currently 5, because an if-then-else tree of 4 values is nice and compact.
   73 minJumpTableSize :: Int
   74 minJumpTableSize = 5
   75 
   76 -- | Minimum non-zero offset for a jump table. See Note [Jump Table Offset].
   77 minJumpTableOffset :: Integer
   78 minJumpTableOffset = 2
   79 
   80 
   81 -----------------------------------------------------------------------------
   82 -- Switch Targets
   83 
   84 -- Note [SwitchTargets]
   85 -- ~~~~~~~~~~~~~~~~~~~~
   86 --
   87 -- The branches of a switch are stored in a SwitchTargets, which consists of an
   88 -- (optional) default jump target, and a map from values to jump targets.
   89 --
   90 -- If the default jump target is absent, the behaviour of the switch outside the
   91 -- values of the map is undefined.
   92 --
   93 -- We use an Integer for the keys the map so that it can be used in switches on
   94 -- unsigned as well as signed integers.
   95 --
   96 -- The map may be empty (we prune out-of-range branches here, so it could be us
   97 -- emptying it).
   98 --
   99 -- Before code generation, the table needs to be brought into a form where all
  100 -- entries are non-negative, so that it can be compiled into a jump table.
  101 -- See switchTargetsToTable.
  102 
  103 
  104 -- | A value of type SwitchTargets contains the alternatives for a 'CmmSwitch'
  105 -- value, and knows whether the value is signed, the possible range, an
  106 -- optional default value and a map from values to jump labels.
  107 data SwitchTargets =
  108     SwitchTargets
  109         Bool                       -- Signed values
  110         (Integer, Integer)         -- Range
  111         (Maybe Label)              -- Default value
  112         (M.Map Integer Label)      -- The branches
  113     deriving (Show, Eq)
  114 
  115 -- | The smart constructor mkSwitchTargets normalises the map a bit:
  116 --  * No entries outside the range
  117 --  * No entries equal to the default
  118 --  * No default if all elements have explicit values
  119 mkSwitchTargets :: Bool -> (Integer, Integer) -> Maybe Label -> M.Map Integer Label -> SwitchTargets
  120 mkSwitchTargets signed range@(lo,hi) mbdef ids
  121     = SwitchTargets signed range mbdef' ids'
  122   where
  123     ids' = dropDefault $ restrict ids
  124     mbdef' | defaultNeeded = mbdef
  125            | otherwise     = Nothing
  126 
  127     -- Drop entries outside the range, if there is a range
  128     restrict = restrictMap (lo,hi)
  129 
  130     -- Drop entries that equal the default, if there is a default
  131     dropDefault | Just l <- mbdef = M.filter (/= l)
  132                 | otherwise       = id
  133 
  134     -- Check if the default is still needed
  135     defaultNeeded = fromIntegral (M.size ids') /= hi-lo+1
  136 
  137 
  138 -- | Changes all labels mentioned in the SwitchTargets value
  139 mapSwitchTargets :: (Label -> Label) -> SwitchTargets -> SwitchTargets
  140 mapSwitchTargets f (SwitchTargets signed range mbdef branches)
  141     = SwitchTargets signed range (fmap f mbdef) (fmap f branches)
  142 
  143 -- | Returns the list of non-default branches of the SwitchTargets value
  144 switchTargetsCases :: SwitchTargets -> [(Integer, Label)]
  145 switchTargetsCases (SwitchTargets _ _ _ branches) = M.toList branches
  146 
  147 -- | Return the default label of the SwitchTargets value
  148 switchTargetsDefault :: SwitchTargets -> Maybe Label
  149 switchTargetsDefault (SwitchTargets _ _ mbdef _) = mbdef
  150 
  151 -- | Return the range of the SwitchTargets value
  152 switchTargetsRange :: SwitchTargets -> (Integer, Integer)
  153 switchTargetsRange (SwitchTargets _ range _ _) = range
  154 
  155 -- | Return whether this is used for a signed value
  156 switchTargetsSigned :: SwitchTargets -> Bool
  157 switchTargetsSigned (SwitchTargets signed _ _ _) = signed
  158 
  159 -- | switchTargetsToTable creates a dense jump table, usable for code generation.
  160 --
  161 -- Also returns an offset to add to the value; the list is 0-based on the
  162 -- result of that addition.
  163 --
  164 -- The conversion from Integer to Int is a bit of a wart, as the actual
  165 -- scrutinee might be an unsigned word, but it just works, due to wrap-around
  166 -- arithmetic (as verified by the CmmSwitchTest test case).
  167 switchTargetsToTable :: SwitchTargets -> (Int, [Maybe Label])
  168 switchTargetsToTable (SwitchTargets _ (lo,hi) mbdef branches)
  169     = (fromIntegral (-start), [ labelFor i | i <- [start..hi] ])
  170   where
  171     labelFor i = case M.lookup i branches of Just l -> Just l
  172                                              Nothing -> mbdef
  173     start | lo >= 0 && lo < minJumpTableOffset  = 0  -- See Note [Jump Table Offset]
  174           | otherwise                           = lo
  175 
  176 -- Note [Jump Table Offset]
  177 -- ~~~~~~~~~~~~~~~~~~~~~~~~
  178 --
  179 -- Usually, the code for a jump table starting at x will first subtract x from
  180 -- the value, to avoid a large amount of empty entries. But if x is very small,
  181 -- the extra entries are no worse than the subtraction in terms of code size, and
  182 -- not having to do the subtraction is quicker.
  183 --
  184 -- I.e. instead of
  185 --     _u20N:
  186 --             leaq -1(%r14),%rax
  187 --             jmp *_n20R(,%rax,8)
  188 --     _n20R:
  189 --             .quad   _c20p
  190 --             .quad   _c20q
  191 -- do
  192 --     _u20N:
  193 --             jmp *_n20Q(,%r14,8)
  194 --
  195 --     _n20Q:
  196 --             .quad   0
  197 --             .quad   _c20p
  198 --             .quad   _c20q
  199 --             .quad   _c20r
  200 
  201 -- | The list of all labels occurring in the SwitchTargets value.
  202 switchTargetsToList :: SwitchTargets -> [Label]
  203 switchTargetsToList (SwitchTargets _ _ mbdef branches)
  204     = maybeToList mbdef ++ M.elems branches
  205 
  206 -- | Groups cases with equal targets, suitable for pretty-printing to a
  207 -- c-like switch statement with fall-through semantics.
  208 switchTargetsFallThrough :: SwitchTargets -> ([([Integer], Label)], Maybe Label)
  209 switchTargetsFallThrough (SwitchTargets _ _ mbdef branches) = (groups, mbdef)
  210   where
  211     groups = map (\xs -> (map fst xs, snd (head xs))) $
  212              groupBy ((==) `on` snd) $
  213              M.toList branches
  214 
  215 -- | Custom equality helper, needed for "GHC.Cmm.CommonBlockElim"
  216 eqSwitchTargetWith :: (Label -> Label -> Bool) -> SwitchTargets -> SwitchTargets -> Bool
  217 eqSwitchTargetWith eq (SwitchTargets signed1 range1 mbdef1 ids1) (SwitchTargets signed2 range2 mbdef2 ids2) =
  218     signed1 == signed2 && range1 == range2 && goMB mbdef1 mbdef2 && goList (M.toList ids1) (M.toList ids2)
  219   where
  220     goMB Nothing Nothing = True
  221     goMB (Just l1) (Just l2) = l1 `eq` l2
  222     goMB _ _ = False
  223     goList [] [] = True
  224     goList ((i1,l1):ls1) ((i2,l2):ls2) = i1 == i2 && l1 `eq` l2 && goList ls1 ls2
  225     goList _ _ = False
  226 
  227 -----------------------------------------------------------------------------
  228 -- Code generation for Switches
  229 
  230 
  231 -- | A SwitchPlan abstractly describes how a Switch statement ought to be
  232 -- implemented. See Note [createSwitchPlan]
  233 data SwitchPlan
  234     = Unconditionally Label
  235     | IfEqual Integer Label SwitchPlan
  236     | IfLT Bool Integer SwitchPlan SwitchPlan
  237     | JumpTable SwitchTargets
  238   deriving Show
  239 --
  240 -- Note [createSwitchPlan]
  241 -- ~~~~~~~~~~~~~~~~~~~~~~~
  242 --
  243 -- A SwitchPlan describes how a Switch statement is to be broken down into
  244 -- smaller pieces suitable for code generation.
  245 --
  246 -- createSwitchPlan creates such a switch plan, in these steps:
  247 --  1. It splits the switch statement at segments of non-default values that
  248 --     are too large. See splitAtHoles and Note [Magic Constants in GHC.Cmm.Switch]
  249 --  2. Too small jump tables should be avoided, so we break up smaller pieces
  250 --     in breakTooSmall.
  251 --  3. We fill in the segments between those pieces with a jump to the default
  252 --     label (if there is one), returning a SeparatedList in mkFlatSwitchPlan
  253 --  4. We find and replace two less-than branches by a single equal-to-test in
  254 --     findSingleValues
  255 --  5. The thus collected pieces are assembled to a balanced binary tree.
  256 
  257 {-
  258   Note [Two alts + default]
  259   ~~~~~~~~~~~~~~~~~~~~~~~~~
  260 
  261 Discussion and a bit more info at #14644
  262 
  263 When dealing with a switch of the form:
  264 switch(e) {
  265   case 1: goto l1;
  266   case 3000: goto l2;
  267   default: goto ldef;
  268 }
  269 
  270 If we treat it as a sparse jump table we would generate:
  271 
  272 if (e > 3000) //Check if value is outside of the jump table.
  273     goto ldef;
  274 else {
  275     if (e < 3000) { //Compare to upper value
  276         if(e != 1) //Compare to remaining value
  277             goto ldef;
  278           else
  279             goto l2;
  280     }
  281     else
  282         goto l1;
  283 }
  284 
  285 Instead we special case this to :
  286 
  287 if (e==1) goto l1;
  288 else if (e==3000) goto l2;
  289 else goto l3;
  290 
  291 This means we have:
  292 * Less comparisons for: 1,<3000
  293 * Unchanged for 3000
  294 * One more for >3000
  295 
  296 This improves code in a few ways:
  297 * One comparison less means smaller code which helps with cache.
  298 * It exchanges a taken jump for two jumps no taken in the >range case.
  299   Jumps not taken are cheaper (See Agner guides) making this about as fast.
  300 * For all other cases the first range check is removed making it faster.
  301 
  302 The end result is that the change is not measurably slower for the case
  303 >3000 and faster for the other cases.
  304 
  305 This makes running this kind of match in an inner loop cheaper by 10-20%
  306 depending on the data.
  307 In nofib this improves wheel-sieve1 by 4-9% depending on problem
  308 size.
  309 
  310 We could also add a second conditional jump after the comparison to
  311 keep the range check like this:
  312     cmp 3000, rArgument
  313     jg <default>
  314     je <branch 2>
  315 While this is fairly cheap it made no big difference for the >3000 case
  316 and slowed down all other cases making it not worthwhile.
  317 -}
  318 
  319 
  320 -- | Does the backend support switch out of the box? Then leave this to the
  321 -- backend!
  322 backendSupportsSwitch :: Backend -> Bool
  323 backendSupportsSwitch ViaC = True
  324 backendSupportsSwitch LLVM = True
  325 backendSupportsSwitch _    = False
  326 
  327 -- | This function creates a SwitchPlan from a SwitchTargets value, breaking it
  328 -- down into smaller pieces suitable for code generation.
  329 createSwitchPlan :: SwitchTargets -> SwitchPlan
  330 -- Lets do the common case of a singleton map quickly and efficiently (#10677)
  331 createSwitchPlan (SwitchTargets _signed _range (Just defLabel) m)
  332     | [(x, l)] <- M.toList m
  333     = IfEqual x l (Unconditionally defLabel)
  334 -- And another common case, matching "booleans"
  335 createSwitchPlan (SwitchTargets _signed (lo,hi) Nothing m)
  336     | [(x1, l1), (_x2,l2)] <- M.toAscList m
  337     --Checking If |range| = 2 is enough if we have two unique literals
  338     , hi - lo == 1
  339     = IfEqual x1 l1 (Unconditionally l2)
  340 -- See Note [Two alts + default]
  341 createSwitchPlan (SwitchTargets _signed _range (Just defLabel) m)
  342     | [(x1, l1), (x2,l2)] <- M.toAscList m
  343     = IfEqual x1 l1 (IfEqual x2 l2 (Unconditionally defLabel))
  344 createSwitchPlan (SwitchTargets signed range mbdef m) =
  345     -- pprTrace "createSwitchPlan" (text (show ids) $$ text (show (range,m)) $$ text (show pieces) $$ text (show flatPlan) $$ text (show plan)) $
  346     plan
  347   where
  348     pieces = concatMap breakTooSmall $ splitAtHoles maxJumpTableHole m
  349     flatPlan = findSingleValues $ mkFlatSwitchPlan signed mbdef range pieces
  350     plan = buildTree signed $ flatPlan
  351 
  352 
  353 ---
  354 --- Step 1: Splitting at large holes
  355 ---
  356 splitAtHoles :: Integer -> M.Map Integer a -> [M.Map Integer a]
  357 splitAtHoles _        m | M.null m = []
  358 splitAtHoles holeSize m = map (\range -> restrictMap range m) nonHoles
  359   where
  360     holes = filter (\(l,h) -> h - l > holeSize) $ zip (M.keys m) (tail (M.keys m))
  361     nonHoles = reassocTuples lo holes hi
  362 
  363     (lo,_) = M.findMin m
  364     (hi,_) = M.findMax m
  365 
  366 ---
  367 --- Step 2: Avoid small jump tables
  368 ---
  369 -- We do not want jump tables below a certain size. This breaks them up
  370 -- (into singleton maps, for now).
  371 breakTooSmall :: M.Map Integer a -> [M.Map Integer a]
  372 breakTooSmall m
  373   | M.size m > minJumpTableSize = [m]
  374   | otherwise                   = [M.singleton k v | (k,v) <- M.toList m]
  375 
  376 ---
  377 ---  Step 3: Fill in the blanks
  378 ---
  379 
  380 -- | A FlatSwitchPlan is a list of SwitchPlans, with an integer inbetween every
  381 -- two entries, dividing the range.
  382 -- So if we have (abusing list syntax) [plan1,n,plan2], then we use plan1 if
  383 -- the expression is < n, and plan2 otherwise.
  384 
  385 type FlatSwitchPlan = SeparatedList Integer SwitchPlan
  386 
  387 mkFlatSwitchPlan :: Bool -> Maybe Label -> (Integer, Integer) -> [M.Map Integer Label] -> FlatSwitchPlan
  388 
  389 -- If we have no default (i.e. undefined where there is no entry), we can
  390 -- branch at the minimum of each map
  391 mkFlatSwitchPlan _ Nothing _ [] = pprPanic "mkFlatSwitchPlan with nothing left to do" empty
  392 mkFlatSwitchPlan signed  Nothing _ (m:ms)
  393   = (mkLeafPlan signed Nothing m , [ (fst (M.findMin m'), mkLeafPlan signed Nothing m') | m' <- ms ])
  394 
  395 -- If we have a default, we have to interleave segments that jump
  396 -- to the default between the maps
  397 mkFlatSwitchPlan signed (Just l) r ms = let ((_,p1):ps) = go r ms in (p1, ps)
  398   where
  399     go (lo,hi) []
  400         | lo > hi = []
  401         | otherwise = [(lo, Unconditionally l)]
  402     go (lo,hi) (m:ms)
  403         | lo < min
  404         = (lo, Unconditionally l) : go (min,hi) (m:ms)
  405         | lo == min
  406         = (lo, mkLeafPlan signed (Just l) m) : go (max+1,hi) ms
  407         | otherwise
  408         = pprPanic "mkFlatSwitchPlan" (integer lo <+> integer min)
  409       where
  410         min = fst (M.findMin m)
  411         max = fst (M.findMax m)
  412 
  413 
  414 mkLeafPlan :: Bool -> Maybe Label -> M.Map Integer Label -> SwitchPlan
  415 mkLeafPlan signed mbdef m
  416     | [(_,l)] <- M.toList m -- singleton map
  417     = Unconditionally l
  418     | otherwise
  419     = JumpTable $ mkSwitchTargets signed (min,max) mbdef m
  420   where
  421     min = fst (M.findMin m)
  422     max = fst (M.findMax m)
  423 
  424 ---
  425 ---  Step 4: Reduce the number of branches using ==
  426 ---
  427 
  428 -- A sequence of three unconditional jumps, with the outer two pointing to the
  429 -- same value and the bounds off by exactly one can be improved
  430 findSingleValues :: FlatSwitchPlan -> FlatSwitchPlan
  431 findSingleValues (Unconditionally l, (i, Unconditionally l2) : (i', Unconditionally l3) : xs)
  432   | l == l3 && i + 1 == i'
  433   = findSingleValues (IfEqual i l2 (Unconditionally l), xs)
  434 findSingleValues (p, (i,p'):xs)
  435   = (p,i) `consSL` findSingleValues (p', xs)
  436 findSingleValues (p, [])
  437   = (p, [])
  438 
  439 ---
  440 ---  Step 5: Actually build the tree
  441 ---
  442 
  443 -- Build a balanced tree from a separated list
  444 buildTree :: Bool -> FlatSwitchPlan -> SwitchPlan
  445 buildTree _ (p,[]) = p
  446 buildTree signed sl = IfLT signed m (buildTree signed sl1) (buildTree signed sl2)
  447   where
  448     (sl1, m, sl2) = divideSL sl
  449 
  450 
  451 
  452 --
  453 -- Utility data type: Non-empty lists with extra markers in between each
  454 -- element:
  455 --
  456 
  457 type SeparatedList b a = (a, [(b,a)])
  458 
  459 consSL :: (a, b) -> SeparatedList b a -> SeparatedList b a
  460 consSL (a, b) (a', xs) = (a, (b,a'):xs)
  461 
  462 divideSL :: SeparatedList b a -> (SeparatedList b a, b, SeparatedList b a)
  463 divideSL (_,[]) = error "divideSL: Singleton SeparatedList"
  464 divideSL (p,xs) = ((p, xs1), m, (p', xs2))
  465   where
  466     (xs1, (m,p'):xs2) = splitAt (length xs `div` 2) xs
  467 
  468 --
  469 -- Other Utilities
  470 --
  471 
  472 restrictMap :: (Integer,Integer) -> M.Map Integer b -> M.Map Integer b
  473 restrictMap (lo,hi) m = mid
  474   where (_,   mid_hi) = M.split (lo-1) m
  475         (mid, _) =      M.split (hi+1) mid_hi
  476 
  477 -- for example: reassocTuples a [(b,c),(d,e)] f == [(a,b),(c,d),(e,f)]
  478 reassocTuples :: a -> [(a,a)] -> a -> [(a,a)]
  479 reassocTuples initial [] last
  480     = [(initial,last)]
  481 reassocTuples initial ((a,b):tuples) last
  482     = (initial,a) : reassocTuples b tuples last
  483 
  484 -- Note [GHC.Cmm.Switch vs. GHC.Cmm.Switch.Implement]
  485 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  486 -- I (Joachim) separated the two somewhat closely related modules
  487 --
  488 --  - GHC.Cmm.Switch, which provides the CmmSwitchTargets type and contains the strategy
  489 --    for implementing a Cmm switch (createSwitchPlan), and
  490 --  - GHC.Cmm.Switch.Implement, which contains the actual Cmm graph modification,
  491 --
  492 -- for these reasons:
  493 --
  494 --  * GHC.Cmm.Switch is very low in the dependency tree, i.e. does not depend on any
  495 --    GHC specific modules at all (with the exception of Output and
  496 --    GHC.Cmm.Dataflow (Literal)).
  497 --  * GHC.Cmm.Switch.Implement is the Cmm transformation and hence very high in
  498 --    the dependency tree.
  499 --  * GHC.Cmm.Switch provides the CmmSwitchTargets data type, which is abstract, but
  500 --    used in GHC.Cmm.Node.
  501 --  * Because GHC.Cmm.Switch is low in the dependency tree, the separation allows
  502 --    for more parallelism when building GHC.
  503 --  * The interaction between the modules is very explicit and easy to
  504 --    understand, due to the small and simple interface.