never executed always true always false
1 {-# LANGUAGE GADTs #-}
2 {-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}
3 module GHC.Cmm.Switch (
4 SwitchTargets,
5 mkSwitchTargets,
6 switchTargetsCases, switchTargetsDefault, switchTargetsRange, switchTargetsSigned,
7 mapSwitchTargets, switchTargetsToTable, switchTargetsFallThrough,
8 switchTargetsToList, eqSwitchTargetWith,
9
10 SwitchPlan(..),
11 backendSupportsSwitch,
12 createSwitchPlan,
13 ) where
14
15 import GHC.Prelude
16
17 import GHC.Utils.Outputable
18 import GHC.Driver.Backend
19 import GHC.Utils.Panic
20 import GHC.Cmm.Dataflow.Label (Label)
21
22 import Data.Maybe
23 import Data.List (groupBy)
24 import Data.Function (on)
25 import qualified Data.Map as M
26
27 -- Note [Cmm Switches, the general plan]
28 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
29 --
30 -- Compiling a high-level switch statement, as it comes out of a STG case
31 -- expression, for example, allows for a surprising amount of design decisions.
32 -- Therefore, we cleanly separated this from the Stg → Cmm transformation, as
33 -- well as from the actual code generation.
34 --
35 -- The overall plan is:
36 -- * The Stg → Cmm transformation creates a single `SwitchTargets` in
37 -- emitSwitch and emitCmmLitSwitch in GHC.StgToCmm.Utils.
38 -- At this stage, they are unsuitable for code generation.
39 -- * A dedicated Cmm transformation (GHC.Cmm.Switch.Implement) replaces these
40 -- switch statements with code that is suitable for code generation, i.e.
41 -- a nice balanced tree of decisions with dense jump tables in the leafs.
42 -- The actual planning of this tree is performed in pure code in createSwitchPlan
43 -- in this module. See Note [createSwitchPlan].
44 -- * The actual code generation will not do any further processing and
45 -- implement each CmmSwitch with a jump tables.
46 --
47 -- When compiling to LLVM or C, GHC.Cmm.Switch.Implement leaves the switch
48 -- statements alone, as we can turn a SwitchTargets value into a nice
49 -- switch-statement in LLVM resp. C, and leave the rest to the compiler.
50 --
51 -- See Note [GHC.Cmm.Switch vs. GHC.Cmm.Switch.Implement] why the two module are
52 -- separated.
53
54 -----------------------------------------------------------------------------
55 -- Note [Magic Constants in GHC.Cmm.Switch]
56 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
57 --
58 -- There are a lot of heuristics here that depend on magic values where it is
59 -- hard to determine the "best" value (for whatever that means). These are the
60 -- magic values:
61
62 -- | Number of consecutive default values allowed in a jump table. If there are
63 -- more of them, the jump tables are split.
64 --
65 -- Currently 7, as it costs 7 words of additional code when a jump table is
66 -- split (at least on x64, determined experimentally).
67 maxJumpTableHole :: Integer
68 maxJumpTableHole = 7
69
70 -- | Minimum size of a jump table. If the number is smaller, the switch is
71 -- implemented using conditionals.
72 -- Currently 5, because an if-then-else tree of 4 values is nice and compact.
73 minJumpTableSize :: Int
74 minJumpTableSize = 5
75
76 -- | Minimum non-zero offset for a jump table. See Note [Jump Table Offset].
77 minJumpTableOffset :: Integer
78 minJumpTableOffset = 2
79
80
81 -----------------------------------------------------------------------------
82 -- Switch Targets
83
84 -- Note [SwitchTargets]
85 -- ~~~~~~~~~~~~~~~~~~~~
86 --
87 -- The branches of a switch are stored in a SwitchTargets, which consists of an
88 -- (optional) default jump target, and a map from values to jump targets.
89 --
90 -- If the default jump target is absent, the behaviour of the switch outside the
91 -- values of the map is undefined.
92 --
93 -- We use an Integer for the keys the map so that it can be used in switches on
94 -- unsigned as well as signed integers.
95 --
96 -- The map may be empty (we prune out-of-range branches here, so it could be us
97 -- emptying it).
98 --
99 -- Before code generation, the table needs to be brought into a form where all
100 -- entries are non-negative, so that it can be compiled into a jump table.
101 -- See switchTargetsToTable.
102
103
104 -- | A value of type SwitchTargets contains the alternatives for a 'CmmSwitch'
105 -- value, and knows whether the value is signed, the possible range, an
106 -- optional default value and a map from values to jump labels.
107 data SwitchTargets =
108 SwitchTargets
109 Bool -- Signed values
110 (Integer, Integer) -- Range
111 (Maybe Label) -- Default value
112 (M.Map Integer Label) -- The branches
113 deriving (Show, Eq)
114
115 -- | The smart constructor mkSwitchTargets normalises the map a bit:
116 -- * No entries outside the range
117 -- * No entries equal to the default
118 -- * No default if all elements have explicit values
119 mkSwitchTargets :: Bool -> (Integer, Integer) -> Maybe Label -> M.Map Integer Label -> SwitchTargets
120 mkSwitchTargets signed range@(lo,hi) mbdef ids
121 = SwitchTargets signed range mbdef' ids'
122 where
123 ids' = dropDefault $ restrict ids
124 mbdef' | defaultNeeded = mbdef
125 | otherwise = Nothing
126
127 -- Drop entries outside the range, if there is a range
128 restrict = restrictMap (lo,hi)
129
130 -- Drop entries that equal the default, if there is a default
131 dropDefault | Just l <- mbdef = M.filter (/= l)
132 | otherwise = id
133
134 -- Check if the default is still needed
135 defaultNeeded = fromIntegral (M.size ids') /= hi-lo+1
136
137
138 -- | Changes all labels mentioned in the SwitchTargets value
139 mapSwitchTargets :: (Label -> Label) -> SwitchTargets -> SwitchTargets
140 mapSwitchTargets f (SwitchTargets signed range mbdef branches)
141 = SwitchTargets signed range (fmap f mbdef) (fmap f branches)
142
143 -- | Returns the list of non-default branches of the SwitchTargets value
144 switchTargetsCases :: SwitchTargets -> [(Integer, Label)]
145 switchTargetsCases (SwitchTargets _ _ _ branches) = M.toList branches
146
147 -- | Return the default label of the SwitchTargets value
148 switchTargetsDefault :: SwitchTargets -> Maybe Label
149 switchTargetsDefault (SwitchTargets _ _ mbdef _) = mbdef
150
151 -- | Return the range of the SwitchTargets value
152 switchTargetsRange :: SwitchTargets -> (Integer, Integer)
153 switchTargetsRange (SwitchTargets _ range _ _) = range
154
155 -- | Return whether this is used for a signed value
156 switchTargetsSigned :: SwitchTargets -> Bool
157 switchTargetsSigned (SwitchTargets signed _ _ _) = signed
158
159 -- | switchTargetsToTable creates a dense jump table, usable for code generation.
160 --
161 -- Also returns an offset to add to the value; the list is 0-based on the
162 -- result of that addition.
163 --
164 -- The conversion from Integer to Int is a bit of a wart, as the actual
165 -- scrutinee might be an unsigned word, but it just works, due to wrap-around
166 -- arithmetic (as verified by the CmmSwitchTest test case).
167 switchTargetsToTable :: SwitchTargets -> (Int, [Maybe Label])
168 switchTargetsToTable (SwitchTargets _ (lo,hi) mbdef branches)
169 = (fromIntegral (-start), [ labelFor i | i <- [start..hi] ])
170 where
171 labelFor i = case M.lookup i branches of Just l -> Just l
172 Nothing -> mbdef
173 start | lo >= 0 && lo < minJumpTableOffset = 0 -- See Note [Jump Table Offset]
174 | otherwise = lo
175
176 -- Note [Jump Table Offset]
177 -- ~~~~~~~~~~~~~~~~~~~~~~~~
178 --
179 -- Usually, the code for a jump table starting at x will first subtract x from
180 -- the value, to avoid a large amount of empty entries. But if x is very small,
181 -- the extra entries are no worse than the subtraction in terms of code size, and
182 -- not having to do the subtraction is quicker.
183 --
184 -- I.e. instead of
185 -- _u20N:
186 -- leaq -1(%r14),%rax
187 -- jmp *_n20R(,%rax,8)
188 -- _n20R:
189 -- .quad _c20p
190 -- .quad _c20q
191 -- do
192 -- _u20N:
193 -- jmp *_n20Q(,%r14,8)
194 --
195 -- _n20Q:
196 -- .quad 0
197 -- .quad _c20p
198 -- .quad _c20q
199 -- .quad _c20r
200
201 -- | The list of all labels occurring in the SwitchTargets value.
202 switchTargetsToList :: SwitchTargets -> [Label]
203 switchTargetsToList (SwitchTargets _ _ mbdef branches)
204 = maybeToList mbdef ++ M.elems branches
205
206 -- | Groups cases with equal targets, suitable for pretty-printing to a
207 -- c-like switch statement with fall-through semantics.
208 switchTargetsFallThrough :: SwitchTargets -> ([([Integer], Label)], Maybe Label)
209 switchTargetsFallThrough (SwitchTargets _ _ mbdef branches) = (groups, mbdef)
210 where
211 groups = map (\xs -> (map fst xs, snd (head xs))) $
212 groupBy ((==) `on` snd) $
213 M.toList branches
214
215 -- | Custom equality helper, needed for "GHC.Cmm.CommonBlockElim"
216 eqSwitchTargetWith :: (Label -> Label -> Bool) -> SwitchTargets -> SwitchTargets -> Bool
217 eqSwitchTargetWith eq (SwitchTargets signed1 range1 mbdef1 ids1) (SwitchTargets signed2 range2 mbdef2 ids2) =
218 signed1 == signed2 && range1 == range2 && goMB mbdef1 mbdef2 && goList (M.toList ids1) (M.toList ids2)
219 where
220 goMB Nothing Nothing = True
221 goMB (Just l1) (Just l2) = l1 `eq` l2
222 goMB _ _ = False
223 goList [] [] = True
224 goList ((i1,l1):ls1) ((i2,l2):ls2) = i1 == i2 && l1 `eq` l2 && goList ls1 ls2
225 goList _ _ = False
226
227 -----------------------------------------------------------------------------
228 -- Code generation for Switches
229
230
231 -- | A SwitchPlan abstractly describes how a Switch statement ought to be
232 -- implemented. See Note [createSwitchPlan]
233 data SwitchPlan
234 = Unconditionally Label
235 | IfEqual Integer Label SwitchPlan
236 | IfLT Bool Integer SwitchPlan SwitchPlan
237 | JumpTable SwitchTargets
238 deriving Show
239 --
240 -- Note [createSwitchPlan]
241 -- ~~~~~~~~~~~~~~~~~~~~~~~
242 --
243 -- A SwitchPlan describes how a Switch statement is to be broken down into
244 -- smaller pieces suitable for code generation.
245 --
246 -- createSwitchPlan creates such a switch plan, in these steps:
247 -- 1. It splits the switch statement at segments of non-default values that
248 -- are too large. See splitAtHoles and Note [Magic Constants in GHC.Cmm.Switch]
249 -- 2. Too small jump tables should be avoided, so we break up smaller pieces
250 -- in breakTooSmall.
251 -- 3. We fill in the segments between those pieces with a jump to the default
252 -- label (if there is one), returning a SeparatedList in mkFlatSwitchPlan
253 -- 4. We find and replace two less-than branches by a single equal-to-test in
254 -- findSingleValues
255 -- 5. The thus collected pieces are assembled to a balanced binary tree.
256
257 {-
258 Note [Two alts + default]
259 ~~~~~~~~~~~~~~~~~~~~~~~~~
260
261 Discussion and a bit more info at #14644
262
263 When dealing with a switch of the form:
264 switch(e) {
265 case 1: goto l1;
266 case 3000: goto l2;
267 default: goto ldef;
268 }
269
270 If we treat it as a sparse jump table we would generate:
271
272 if (e > 3000) //Check if value is outside of the jump table.
273 goto ldef;
274 else {
275 if (e < 3000) { //Compare to upper value
276 if(e != 1) //Compare to remaining value
277 goto ldef;
278 else
279 goto l2;
280 }
281 else
282 goto l1;
283 }
284
285 Instead we special case this to :
286
287 if (e==1) goto l1;
288 else if (e==3000) goto l2;
289 else goto l3;
290
291 This means we have:
292 * Less comparisons for: 1,<3000
293 * Unchanged for 3000
294 * One more for >3000
295
296 This improves code in a few ways:
297 * One comparison less means smaller code which helps with cache.
298 * It exchanges a taken jump for two jumps no taken in the >range case.
299 Jumps not taken are cheaper (See Agner guides) making this about as fast.
300 * For all other cases the first range check is removed making it faster.
301
302 The end result is that the change is not measurably slower for the case
303 >3000 and faster for the other cases.
304
305 This makes running this kind of match in an inner loop cheaper by 10-20%
306 depending on the data.
307 In nofib this improves wheel-sieve1 by 4-9% depending on problem
308 size.
309
310 We could also add a second conditional jump after the comparison to
311 keep the range check like this:
312 cmp 3000, rArgument
313 jg <default>
314 je <branch 2>
315 While this is fairly cheap it made no big difference for the >3000 case
316 and slowed down all other cases making it not worthwhile.
317 -}
318
319
320 -- | Does the backend support switch out of the box? Then leave this to the
321 -- backend!
322 backendSupportsSwitch :: Backend -> Bool
323 backendSupportsSwitch ViaC = True
324 backendSupportsSwitch LLVM = True
325 backendSupportsSwitch _ = False
326
327 -- | This function creates a SwitchPlan from a SwitchTargets value, breaking it
328 -- down into smaller pieces suitable for code generation.
329 createSwitchPlan :: SwitchTargets -> SwitchPlan
330 -- Lets do the common case of a singleton map quickly and efficiently (#10677)
331 createSwitchPlan (SwitchTargets _signed _range (Just defLabel) m)
332 | [(x, l)] <- M.toList m
333 = IfEqual x l (Unconditionally defLabel)
334 -- And another common case, matching "booleans"
335 createSwitchPlan (SwitchTargets _signed (lo,hi) Nothing m)
336 | [(x1, l1), (_x2,l2)] <- M.toAscList m
337 --Checking If |range| = 2 is enough if we have two unique literals
338 , hi - lo == 1
339 = IfEqual x1 l1 (Unconditionally l2)
340 -- See Note [Two alts + default]
341 createSwitchPlan (SwitchTargets _signed _range (Just defLabel) m)
342 | [(x1, l1), (x2,l2)] <- M.toAscList m
343 = IfEqual x1 l1 (IfEqual x2 l2 (Unconditionally defLabel))
344 createSwitchPlan (SwitchTargets signed range mbdef m) =
345 -- pprTrace "createSwitchPlan" (text (show ids) $$ text (show (range,m)) $$ text (show pieces) $$ text (show flatPlan) $$ text (show plan)) $
346 plan
347 where
348 pieces = concatMap breakTooSmall $ splitAtHoles maxJumpTableHole m
349 flatPlan = findSingleValues $ mkFlatSwitchPlan signed mbdef range pieces
350 plan = buildTree signed $ flatPlan
351
352
353 ---
354 --- Step 1: Splitting at large holes
355 ---
356 splitAtHoles :: Integer -> M.Map Integer a -> [M.Map Integer a]
357 splitAtHoles _ m | M.null m = []
358 splitAtHoles holeSize m = map (\range -> restrictMap range m) nonHoles
359 where
360 holes = filter (\(l,h) -> h - l > holeSize) $ zip (M.keys m) (tail (M.keys m))
361 nonHoles = reassocTuples lo holes hi
362
363 (lo,_) = M.findMin m
364 (hi,_) = M.findMax m
365
366 ---
367 --- Step 2: Avoid small jump tables
368 ---
369 -- We do not want jump tables below a certain size. This breaks them up
370 -- (into singleton maps, for now).
371 breakTooSmall :: M.Map Integer a -> [M.Map Integer a]
372 breakTooSmall m
373 | M.size m > minJumpTableSize = [m]
374 | otherwise = [M.singleton k v | (k,v) <- M.toList m]
375
376 ---
377 --- Step 3: Fill in the blanks
378 ---
379
380 -- | A FlatSwitchPlan is a list of SwitchPlans, with an integer inbetween every
381 -- two entries, dividing the range.
382 -- So if we have (abusing list syntax) [plan1,n,plan2], then we use plan1 if
383 -- the expression is < n, and plan2 otherwise.
384
385 type FlatSwitchPlan = SeparatedList Integer SwitchPlan
386
387 mkFlatSwitchPlan :: Bool -> Maybe Label -> (Integer, Integer) -> [M.Map Integer Label] -> FlatSwitchPlan
388
389 -- If we have no default (i.e. undefined where there is no entry), we can
390 -- branch at the minimum of each map
391 mkFlatSwitchPlan _ Nothing _ [] = pprPanic "mkFlatSwitchPlan with nothing left to do" empty
392 mkFlatSwitchPlan signed Nothing _ (m:ms)
393 = (mkLeafPlan signed Nothing m , [ (fst (M.findMin m'), mkLeafPlan signed Nothing m') | m' <- ms ])
394
395 -- If we have a default, we have to interleave segments that jump
396 -- to the default between the maps
397 mkFlatSwitchPlan signed (Just l) r ms = let ((_,p1):ps) = go r ms in (p1, ps)
398 where
399 go (lo,hi) []
400 | lo > hi = []
401 | otherwise = [(lo, Unconditionally l)]
402 go (lo,hi) (m:ms)
403 | lo < min
404 = (lo, Unconditionally l) : go (min,hi) (m:ms)
405 | lo == min
406 = (lo, mkLeafPlan signed (Just l) m) : go (max+1,hi) ms
407 | otherwise
408 = pprPanic "mkFlatSwitchPlan" (integer lo <+> integer min)
409 where
410 min = fst (M.findMin m)
411 max = fst (M.findMax m)
412
413
414 mkLeafPlan :: Bool -> Maybe Label -> M.Map Integer Label -> SwitchPlan
415 mkLeafPlan signed mbdef m
416 | [(_,l)] <- M.toList m -- singleton map
417 = Unconditionally l
418 | otherwise
419 = JumpTable $ mkSwitchTargets signed (min,max) mbdef m
420 where
421 min = fst (M.findMin m)
422 max = fst (M.findMax m)
423
424 ---
425 --- Step 4: Reduce the number of branches using ==
426 ---
427
428 -- A sequence of three unconditional jumps, with the outer two pointing to the
429 -- same value and the bounds off by exactly one can be improved
430 findSingleValues :: FlatSwitchPlan -> FlatSwitchPlan
431 findSingleValues (Unconditionally l, (i, Unconditionally l2) : (i', Unconditionally l3) : xs)
432 | l == l3 && i + 1 == i'
433 = findSingleValues (IfEqual i l2 (Unconditionally l), xs)
434 findSingleValues (p, (i,p'):xs)
435 = (p,i) `consSL` findSingleValues (p', xs)
436 findSingleValues (p, [])
437 = (p, [])
438
439 ---
440 --- Step 5: Actually build the tree
441 ---
442
443 -- Build a balanced tree from a separated list
444 buildTree :: Bool -> FlatSwitchPlan -> SwitchPlan
445 buildTree _ (p,[]) = p
446 buildTree signed sl = IfLT signed m (buildTree signed sl1) (buildTree signed sl2)
447 where
448 (sl1, m, sl2) = divideSL sl
449
450
451
452 --
453 -- Utility data type: Non-empty lists with extra markers in between each
454 -- element:
455 --
456
457 type SeparatedList b a = (a, [(b,a)])
458
459 consSL :: (a, b) -> SeparatedList b a -> SeparatedList b a
460 consSL (a, b) (a', xs) = (a, (b,a'):xs)
461
462 divideSL :: SeparatedList b a -> (SeparatedList b a, b, SeparatedList b a)
463 divideSL (_,[]) = error "divideSL: Singleton SeparatedList"
464 divideSL (p,xs) = ((p, xs1), m, (p', xs2))
465 where
466 (xs1, (m,p'):xs2) = splitAt (length xs `div` 2) xs
467
468 --
469 -- Other Utilities
470 --
471
472 restrictMap :: (Integer,Integer) -> M.Map Integer b -> M.Map Integer b
473 restrictMap (lo,hi) m = mid
474 where (_, mid_hi) = M.split (lo-1) m
475 (mid, _) = M.split (hi+1) mid_hi
476
477 -- for example: reassocTuples a [(b,c),(d,e)] f == [(a,b),(c,d),(e,f)]
478 reassocTuples :: a -> [(a,a)] -> a -> [(a,a)]
479 reassocTuples initial [] last
480 = [(initial,last)]
481 reassocTuples initial ((a,b):tuples) last
482 = (initial,a) : reassocTuples b tuples last
483
484 -- Note [GHC.Cmm.Switch vs. GHC.Cmm.Switch.Implement]
485 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
486 -- I (Joachim) separated the two somewhat closely related modules
487 --
488 -- - GHC.Cmm.Switch, which provides the CmmSwitchTargets type and contains the strategy
489 -- for implementing a Cmm switch (createSwitchPlan), and
490 -- - GHC.Cmm.Switch.Implement, which contains the actual Cmm graph modification,
491 --
492 -- for these reasons:
493 --
494 -- * GHC.Cmm.Switch is very low in the dependency tree, i.e. does not depend on any
495 -- GHC specific modules at all (with the exception of Output and
496 -- GHC.Cmm.Dataflow (Literal)).
497 -- * GHC.Cmm.Switch.Implement is the Cmm transformation and hence very high in
498 -- the dependency tree.
499 -- * GHC.Cmm.Switch provides the CmmSwitchTargets data type, which is abstract, but
500 -- used in GHC.Cmm.Node.
501 -- * Because GHC.Cmm.Switch is low in the dependency tree, the separation allows
502 -- for more parallelism when building GHC.
503 -- * The interaction between the modules is very explicit and easy to
504 -- understand, due to the small and simple interface.