never executed always true always false
1 {-
2 (c) The AQUA Project, Glasgow University, 1993-1998
3
4 The simplifier utilities
5 -}
6
7
8
9 module GHC.Core.Opt.Simplify.Utils (
10 -- Rebuilding
11 mkLam, mkCase, prepareAlts, tryEtaExpandRhs,
12
13 -- Inlining,
14 preInlineUnconditionally, postInlineUnconditionally,
15 activeUnfolding, activeRule,
16 getUnfoldingInRuleMatch,
17 simplEnvForGHCi, updModeForStableUnfoldings, updModeForRules,
18
19 -- The continuation type
20 SimplCont(..), DupFlag(..), StaticEnv,
21 isSimplified, contIsStop,
22 contIsDupable, contResultType, contHoleType, contHoleScaling,
23 contIsTrivial, contArgs,
24 countArgs,
25 mkBoringStop, mkRhsStop, mkLazyArgStop, contIsRhsOrArg,
26 interestingCallContext,
27
28 -- ArgInfo
29 ArgInfo(..), ArgSpec(..), mkArgInfo,
30 addValArgTo, addCastTo, addTyArgTo,
31 argInfoExpr, argInfoAppArgs, pushSimplifiedArgs,
32 isStrictArgInfo, lazyArgContext,
33
34 abstractFloats,
35
36 -- Utilities
37 isExitJoinId
38 ) where
39
40 import GHC.Prelude
41
42 import GHC.Driver.Session
43
44 import GHC.Core
45 import GHC.Types.Literal ( isLitRubbish )
46 import GHC.Core.Opt.Simplify.Env
47 import GHC.Core.Opt.Monad ( SimplMode(..), Tick(..) )
48 import qualified GHC.Core.Subst
49 import GHC.Core.Ppr
50 import GHC.Core.TyCo.Ppr ( pprParendType )
51 import GHC.Core.FVs
52 import GHC.Core.Utils
53 import GHC.Core.Opt.Arity
54 import GHC.Core.Unfold
55 import GHC.Core.Unfold.Make
56 import GHC.Core.Opt.Simplify.Monad
57 import GHC.Core.Type hiding( substTy )
58 import GHC.Core.Coercion hiding( substCo )
59 import GHC.Core.DataCon ( dataConWorkId, isNullaryRepDataCon )
60 import GHC.Core.Multiplicity
61 import GHC.Core.Opt.ConstantFold
62
63 import GHC.Types.Name
64 import GHC.Types.Id
65 import GHC.Types.Id.Info
66 import GHC.Types.Tickish
67 import GHC.Types.Demand
68 import GHC.Types.Var.Set
69 import GHC.Types.Basic
70
71 import GHC.Data.OrdList ( isNilOL )
72 import GHC.Data.FastString ( fsLit )
73
74 import GHC.Utils.Misc
75 import GHC.Utils.Monad
76 import GHC.Utils.Outputable
77 import GHC.Utils.Logger
78 import GHC.Utils.Panic
79 import GHC.Utils.Panic.Plain
80 import GHC.Utils.Trace
81
82 import Control.Monad ( when )
83 import Data.List ( sortBy )
84
85 {-
86 ************************************************************************
87 * *
88 The SimplCont and DupFlag types
89 * *
90 ************************************************************************
91
92 A SimplCont allows the simplifier to traverse the expression in a
93 zipper-like fashion. The SimplCont represents the rest of the expression,
94 "above" the point of interest.
95
96 You can also think of a SimplCont as an "evaluation context", using
97 that term in the way it is used for operational semantics. This is the
98 way I usually think of it, For example you'll often see a syntax for
99 evaluation context looking like
100 C ::= [] | C e | case C of alts | C `cast` co
101 That's the kind of thing we are doing here, and I use that syntax in
102 the comments.
103
104
105 Key points:
106 * A SimplCont describes a *strict* context (just like
107 evaluation contexts do). E.g. Just [] is not a SimplCont
108
109 * A SimplCont describes a context that *does not* bind
110 any variables. E.g. \x. [] is not a SimplCont
111 -}
112
113 data SimplCont
114 = Stop -- Stop[e] = e
115 OutType -- Type of the <hole>
116 CallCtxt -- Tells if there is something interesting about
117 -- the context, and hence the inliner
118 -- should be a bit keener (see interestingCallContext)
119 -- Specifically:
120 -- This is an argument of a function that has RULES
121 -- Inlining the call might allow the rule to fire
122 -- Never ValAppCxt (use ApplyToVal instead)
123 -- or CaseCtxt (use Select instead)
124
125 | CastIt -- (CastIt co K)[e] = K[ e `cast` co ]
126 OutCoercion -- The coercion simplified
127 -- Invariant: never an identity coercion
128 SimplCont
129
130 | ApplyToVal -- (ApplyToVal arg K)[e] = K[ e arg ]
131 { sc_dup :: DupFlag -- See Note [DupFlag invariants]
132 , sc_hole_ty :: OutType -- Type of the function, presumably (forall a. blah)
133 -- See Note [The hole type in ApplyToTy/Val]
134 , sc_arg :: InExpr -- The argument,
135 , sc_env :: StaticEnv -- see Note [StaticEnv invariant]
136 , sc_cont :: SimplCont }
137
138 | ApplyToTy -- (ApplyToTy ty K)[e] = K[ e ty ]
139 { sc_arg_ty :: OutType -- Argument type
140 , sc_hole_ty :: OutType -- Type of the function, presumably (forall a. blah)
141 -- See Note [The hole type in ApplyToTy/Val]
142 , sc_cont :: SimplCont }
143
144 | Select -- (Select alts K)[e] = K[ case e of alts ]
145 { sc_dup :: DupFlag -- See Note [DupFlag invariants]
146 , sc_bndr :: InId -- case binder
147 , sc_alts :: [InAlt] -- Alternatives
148 , sc_env :: StaticEnv -- See Note [StaticEnv invariant]
149 , sc_cont :: SimplCont }
150
151 -- The two strict forms have no DupFlag, because we never duplicate them
152 | StrictBind -- (StrictBind x xs b K)[e] = let x = e in K[\xs.b]
153 -- or, equivalently, = K[ (\x xs.b) e ]
154 { sc_dup :: DupFlag -- See Note [DupFlag invariants]
155 , sc_bndr :: InId
156 , sc_bndrs :: [InBndr]
157 , sc_body :: InExpr
158 , sc_env :: StaticEnv -- See Note [StaticEnv invariant]
159 , sc_cont :: SimplCont }
160
161 | StrictArg -- (StrictArg (f e1 ..en) K)[e] = K[ f e1 .. en e ]
162 { sc_dup :: DupFlag -- Always Simplified or OkToDup
163 , sc_fun :: ArgInfo -- Specifies f, e1..en, Whether f has rules, etc
164 -- plus demands and discount flags for *this* arg
165 -- and further args
166 -- So ai_dmds and ai_discs are never empty
167 , sc_fun_ty :: OutType -- Type of the function (f e1 .. en),
168 -- presumably (arg_ty -> res_ty)
169 -- where res_ty is expected by sc_cont
170 , sc_cont :: SimplCont }
171
172 | TickIt -- (TickIt t K)[e] = K[ tick t e ]
173 CoreTickish -- Tick tickish <hole>
174 SimplCont
175
176 type StaticEnv = SimplEnv -- Just the static part is relevant
177
178 data DupFlag = NoDup -- Unsimplified, might be big
179 | Simplified -- Simplified
180 | OkToDup -- Simplified and small
181
182 isSimplified :: DupFlag -> Bool
183 isSimplified NoDup = False
184 isSimplified _ = True -- Invariant: the subst-env is empty
185
186 perhapsSubstTy :: DupFlag -> StaticEnv -> Type -> Type
187 perhapsSubstTy dup env ty
188 | isSimplified dup = ty
189 | otherwise = substTy env ty
190
191 {- Note [StaticEnv invariant]
192 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
193 We pair up an InExpr or InAlts with a StaticEnv, which establishes the
194 lexical scope for that InExpr. When we simplify that InExpr/InAlts, we
195 use
196 - Its captured StaticEnv
197 - Overriding its InScopeSet with the larger one at the
198 simplification point.
199
200 Why override the InScopeSet? Example:
201 (let y = ey in f) ex
202 By the time we simplify ex, 'y' will be in scope.
203
204 However the InScopeSet in the StaticEnv is not irrelevant: it should
205 include all the free vars of applying the substitution to the InExpr.
206 Reason: contHoleType uses perhapsSubstTy to apply the substitution to
207 the expression, and that (rightly) gives ASSERT failures if the InScopeSet
208 isn't big enough.
209
210 Note [DupFlag invariants]
211 ~~~~~~~~~~~~~~~~~~~~~~~~~
212 In both (ApplyToVal dup _ env k)
213 and (Select dup _ _ env k)
214 the following invariants hold
215
216 (a) if dup = OkToDup, then continuation k is also ok-to-dup
217 (b) if dup = OkToDup or Simplified, the subst-env is empty
218 (and hence no need to re-simplify)
219 -}
220
221 instance Outputable DupFlag where
222 ppr OkToDup = text "ok"
223 ppr NoDup = text "nodup"
224 ppr Simplified = text "simpl"
225
226 instance Outputable SimplCont where
227 ppr (Stop ty interesting) = text "Stop" <> brackets (ppr interesting) <+> ppr ty
228 ppr (CastIt co cont ) = (text "CastIt" <+> pprOptCo co) $$ ppr cont
229 ppr (TickIt t cont) = (text "TickIt" <+> ppr t) $$ ppr cont
230 ppr (ApplyToTy { sc_arg_ty = ty, sc_cont = cont })
231 = (text "ApplyToTy" <+> pprParendType ty) $$ ppr cont
232 ppr (ApplyToVal { sc_arg = arg, sc_dup = dup, sc_cont = cont, sc_hole_ty = hole_ty })
233 = (hang (text "ApplyToVal" <+> ppr dup <+> text "hole" <+> ppr hole_ty)
234 2 (pprParendExpr arg))
235 $$ ppr cont
236 ppr (StrictBind { sc_bndr = b, sc_cont = cont })
237 = (text "StrictBind" <+> ppr b) $$ ppr cont
238 ppr (StrictArg { sc_fun = ai, sc_cont = cont })
239 = (text "StrictArg" <+> ppr (ai_fun ai)) $$ ppr cont
240 ppr (Select { sc_dup = dup, sc_bndr = bndr, sc_alts = alts, sc_env = se, sc_cont = cont })
241 = (text "Select" <+> ppr dup <+> ppr bndr) $$
242 whenPprDebug (nest 2 $ vcat [ppr (seTvSubst se), ppr alts]) $$ ppr cont
243
244
245 {- Note [The hole type in ApplyToTy]
246 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
247 The sc_hole_ty field of ApplyToTy records the type of the "hole" in the
248 continuation. It is absolutely necessary to compute contHoleType, but it is
249 not used for anything else (and hence may not be evaluated).
250
251 Why is it necessary for contHoleType? Consider the continuation
252 ApplyToType Int (Stop Int)
253 corresponding to
254 (<hole> @Int) :: Int
255 What is the type of <hole>? It could be (forall a. Int) or (forall a. a),
256 and there is no way to know which, so we must record it.
257
258 In a chain of applications (f @t1 @t2 @t3) we'll lazily compute exprType
259 for (f @t1) and (f @t1 @t2), which is potentially non-linear; but it probably
260 doesn't matter because we'll never compute them all.
261
262 ************************************************************************
263 * *
264 ArgInfo and ArgSpec
265 * *
266 ************************************************************************
267 -}
268
269 data ArgInfo
270 = ArgInfo {
271 ai_fun :: OutId, -- The function
272 ai_args :: [ArgSpec], -- ...applied to these args (which are in *reverse* order)
273
274 ai_rules :: FunRules, -- Rules for this function
275
276 ai_encl :: Bool, -- Flag saying whether this function
277 -- or an enclosing one has rules (recursively)
278 -- True => be keener to inline in all args
279
280 ai_dmds :: [Demand], -- Demands on remaining value arguments (beyond ai_args)
281 -- Usually infinite, but if it is finite it guarantees
282 -- that the function diverges after being given
283 -- that number of args
284
285 ai_discs :: [Int] -- Discounts for remaining value arguments (beyong ai_args)
286 -- non-zero => be keener to inline
287 -- Always infinite
288 }
289
290 data ArgSpec
291 = ValArg { as_dmd :: Demand -- Demand placed on this argument
292 , as_arg :: OutExpr -- Apply to this (coercion or value); c.f. ApplyToVal
293 , as_hole_ty :: OutType } -- Type of the function (presumably t1 -> t2)
294
295 | TyArg { as_arg_ty :: OutType -- Apply to this type; c.f. ApplyToTy
296 , as_hole_ty :: OutType } -- Type of the function (presumably forall a. blah)
297
298 | CastBy OutCoercion -- Cast by this; c.f. CastIt
299
300 instance Outputable ArgInfo where
301 ppr (ArgInfo { ai_fun = fun, ai_args = args, ai_dmds = dmds })
302 = text "ArgInfo" <+> braces
303 (sep [ text "fun =" <+> ppr fun
304 , text "dmds =" <+> ppr dmds
305 , text "args =" <+> ppr args ])
306
307 instance Outputable ArgSpec where
308 ppr (ValArg { as_arg = arg }) = text "ValArg" <+> ppr arg
309 ppr (TyArg { as_arg_ty = ty }) = text "TyArg" <+> ppr ty
310 ppr (CastBy c) = text "CastBy" <+> ppr c
311
312 addValArgTo :: ArgInfo -> OutExpr -> OutType -> ArgInfo
313 addValArgTo ai arg hole_ty
314 | ArgInfo { ai_dmds = dmd:dmds, ai_discs = _:discs, ai_rules = rules } <- ai
315 -- Pop the top demand and and discounts off
316 , let arg_spec = ValArg { as_arg = arg, as_hole_ty = hole_ty, as_dmd = dmd }
317 = ai { ai_args = arg_spec : ai_args ai
318 , ai_dmds = dmds
319 , ai_discs = discs
320 , ai_rules = decRules rules }
321 | otherwise
322 = pprPanic "addValArgTo" (ppr ai $$ ppr arg)
323 -- There should always be enough demands and discounts
324
325 addTyArgTo :: ArgInfo -> OutType -> OutType -> ArgInfo
326 addTyArgTo ai arg_ty hole_ty = ai { ai_args = arg_spec : ai_args ai
327 , ai_rules = decRules (ai_rules ai) }
328 where
329 arg_spec = TyArg { as_arg_ty = arg_ty, as_hole_ty = hole_ty }
330
331 addCastTo :: ArgInfo -> OutCoercion -> ArgInfo
332 addCastTo ai co = ai { ai_args = CastBy co : ai_args ai }
333
334 isStrictArgInfo :: ArgInfo -> Bool
335 -- True if the function is strict in the next argument
336 isStrictArgInfo (ArgInfo { ai_dmds = dmds })
337 | dmd:_ <- dmds = isStrUsedDmd dmd
338 | otherwise = False
339
340 argInfoAppArgs :: [ArgSpec] -> [OutExpr]
341 argInfoAppArgs [] = []
342 argInfoAppArgs (CastBy {} : _) = [] -- Stop at a cast
343 argInfoAppArgs (ValArg { as_arg = arg } : as) = arg : argInfoAppArgs as
344 argInfoAppArgs (TyArg { as_arg_ty = ty } : as) = Type ty : argInfoAppArgs as
345
346 pushSimplifiedArgs :: SimplEnv -> [ArgSpec] -> SimplCont -> SimplCont
347 pushSimplifiedArgs _env [] k = k
348 pushSimplifiedArgs env (arg : args) k
349 = case arg of
350 TyArg { as_arg_ty = arg_ty, as_hole_ty = hole_ty }
351 -> ApplyToTy { sc_arg_ty = arg_ty, sc_hole_ty = hole_ty, sc_cont = rest }
352 ValArg { as_arg = arg, as_hole_ty = hole_ty }
353 -> ApplyToVal { sc_arg = arg, sc_env = env, sc_dup = Simplified
354 , sc_hole_ty = hole_ty, sc_cont = rest }
355 CastBy c -> CastIt c rest
356 where
357 rest = pushSimplifiedArgs env args k
358 -- The env has an empty SubstEnv
359
360 argInfoExpr :: OutId -> [ArgSpec] -> OutExpr
361 -- NB: the [ArgSpec] is reversed so that the first arg
362 -- in the list is the last one in the application
363 argInfoExpr fun rev_args
364 = go rev_args
365 where
366 go [] = Var fun
367 go (ValArg { as_arg = arg } : as) = go as `App` arg
368 go (TyArg { as_arg_ty = ty } : as) = go as `App` Type ty
369 go (CastBy co : as) = mkCast (go as) co
370
371
372 type FunRules = Maybe (Int, [CoreRule]) -- Remaining rules for this function
373 -- Nothing => No rules
374 -- Just (n, rules) => some rules, requiring at least n more type/value args
375
376 decRules :: FunRules -> FunRules
377 decRules (Just (n, rules)) = Just (n-1, rules)
378 decRules Nothing = Nothing
379
380 mkFunRules :: [CoreRule] -> FunRules
381 mkFunRules [] = Nothing
382 mkFunRules rs = Just (n_required, rs)
383 where
384 n_required = maximum (map ruleArity rs)
385
386 {-
387 ************************************************************************
388 * *
389 Functions on SimplCont
390 * *
391 ************************************************************************
392 -}
393
394 mkBoringStop :: OutType -> SimplCont
395 mkBoringStop ty = Stop ty BoringCtxt
396
397 mkRhsStop :: OutType -> SimplCont -- See Note [RHS of lets] in GHC.Core.Unfold
398 mkRhsStop ty = Stop ty RhsCtxt
399
400 mkLazyArgStop :: OutType -> CallCtxt -> SimplCont
401 mkLazyArgStop ty cci = Stop ty cci
402
403 -------------------
404 contIsRhsOrArg :: SimplCont -> Bool
405 contIsRhsOrArg (Stop {}) = True
406 contIsRhsOrArg (StrictBind {}) = True
407 contIsRhsOrArg (StrictArg {}) = True
408 contIsRhsOrArg _ = False
409
410 contIsRhs :: SimplCont -> Bool
411 contIsRhs (Stop _ RhsCtxt) = True
412 contIsRhs (CastIt _ k) = contIsRhs k -- For f = e |> co, treat e as Rhs context
413 contIsRhs _ = False
414
415 -------------------
416 contIsStop :: SimplCont -> Bool
417 contIsStop (Stop {}) = True
418 contIsStop _ = False
419
420 contIsDupable :: SimplCont -> Bool
421 contIsDupable (Stop {}) = True
422 contIsDupable (ApplyToTy { sc_cont = k }) = contIsDupable k
423 contIsDupable (ApplyToVal { sc_dup = OkToDup }) = True -- See Note [DupFlag invariants]
424 contIsDupable (Select { sc_dup = OkToDup }) = True -- ...ditto...
425 contIsDupable (StrictArg { sc_dup = OkToDup }) = True -- ...ditto...
426 contIsDupable (CastIt _ k) = contIsDupable k
427 contIsDupable _ = False
428
429 -------------------
430 contIsTrivial :: SimplCont -> Bool
431 contIsTrivial (Stop {}) = True
432 contIsTrivial (ApplyToTy { sc_cont = k }) = contIsTrivial k
433 -- This one doesn't look right. A value application is not trivial
434 -- contIsTrivial (ApplyToVal { sc_arg = Coercion _, sc_cont = k }) = contIsTrivial k
435 contIsTrivial (CastIt _ k) = contIsTrivial k
436 contIsTrivial _ = False
437
438 -------------------
439 contResultType :: SimplCont -> OutType
440 contResultType (Stop ty _) = ty
441 contResultType (CastIt _ k) = contResultType k
442 contResultType (StrictBind { sc_cont = k }) = contResultType k
443 contResultType (StrictArg { sc_cont = k }) = contResultType k
444 contResultType (Select { sc_cont = k }) = contResultType k
445 contResultType (ApplyToTy { sc_cont = k }) = contResultType k
446 contResultType (ApplyToVal { sc_cont = k }) = contResultType k
447 contResultType (TickIt _ k) = contResultType k
448
449 contHoleType :: SimplCont -> OutType
450 contHoleType (Stop ty _) = ty
451 contHoleType (TickIt _ k) = contHoleType k
452 contHoleType (CastIt co _) = coercionLKind co
453 contHoleType (StrictBind { sc_bndr = b, sc_dup = dup, sc_env = se })
454 = perhapsSubstTy dup se (idType b)
455 contHoleType (StrictArg { sc_fun_ty = ty }) = funArgTy ty
456 contHoleType (ApplyToTy { sc_hole_ty = ty }) = ty -- See Note [The hole type in ApplyToTy]
457 contHoleType (ApplyToVal { sc_hole_ty = ty }) = ty -- See Note [The hole type in ApplyToTy/Val]
458 contHoleType (Select { sc_dup = d, sc_bndr = b, sc_env = se })
459 = perhapsSubstTy d se (idType b)
460
461
462 -- Computes the multiplicity scaling factor at the hole. That is, in (case [] of
463 -- x ::(p) _ { … }) (respectively for arguments of functions), the scaling
464 -- factor is p. And in E[G[]], the scaling factor is the product of the scaling
465 -- factor of E and that of G.
466 --
467 -- The scaling factor at the hole of E[] is used to determine how a binder
468 -- should be scaled if it commutes with E. This appears, in particular, in the
469 -- case-of-case transformation.
470 contHoleScaling :: SimplCont -> Mult
471 contHoleScaling (Stop _ _) = One
472 contHoleScaling (CastIt _ k) = contHoleScaling k
473 contHoleScaling (StrictBind { sc_bndr = id, sc_cont = k })
474 = idMult id `mkMultMul` contHoleScaling k
475 contHoleScaling (Select { sc_bndr = id, sc_cont = k })
476 = idMult id `mkMultMul` contHoleScaling k
477 contHoleScaling (StrictArg { sc_fun_ty = fun_ty, sc_cont = k })
478 = w `mkMultMul` contHoleScaling k
479 where
480 (w, _, _) = splitFunTy fun_ty
481 contHoleScaling (ApplyToTy { sc_cont = k }) = contHoleScaling k
482 contHoleScaling (ApplyToVal { sc_cont = k }) = contHoleScaling k
483 contHoleScaling (TickIt _ k) = contHoleScaling k
484 -------------------
485 countArgs :: SimplCont -> Int
486 -- Count all arguments, including types, coercions, and other values
487 countArgs (ApplyToTy { sc_cont = cont }) = 1 + countArgs cont
488 countArgs (ApplyToVal { sc_cont = cont }) = 1 + countArgs cont
489 countArgs _ = 0
490
491 contArgs :: SimplCont -> (Bool, [ArgSummary], SimplCont)
492 -- Summarises value args, discards type args and coercions
493 -- The returned continuation of the call is only used to
494 -- answer questions like "are you interesting?"
495 contArgs cont
496 | lone cont = (True, [], cont)
497 | otherwise = go [] cont
498 where
499 lone (ApplyToTy {}) = False -- See Note [Lone variables] in GHC.Core.Unfold
500 lone (ApplyToVal {}) = False -- NB: even a type application or cast
501 lone (CastIt {}) = False -- stops it being "lone"
502 lone _ = True
503
504 go args (ApplyToVal { sc_arg = arg, sc_env = se, sc_cont = k })
505 = go (is_interesting arg se : args) k
506 go args (ApplyToTy { sc_cont = k }) = go args k
507 go args (CastIt _ k) = go args k
508 go args k = (False, reverse args, k)
509
510 is_interesting arg se = interestingArg se arg
511 -- Do *not* use short-cutting substitution here
512 -- because we want to get as much IdInfo as possible
513
514
515 -------------------
516 mkArgInfo :: SimplEnv
517 -> Id
518 -> [CoreRule] -- Rules for function
519 -> Int -- Number of value args
520 -> SimplCont -- Context of the call
521 -> ArgInfo
522
523 mkArgInfo env fun rules n_val_args call_cont
524 | n_val_args < idArity fun -- Note [Unsaturated functions]
525 = ArgInfo { ai_fun = fun, ai_args = []
526 , ai_rules = fun_rules
527 , ai_encl = False
528 , ai_dmds = vanilla_dmds
529 , ai_discs = vanilla_discounts }
530 | otherwise
531 = ArgInfo { ai_fun = fun
532 , ai_args = []
533 , ai_rules = fun_rules
534 , ai_encl = interestingArgContext rules call_cont
535 , ai_dmds = add_type_strictness (idType fun) arg_dmds
536 , ai_discs = arg_discounts }
537 where
538 fun_rules = mkFunRules rules
539
540 vanilla_discounts, arg_discounts :: [Int]
541 vanilla_discounts = repeat 0
542 arg_discounts = case idUnfolding fun of
543 CoreUnfolding {uf_guidance = UnfIfGoodArgs {ug_args = discounts}}
544 -> discounts ++ vanilla_discounts
545 _ -> vanilla_discounts
546
547 vanilla_dmds, arg_dmds :: [Demand]
548 vanilla_dmds = repeat topDmd
549
550 arg_dmds
551 | not (sm_inline (seMode env))
552 = vanilla_dmds -- See Note [Do not expose strictness if sm_inline=False]
553 | otherwise
554 = -- add_type_str fun_ty $
555 case splitDmdSig (idDmdSig fun) of
556 (demands, result_info)
557 | not (demands `lengthExceeds` n_val_args)
558 -> -- Enough args, use the strictness given.
559 -- For bottoming functions we used to pretend that the arg
560 -- is lazy, so that we don't treat the arg as an
561 -- interesting context. This avoids substituting
562 -- top-level bindings for (say) strings into
563 -- calls to error. But now we are more careful about
564 -- inlining lone variables, so its ok
565 -- (see GHC.Core.Op.Simplify.Utils.analyseCont)
566 if isDeadEndDiv result_info then
567 demands -- Finite => result is bottom
568 else
569 demands ++ vanilla_dmds
570 | otherwise
571 -> warnPprTrace True (text "More demands than arity" <+> ppr fun <+> ppr (idArity fun)
572 <+> ppr n_val_args <+> ppr demands) $
573 vanilla_dmds -- Not enough args, or no strictness
574
575 add_type_strictness :: Type -> [Demand] -> [Demand]
576 -- If the function arg types are strict, record that in the 'strictness bits'
577 -- No need to instantiate because unboxed types (which dominate the strict
578 -- types) can't instantiate type variables.
579 -- add_type_strictness is done repeatedly (for each call);
580 -- might be better once-for-all in the function
581 -- But beware primops/datacons with no strictness
582
583 add_type_strictness fun_ty dmds
584 | null dmds = []
585
586 | Just (_, fun_ty') <- splitForAllTyCoVar_maybe fun_ty
587 = add_type_strictness fun_ty' dmds -- Look through foralls
588
589 | Just (_, arg_ty, fun_ty') <- splitFunTy_maybe fun_ty -- Add strict-type info
590 , dmd : rest_dmds <- dmds
591 , let dmd' = case isLiftedType_maybe arg_ty of
592 Just False -> strictifyDmd dmd
593 _ -> dmd
594 = dmd' : add_type_strictness fun_ty' rest_dmds
595 -- If the type is representation-polymorphic, we can't know whether
596 -- it's strict. isLiftedType_maybe will return Just False only when
597 -- we're sure the type is unlifted.
598
599 | otherwise
600 = dmds
601
602 {- Note [Unsaturated functions]
603 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
604 Consider (test eyeball/inline4)
605 x = a:as
606 y = f x
607 where f has arity 2. Then we do not want to inline 'x', because
608 it'll just be floated out again. Even if f has lots of discounts
609 on its first argument -- it must be saturated for these to kick in
610
611 Note [Do not expose strictness if sm_inline=False]
612 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
613 #15163 showed a case in which we had
614
615 {-# INLINE [1] zip #-}
616 zip = undefined
617
618 {-# RULES "foo" forall as bs. stream (zip as bs) = ..blah... #-}
619
620 If we expose zip's bottoming nature when simplifying the LHS of the
621 RULE we get
622 {-# RULES "foo" forall as bs.
623 stream (case zip of {}) = ..blah... #-}
624 discarding the arguments to zip. Usually this is fine, but on the
625 LHS of a rule it's not, because 'as' and 'bs' are now not bound on
626 the LHS.
627
628 This is a pretty pathological example, so I'm not losing sleep over
629 it, but the simplest solution was to check sm_inline; if it is False,
630 which it is on the LHS of a rule (see updModeForRules), then don't
631 make use of the strictness info for the function.
632 -}
633
634
635 {-
636 ************************************************************************
637 * *
638 Interesting arguments
639 * *
640 ************************************************************************
641
642 Note [Interesting call context]
643 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
644 We want to avoid inlining an expression where there can't possibly be
645 any gain, such as in an argument position. Hence, if the continuation
646 is interesting (eg. a case scrutinee, application etc.) then we
647 inline, otherwise we don't.
648
649 Previously some_benefit used to return True only if the variable was
650 applied to some value arguments. This didn't work:
651
652 let x = _coerce_ (T Int) Int (I# 3) in
653 case _coerce_ Int (T Int) x of
654 I# y -> ....
655
656 we want to inline x, but can't see that it's a constructor in a case
657 scrutinee position, and some_benefit is False.
658
659 Another example:
660
661 dMonadST = _/\_ t -> :Monad (g1 _@_ t, g2 _@_ t, g3 _@_ t)
662
663 .... case dMonadST _@_ x0 of (a,b,c) -> ....
664
665 we'd really like to inline dMonadST here, but we *don't* want to
666 inline if the case expression is just
667
668 case x of y { DEFAULT -> ... }
669
670 since we can just eliminate this case instead (x is in WHNF). Similar
671 applies when x is bound to a lambda expression. Hence
672 contIsInteresting looks for case expressions with just a single
673 default case.
674
675 Note [No case of case is boring]
676 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
677 If we see
678 case f x of <alts>
679
680 we'd usually treat the context as interesting, to encourage 'f' to
681 inline. But if case-of-case is off, it's really not so interesting
682 after all, because we are unlikely to be able to push the case
683 expression into the branches of any case in f's unfolding. So, to
684 reduce unnecessary code expansion, we just make the context look boring.
685 This made a small compile-time perf improvement in perf/compiler/T6048,
686 and it looks plausible to me.
687 -}
688
689 lazyArgContext :: ArgInfo -> CallCtxt
690 -- Use this for lazy arguments
691 lazyArgContext (ArgInfo { ai_encl = encl_rules, ai_discs = discs })
692 | encl_rules = RuleArgCtxt
693 | disc:_ <- discs, disc > 0 = DiscArgCtxt -- Be keener here
694 | otherwise = BoringCtxt -- Nothing interesting
695
696 strictArgContext :: ArgInfo -> CallCtxt
697 strictArgContext (ArgInfo { ai_encl = encl_rules, ai_discs = discs })
698 -- Use this for strict arguments
699 | encl_rules = RuleArgCtxt
700 | disc:_ <- discs, disc > 0 = DiscArgCtxt -- Be keener here
701 | otherwise = RhsCtxt
702 -- Why RhsCtxt? if we see f (g x) (h x), and f is strict, we
703 -- want to be a bit more eager to inline g, because it may
704 -- expose an eval (on x perhaps) that can be eliminated or
705 -- shared. I saw this in nofib 'boyer2', RewriteFuns.onewayunify1
706 -- It's worth an 18% improvement in allocation for this
707 -- particular benchmark; 5% on 'mate' and 1.3% on 'multiplier'
708
709 interestingCallContext :: SimplEnv -> SimplCont -> CallCtxt
710 -- See Note [Interesting call context]
711 interestingCallContext env cont
712 = interesting cont
713 where
714 interesting (Select {})
715 | sm_case_case (getMode env) = CaseCtxt
716 | otherwise = BoringCtxt
717 -- See Note [No case of case is boring]
718
719 interesting (ApplyToVal {}) = ValAppCtxt
720 -- Can happen if we have (f Int |> co) y
721 -- If f has an INLINE prag we need to give it some
722 -- motivation to inline. See Note [Cast then apply]
723 -- in GHC.Core.Unfold
724
725 interesting (StrictArg { sc_fun = fun }) = strictArgContext fun
726 interesting (StrictBind {}) = BoringCtxt
727 interesting (Stop _ cci) = cci
728 interesting (TickIt _ k) = interesting k
729 interesting (ApplyToTy { sc_cont = k }) = interesting k
730 interesting (CastIt _ k) = interesting k
731 -- If this call is the arg of a strict function, the context
732 -- is a bit interesting. If we inline here, we may get useful
733 -- evaluation information to avoid repeated evals: e.g.
734 -- x + (y * z)
735 -- Here the contIsInteresting makes the '*' keener to inline,
736 -- which in turn exposes a constructor which makes the '+' inline.
737 -- Assuming that +,* aren't small enough to inline regardless.
738 --
739 -- It's also very important to inline in a strict context for things
740 -- like
741 -- foldr k z (f x)
742 -- Here, the context of (f x) is strict, and if f's unfolding is
743 -- a build it's *great* to inline it here. So we must ensure that
744 -- the context for (f x) is not totally uninteresting.
745
746 interestingArgContext :: [CoreRule] -> SimplCont -> Bool
747 -- If the argument has form (f x y), where x,y are boring,
748 -- and f is marked INLINE, then we don't want to inline f.
749 -- But if the context of the argument is
750 -- g (f x y)
751 -- where g has rules, then we *do* want to inline f, in case it
752 -- exposes a rule that might fire. Similarly, if the context is
753 -- h (g (f x x))
754 -- where h has rules, then we do want to inline f; hence the
755 -- call_cont argument to interestingArgContext
756 --
757 -- The ai-rules flag makes this happen; if it's
758 -- set, the inliner gets just enough keener to inline f
759 -- regardless of how boring f's arguments are, if it's marked INLINE
760 --
761 -- The alternative would be to *always* inline an INLINE function,
762 -- regardless of how boring its context is; but that seems overkill
763 -- For example, it'd mean that wrapper functions were always inlined
764 --
765 -- The call_cont passed to interestingArgContext is the context of
766 -- the call itself, e.g. g <hole> in the example above
767 interestingArgContext rules call_cont
768 = notNull rules || enclosing_fn_has_rules
769 where
770 enclosing_fn_has_rules = go call_cont
771
772 go (Select {}) = False
773 go (ApplyToVal {}) = False -- Shouldn't really happen
774 go (ApplyToTy {}) = False -- Ditto
775 go (StrictArg { sc_fun = fun }) = ai_encl fun
776 go (StrictBind {}) = False -- ??
777 go (CastIt _ c) = go c
778 go (Stop _ RuleArgCtxt) = True
779 go (Stop _ _) = False
780 go (TickIt _ c) = go c
781
782 {- Note [Interesting arguments]
783 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
784 An argument is interesting if it deserves a discount for unfoldings
785 with a discount in that argument position. The idea is to avoid
786 unfolding a function that is applied only to variables that have no
787 unfolding (i.e. they are probably lambda bound): f x y z There is
788 little point in inlining f here.
789
790 Generally, *values* (like (C a b) and (\x.e)) deserve discounts. But
791 we must look through lets, eg (let x = e in C a b), because the let will
792 float, exposing the value, if we inline. That makes it different to
793 exprIsHNF.
794
795 Before 2009 we said it was interesting if the argument had *any* structure
796 at all; i.e. (hasSomeUnfolding v). But does too much inlining; see #3016.
797
798 But we don't regard (f x y) as interesting, unless f is unsaturated.
799 If it's saturated and f hasn't inlined, then it's probably not going
800 to now!
801
802 Note [Conlike is interesting]
803 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
804 Consider
805 f d = ...((*) d x y)...
806 ... f (df d')...
807 where df is con-like. Then we'd really like to inline 'f' so that the
808 rule for (*) (df d) can fire. To do this
809 a) we give a discount for being an argument of a class-op (eg (*) d)
810 b) we say that a con-like argument (eg (df d)) is interesting
811 -}
812
813 interestingArg :: SimplEnv -> CoreExpr -> ArgSummary
814 -- See Note [Interesting arguments]
815 interestingArg env e = go env 0 e
816 where
817 -- n is # value args to which the expression is applied
818 go env n (Var v)
819 = case substId env v of
820 DoneId v' -> go_var n v'
821 DoneEx e _ -> go (zapSubstEnv env) n e
822 ContEx tvs cvs ids e -> go (setSubstEnv env tvs cvs ids) n e
823
824 go _ _ (Lit l)
825 | isLitRubbish l = TrivArg -- Leads to unproductive inlining in WWRec, #20035
826 | otherwise = ValueArg
827 go _ _ (Type _) = TrivArg
828 go _ _ (Coercion _) = TrivArg
829 go env n (App fn (Type _)) = go env n fn
830 go env n (App fn _) = go env (n+1) fn
831 go env n (Tick _ a) = go env n a
832 go env n (Cast e _) = go env n e
833 go env n (Lam v e)
834 | isTyVar v = go env n e
835 | n>0 = NonTrivArg -- (\x.b) e is NonTriv
836 | otherwise = ValueArg
837 go _ _ (Case {}) = NonTrivArg
838 go env n (Let b e) = case go env' n e of
839 ValueArg -> ValueArg
840 _ -> NonTrivArg
841 where
842 env' = env `addNewInScopeIds` bindersOf b
843
844 go_var n v
845 | isConLikeId v = ValueArg -- Experimenting with 'conlike' rather that
846 -- data constructors here
847 | idArity v > n = ValueArg -- Catches (eg) primops with arity but no unfolding
848 | n > 0 = NonTrivArg -- Saturated or unknown call
849 | conlike_unfolding = ValueArg -- n==0; look for an interesting unfolding
850 -- See Note [Conlike is interesting]
851 | otherwise = TrivArg -- n==0, no useful unfolding
852 where
853 conlike_unfolding = isConLikeUnfolding (idUnfolding v)
854
855 {-
856 ************************************************************************
857 * *
858 SimplMode
859 * *
860 ************************************************************************
861
862 The SimplMode controls several switches; see its definition in
863 GHC.Core.Opt.Monad
864 sm_rules :: Bool -- Whether RULES are enabled
865 sm_inline :: Bool -- Whether inlining is enabled
866 sm_case_case :: Bool -- Whether case-of-case is enabled
867 sm_eta_expand :: Bool -- Whether eta-expansion is enabled
868 -}
869
870 simplEnvForGHCi :: Logger -> DynFlags -> SimplEnv
871 simplEnvForGHCi logger dflags
872 = mkSimplEnv $ SimplMode { sm_names = ["GHCi"]
873 , sm_phase = InitialPhase
874 , sm_logger = logger
875 , sm_dflags = dflags
876 , sm_uf_opts = uf_opts
877 , sm_rules = rules_on
878 , sm_inline = False
879 -- Do not do any inlining, in case we expose some
880 -- unboxed tuple stuff that confuses the bytecode
881 -- interpreter
882 , sm_eta_expand = eta_expand_on
883 , sm_case_case = True
884 , sm_pre_inline = pre_inline_on
885 }
886 where
887 rules_on = gopt Opt_EnableRewriteRules dflags
888 eta_expand_on = gopt Opt_DoLambdaEtaExpansion dflags
889 pre_inline_on = gopt Opt_SimplPreInlining dflags
890 uf_opts = unfoldingOpts dflags
891
892 updModeForStableUnfoldings :: Activation -> SimplMode -> SimplMode
893 -- See Note [Simplifying inside stable unfoldings]
894 updModeForStableUnfoldings unf_act current_mode
895 = current_mode { sm_phase = phaseFromActivation unf_act
896 , sm_inline = True
897 , sm_eta_expand = False }
898 -- sm_eta_expand: see Note [No eta expansion in stable unfoldings]
899 -- sm_rules: just inherit; sm_rules might be "off"
900 -- because of -fno-enable-rewrite-rules
901 where
902 phaseFromActivation (ActiveAfter _ n) = Phase n
903 phaseFromActivation _ = InitialPhase
904
905 updModeForRules :: SimplMode -> SimplMode
906 -- See Note [Simplifying rules]
907 updModeForRules current_mode
908 = current_mode { sm_phase = InitialPhase
909 , sm_inline = False -- See Note [Do not expose strictness if sm_inline=False]
910 , sm_rules = False
911 , sm_eta_expand = False }
912
913 {- Note [Simplifying rules]
914 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
915 When simplifying a rule LHS, refrain from /any/ inlining or applying
916 of other RULES.
917
918 Doing anything to the LHS is plain confusing, because it means that what the
919 rule matches is not what the user wrote. c.f. #10595, and #10528.
920 Moreover, inlining (or applying rules) on rule LHSs risks introducing
921 Ticks into the LHS, which makes matching trickier. #10665, #10745.
922
923 Doing this to either side confounds tools like HERMIT, which seek to reason
924 about and apply the RULES as originally written. See #10829.
925
926 There is, however, one case where we are pretty much /forced/ to transform the
927 LHS of a rule: postInlineUnconditionally. For instance, in the case of
928
929 let f = g @Int in f
930
931 We very much want to inline f into the body of the let. However, to do so (and
932 be able to safely drop f's binding) we must inline into all occurrences of f,
933 including those in the LHS of rules.
934
935 This can cause somewhat surprising results; for instance, in #18162 we found
936 that a rule template contained ticks in its arguments, because
937 postInlineUnconditionally substituted in a trivial expression that contains
938 ticks. See Note [Tick annotations in RULE matching] in GHC.Core.Rules for
939 details.
940
941 Note [No eta expansion in stable unfoldings]
942 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
943 If we have a stable unfolding
944
945 f :: Ord a => a -> IO ()
946 -- Unfolding template
947 -- = /\a \(d:Ord a) (x:a). bla
948
949 we do not want to eta-expand to
950
951 f :: Ord a => a -> IO ()
952 -- Unfolding template
953 -- = (/\a \(d:Ord a) (x:a) (eta:State#). bla eta) |> co
954
955 because not specialisation of the overloading doesn't work properly
956 (see Note [Specialisation shape] in GHC.Core.Opt.Specialise), #9509.
957
958 So we disable eta-expansion in stable unfoldings.
959
960 Note [Inlining in gentle mode]
961 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
962 Something is inlined if
963 (i) the sm_inline flag is on, AND
964 (ii) the thing has an INLINE pragma, AND
965 (iii) the thing is inlinable in the earliest phase.
966
967 Example of why (iii) is important:
968 {-# INLINE [~1] g #-}
969 g = ...
970
971 {-# INLINE f #-}
972 f x = g (g x)
973
974 If we were to inline g into f's inlining, then an importing module would
975 never be able to do
976 f e --> g (g e) ---> RULE fires
977 because the stable unfolding for f has had g inlined into it.
978
979 On the other hand, it is bad not to do ANY inlining into an
980 stable unfolding, because then recursive knots in instance declarations
981 don't get unravelled.
982
983 However, *sometimes* SimplGently must do no call-site inlining at all
984 (hence sm_inline = False). Before full laziness we must be careful
985 not to inline wrappers, because doing so inhibits floating
986 e.g. ...(case f x of ...)...
987 ==> ...(case (case x of I# x# -> fw x#) of ...)...
988 ==> ...(case x of I# x# -> case fw x# of ...)...
989 and now the redex (f x) isn't floatable any more.
990
991 The no-inlining thing is also important for Template Haskell. You might be
992 compiling in one-shot mode with -O2; but when TH compiles a splice before
993 running it, we don't want to use -O2. Indeed, we don't want to inline
994 anything, because the byte-code interpreter might get confused about
995 unboxed tuples and suchlike.
996
997 Note [Simplifying inside stable unfoldings]
998 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
999 We must take care with simplification inside stable unfoldings (which come from
1000 INLINE pragmas).
1001
1002 First, consider the following example
1003 let f = \pq -> BIG
1004 in
1005 let g = \y -> f y y
1006 {-# INLINE g #-}
1007 in ...g...g...g...g...g...
1008 Now, if that's the ONLY occurrence of f, it might be inlined inside g,
1009 and thence copied multiple times when g is inlined. HENCE we treat
1010 any occurrence in a stable unfolding as a multiple occurrence, not a single
1011 one; see OccurAnal.addRuleUsage.
1012
1013 Second, we do want *do* to some modest rules/inlining stuff in stable
1014 unfoldings, partly to eliminate senseless crap, and partly to break
1015 the recursive knots generated by instance declarations.
1016
1017 However, suppose we have
1018 {-# INLINE <act> f #-}
1019 f = <rhs>
1020 meaning "inline f in phases p where activation <act>(p) holds".
1021 Then what inlinings/rules can we apply to the copy of <rhs> captured in
1022 f's stable unfolding? Our model is that literally <rhs> is substituted for
1023 f when it is inlined. So our conservative plan (implemented by
1024 updModeForStableUnfoldings) is this:
1025
1026 -------------------------------------------------------------
1027 When simplifying the RHS of a stable unfolding, set the phase
1028 to the phase in which the stable unfolding first becomes active
1029 -------------------------------------------------------------
1030
1031 That ensures that
1032
1033 a) Rules/inlinings that *cease* being active before p will
1034 not apply to the stable unfolding, consistent with it being
1035 inlined in its *original* form in phase p.
1036
1037 b) Rules/inlinings that only become active *after* p will
1038 not apply to the stable unfolding, again to be consistent with
1039 inlining the *original* rhs in phase p.
1040
1041 For example,
1042 {-# INLINE f #-}
1043 f x = ...g...
1044
1045 {-# NOINLINE [1] g #-}
1046 g y = ...
1047
1048 {-# RULE h g = ... #-}
1049 Here we must not inline g into f's RHS, even when we get to phase 0,
1050 because when f is later inlined into some other module we want the
1051 rule for h to fire.
1052
1053 Similarly, consider
1054 {-# INLINE f #-}
1055 f x = ...g...
1056
1057 g y = ...
1058 and suppose that there are auto-generated specialisations and a strictness
1059 wrapper for g. The specialisations get activation AlwaysActive, and the
1060 strictness wrapper get activation (ActiveAfter 0). So the strictness
1061 wrepper fails the test and won't be inlined into f's stable unfolding. That
1062 means f can inline, expose the specialised call to g, so the specialisation
1063 rules can fire.
1064
1065 A note about wrappers
1066 ~~~~~~~~~~~~~~~~~~~~~
1067 It's also important not to inline a worker back into a wrapper.
1068 A wrapper looks like
1069 wraper = inline_me (\x -> ...worker... )
1070 Normally, the inline_me prevents the worker getting inlined into
1071 the wrapper (initially, the worker's only call site!). But,
1072 if the wrapper is sure to be called, the strictness analyser will
1073 mark it 'demanded', so when the RHS is simplified, it'll get an ArgOf
1074 continuation.
1075 -}
1076
1077 activeUnfolding :: SimplMode -> Id -> Bool
1078 activeUnfolding mode id
1079 | isCompulsoryUnfolding (realIdUnfolding id)
1080 = True -- Even sm_inline can't override compulsory unfoldings
1081 | otherwise
1082 = isActive (sm_phase mode) (idInlineActivation id)
1083 && sm_inline mode
1084 -- `or` isStableUnfolding (realIdUnfolding id)
1085 -- Inline things when
1086 -- (a) they are active
1087 -- (b) sm_inline says so, except that for stable unfoldings
1088 -- (ie pragmas) we inline anyway
1089
1090 getUnfoldingInRuleMatch :: SimplEnv -> InScopeEnv
1091 -- When matching in RULE, we want to "look through" an unfolding
1092 -- (to see a constructor) if *rules* are on, even if *inlinings*
1093 -- are not. A notable example is DFuns, which really we want to
1094 -- match in rules like (op dfun) in gentle mode. Another example
1095 -- is 'otherwise' which we want exprIsConApp_maybe to be able to
1096 -- see very early on
1097 getUnfoldingInRuleMatch env
1098 = (in_scope, id_unf)
1099 where
1100 in_scope = seInScope env
1101 mode = getMode env
1102 id_unf id | unf_is_active id = idUnfolding id
1103 | otherwise = NoUnfolding
1104 unf_is_active id
1105 | not (sm_rules mode) = -- active_unfolding_minimal id
1106 isStableUnfolding (realIdUnfolding id)
1107 -- Do we even need to test this? I think this InScopeEnv
1108 -- is only consulted if activeRule returns True, which
1109 -- never happens if sm_rules is False
1110 | otherwise = isActive (sm_phase mode) (idInlineActivation id)
1111
1112 ----------------------
1113 activeRule :: SimplMode -> Activation -> Bool
1114 -- Nothing => No rules at all
1115 activeRule mode
1116 | not (sm_rules mode) = \_ -> False -- Rewriting is off
1117 | otherwise = isActive (sm_phase mode)
1118
1119 {-
1120 ************************************************************************
1121 * *
1122 preInlineUnconditionally
1123 * *
1124 ************************************************************************
1125
1126 preInlineUnconditionally
1127 ~~~~~~~~~~~~~~~~~~~~~~~~
1128 @preInlineUnconditionally@ examines a bndr to see if it is used just
1129 once in a completely safe way, so that it is safe to discard the
1130 binding inline its RHS at the (unique) usage site, REGARDLESS of how
1131 big the RHS might be. If this is the case we don't simplify the RHS
1132 first, but just inline it un-simplified.
1133
1134 This is much better than first simplifying a perhaps-huge RHS and then
1135 inlining and re-simplifying it. Indeed, it can be at least quadratically
1136 better. Consider
1137
1138 x1 = e1
1139 x2 = e2[x1]
1140 x3 = e3[x2]
1141 ...etc...
1142 xN = eN[xN-1]
1143
1144 We may end up simplifying e1 N times, e2 N-1 times, e3 N-3 times etc.
1145 This can happen with cascades of functions too:
1146
1147 f1 = \x1.e1
1148 f2 = \xs.e2[f1]
1149 f3 = \xs.e3[f3]
1150 ...etc...
1151
1152 THE MAIN INVARIANT is this:
1153
1154 ---- preInlineUnconditionally invariant -----
1155 IF preInlineUnconditionally chooses to inline x = <rhs>
1156 THEN doing the inlining should not change the occurrence
1157 info for the free vars of <rhs>
1158 ----------------------------------------------
1159
1160 For example, it's tempting to look at trivial binding like
1161 x = y
1162 and inline it unconditionally. But suppose x is used many times,
1163 but this is the unique occurrence of y. Then inlining x would change
1164 y's occurrence info, which breaks the invariant. It matters: y
1165 might have a BIG rhs, which will now be dup'd at every occurrence of x.
1166
1167
1168 Even RHSs labelled InlineMe aren't caught here, because there might be
1169 no benefit from inlining at the call site.
1170
1171 [Sept 01] Don't unconditionally inline a top-level thing, because that
1172 can simply make a static thing into something built dynamically. E.g.
1173 x = (a,b)
1174 main = \s -> h x
1175
1176 [Remember that we treat \s as a one-shot lambda.] No point in
1177 inlining x unless there is something interesting about the call site.
1178
1179 But watch out: if you aren't careful, some useful foldr/build fusion
1180 can be lost (most notably in spectral/hartel/parstof) because the
1181 foldr didn't see the build. Doing the dynamic allocation isn't a big
1182 deal, in fact, but losing the fusion can be. But the right thing here
1183 seems to be to do a callSiteInline based on the fact that there is
1184 something interesting about the call site (it's strict). Hmm. That
1185 seems a bit fragile.
1186
1187 Conclusion: inline top level things gaily until FinalPhase (the last
1188 phase), at which point don't.
1189
1190 Note [pre/postInlineUnconditionally in gentle mode]
1191 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1192 Even in gentle mode we want to do preInlineUnconditionally. The
1193 reason is that too little clean-up happens if you don't inline
1194 use-once things. Also a bit of inlining is *good* for full laziness;
1195 it can expose constant sub-expressions. Example in
1196 spectral/mandel/Mandel.hs, where the mandelset function gets a useful
1197 let-float if you inline windowToViewport
1198
1199 However, as usual for Gentle mode, do not inline things that are
1200 inactive in the initial stages. See Note [Gentle mode].
1201
1202 Note [Stable unfoldings and preInlineUnconditionally]
1203 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1204 Surprisingly, do not pre-inline-unconditionally Ids with INLINE pragmas!
1205 Example
1206
1207 {-# INLINE f #-}
1208 f :: Eq a => a -> a
1209 f x = ...
1210
1211 fInt :: Int -> Int
1212 fInt = f Int dEqInt
1213
1214 ...fInt...fInt...fInt...
1215
1216 Here f occurs just once, in the RHS of fInt. But if we inline it there
1217 it might make fInt look big, and we'll lose the opportunity to inline f
1218 at each of fInt's call sites. The INLINE pragma will only inline when
1219 the application is saturated for exactly this reason; and we don't
1220 want PreInlineUnconditionally to second-guess it. A live example is #3736.
1221 c.f. Note [Stable unfoldings and postInlineUnconditionally]
1222
1223 NB: this only applies for INLINE things. Do /not/ switch off
1224 preInlineUnconditionally for
1225
1226 * INLINABLE. It just says to GHC "inline this if you like". If there
1227 is a unique occurrence, we want to inline the stable unfolding, not
1228 the RHS.
1229
1230 * NONLINE[n] just switches off inlining until phase n. We should
1231 respect that, but after phase n, just behave as usual.
1232
1233 * NoUserInlinePrag. There is no pragma at all. This ends up on wrappers.
1234 (See #18815.)
1235
1236 Note [Top-level bottoming Ids]
1237 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1238 Don't inline top-level Ids that are bottoming, even if they are used just
1239 once, because FloatOut has gone to some trouble to extract them out.
1240 Inlining them won't make the program run faster!
1241
1242 Note [Do not inline CoVars unconditionally]
1243 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1244 Coercion variables appear inside coercions, and the RHS of a let-binding
1245 is a term (not a coercion) so we can't necessarily inline the latter in
1246 the former.
1247 -}
1248
1249 preInlineUnconditionally
1250 :: SimplEnv -> TopLevelFlag -> InId
1251 -> InExpr -> StaticEnv -- These two go together
1252 -> Maybe SimplEnv -- Returned env has extended substitution
1253 -- Precondition: rhs satisfies the let/app invariant
1254 -- See Note [Core let/app invariant] in GHC.Core
1255 -- Reason: we don't want to inline single uses, or discard dead bindings,
1256 -- for unlifted, side-effect-ful bindings
1257 preInlineUnconditionally env top_lvl bndr rhs rhs_env
1258 | not pre_inline_unconditionally = Nothing
1259 | not active = Nothing
1260 | isTopLevel top_lvl && isDeadEndId bndr = Nothing -- Note [Top-level bottoming Ids]
1261 | isCoVar bndr = Nothing -- Note [Do not inline CoVars unconditionally]
1262 | isExitJoinId bndr = Nothing -- Note [Do not inline exit join points]
1263 -- in module Exitify
1264 | not (one_occ (idOccInfo bndr)) = Nothing
1265 | not (isStableUnfolding unf) = Just $! (extend_subst_with rhs)
1266
1267 -- Note [Stable unfoldings and preInlineUnconditionally]
1268 | not (isInlinePragma inline_prag)
1269 , Just inl <- maybeUnfoldingTemplate unf = Just $! (extend_subst_with inl)
1270 | otherwise = Nothing
1271 where
1272 unf = idUnfolding bndr
1273 extend_subst_with inl_rhs = extendIdSubst env bndr $! (mkContEx rhs_env inl_rhs)
1274
1275 one_occ IAmDead = True -- Happens in ((\x.1) v)
1276 one_occ OneOcc{ occ_n_br = 1
1277 , occ_in_lam = NotInsideLam } = isNotTopLevel top_lvl || early_phase
1278 one_occ OneOcc{ occ_n_br = 1
1279 , occ_in_lam = IsInsideLam
1280 , occ_int_cxt = IsInteresting } = canInlineInLam rhs
1281 one_occ _ = False
1282
1283 pre_inline_unconditionally = sm_pre_inline mode
1284 mode = getMode env
1285 active = isActive (sm_phase mode) (inlinePragmaActivation inline_prag)
1286 -- See Note [pre/postInlineUnconditionally in gentle mode]
1287 inline_prag = idInlinePragma bndr
1288
1289 -- Be very careful before inlining inside a lambda, because (a) we must not
1290 -- invalidate occurrence information, and (b) we want to avoid pushing a
1291 -- single allocation (here) into multiple allocations (inside lambda).
1292 -- Inlining a *function* with a single *saturated* call would be ok, mind you.
1293 -- || (if is_cheap && not (canInlineInLam rhs) then pprTrace "preinline" (ppr bndr <+> ppr rhs) ok else ok)
1294 -- where
1295 -- is_cheap = exprIsCheap rhs
1296 -- ok = is_cheap && int_cxt
1297
1298 -- int_cxt The context isn't totally boring
1299 -- E.g. let f = \ab.BIG in \y. map f xs
1300 -- Don't want to substitute for f, because then we allocate
1301 -- its closure every time the \y is called
1302 -- But: let f = \ab.BIG in \y. map (f y) xs
1303 -- Now we do want to substitute for f, even though it's not
1304 -- saturated, because we're going to allocate a closure for
1305 -- (f y) every time round the loop anyhow.
1306
1307 -- canInlineInLam => free vars of rhs are (Once in_lam) or Many,
1308 -- so substituting rhs inside a lambda doesn't change the occ info.
1309 -- Sadly, not quite the same as exprIsHNF.
1310 canInlineInLam (Lit _) = True
1311 canInlineInLam (Lam b e) = isRuntimeVar b || canInlineInLam e
1312 canInlineInLam (Tick t e) = not (tickishIsCode t) && canInlineInLam e
1313 canInlineInLam _ = False
1314 -- not ticks. Counting ticks cannot be duplicated, and non-counting
1315 -- ticks around a Lam will disappear anyway.
1316
1317 early_phase = sm_phase mode /= FinalPhase
1318 -- If we don't have this early_phase test, consider
1319 -- x = length [1,2,3]
1320 -- The full laziness pass carefully floats all the cons cells to
1321 -- top level, and preInlineUnconditionally floats them all back in.
1322 -- Result is (a) static allocation replaced by dynamic allocation
1323 -- (b) many simplifier iterations because this tickles
1324 -- a related problem; only one inlining per pass
1325 --
1326 -- On the other hand, I have seen cases where top-level fusion is
1327 -- lost if we don't inline top level thing (e.g. string constants)
1328 -- Hence the test for phase zero (which is the phase for all the final
1329 -- simplifications). Until phase zero we take no special notice of
1330 -- top level things, but then we become more leery about inlining
1331 -- them.
1332
1333 {-
1334 ************************************************************************
1335 * *
1336 postInlineUnconditionally
1337 * *
1338 ************************************************************************
1339
1340 postInlineUnconditionally
1341 ~~~~~~~~~~~~~~~~~~~~~~~~~
1342 @postInlineUnconditionally@ decides whether to unconditionally inline
1343 a thing based on the form of its RHS; in particular if it has a
1344 trivial RHS. If so, we can inline and discard the binding altogether.
1345
1346 NB: a loop breaker has must_keep_binding = True and non-loop-breakers
1347 only have *forward* references. Hence, it's safe to discard the binding
1348
1349 NOTE: This isn't our last opportunity to inline. We're at the binding
1350 site right now, and we'll get another opportunity when we get to the
1351 occurrence(s)
1352
1353 Note that we do this unconditional inlining only for trivial RHSs.
1354 Don't inline even WHNFs inside lambdas; doing so may simply increase
1355 allocation when the function is called. This isn't the last chance; see
1356 NOTE above.
1357
1358 NB: Even inline pragmas (e.g. IMustBeINLINEd) are ignored here Why?
1359 Because we don't even want to inline them into the RHS of constructor
1360 arguments. See NOTE above
1361
1362 NB: At one time even NOINLINE was ignored here: if the rhs is trivial
1363 it's best to inline it anyway. We often get a=E; b=a from desugaring,
1364 with both a and b marked NOINLINE. But that seems incompatible with
1365 our new view that inlining is like a RULE, so I'm sticking to the 'active'
1366 story for now.
1367
1368 NB: unconditional inlining of this sort can introduce ticks in places that
1369 may seem surprising; for instance, the LHS of rules. See Note [Simplfying
1370 rules] for details.
1371 -}
1372
1373 postInlineUnconditionally
1374 :: SimplEnv -> TopLevelFlag
1375 -> OutId -- The binder (*not* a CoVar), including its unfolding
1376 -> OccInfo -- From the InId
1377 -> OutExpr
1378 -> Bool
1379 -- Precondition: rhs satisfies the let/app invariant
1380 -- See Note [Core let/app invariant] in GHC.Core
1381 -- Reason: we don't want to inline single uses, or discard dead bindings,
1382 -- for unlifted, side-effect-ful bindings
1383 postInlineUnconditionally env top_lvl bndr occ_info rhs
1384 | not active = False
1385 | isWeakLoopBreaker occ_info = False -- If it's a loop-breaker of any kind, don't inline
1386 -- because it might be referred to "earlier"
1387 | isStableUnfolding unfolding = False -- Note [Stable unfoldings and postInlineUnconditionally]
1388 | isTopLevel top_lvl = False -- Note [Top level and postInlineUnconditionally]
1389 | exprIsTrivial rhs = True
1390 | isJoinId bndr -- See point (1) of Note [Duplicating join points]
1391 , not (phase == FinalPhase) = False -- in Simplify.hs
1392 | otherwise
1393 = case occ_info of
1394 OneOcc { occ_in_lam = in_lam, occ_int_cxt = int_cxt, occ_n_br = n_br }
1395 -- See Note [Inline small things to avoid creating a thunk]
1396
1397 -> n_br < 100 -- See Note [Suppress exponential blowup]
1398
1399 && smallEnoughToInline uf_opts unfolding -- Small enough to dup
1400 -- ToDo: consider discount on smallEnoughToInline if int_cxt is true
1401 --
1402 -- NB: Do NOT inline arbitrarily big things, even if occ_n_br=1
1403 -- Reason: doing so risks exponential behaviour. We simplify a big
1404 -- expression, inline it, and simplify it again. But if the
1405 -- very same thing happens in the big expression, we get
1406 -- exponential cost!
1407 -- PRINCIPLE: when we've already simplified an expression once,
1408 -- make sure that we only inline it if it's reasonably small.
1409
1410 && (in_lam == NotInsideLam ||
1411 -- Outside a lambda, we want to be reasonably aggressive
1412 -- about inlining into multiple branches of case
1413 -- e.g. let x = <non-value>
1414 -- in case y of { C1 -> ..x..; C2 -> ..x..; C3 -> ... }
1415 -- Inlining can be a big win if C3 is the hot-spot, even if
1416 -- the uses in C1, C2 are not 'interesting'
1417 -- An example that gets worse if you add int_cxt here is 'clausify'
1418
1419 (isCheapUnfolding unfolding && int_cxt == IsInteresting))
1420 -- isCheap => acceptable work duplication; in_lam may be true
1421 -- int_cxt to prevent us inlining inside a lambda without some
1422 -- good reason. See the notes on int_cxt in preInlineUnconditionally
1423
1424 IAmDead -> True -- This happens; for example, the case_bndr during case of
1425 -- known constructor: case (a,b) of x { (p,q) -> ... }
1426 -- Here x isn't mentioned in the RHS, so we don't want to
1427 -- create the (dead) let-binding let x = (a,b) in ...
1428
1429 _ -> False
1430
1431 -- Here's an example that we don't handle well:
1432 -- let f = if b then Left (\x.BIG) else Right (\y.BIG)
1433 -- in \y. ....case f of {...} ....
1434 -- Here f is used just once, and duplicating the case work is fine (exprIsCheap).
1435 -- But
1436 -- - We can't preInlineUnconditionally because that would invalidate
1437 -- the occ info for b.
1438 -- - We can't postInlineUnconditionally because the RHS is big, and
1439 -- that risks exponential behaviour
1440 -- - We can't call-site inline, because the rhs is big
1441 -- Alas!
1442
1443 where
1444 unfolding = idUnfolding bndr
1445 uf_opts = seUnfoldingOpts env
1446 phase = sm_phase (getMode env)
1447 active = isActive phase (idInlineActivation bndr)
1448 -- See Note [pre/postInlineUnconditionally in gentle mode]
1449
1450 {- Note [Inline small things to avoid creating a thunk]
1451 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1452 The point of examining occ_info here is that for *non-values* that
1453 occur outside a lambda, the call-site inliner won't have a chance
1454 (because it doesn't know that the thing only occurs once). The
1455 pre-inliner won't have gotten it either, if the thing occurs in more
1456 than one branch So the main target is things like
1457
1458 let x = f y in
1459 case v of
1460 True -> case x of ...
1461 False -> case x of ...
1462
1463 This is very important in practice; e.g. wheel-seive1 doubles
1464 in allocation if you miss this out. And bits of GHC itself start
1465 to allocate more. An egregious example is test perf/compiler/T14697,
1466 where GHC.Driver.CmdLine.$wprocessArgs allocated hugely more.
1467
1468 Note [Suppress exponential blowup]
1469 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1470 In #13253, and several related tickets, we got an exponential blowup
1471 in code size from postInlineUnconditionally. The trouble comes when
1472 we have
1473 let j1a = case f y of { True -> p; False -> q }
1474 j1b = case f y of { True -> q; False -> p }
1475 j2a = case f (y+1) of { True -> j1a; False -> j1b }
1476 j2b = case f (y+1) of { True -> j1b; False -> j1a }
1477 ...
1478 in case f (y+10) of { True -> j10a; False -> j10b }
1479
1480 when there are many branches. In pass 1, postInlineUnconditionally
1481 inlines j10a and j10b (they are both small). Now we have two calls
1482 to j9a and two to j9b. In pass 2, postInlineUnconditionally inlines
1483 all four of these calls, leaving four calls to j8a and j8b. Etc.
1484 Yikes! This is exponential!
1485
1486 A possible plan: stop doing postInlineUnconditionally
1487 for some fixed, smallish number of branches, say 4. But that turned
1488 out to be bad: see Note [Inline small things to avoid creating a thunk].
1489 And, as it happened, the problem with #13253 was solved in a
1490 different way (Note [Duplicating StrictArg] in Simplify).
1491
1492 So I just set an arbitrary, high limit of 100, to stop any
1493 totally exponential behaviour.
1494
1495 This still leaves the nasty possibility that /ordinary/ inlining (not
1496 postInlineUnconditionally) might inline these join points, each of
1497 which is individually quiet small. I'm still not sure what to do
1498 about this (e.g. see #15488).
1499
1500 Note [Top level and postInlineUnconditionally]
1501 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1502 We don't do postInlineUnconditionally for top-level things (even for
1503 ones that are trivial):
1504
1505 * Doing so will inline top-level error expressions that have been
1506 carefully floated out by FloatOut. More generally, it might
1507 replace static allocation with dynamic.
1508
1509 * Even for trivial expressions there's a problem. Consider
1510 {-# RULE "foo" forall (xs::[T]). reverse xs = ruggle xs #-}
1511 blah xs = reverse xs
1512 ruggle = sort
1513 In one simplifier pass we might fire the rule, getting
1514 blah xs = ruggle xs
1515 but in *that* simplifier pass we must not do postInlineUnconditionally
1516 on 'ruggle' because then we'll have an unbound occurrence of 'ruggle'
1517
1518 If the rhs is trivial it'll be inlined by callSiteInline, and then
1519 the binding will be dead and discarded by the next use of OccurAnal
1520
1521 * There is less point, because the main goal is to get rid of local
1522 bindings used in multiple case branches.
1523
1524 * The inliner should inline trivial things at call sites anyway.
1525
1526 * The Id might be exported. We could check for that separately,
1527 but since we aren't going to postInlineUnconditionally /any/
1528 top-level bindings, we don't need to test.
1529
1530 Note [Stable unfoldings and postInlineUnconditionally]
1531 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1532 Do not do postInlineUnconditionally if the Id has a stable unfolding,
1533 otherwise we lose the unfolding. Example
1534
1535 -- f has stable unfolding with rhs (e |> co)
1536 -- where 'e' is big
1537 f = e |> co
1538
1539 Then there's a danger we'll optimise to
1540
1541 f' = e
1542 f = f' |> co
1543
1544 and now postInlineUnconditionally, losing the stable unfolding on f. Now f'
1545 won't inline because 'e' is too big.
1546
1547 c.f. Note [Stable unfoldings and preInlineUnconditionally]
1548
1549
1550 ************************************************************************
1551 * *
1552 Rebuilding a lambda
1553 * *
1554 ************************************************************************
1555 -}
1556
1557 mkLam :: SimplEnv -> [OutBndr] -> OutExpr -> SimplCont -> SimplM OutExpr
1558 -- mkLam tries three things
1559 -- a) eta reduction, if that gives a trivial expression
1560 -- b) eta expansion [only if there are some value lambdas]
1561 --
1562 -- NB: the SimplEnv already includes the [OutBndr] in its in-scope set
1563 mkLam _env [] body _cont
1564 = return body
1565 mkLam env bndrs body cont
1566 = {-#SCC "mkLam" #-}
1567 do { dflags <- getDynFlags
1568 ; mkLam' dflags bndrs body }
1569 where
1570 mkLam' :: DynFlags -> [OutBndr] -> OutExpr -> SimplM OutExpr
1571 mkLam' dflags bndrs (Cast body co)
1572 | not (any bad bndrs)
1573 -- Note [Casts and lambdas]
1574 = do { lam <- mkLam' dflags bndrs body
1575 ; return (mkCast lam (mkPiCos Representational bndrs co)) }
1576 where
1577 co_vars = tyCoVarsOfCo co
1578 bad bndr = isCoVar bndr && bndr `elemVarSet` co_vars
1579
1580 mkLam' dflags bndrs body@(Lam {})
1581 = mkLam' dflags (bndrs ++ bndrs1) body1
1582 where
1583 (bndrs1, body1) = collectBinders body
1584
1585 mkLam' dflags bndrs (Tick t expr)
1586 | tickishFloatable t
1587 = mkTick t <$> mkLam' dflags bndrs expr
1588
1589 mkLam' dflags bndrs body
1590 | gopt Opt_DoEtaReduction dflags
1591 , Just etad_lam <- tryEtaReduce bndrs body
1592 = do { tick (EtaReduction (head bndrs))
1593 ; return etad_lam }
1594
1595 | not (contIsRhs cont) -- See Note [Eta-expanding lambdas]
1596 , sm_eta_expand (getMode env)
1597 , any isRuntimeVar bndrs
1598 , let body_arity = exprEtaExpandArity dflags body
1599 , expandableArityType body_arity
1600 = do { tick (EtaExpansion (head bndrs))
1601 ; let res = mkLams bndrs $
1602 etaExpandAT in_scope body_arity body
1603 ; traceSmpl "eta expand" (vcat [text "before" <+> ppr (mkLams bndrs body)
1604 , text "after" <+> ppr res])
1605 ; return res }
1606
1607 | otherwise
1608 = return (mkLams bndrs body)
1609 where
1610 in_scope = getInScope env -- Includes 'bndrs'
1611
1612 {-
1613 Note [Eta expanding lambdas]
1614 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1615 In general we *do* want to eta-expand lambdas. Consider
1616 f (\x -> case x of (a,b) -> \s -> blah)
1617 where 's' is a state token, and hence can be eta expanded. This
1618 showed up in the code for GHc.IO.Handle.Text.hPutChar, a rather
1619 important function!
1620
1621 The eta-expansion will never happen unless we do it now. (Well, it's
1622 possible that CorePrep will do it, but CorePrep only has a half-baked
1623 eta-expander that can't deal with casts. So it's much better to do it
1624 here.)
1625
1626 However, when the lambda is let-bound, as the RHS of a let, we have a
1627 better eta-expander (in the form of tryEtaExpandRhs), so we don't
1628 bother to try expansion in mkLam in that case; hence the contIsRhs
1629 guard.
1630
1631 NB: We check the SimplEnv (sm_eta_expand), not DynFlags.
1632 See Note [No eta expansion in stable unfoldings]
1633
1634 Note [Casts and lambdas]
1635 ~~~~~~~~~~~~~~~~~~~~~~~~
1636 Consider
1637 (\x. (\y. e) `cast` g1) `cast` g2
1638 There is a danger here that the two lambdas look separated, and the
1639 full laziness pass might float an expression to between the two.
1640
1641 So this equation in mkLam' floats the g1 out, thus:
1642 (\x. e `cast` g1) --> (\x.e) `cast` (tx -> g1)
1643 where x:tx.
1644
1645 In general, this floats casts outside lambdas, where (I hope) they
1646 might meet and cancel with some other cast:
1647 \x. e `cast` co ===> (\x. e) `cast` (tx -> co)
1648 /\a. e `cast` co ===> (/\a. e) `cast` (/\a. co)
1649 /\g. e `cast` co ===> (/\g. e) `cast` (/\g. co)
1650 (if not (g `in` co))
1651
1652 Notice that it works regardless of 'e'. Originally it worked only
1653 if 'e' was itself a lambda, but in some cases that resulted in
1654 fruitless iteration in the simplifier. A good example was when
1655 compiling Text.ParserCombinators.ReadPrec, where we had a definition
1656 like (\x. Get `cast` g)
1657 where Get is a constructor with nonzero arity. Then mkLam eta-expanded
1658 the Get, and the next iteration eta-reduced it, and then eta-expanded
1659 it again.
1660
1661 Note also the side condition for the case of coercion binders.
1662 It does not make sense to transform
1663 /\g. e `cast` g ==> (/\g.e) `cast` (/\g.g)
1664 because the latter is not well-kinded.
1665
1666 ************************************************************************
1667 * *
1668 Eta expansion
1669 * *
1670 ************************************************************************
1671 -}
1672
1673 tryEtaExpandRhs :: SimplEnv -> OutId -> OutExpr
1674 -> SimplM (ArityType, OutExpr)
1675 -- See Note [Eta-expanding at let bindings]
1676 -- If tryEtaExpandRhs rhs = (n, is_bot, rhs') then
1677 -- (a) rhs' has manifest arity n
1678 -- (b) if is_bot is True then rhs' applied to n args is guaranteed bottom
1679 tryEtaExpandRhs env bndr rhs
1680 | Just join_arity <- isJoinId_maybe bndr
1681 = do { let (join_bndrs, join_body) = collectNBinders join_arity rhs
1682 oss = [idOneShotInfo id | id <- join_bndrs, isId id]
1683 arity_type | exprIsDeadEnd join_body = mkBotArityType oss
1684 | otherwise = mkTopArityType oss
1685 ; return (arity_type, rhs) }
1686 -- Note [Do not eta-expand join points]
1687 -- But do return the correct arity and bottom-ness, because
1688 -- these are used to set the bndr's IdInfo (#15517)
1689 -- Note [Invariants on join points] invariant 2b, in GHC.Core
1690
1691 | sm_eta_expand mode -- Provided eta-expansion is on
1692 , new_arity > old_arity -- And the current manifest arity isn't enough
1693 , want_eta rhs
1694 = do { tick (EtaExpansion bndr)
1695 ; return (arity_type, etaExpandAT in_scope arity_type rhs) }
1696
1697 | otherwise
1698 = return (arity_type, rhs)
1699
1700 where
1701 mode = getMode env
1702 in_scope = getInScope env
1703 dflags = sm_dflags mode
1704 old_arity = exprArity rhs
1705
1706 arity_type = findRhsArity dflags bndr rhs old_arity
1707 `maxWithArity` idCallArity bndr
1708 new_arity = arityTypeArity arity_type
1709
1710 -- See Note [Which RHSs do we eta-expand?]
1711 want_eta (Cast e _) = want_eta e
1712 want_eta (Tick _ e) = want_eta e
1713 want_eta (Lam b e) | isTyVar b = want_eta e
1714 want_eta (App e a) | exprIsTrivial a = want_eta e
1715 want_eta (Var {}) = False
1716 want_eta (Lit {}) = False
1717 want_eta _ = True
1718 {-
1719 want_eta _ = case arity_type of
1720 ATop (os:_) -> isOneShotInfo os
1721 ATop [] -> False
1722 ABot {} -> True
1723 -}
1724
1725 {-
1726 Note [Eta-expanding at let bindings]
1727 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1728 We now eta expand at let-bindings, which is where the payoff comes.
1729 The most significant thing is that we can do a simple arity analysis
1730 (in GHC.Core.Opt.Arity.findRhsArity), which we can't do for free-floating lambdas
1731
1732 One useful consequence of not eta-expanding lambdas is this example:
1733 genMap :: C a => ...
1734 {-# INLINE genMap #-}
1735 genMap f xs = ...
1736
1737 myMap :: D a => ...
1738 {-# INLINE myMap #-}
1739 myMap = genMap
1740
1741 Notice that 'genMap' should only inline if applied to two arguments.
1742 In the stable unfolding for myMap we'll have the unfolding
1743 (\d -> genMap Int (..d..))
1744 We do not want to eta-expand to
1745 (\d f xs -> genMap Int (..d..) f xs)
1746 because then 'genMap' will inline, and it really shouldn't: at least
1747 as far as the programmer is concerned, it's not applied to two
1748 arguments!
1749
1750 Note [Which RHSs do we eta-expand?]
1751 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1752 We don't eta-expand:
1753
1754 * Trivial RHSs, e.g. f = g
1755 If we eta expand do
1756 f = \x. g x
1757 we'll just eta-reduce again, and so on; so the
1758 simplifier never terminates.
1759
1760 * PAPs: see Note [Do not eta-expand PAPs]
1761
1762 What about things like this?
1763 f = case y of p -> \x -> blah
1764
1765 Here we do eta-expand. This is a change (Jun 20), but if we have
1766 really decided that f has arity 1, then putting that lambda at the top
1767 seems like a Good idea.
1768
1769 Note [Do not eta-expand PAPs]
1770 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1771 We used to have old_arity = manifestArity rhs, which meant that we
1772 would eta-expand even PAPs. But this gives no particular advantage,
1773 and can lead to a massive blow-up in code size, exhibited by #9020.
1774 Suppose we have a PAP
1775 foo :: IO ()
1776 foo = returnIO ()
1777 Then we can eta-expand to
1778 foo = (\eta. (returnIO () |> sym g) eta) |> g
1779 where
1780 g :: IO () ~ State# RealWorld -> (# State# RealWorld, () #)
1781
1782 But there is really no point in doing this, and it generates masses of
1783 coercions and whatnot that eventually disappear again. For T9020, GHC
1784 allocated 6.6G before, and 0.8G afterwards; and residency dropped from
1785 1.8G to 45M.
1786
1787 Moreover, if we eta expand
1788 f = g d ==> f = \x. g d x
1789 that might in turn make g inline (if it has an inline pragma), which
1790 we might not want. After all, INLINE pragmas say "inline only when
1791 saturated" so we don't want to be too gung-ho about saturating!
1792
1793 But note that this won't eta-expand, say
1794 f = \g -> map g
1795 Does it matter not eta-expanding such functions? I'm not sure. Perhaps
1796 strictness analysis will have less to bite on?
1797
1798 Note [Do not eta-expand join points]
1799 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1800 Similarly to CPR (see Note [Don't w/w join points for CPR] in
1801 GHC.Core.Opt.WorkWrap), a join point stands well to gain from its outer binding's
1802 eta-expansion, and eta-expanding a join point is fraught with issues like how to
1803 deal with a cast:
1804
1805 let join $j1 :: IO ()
1806 $j1 = ...
1807 $j2 :: Int -> IO ()
1808 $j2 n = if n > 0 then $j1
1809 else ...
1810
1811 =>
1812
1813 let join $j1 :: IO ()
1814 $j1 = (\eta -> ...)
1815 `cast` N:IO :: State# RealWorld -> (# State# RealWorld, ())
1816 ~ IO ()
1817 $j2 :: Int -> IO ()
1818 $j2 n = (\eta -> if n > 0 then $j1
1819 else ...)
1820 `cast` N:IO :: State# RealWorld -> (# State# RealWorld, ())
1821 ~ IO ()
1822
1823 The cast here can't be pushed inside the lambda (since it's not casting to a
1824 function type), so the lambda has to stay, but it can't because it contains a
1825 reference to a join point. In fact, $j2 can't be eta-expanded at all. Rather
1826 than try and detect this situation (and whatever other situations crop up!), we
1827 don't bother; again, any surrounding eta-expansion will improve these join
1828 points anyway, since an outer cast can *always* be pushed inside. By the time
1829 CorePrep comes around, the code is very likely to look more like this:
1830
1831 let join $j1 :: State# RealWorld -> (# State# RealWorld, ())
1832 $j1 = (...) eta
1833 $j2 :: Int -> State# RealWorld -> (# State# RealWorld, ())
1834 $j2 = if n > 0 then $j1
1835 else (...) eta
1836
1837
1838 ************************************************************************
1839 * *
1840 \subsection{Floating lets out of big lambdas}
1841 * *
1842 ************************************************************************
1843
1844 Note [Floating and type abstraction]
1845 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1846 Consider this:
1847 x = /\a. C e1 e2
1848 We'd like to float this to
1849 y1 = /\a. e1
1850 y2 = /\a. e2
1851 x = /\a. C (y1 a) (y2 a)
1852 for the usual reasons: we want to inline x rather vigorously.
1853
1854 You may think that this kind of thing is rare. But in some programs it is
1855 common. For example, if you do closure conversion you might get:
1856
1857 data a :-> b = forall e. (e -> a -> b) :$ e
1858
1859 f_cc :: forall a. a :-> a
1860 f_cc = /\a. (\e. id a) :$ ()
1861
1862 Now we really want to inline that f_cc thing so that the
1863 construction of the closure goes away.
1864
1865 So I have elaborated simplLazyBind to understand right-hand sides that look
1866 like
1867 /\ a1..an. body
1868
1869 and treat them specially. The real work is done in
1870 GHC.Core.Opt.Simplify.Utils.abstractFloats, but there is quite a bit of plumbing
1871 in simplLazyBind as well.
1872
1873 The same transformation is good when there are lets in the body:
1874
1875 /\abc -> let(rec) x = e in b
1876 ==>
1877 let(rec) x' = /\abc -> let x = x' a b c in e
1878 in
1879 /\abc -> let x = x' a b c in b
1880
1881 This is good because it can turn things like:
1882
1883 let f = /\a -> letrec g = ... g ... in g
1884 into
1885 letrec g' = /\a -> ... g' a ...
1886 in
1887 let f = /\ a -> g' a
1888
1889 which is better. In effect, it means that big lambdas don't impede
1890 let-floating.
1891
1892 This optimisation is CRUCIAL in eliminating the junk introduced by
1893 desugaring mutually recursive definitions. Don't eliminate it lightly!
1894
1895 [May 1999] If we do this transformation *regardless* then we can
1896 end up with some pretty silly stuff. For example,
1897
1898 let
1899 st = /\ s -> let { x1=r1 ; x2=r2 } in ...
1900 in ..
1901 becomes
1902 let y1 = /\s -> r1
1903 y2 = /\s -> r2
1904 st = /\s -> ...[y1 s/x1, y2 s/x2]
1905 in ..
1906
1907 Unless the "..." is a WHNF there is really no point in doing this.
1908 Indeed it can make things worse. Suppose x1 is used strictly,
1909 and is of the form
1910
1911 x1* = case f y of { (a,b) -> e }
1912
1913 If we abstract this wrt the tyvar we then can't do the case inline
1914 as we would normally do.
1915
1916 That's why the whole transformation is part of the same process that
1917 floats let-bindings and constructor arguments out of RHSs. In particular,
1918 it is guarded by the doFloatFromRhs call in simplLazyBind.
1919
1920 Note [Which type variables to abstract over]
1921 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1922 Abstract only over the type variables free in the rhs wrt which the
1923 new binding is abstracted. Note that
1924
1925 * The naive approach of abstracting wrt the
1926 tyvars free in the Id's /type/ fails. Consider:
1927 /\ a b -> let t :: (a,b) = (e1, e2)
1928 x :: a = fst t
1929 in ...
1930 Here, b isn't free in x's type, but we must nevertheless
1931 abstract wrt b as well, because t's type mentions b.
1932 Since t is floated too, we'd end up with the bogus:
1933 poly_t = /\ a b -> (e1, e2)
1934 poly_x = /\ a -> fst (poly_t a *b*)
1935
1936 * We must do closeOverKinds. Example (#10934):
1937 f = /\k (f:k->*) (a:k). let t = AccFailure @ (f a) in ...
1938 Here we want to float 't', but we must remember to abstract over
1939 'k' as well, even though it is not explicitly mentioned in the RHS,
1940 otherwise we get
1941 t = /\ (f:k->*) (a:k). AccFailure @ (f a)
1942 which is obviously bogus.
1943
1944 * We get the variables to abstract over by filtering down the
1945 the main_tvs for the original function, picking only ones
1946 mentioned in the abstracted body. This means:
1947 - they are automatically in dependency order, because main_tvs is
1948 - there is no issue about non-determinism
1949 - we don't gratuitiously change order, which may help (in a tiny
1950 way) with CSE and/or the compiler-debugging experience
1951 -}
1952
1953 abstractFloats :: UnfoldingOpts -> TopLevelFlag -> [OutTyVar] -> SimplFloats
1954 -> OutExpr -> SimplM ([OutBind], OutExpr)
1955 abstractFloats uf_opts top_lvl main_tvs floats body
1956 = assert (notNull body_floats) $
1957 assert (isNilOL (sfJoinFloats floats)) $
1958 do { (subst, float_binds) <- mapAccumLM abstract empty_subst body_floats
1959 ; return (float_binds, GHC.Core.Subst.substExpr subst body) }
1960 where
1961 is_top_lvl = isTopLevel top_lvl
1962 body_floats = letFloatBinds (sfLetFloats floats)
1963 empty_subst = GHC.Core.Subst.mkEmptySubst (sfInScope floats)
1964
1965 abstract :: GHC.Core.Subst.Subst -> OutBind -> SimplM (GHC.Core.Subst.Subst, OutBind)
1966 abstract subst (NonRec id rhs)
1967 = do { (poly_id1, poly_app) <- mk_poly1 tvs_here id
1968 ; let (poly_id2, poly_rhs) = mk_poly2 poly_id1 tvs_here rhs'
1969 !subst' = GHC.Core.Subst.extendIdSubst subst id poly_app
1970 ; return (subst', NonRec poly_id2 poly_rhs) }
1971 where
1972 rhs' = GHC.Core.Subst.substExpr subst rhs
1973
1974 -- tvs_here: see Note [Which type variables to abstract over]
1975 tvs_here = filter (`elemVarSet` free_tvs) main_tvs
1976 free_tvs = closeOverKinds $
1977 exprSomeFreeVars isTyVar rhs'
1978
1979 abstract subst (Rec prs)
1980 = do { (poly_ids, poly_apps) <- mapAndUnzipM (mk_poly1 tvs_here) ids
1981 ; let subst' = GHC.Core.Subst.extendSubstList subst (ids `zip` poly_apps)
1982 poly_pairs = [ mk_poly2 poly_id tvs_here rhs'
1983 | (poly_id, rhs) <- poly_ids `zip` rhss
1984 , let rhs' = GHC.Core.Subst.substExpr subst' rhs ]
1985 ; return (subst', Rec poly_pairs) }
1986 where
1987 (ids,rhss) = unzip prs
1988 -- For a recursive group, it's a bit of a pain to work out the minimal
1989 -- set of tyvars over which to abstract:
1990 -- /\ a b c. let x = ...a... in
1991 -- letrec { p = ...x...q...
1992 -- q = .....p...b... } in
1993 -- ...
1994 -- Since 'x' is abstracted over 'a', the {p,q} group must be abstracted
1995 -- over 'a' (because x is replaced by (poly_x a)) as well as 'b'.
1996 -- Since it's a pain, we just use the whole set, which is always safe
1997 --
1998 -- If you ever want to be more selective, remember this bizarre case too:
1999 -- x::a = x
2000 -- Here, we must abstract 'x' over 'a'.
2001 tvs_here = scopedSort main_tvs
2002
2003 mk_poly1 :: [TyVar] -> Id -> SimplM (Id, CoreExpr)
2004 mk_poly1 tvs_here var
2005 = do { uniq <- getUniqueM
2006 ; let poly_name = setNameUnique (idName var) uniq -- Keep same name
2007 poly_ty = mkInfForAllTys tvs_here (idType var) -- But new type of course
2008 poly_id = transferPolyIdInfo var tvs_here $ -- Note [transferPolyIdInfo] in GHC.Types.Id
2009 mkLocalId poly_name (idMult var) poly_ty
2010 ; return (poly_id, mkTyApps (Var poly_id) (mkTyVarTys tvs_here)) }
2011 -- In the olden days, it was crucial to copy the occInfo of the original var,
2012 -- because we were looking at occurrence-analysed but as yet unsimplified code!
2013 -- In particular, we mustn't lose the loop breakers. BUT NOW we are looking
2014 -- at already simplified code, so it doesn't matter
2015 --
2016 -- It's even right to retain single-occurrence or dead-var info:
2017 -- Suppose we started with /\a -> let x = E in B
2018 -- where x occurs once in B. Then we transform to:
2019 -- let x' = /\a -> E in /\a -> let x* = x' a in B
2020 -- where x* has an INLINE prag on it. Now, once x* is inlined,
2021 -- the occurrences of x' will be just the occurrences originally
2022 -- pinned on x.
2023
2024 mk_poly2 :: Id -> [TyVar] -> CoreExpr -> (Id, CoreExpr)
2025 mk_poly2 poly_id tvs_here rhs
2026 = (poly_id `setIdUnfolding` unf, poly_rhs)
2027 where
2028 poly_rhs = mkLams tvs_here rhs
2029 unf = mkUnfolding uf_opts InlineRhs is_top_lvl False poly_rhs
2030
2031 -- We want the unfolding. Consider
2032 -- let
2033 -- x = /\a. let y = ... in Just y
2034 -- in body
2035 -- Then we float the y-binding out (via abstractFloats and addPolyBind)
2036 -- but 'x' may well then be inlined in 'body' in which case we'd like the
2037 -- opportunity to inline 'y' too.
2038
2039 {-
2040 Note [Abstract over coercions]
2041 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2042 If a coercion variable (g :: a ~ Int) is free in the RHS, then so is the
2043 type variable a. Rather than sort this mess out, we simply bale out and abstract
2044 wrt all the type variables if any of them are coercion variables.
2045
2046
2047 Historical note: if you use let-bindings instead of a substitution, beware of this:
2048
2049 -- Suppose we start with:
2050 --
2051 -- x = /\ a -> let g = G in E
2052 --
2053 -- Then we'll float to get
2054 --
2055 -- x = let poly_g = /\ a -> G
2056 -- in /\ a -> let g = poly_g a in E
2057 --
2058 -- But now the occurrence analyser will see just one occurrence
2059 -- of poly_g, not inside a lambda, so the simplifier will
2060 -- PreInlineUnconditionally poly_g back into g! Badk to square 1!
2061 -- (I used to think that the "don't inline lone occurrences" stuff
2062 -- would stop this happening, but since it's the *only* occurrence,
2063 -- PreInlineUnconditionally kicks in first!)
2064 --
2065 -- Solution: put an INLINE note on g's RHS, so that poly_g seems
2066 -- to appear many times. (NB: mkInlineMe eliminates
2067 -- such notes on trivial RHSs, so do it manually.)
2068
2069 ************************************************************************
2070 * *
2071 prepareAlts
2072 * *
2073 ************************************************************************
2074
2075 prepareAlts tries these things:
2076
2077 1. filterAlts: eliminate alternatives that cannot match, including
2078 the DEFAULT alternative. Here "cannot match" includes knowledge
2079 from GADTs
2080
2081 2. refineDefaultAlt: if the DEFAULT alternative can match only one
2082 possible constructor, then make that constructor explicit.
2083 e.g.
2084 case e of x { DEFAULT -> rhs }
2085 ===>
2086 case e of x { (a,b) -> rhs }
2087 where the type is a single constructor type. This gives better code
2088 when rhs also scrutinises x or e.
2089 See CoreUtils Note [Refine DEFAULT case alternatives]
2090
2091 3. combineIdenticalAlts: combine identical alternatives into a DEFAULT.
2092 See CoreUtils Note [Combine identical alternatives], which also
2093 says why we do this on InAlts not on OutAlts
2094
2095 4. Returns a list of the constructors that cannot holds in the
2096 DEFAULT alternative (if there is one)
2097
2098 It's a good idea to do this stuff before simplifying the alternatives, to
2099 avoid simplifying alternatives we know can't happen, and to come up with
2100 the list of constructors that are handled, to put into the IdInfo of the
2101 case binder, for use when simplifying the alternatives.
2102
2103 Eliminating the default alternative in (1) isn't so obvious, but it can
2104 happen:
2105
2106 data Colour = Red | Green | Blue
2107
2108 f x = case x of
2109 Red -> ..
2110 Green -> ..
2111 DEFAULT -> h x
2112
2113 h y = case y of
2114 Blue -> ..
2115 DEFAULT -> [ case y of ... ]
2116
2117 If we inline h into f, the default case of the inlined h can't happen.
2118 If we don't notice this, we may end up filtering out *all* the cases
2119 of the inner case y, which give us nowhere to go!
2120 -}
2121
2122 prepareAlts :: OutExpr -> OutId -> [InAlt] -> SimplM ([AltCon], [InAlt])
2123 -- The returned alternatives can be empty, none are possible
2124 prepareAlts scrut case_bndr' alts
2125 | Just (tc, tys) <- splitTyConApp_maybe (varType case_bndr')
2126 -- Case binder is needed just for its type. Note that as an
2127 -- OutId, it has maximum information; this is important.
2128 -- Test simpl013 is an example
2129 = do { us <- getUniquesM
2130 ; let (idcs1, alts1) = filterAlts tc tys imposs_cons alts
2131 (yes2, alts2) = refineDefaultAlt us (idMult case_bndr') tc tys idcs1 alts1
2132 -- the multiplicity on case_bndr's is the multiplicity of the
2133 -- case expression The newly introduced patterns in
2134 -- refineDefaultAlt must be scaled by this multiplicity
2135 (yes3, idcs3, alts3) = combineIdenticalAlts idcs1 alts2
2136 -- "idcs" stands for "impossible default data constructors"
2137 -- i.e. the constructors that can't match the default case
2138 ; when yes2 $ tick (FillInCaseDefault case_bndr')
2139 ; when yes3 $ tick (AltMerge case_bndr')
2140 ; return (idcs3, alts3) }
2141
2142 | otherwise -- Not a data type, so nothing interesting happens
2143 = return ([], alts)
2144 where
2145 imposs_cons = case scrut of
2146 Var v -> otherCons (idUnfolding v)
2147 _ -> []
2148
2149
2150 {-
2151 ************************************************************************
2152 * *
2153 mkCase
2154 * *
2155 ************************************************************************
2156
2157 mkCase tries these things
2158
2159 * Note [Nerge nested cases]
2160 * Note [Eliminate identity case]
2161 * Note [Scrutinee constant folding]
2162
2163 Note [Merge Nested Cases]
2164 ~~~~~~~~~~~~~~~~~~~~~~~~~
2165 case e of b { ==> case e of b {
2166 p1 -> rhs1 p1 -> rhs1
2167 ... ...
2168 pm -> rhsm pm -> rhsm
2169 _ -> case b of b' { pn -> let b'=b in rhsn
2170 pn -> rhsn ...
2171 ... po -> let b'=b in rhso
2172 po -> rhso _ -> let b'=b in rhsd
2173 _ -> rhsd
2174 }
2175
2176 which merges two cases in one case when -- the default alternative of
2177 the outer case scrutises the same variable as the outer case. This
2178 transformation is called Case Merging. It avoids that the same
2179 variable is scrutinised multiple times.
2180
2181 Note [Eliminate Identity Case]
2182 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2183 case e of ===> e
2184 True -> True;
2185 False -> False
2186
2187 and similar friends.
2188
2189 Note [Scrutinee Constant Folding]
2190 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2191 case x op# k# of _ { ===> case x of _ {
2192 a1# -> e1 (a1# inv_op# k#) -> e1
2193 a2# -> e2 (a2# inv_op# k#) -> e2
2194 ... ...
2195 DEFAULT -> ed DEFAULT -> ed
2196
2197 where (x op# k#) inv_op# k# == x
2198
2199 And similarly for commuted arguments and for some unary operations.
2200
2201 The purpose of this transformation is not only to avoid an arithmetic
2202 operation at runtime but to allow other transformations to apply in cascade.
2203
2204 Example with the "Merge Nested Cases" optimization (from #12877):
2205
2206 main = case t of t0
2207 0## -> ...
2208 DEFAULT -> case t0 `minusWord#` 1## of t1
2209 0## -> ...
2210 DEFAULT -> case t1 `minusWord#` 1## of t2
2211 0## -> ...
2212 DEFAULT -> case t2 `minusWord#` 1## of _
2213 0## -> ...
2214 DEFAULT -> ...
2215
2216 becomes:
2217
2218 main = case t of _
2219 0## -> ...
2220 1## -> ...
2221 2## -> ...
2222 3## -> ...
2223 DEFAULT -> ...
2224
2225 There are some wrinkles
2226
2227 * Do not apply caseRules if there is just a single DEFAULT alternative
2228 case e +# 3# of b { DEFAULT -> rhs }
2229 If we applied the transformation here we would (stupidly) get
2230 case a of b' { DEFAULT -> let b = e +# 3# in rhs }
2231 and now the process may repeat, because that let will really
2232 be a case.
2233
2234 * The type of the scrutinee might change. E.g.
2235 case tagToEnum (x :: Int#) of (b::Bool)
2236 False -> e1
2237 True -> e2
2238 ==>
2239 case x of (b'::Int#)
2240 DEFAULT -> e1
2241 1# -> e2
2242
2243 * The case binder may be used in the right hand sides, so we need
2244 to make a local binding for it, if it is alive. e.g.
2245 case e +# 10# of b
2246 DEFAULT -> blah...b...
2247 44# -> blah2...b...
2248 ===>
2249 case e of b'
2250 DEFAULT -> let b = b' +# 10# in blah...b...
2251 34# -> let b = 44# in blah2...b...
2252
2253 Note that in the non-DEFAULT cases we know what to bind 'b' to,
2254 whereas in the DEFAULT case we must reconstruct the original value.
2255 But NB: we use b'; we do not duplicate 'e'.
2256
2257 * In dataToTag we might need to make up some fake binders;
2258 see Note [caseRules for dataToTag] in GHC.Core.Opt.ConstantFold
2259 -}
2260
2261 mkCase, mkCase1, mkCase2, mkCase3
2262 :: DynFlags
2263 -> OutExpr -> OutId
2264 -> OutType -> [OutAlt] -- Alternatives in standard (increasing) order
2265 -> SimplM OutExpr
2266
2267 --------------------------------------------------
2268 -- 1. Merge Nested Cases
2269 --------------------------------------------------
2270
2271 mkCase dflags scrut outer_bndr alts_ty (Alt DEFAULT _ deflt_rhs : outer_alts)
2272 | gopt Opt_CaseMerge dflags
2273 , (ticks, Case (Var inner_scrut_var) inner_bndr _ inner_alts)
2274 <- stripTicksTop tickishFloatable deflt_rhs
2275 , inner_scrut_var == outer_bndr
2276 = do { tick (CaseMerge outer_bndr)
2277
2278 ; let wrap_alt (Alt con args rhs) = assert (outer_bndr `notElem` args)
2279 (Alt con args (wrap_rhs rhs))
2280 -- Simplifier's no-shadowing invariant should ensure
2281 -- that outer_bndr is not shadowed by the inner patterns
2282 wrap_rhs rhs = Let (NonRec inner_bndr (Var outer_bndr)) rhs
2283 -- The let is OK even for unboxed binders,
2284
2285 wrapped_alts | isDeadBinder inner_bndr = inner_alts
2286 | otherwise = map wrap_alt inner_alts
2287
2288 merged_alts = mergeAlts outer_alts wrapped_alts
2289 -- NB: mergeAlts gives priority to the left
2290 -- case x of
2291 -- A -> e1
2292 -- DEFAULT -> case x of
2293 -- A -> e2
2294 -- B -> e3
2295 -- When we merge, we must ensure that e1 takes
2296 -- precedence over e2 as the value for A!
2297
2298 ; fmap (mkTicks ticks) $
2299 mkCase1 dflags scrut outer_bndr alts_ty merged_alts
2300 }
2301 -- Warning: don't call mkCase recursively!
2302 -- Firstly, there's no point, because inner alts have already had
2303 -- mkCase applied to them, so they won't have a case in their default
2304 -- Secondly, if you do, you get an infinite loop, because the bindCaseBndr
2305 -- in munge_rhs may put a case into the DEFAULT branch!
2306
2307 mkCase dflags scrut bndr alts_ty alts = mkCase1 dflags scrut bndr alts_ty alts
2308
2309 --------------------------------------------------
2310 -- 2. Eliminate Identity Case
2311 --------------------------------------------------
2312
2313 mkCase1 _dflags scrut case_bndr _ alts@(Alt _ _ rhs1 : _) -- Identity case
2314 | all identity_alt alts
2315 = do { tick (CaseIdentity case_bndr)
2316 ; return (mkTicks ticks $ re_cast scrut rhs1) }
2317 where
2318 ticks = concatMap (\(Alt _ _ rhs) -> stripTicksT tickishFloatable rhs) (tail alts)
2319 identity_alt (Alt con args rhs) = check_eq rhs con args
2320
2321 check_eq (Cast rhs co) con args -- See Note [RHS casts]
2322 = not (any (`elemVarSet` tyCoVarsOfCo co) args) && check_eq rhs con args
2323 check_eq (Tick t e) alt args
2324 = tickishFloatable t && check_eq e alt args
2325
2326 check_eq (Lit lit) (LitAlt lit') _ = lit == lit'
2327 check_eq (Var v) _ _ | v == case_bndr = True
2328 check_eq (Var v) (DataAlt con) args
2329 | null arg_tys, null args = v == dataConWorkId con
2330 -- Optimisation only
2331 check_eq rhs (DataAlt con) args = cheapEqExpr' tickishFloatable rhs $
2332 mkConApp2 con arg_tys args
2333 check_eq _ _ _ = False
2334
2335 arg_tys = tyConAppArgs (idType case_bndr)
2336
2337 -- Note [RHS casts]
2338 -- ~~~~~~~~~~~~~~~~
2339 -- We've seen this:
2340 -- case e of x { _ -> x `cast` c }
2341 -- And we definitely want to eliminate this case, to give
2342 -- e `cast` c
2343 -- So we throw away the cast from the RHS, and reconstruct
2344 -- it at the other end. All the RHS casts must be the same
2345 -- if (all identity_alt alts) holds.
2346 --
2347 -- Don't worry about nested casts, because the simplifier combines them
2348
2349 re_cast scrut (Cast rhs co) = Cast (re_cast scrut rhs) co
2350 re_cast scrut _ = scrut
2351
2352 mkCase1 dflags scrut bndr alts_ty alts = mkCase2 dflags scrut bndr alts_ty alts
2353
2354 --------------------------------------------------
2355 -- 2. Scrutinee Constant Folding
2356 --------------------------------------------------
2357
2358 mkCase2 dflags scrut bndr alts_ty alts
2359 | -- See Note [Scrutinee Constant Folding]
2360 case alts of -- Not if there is just a DEFAULT alternative
2361 [Alt DEFAULT _ _] -> False
2362 _ -> True
2363 , gopt Opt_CaseFolding dflags
2364 , Just (scrut', tx_con, mk_orig) <- caseRules (targetPlatform dflags) scrut
2365 = do { bndr' <- newId (fsLit "lwild") Many (exprType scrut')
2366
2367 ; alts' <- mapMaybeM (tx_alt tx_con mk_orig bndr') alts
2368 -- mapMaybeM: discard unreachable alternatives
2369 -- See Note [Unreachable caseRules alternatives]
2370 -- in GHC.Core.Opt.ConstantFold
2371
2372 ; mkCase3 dflags scrut' bndr' alts_ty $
2373 add_default (re_sort alts')
2374 }
2375
2376 | otherwise
2377 = mkCase3 dflags scrut bndr alts_ty alts
2378 where
2379 -- We need to keep the correct association between the scrutinee and its
2380 -- binder if the latter isn't dead. Hence we wrap rhs of alternatives with
2381 -- "let bndr = ... in":
2382 --
2383 -- case v + 10 of y =====> case v of y
2384 -- 20 -> e1 10 -> let y = 20 in e1
2385 -- DEFAULT -> e2 DEFAULT -> let y = v + 10 in e2
2386 --
2387 -- Other transformations give: =====> case v of y'
2388 -- 10 -> let y = 20 in e1
2389 -- DEFAULT -> let y = y' + 10 in e2
2390 --
2391 -- This wrapping is done in tx_alt; we use mk_orig, returned by caseRules,
2392 -- to construct an expression equivalent to the original one, for use
2393 -- in the DEFAULT case
2394
2395 tx_alt :: (AltCon -> Maybe AltCon) -> (Id -> CoreExpr) -> Id
2396 -> CoreAlt -> SimplM (Maybe CoreAlt)
2397 tx_alt tx_con mk_orig new_bndr (Alt con bs rhs)
2398 = case tx_con con of
2399 Nothing -> return Nothing
2400 Just con' -> do { bs' <- mk_new_bndrs new_bndr con'
2401 ; return (Just (Alt con' bs' rhs')) }
2402 where
2403 rhs' | isDeadBinder bndr = rhs
2404 | otherwise = bindNonRec bndr orig_val rhs
2405
2406 orig_val = case con of
2407 DEFAULT -> mk_orig new_bndr
2408 LitAlt l -> Lit l
2409 DataAlt dc -> mkConApp2 dc (tyConAppArgs (idType bndr)) bs
2410
2411 mk_new_bndrs new_bndr (DataAlt dc)
2412 | not (isNullaryRepDataCon dc)
2413 = -- For non-nullary data cons we must invent some fake binders
2414 -- See Note [caseRules for dataToTag] in GHC.Core.Opt.ConstantFold
2415 do { us <- getUniquesM
2416 ; let (ex_tvs, arg_ids) = dataConRepInstPat us (idMult new_bndr) dc
2417 (tyConAppArgs (idType new_bndr))
2418 ; return (ex_tvs ++ arg_ids) }
2419 mk_new_bndrs _ _ = return []
2420
2421 re_sort :: [CoreAlt] -> [CoreAlt]
2422 -- Sort the alternatives to re-establish
2423 -- GHC.Core Note [Case expression invariants]
2424 re_sort alts = sortBy cmpAlt alts
2425
2426 add_default :: [CoreAlt] -> [CoreAlt]
2427 -- See Note [Literal cases]
2428 add_default (Alt (LitAlt {}) bs rhs : alts) = Alt DEFAULT bs rhs : alts
2429 add_default alts = alts
2430
2431 {- Note [Literal cases]
2432 ~~~~~~~~~~~~~~~~~~~~~~~
2433 If we have
2434 case tagToEnum (a ># b) of
2435 False -> e1
2436 True -> e2
2437
2438 then caseRules for TagToEnum will turn it into
2439 case tagToEnum (a ># b) of
2440 0# -> e1
2441 1# -> e2
2442
2443 Since the case is exhaustive (all cases are) we can convert it to
2444 case tagToEnum (a ># b) of
2445 DEFAULT -> e1
2446 1# -> e2
2447
2448 This may generate sligthtly better code (although it should not, since
2449 all cases are exhaustive) and/or optimise better. I'm not certain that
2450 it's necessary, but currently we do make this change. We do it here,
2451 NOT in the TagToEnum rules (see "Beware" in Note [caseRules for tagToEnum]
2452 in GHC.Core.Opt.ConstantFold)
2453 -}
2454
2455 --------------------------------------------------
2456 -- Catch-all
2457 --------------------------------------------------
2458 mkCase3 _dflags scrut bndr alts_ty alts
2459 = return (Case scrut bndr alts_ty alts)
2460
2461 -- See Note [Exitification] and Note [Do not inline exit join points] in
2462 -- GHC.Core.Opt.Exitify
2463 -- This lives here (and not in Id) because occurrence info is only valid on
2464 -- InIds, so it's crucial that isExitJoinId is only called on freshly
2465 -- occ-analysed code. It's not a generic function you can call anywhere.
2466 isExitJoinId :: Var -> Bool
2467 isExitJoinId id
2468 = isJoinId id
2469 && isOneOcc (idOccInfo id)
2470 && occ_in_lam (idOccInfo id) == IsInsideLam
2471
2472 {-
2473 Note [Dead binders]
2474 ~~~~~~~~~~~~~~~~~~~~
2475 Note that dead-ness is maintained by the simplifier, so that it is
2476 accurate after simplification as well as before.
2477
2478
2479 Note [Cascading case merge]
2480 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
2481 Case merging should cascade in one sweep, because it
2482 happens bottom-up
2483
2484 case e of a {
2485 DEFAULT -> case a of b
2486 DEFAULT -> case b of c {
2487 DEFAULT -> e
2488 A -> ea
2489 B -> eb
2490 C -> ec
2491 ==>
2492 case e of a {
2493 DEFAULT -> case a of b
2494 DEFAULT -> let c = b in e
2495 A -> let c = b in ea
2496 B -> eb
2497 C -> ec
2498 ==>
2499 case e of a {
2500 DEFAULT -> let b = a in let c = b in e
2501 A -> let b = a in let c = b in ea
2502 B -> let b = a in eb
2503 C -> ec
2504
2505
2506 However here's a tricky case that we still don't catch, and I don't
2507 see how to catch it in one pass:
2508
2509 case x of c1 { I# a1 ->
2510 case a1 of c2 ->
2511 0 -> ...
2512 DEFAULT -> case x of c3 { I# a2 ->
2513 case a2 of ...
2514
2515 After occurrence analysis (and its binder-swap) we get this
2516
2517 case x of c1 { I# a1 ->
2518 let x = c1 in -- Binder-swap addition
2519 case a1 of c2 ->
2520 0 -> ...
2521 DEFAULT -> case x of c3 { I# a2 ->
2522 case a2 of ...
2523
2524 When we simplify the inner case x, we'll see that
2525 x=c1=I# a1. So we'll bind a2 to a1, and get
2526
2527 case x of c1 { I# a1 ->
2528 case a1 of c2 ->
2529 0 -> ...
2530 DEFAULT -> case a1 of ...
2531
2532 This is correct, but we can't do a case merge in this sweep
2533 because c2 /= a1. Reason: the binding c1=I# a1 went inwards
2534 without getting changed to c1=I# c2.
2535
2536 I don't think this is worth fixing, even if I knew how. It'll
2537 all come out in the next pass anyway.
2538 -}