never executed always true always false
1 {-
2 (c) The AQUA Project, Glasgow University, 1993-1998
3
4 \section[Simplify]{The main module of the simplifier}
5 -}
6
7
8 {-# LANGUAGE TypeFamilies #-}
9
10 {-# OPTIONS_GHC -Wno-incomplete-record-updates -Wno-incomplete-uni-patterns #-}
11 module GHC.Core.Opt.Simplify ( simplTopBinds, simplExpr, simplRules ) where
12
13 import GHC.Prelude
14
15 import GHC.Platform
16
17 import GHC.Driver.Session
18
19 import GHC.Core
20 import GHC.Core.Opt.Simplify.Monad
21 import GHC.Core.Type hiding ( substTy, substTyVar, extendTvSubst, extendCvSubst )
22 import GHC.Core.Opt.Simplify.Env
23 import GHC.Core.Opt.Simplify.Utils
24 import GHC.Core.Opt.OccurAnal ( occurAnalyseExpr )
25 import GHC.Core.Make ( FloatBind, mkImpossibleExpr, castBottomExpr )
26 import qualified GHC.Core.Make
27 import GHC.Core.Coercion hiding ( substCo, substCoVar )
28 import GHC.Core.Reduction
29 import GHC.Core.Coercion.Opt ( optCoercion )
30 import GHC.Core.FamInstEnv ( FamInstEnv, topNormaliseType_maybe )
31 import GHC.Core.DataCon
32 ( DataCon, dataConWorkId, dataConRepStrictness
33 , dataConRepArgTys, isUnboxedTupleDataCon
34 , StrictnessMark (..) )
35 import GHC.Core.Opt.Monad ( Tick(..), SimplMode(..) )
36 import GHC.Core.Ppr ( pprCoreExpr )
37 import GHC.Core.Unfold
38 import GHC.Core.Unfold.Make
39 import GHC.Core.Utils
40 import GHC.Core.Opt.Arity ( ArityType(..)
41 , pushCoTyArg, pushCoValArg
42 , etaExpandAT )
43 import GHC.Core.SimpleOpt ( exprIsConApp_maybe, joinPointBinding_maybe, joinPointBindings_maybe )
44 import GHC.Core.FVs ( mkRuleInfo )
45 import GHC.Core.Rules ( lookupRule, getRules, initRuleOpts )
46 import GHC.Core.Multiplicity
47
48 import GHC.Types.Literal ( litIsLifted ) --, mkLitInt ) -- temporalily commented out. See #8326
49 import GHC.Types.SourceText
50 import GHC.Types.Id
51 import GHC.Types.Id.Make ( seqId )
52 import GHC.Types.Id.Info
53 import GHC.Types.Name ( mkSystemVarName, isExternalName, getOccFS )
54 import GHC.Types.Demand
55 import GHC.Types.Cpr ( mkCprSig, botCpr )
56 import GHC.Types.Unique ( hasKey )
57 import GHC.Types.Basic
58 import GHC.Types.Tickish
59 import GHC.Types.Var ( isTyCoVar )
60 import GHC.Builtin.PrimOps ( PrimOp (SeqOp) )
61 import GHC.Builtin.Types.Prim( realWorldStatePrimTy )
62 import GHC.Builtin.Names( runRWKey )
63
64 import GHC.Data.Maybe ( isNothing, orElse )
65 import GHC.Data.FastString
66 import GHC.Unit.Module ( moduleName, pprModuleName )
67 import GHC.Utils.Outputable
68 import GHC.Utils.Panic
69 import GHC.Utils.Panic.Plain
70 import GHC.Utils.Constants (debugIsOn)
71 import GHC.Utils.Trace
72 import GHC.Utils.Monad ( mapAccumLM, liftIO )
73 import GHC.Utils.Logger
74
75 import Control.Monad
76
77
78 {-
79 The guts of the simplifier is in this module, but the driver loop for
80 the simplifier is in GHC.Core.Opt.Pipeline
81
82 Note [The big picture]
83 ~~~~~~~~~~~~~~~~~~~~~~
84 The general shape of the simplifier is this:
85
86 simplExpr :: SimplEnv -> InExpr -> SimplCont -> SimplM (SimplFloats, OutExpr)
87 simplBind :: SimplEnv -> InBind -> SimplM (SimplFloats, SimplEnv)
88
89 * SimplEnv contains
90 - Simplifier mode (which includes DynFlags for convenience)
91 - Ambient substitution
92 - InScopeSet
93
94 * SimplFloats contains
95 - Let-floats (which includes ok-for-spec case-floats)
96 - Join floats
97 - InScopeSet (including all the floats)
98
99 * Expressions
100 simplExpr :: SimplEnv -> InExpr -> SimplCont
101 -> SimplM (SimplFloats, OutExpr)
102 The result of simplifying an /expression/ is (floats, expr)
103 - A bunch of floats (let bindings, join bindings)
104 - A simplified expression.
105 The overall result is effectively (let floats in expr)
106
107 * Bindings
108 simplBind :: SimplEnv -> InBind -> SimplM (SimplFloats, SimplEnv)
109 The result of simplifying a binding is
110 - A bunch of floats, the last of which is the simplified binding
111 There may be auxiliary bindings too; see prepareRhs
112 - An environment suitable for simplifying the scope of the binding
113
114 The floats may also be empty, if the binding is inlined unconditionally;
115 in that case the returned SimplEnv will have an augmented substitution.
116
117 The returned floats and env both have an in-scope set, and they are
118 guaranteed to be the same.
119
120
121 Note [Shadowing]
122 ~~~~~~~~~~~~~~~~
123 The simplifier used to guarantee that the output had no shadowing, but
124 it does not do so any more. (Actually, it never did!) The reason is
125 documented with simplifyArgs.
126
127
128 Eta expansion
129 ~~~~~~~~~~~~~~
130 For eta expansion, we want to catch things like
131
132 case e of (a,b) -> \x -> case a of (p,q) -> \y -> r
133
134 If the \x was on the RHS of a let, we'd eta expand to bring the two
135 lambdas together. And in general that's a good thing to do. Perhaps
136 we should eta expand wherever we find a (value) lambda? Then the eta
137 expansion at a let RHS can concentrate solely on the PAP case.
138
139 Note [In-scope set as a substitution]
140 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
141 As per Note [Lookups in in-scope set], an in-scope set can act as
142 a substitution. Specifically, it acts as a substitution from variable to
143 variables /with the same unique/.
144
145 Why do we need this? Well, during the course of the simplifier, we may want to
146 adjust inessential properties of a variable. For instance, when performing a
147 beta-reduction, we change
148
149 (\x. e) u ==> let x = u in e
150
151 We typically want to add an unfolding to `x` so that it inlines to (the
152 simplification of) `u`.
153
154 We do that by adding the unfolding to the binder `x`, which is added to the
155 in-scope set. When simplifying occurrences of `x` (every occurrence!), they are
156 replaced by their “updated” version from the in-scope set, hence inherit the
157 unfolding. This happens in `SimplEnv.substId`.
158
159 Another example. Consider
160
161 case x of y { Node a b -> ...y...
162 ; Leaf v -> ...y... }
163
164 In the Node branch want y's unfolding to be (Node a b); in the Leaf branch we
165 want y's unfolding to be (Leaf v). We achieve this by adding the appropriate
166 unfolding to y, and re-adding it to the in-scope set. See the calls to
167 `addBinderUnfolding` in `Simplify.addAltUnfoldings` and elsewhere.
168
169 It's quite convenient. This way we don't need to manipulate the substitution all
170 the time: every update to a binder is automatically reflected to its bound
171 occurrences.
172
173 Note [Bangs in the Simplifier]
174 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
175 Both SimplFloats and SimplEnv do *not* generally benefit from making
176 their fields strict. I don't know if this is because of good use of
177 laziness or unintended side effects like closures capturing more variables
178 after WW has run.
179
180 But the end result is that we keep these lazy, but force them in some places
181 where we know it's beneficial to the compiler.
182
183 Similarly environments returned from functions aren't *always* beneficial to
184 force. In some places they would never be demanded so forcing them early
185 increases allocation. In other places they almost always get demanded so
186 it's worthwhile to force them early.
187
188 Would it be better to through every allocation of e.g. SimplEnv and decide
189 wether or not to make this one strict? Absolutely! Would be a good use of
190 someones time? Absolutely not! I made these strict that showed up during
191 a profiled build or which I noticed while looking at core for one reason
192 or another.
193
194 The result sadly is that we end up with "random" bangs in the simplifier
195 where we sometimes force e.g. the returned environment from a function and
196 sometimes we don't for the same function. Depending on the context around
197 the call. The treatment is also not very consistent. I only added bangs
198 where I saw it making a difference either in the core or benchmarks. Some
199 patterns where it would be beneficial aren't convered as a consequence as
200 I neither have the time to go through all of the core and some cases are
201 too small to show up in benchmarks.
202
203
204
205 ************************************************************************
206 * *
207 \subsection{Bindings}
208 * *
209 ************************************************************************
210 -}
211
212 simplTopBinds :: SimplEnv -> [InBind] -> SimplM (SimplFloats, SimplEnv)
213 -- See Note [The big picture]
214 simplTopBinds env0 binds0
215 = do { -- Put all the top-level binders into scope at the start
216 -- so that if a rewrite rule has unexpectedly brought
217 -- anything into scope, then we don't get a complaint about that.
218 -- It's rather as if the top-level binders were imported.
219 -- See note [Glomming] in "GHC.Core.Opt.OccurAnal".
220 -- See Note [Bangs in the Simplifier]
221 ; !env1 <- {-#SCC "simplTopBinds-simplRecBndrs" #-} simplRecBndrs env0 (bindersOfBinds binds0)
222 ; (floats, env2) <- {-#SCC "simplTopBinds-simpl_binds" #-} simpl_binds env1 binds0
223 ; freeTick SimplifierDone
224 ; return (floats, env2) }
225 where
226 -- We need to track the zapped top-level binders, because
227 -- they should have their fragile IdInfo zapped (notably occurrence info)
228 -- That's why we run down binds and bndrs' simultaneously.
229 --
230 simpl_binds :: SimplEnv -> [InBind] -> SimplM (SimplFloats, SimplEnv)
231 simpl_binds env [] = return (emptyFloats env, env)
232 simpl_binds env (bind:binds) = do { (float, env1) <- simpl_bind env bind
233 ; (floats, env2) <- simpl_binds env1 binds
234 -- See Note [Bangs in the Simplifier]
235 ; let !floats1 = float `addFloats` floats
236 ; return (floats1, env2) }
237
238 simpl_bind env (Rec pairs)
239 = simplRecBind env TopLevel Nothing pairs
240 simpl_bind env (NonRec b r)
241 = do { (env', b') <- addBndrRules env b (lookupRecBndr env b) Nothing
242 ; simplRecOrTopPair env' TopLevel NonRecursive Nothing b b' r }
243
244 {-
245 ************************************************************************
246 * *
247 Lazy bindings
248 * *
249 ************************************************************************
250
251 simplRecBind is used for
252 * recursive bindings only
253 -}
254
255 simplRecBind :: SimplEnv -> TopLevelFlag -> MaybeJoinCont
256 -> [(InId, InExpr)]
257 -> SimplM (SimplFloats, SimplEnv)
258 simplRecBind env0 top_lvl mb_cont pairs0
259 = do { (env_with_info, triples) <- mapAccumLM add_rules env0 pairs0
260 ; (rec_floats, env1) <- go env_with_info triples
261 ; return (mkRecFloats rec_floats, env1) }
262 where
263 add_rules :: SimplEnv -> (InBndr,InExpr) -> SimplM (SimplEnv, (InBndr, OutBndr, InExpr))
264 -- Add the (substituted) rules to the binder
265 add_rules env (bndr, rhs)
266 = do { (env', bndr') <- addBndrRules env bndr (lookupRecBndr env bndr) mb_cont
267 ; return (env', (bndr, bndr', rhs)) }
268
269 go env [] = return (emptyFloats env, env)
270
271 go env ((old_bndr, new_bndr, rhs) : pairs)
272 = do { (float, env1) <- simplRecOrTopPair env top_lvl Recursive mb_cont
273 old_bndr new_bndr rhs
274 ; (floats, env2) <- go env1 pairs
275 ; return (float `addFloats` floats, env2) }
276
277 {-
278 simplOrTopPair is used for
279 * recursive bindings (whether top level or not)
280 * top-level non-recursive bindings
281
282 It assumes the binder has already been simplified, but not its IdInfo.
283 -}
284
285 simplRecOrTopPair :: SimplEnv
286 -> TopLevelFlag -> RecFlag -> MaybeJoinCont
287 -> InId -> OutBndr -> InExpr -- Binder and rhs
288 -> SimplM (SimplFloats, SimplEnv)
289
290 simplRecOrTopPair env top_lvl is_rec mb_cont old_bndr new_bndr rhs
291 | Just env' <- preInlineUnconditionally env top_lvl old_bndr rhs env
292 = {-#SCC "simplRecOrTopPair-pre-inline-uncond" #-}
293 simplTrace env "SimplBindr:inline-uncond" (ppr old_bndr) $
294 do { tick (PreInlineUnconditionally old_bndr)
295 ; return ( emptyFloats env, env' ) }
296
297 | Just cont <- mb_cont
298 = {-#SCC "simplRecOrTopPair-join" #-}
299 assert (isNotTopLevel top_lvl && isJoinId new_bndr )
300 simplTrace env "SimplBind:join" (ppr old_bndr) $
301 simplJoinBind env cont old_bndr new_bndr rhs env
302
303 | otherwise
304 = {-#SCC "simplRecOrTopPair-normal" #-}
305 simplTrace env "SimplBind:normal" (ppr old_bndr) $
306 simplLazyBind env top_lvl is_rec old_bndr new_bndr rhs env
307
308 simplTrace :: SimplEnv -> String -> SDoc -> a -> a
309 simplTrace env herald doc thing_inside
310 | not (logHasDumpFlag logger Opt_D_verbose_core2core)
311 = thing_inside
312 | otherwise
313 = logTraceMsg logger herald doc thing_inside
314 where
315 logger = seLogger env
316
317 --------------------------
318 simplLazyBind :: SimplEnv
319 -> TopLevelFlag -> RecFlag
320 -> InId -> OutId -- Binder, both pre-and post simpl
321 -- Not a JoinId
322 -- The OutId has IdInfo, except arity, unfolding
323 -- Ids only, no TyVars
324 -> InExpr -> SimplEnv -- The RHS and its environment
325 -> SimplM (SimplFloats, SimplEnv)
326 -- Precondition: not a JoinId
327 -- Precondition: rhs obeys the let/app invariant
328 -- NOT used for JoinIds
329 simplLazyBind env top_lvl is_rec bndr bndr1 rhs rhs_se
330 = assert (isId bndr )
331 assertPpr (not (isJoinId bndr)) (ppr bndr) $
332 -- pprTrace "simplLazyBind" ((ppr bndr <+> ppr bndr1) $$ ppr rhs $$ ppr (seIdSubst rhs_se)) $
333 do { let !rhs_env = rhs_se `setInScopeFromE` env -- See Note [Bangs in the Simplifier]
334 (tvs, body) = case collectTyAndValBinders rhs of
335 (tvs, [], body)
336 | surely_not_lam body -> (tvs, body)
337 _ -> ([], rhs)
338
339 surely_not_lam (Lam {}) = False
340 surely_not_lam (Tick t e)
341 | not (tickishFloatable t) = surely_not_lam e
342 -- eta-reduction could float
343 surely_not_lam _ = True
344 -- Do not do the "abstract tyvar" thing if there's
345 -- a lambda inside, because it defeats eta-reduction
346 -- f = /\a. \x. g a x
347 -- should eta-reduce.
348
349
350 ; (body_env, tvs') <- {-#SCC "simplBinders" #-} simplBinders rhs_env tvs
351 -- See Note [Floating and type abstraction] in GHC.Core.Opt.Simplify.Utils
352
353 -- Simplify the RHS
354 ; let rhs_cont = mkRhsStop (substTy body_env (exprType body))
355 ; (body_floats0, body0) <- {-#SCC "simplExprF" #-} simplExprF body_env body rhs_cont
356
357 -- Never float join-floats out of a non-join let-binding (which this is)
358 -- So wrap the body in the join-floats right now
359 -- Hence: body_floats1 consists only of let-floats
360 ; let (body_floats1, body1) = wrapJoinFloatsX body_floats0 body0
361
362 -- ANF-ise a constructor or PAP rhs
363 -- We get at most one float per argument here
364 ; let body_env1 = body_env `setInScopeFromF` body_floats1
365 -- body_env1: add to in-scope set the binders from body_floats1
366 -- so that prepareBinding knows what is in scope in body1
367 ; (let_floats, body2) <- {-#SCC "prepareBinding" #-}
368 prepareBinding body_env1 top_lvl bndr1 body1
369 ; let body_floats2 = body_floats1 `addLetFloats` let_floats
370
371 ; (rhs_floats, body3)
372 <- if not (doFloatFromRhs top_lvl is_rec False body_floats2 body2)
373 then -- No floating, revert to body1
374 return (emptyFloats env, wrapFloats body_floats2 body1)
375
376 else if null tvs then -- Simple floating
377 {-#SCC "simplLazyBind-simple-floating" #-}
378 do { tick LetFloatFromLet
379 ; return (body_floats2, body2) }
380
381 else -- Do type-abstraction first
382 {-#SCC "simplLazyBind-type-abstraction-first" #-}
383 do { tick LetFloatFromLet
384 ; (poly_binds, body3) <- abstractFloats (seUnfoldingOpts env) top_lvl
385 tvs' body_floats2 body2
386 ; let floats = foldl' extendFloats (emptyFloats env) poly_binds
387 ; return (floats, body3) }
388
389 ; let env' = env `setInScopeFromF` rhs_floats
390 ; rhs' <- mkLam env' tvs' body3 rhs_cont
391 ; (bind_float, env2) <- completeBind env' top_lvl Nothing bndr bndr1 rhs'
392 ; return (rhs_floats `addFloats` bind_float, env2) }
393
394 --------------------------
395 simplJoinBind :: SimplEnv
396 -> SimplCont
397 -> InId -> OutId -- Binder, both pre-and post simpl
398 -- The OutId has IdInfo, except arity,
399 -- unfolding
400 -> InExpr -> SimplEnv -- The right hand side and its env
401 -> SimplM (SimplFloats, SimplEnv)
402 simplJoinBind env cont old_bndr new_bndr rhs rhs_se
403 = do { let rhs_env = rhs_se `setInScopeFromE` env
404 ; rhs' <- simplJoinRhs rhs_env old_bndr rhs cont
405 ; completeBind env NotTopLevel (Just cont) old_bndr new_bndr rhs' }
406
407 --------------------------
408 simplNonRecX :: SimplEnv
409 -> InId -- Old binder; not a JoinId
410 -> OutExpr -- Simplified RHS
411 -> SimplM (SimplFloats, SimplEnv)
412 -- A specialised variant of simplNonRec used when the RHS is already
413 -- simplified, notably in knownCon. It uses case-binding where necessary.
414 --
415 -- Precondition: rhs satisfies the let/app invariant
416
417 simplNonRecX env bndr new_rhs
418 | assertPpr (not (isJoinId bndr)) (ppr bndr) $
419 isDeadBinder bndr -- Not uncommon; e.g. case (a,b) of c { (p,q) -> p }
420 = return (emptyFloats env, env) -- Here c is dead, and we avoid
421 -- creating the binding c = (a,b)
422
423 | Coercion co <- new_rhs
424 = return (emptyFloats env, extendCvSubst env bndr co)
425
426 | exprIsTrivial new_rhs -- Short-cut for let x = y in ...
427 -- This case would ultimately land in postInlineUnconditionally
428 -- but it seems not uncommon, and avoids a lot of faff to do it here
429 = return (emptyFloats env
430 , extendIdSubst env bndr (DoneEx new_rhs Nothing))
431
432 | otherwise
433 = do { (env', bndr') <- simplBinder env bndr
434 ; completeNonRecX NotTopLevel env' (isStrictId bndr') bndr bndr' new_rhs }
435 -- NotTopLevel: simplNonRecX is only used for NotTopLevel things
436 --
437 -- isStrictId: use bndr' because the InId bndr might not have
438 -- a fixed runtime representation, which isStrictId doesn't expect
439 -- c.f. Note [Dark corner with representation polymorphism]
440
441 --------------------------
442 completeNonRecX :: TopLevelFlag -> SimplEnv
443 -> Bool
444 -> InId -- Old binder; not a JoinId
445 -> OutId -- New binder
446 -> OutExpr -- Simplified RHS
447 -> SimplM (SimplFloats, SimplEnv) -- The new binding is in the floats
448 -- Precondition: rhs satisfies the let/app invariant
449 -- See Note [Core let/app invariant] in GHC.Core
450
451 completeNonRecX top_lvl env is_strict old_bndr new_bndr new_rhs
452 = assertPpr (not (isJoinId new_bndr)) (ppr new_bndr) $
453 do { (prepd_floats, new_rhs) <- prepareBinding env top_lvl new_bndr new_rhs
454 ; let floats = emptyFloats env `addLetFloats` prepd_floats
455 ; (rhs_floats, rhs2) <-
456 if doFloatFromRhs NotTopLevel NonRecursive is_strict floats new_rhs
457 then -- Add the floats to the main env
458 do { tick LetFloatFromLet
459 ; return (floats, new_rhs) }
460 else -- Do not float; wrap the floats around the RHS
461 return (emptyFloats env, wrapFloats floats new_rhs)
462
463 ; (bind_float, env2) <- completeBind (env `setInScopeFromF` rhs_floats)
464 NotTopLevel Nothing
465 old_bndr new_bndr rhs2
466 ; return (rhs_floats `addFloats` bind_float, env2) }
467
468
469 {- *********************************************************************
470 * *
471 Cast worker/wrapper
472 * *
473 ************************************************************************
474
475 Note [Cast worker/wrapper]
476 ~~~~~~~~~~~~~~~~~~~~~~~~~~
477 When we have a binding
478 x = e |> co
479 we want to do something very similar to worker/wrapper:
480 $wx = e
481 x = $wx |> co
482
483 We call this making a cast worker/wrapper in tryCastWorkerWrapper.
484
485 The main motivaiton is that x can be inlined freely. There's a chance
486 that e will be a constructor application or function, or something
487 like that, so moving the coercion to the usage site may well cancel
488 the coercions and lead to further optimisation. Example:
489
490 data family T a :: *
491 data instance T Int = T Int
492
493 foo :: Int -> Int -> Int
494 foo m n = ...
495 where
496 t = T m
497 go 0 = 0
498 go n = case t of { T m -> go (n-m) }
499 -- This case should optimise
500
501 A second reason for doing cast worker/wrapper is that the worker/wrapper
502 pass after strictness analysis can't deal with RHSs like
503 f = (\ a b c. blah) |> co
504 Instead, it relies on cast worker/wrapper to get rid of the cast,
505 leaving a simpler job for demand-analysis worker/wrapper. See #19874.
506
507 Wrinkles
508
509 1. We must /not/ do cast w/w on
510 f = g |> co
511 otherwise it'll just keep repeating forever! You might think this
512 is avoided because the call to tryCastWorkerWrapper is guarded by
513 preInlineUnconditinally, but I'm worried that a loop-breaker or an
514 exported Id might say False to preInlineUnonditionally.
515
516 2. We need to be careful with inline/noinline pragmas:
517 rec { {-# NOINLINE f #-}
518 f = (...g...) |> co
519 ; g = ...f... }
520 This is legitimate -- it tells GHC to use f as the loop breaker
521 rather than g. Now we do the cast thing, to get something like
522 rec { $wf = ...g...
523 ; f = $wf |> co
524 ; g = ...f... }
525 Where should the NOINLINE pragma go? If we leave it on f we'll get
526 rec { $wf = ...g...
527 ; {-# NOINLINE f #-}
528 f = $wf |> co
529 ; g = ...f... }
530 and that is bad: the whole point is that we want to inline that
531 cast! We want to transfer the pagma to $wf:
532 rec { {-# NOINLINE $wf #-}
533 $wf = ...g...
534 ; f = $wf |> co
535 ; g = ...f... }
536 c.f. Note [Worker/wrapper for NOINLINE functions] in GHC.Core.Opt.WorkWrap.
537
538 3. We should still do cast w/w even if `f` is INLINEABLE. E.g.
539 {- f: Stable unfolding = <stable-big> -}
540 f = (\xy. <big-body>) |> co
541 Then we want to w/w to
542 {- $wf: Stable unfolding = <stable-big> |> sym co -}
543 $wf = \xy. <big-body>
544 f = $wf |> co
545 Notice that the stable unfolding moves to the worker! Now demand analysis
546 will work fine on $wf, whereas it has trouble with the original f.
547 c.f. Note [Worker/wrapper for INLINABLE functions] in GHC.Core.Opt.WorkWrap.
548 This point also applies to strong loopbreakers with INLINE pragmas, see
549 wrinkle (4).
550
551 4. We should /not/ do cast w/w for non-loop-breaker INLINE functions (hence
552 hasInlineUnfolding in tryCastWorkerWrapper, which responds False to
553 loop-breakers) because they'll definitely be inlined anyway, cast and
554 all. And if we do cast w/w for an INLINE function with arity zero, we get
555 something really silly: we inline that "worker" right back into the wrapper!
556 Worse than a no-op, because we have then lost the stable unfolding.
557
558 All these wrinkles are exactly like worker/wrapper for strictness analysis:
559 f is the wrapper and must inline like crazy
560 $wf is the worker and must carry f's original pragma
561 See Note [Worker/wrapper for INLINABLE functions]
562 and Note [Worker/wrapper for NOINLINE functions] in GHC.Core.Opt.WorkWrap.
563
564 See #17673, #18093, #18078, #19890.
565
566 Note [Preserve strictness in cast w/w]
567 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
568 In the Note [Cast worker/wrapper] transformation, keep the strictness info.
569 Eg
570 f = e `cast` co -- f has strictness SSL
571 When we transform to
572 f' = e -- f' also has strictness SSL
573 f = f' `cast` co -- f still has strictness SSL
574
575 Its not wrong to drop it on the floor, but better to keep it.
576
577 Note [Cast w/w: unlifted]
578 ~~~~~~~~~~~~~~~~~~~~~~~~~
579 BUT don't do cast worker/wrapper if 'e' has an unlifted type.
580 This *can* happen:
581
582 foo :: Int = (error (# Int,Int #) "urk")
583 `cast` CoUnsafe (# Int,Int #) Int
584
585 If do the makeTrivial thing to the error call, we'll get
586 foo = case error (# Int,Int #) "urk" of v -> v `cast` ...
587 But 'v' isn't in scope!
588
589 These strange casts can happen as a result of case-of-case
590 bar = case (case x of { T -> (# 2,3 #); F -> error "urk" }) of
591 (# p,q #) -> p+q
592
593 NOTE: Nowadays we don't use casts for these error functions;
594 instead, we use (case erorr ... of {}). So I'm not sure
595 this Note makes much sense any more.
596 -}
597
598 tryCastWorkerWrapper :: SimplEnv -> TopLevelFlag
599 -> InId -> OccInfo
600 -> OutId -> OutExpr
601 -> SimplM (SimplFloats, SimplEnv)
602 -- See Note [Cast worker/wrapper]
603 tryCastWorkerWrapper env top_lvl old_bndr occ_info bndr (Cast rhs co)
604 | not (isJoinId bndr) -- Not for join points
605 , not (isDFunId bndr) -- nor DFuns; cast w/w is no help, and we can't transform
606 -- a DFunUnfolding in mk_worker_unfolding
607 , not (exprIsTrivial rhs) -- Not x = y |> co; Wrinkle 1
608 , not (hasInlineUnfolding info) -- Not INLINE things: Wrinkle 4
609 , not (isUnliftedType rhs_ty) -- Not if rhs has an unlifted type;
610 -- see Note [Cast w/w: unlifted]
611 = do { (rhs_floats, work_rhs) <- prepareRhs env top_lvl occ_fs rhs
612 ; uniq <- getUniqueM
613 ; let work_name = mkSystemVarName uniq occ_fs
614 work_id = mkLocalIdWithInfo work_name Many rhs_ty worker_info
615
616 ; work_unf <- mk_worker_unfolding work_id work_rhs
617 ; let work_id_w_unf = work_id `setIdUnfolding` work_unf
618 floats = emptyFloats env
619 `addLetFloats` rhs_floats
620 `addLetFloats` unitLetFloat (NonRec work_id_w_unf work_rhs)
621
622 triv_rhs = Cast (Var work_id_w_unf) co
623
624 ; if postInlineUnconditionally env top_lvl bndr occ_info triv_rhs
625 -- Almost always True, because the RHS is trivial
626 -- In that case we want to eliminate the binding fast
627 -- We conservatively use postInlineUnconditionally so that we
628 -- check all the right things
629 then do { tick (PostInlineUnconditionally bndr)
630 ; return ( floats
631 , extendIdSubst (setInScopeFromF env floats) old_bndr $
632 DoneEx triv_rhs Nothing ) }
633
634 else do { wrap_unf <- mkLetUnfolding (sm_uf_opts mode) top_lvl InlineRhs bndr triv_rhs
635 ; let bndr' = bndr `setInlinePragma` mkCastWrapperInlinePrag (idInlinePragma bndr)
636 `setIdUnfolding` wrap_unf
637 floats' = floats `extendFloats` NonRec bndr' triv_rhs
638 ; return ( floats', setInScopeFromF env floats' ) } }
639 where
640 mode = getMode env
641 occ_fs = getOccFS bndr
642 rhs_ty = coercionLKind co
643 info = idInfo bndr
644
645 worker_info = vanillaIdInfo `setDmdSigInfo` dmdSigInfo info
646 `setCprSigInfo` cprSigInfo info
647 `setDemandInfo` demandInfo info
648 `setInlinePragInfo` inlinePragInfo info
649 `setArityInfo` arityInfo info
650 -- We do /not/ want to transfer OccInfo, Rules
651 -- Note [Preserve strictness in cast w/w]
652 -- and Wrinkle 2 of Note [Cast worker/wrapper]
653
654 ----------- Worker unfolding -----------
655 -- Stable case: if there is a stable unfolding we have to compose with (Sym co);
656 -- the next round of simplification will do the job
657 -- Non-stable case: use work_rhs
658 -- Wrinkle 3 of Note [Cast worker/wrapper]
659 mk_worker_unfolding work_id work_rhs
660 = case realUnfoldingInfo info of -- NB: the real one, even for loop-breakers
661 unf@(CoreUnfolding { uf_tmpl = unf_rhs, uf_src = src })
662 | isStableSource src -> return (unf { uf_tmpl = mkCast unf_rhs (mkSymCo co) })
663 _ -> mkLetUnfolding (sm_uf_opts mode) top_lvl InlineRhs work_id work_rhs
664
665 tryCastWorkerWrapper env _ _ _ bndr rhs -- All other bindings
666 = return (mkFloatBind env (NonRec bndr rhs))
667
668 mkCastWrapperInlinePrag :: InlinePragma -> InlinePragma
669 -- See Note [Cast worker/wrapper]
670 mkCastWrapperInlinePrag (InlinePragma { inl_act = act, inl_rule = rule_info })
671 = InlinePragma { inl_src = SourceText "{-# INLINE"
672 , inl_inline = NoUserInlinePrag -- See Note [Wrapper NoUserInline]
673 , inl_sat = Nothing -- in GHC.Core.Opt.WorkWrap
674 , inl_act = wrap_act -- See Note [Wrapper activation]
675 , inl_rule = rule_info } -- in GHC.Core.Opt.WorkWrap
676 -- RuleMatchInfo is (and must be) unaffected
677 where
678 -- See Note [Wrapper activation] in GHC.Core.Opt.WorkWrap
679 -- But simpler, because we don't need to disable during InitialPhase
680 wrap_act | isNeverActive act = activateDuringFinal
681 | otherwise = act
682
683
684 {- *********************************************************************
685 * *
686 prepareBinding, prepareRhs, makeTrivial
687 * *
688 ********************************************************************* -}
689
690 prepareBinding :: SimplEnv -> TopLevelFlag
691 -> OutId -> OutExpr
692 -> SimplM (LetFloats, OutExpr)
693 prepareBinding env top_lvl bndr rhs
694 = prepareRhs env top_lvl (getOccFS bndr) rhs
695
696 {- Note [prepareRhs]
697 ~~~~~~~~~~~~~~~~~~~~
698 prepareRhs takes a putative RHS, checks whether it's a PAP or
699 constructor application and, if so, converts it to ANF, so that the
700 resulting thing can be inlined more easily. Thus
701 x = (f a, g b)
702 becomes
703 t1 = f a
704 t2 = g b
705 x = (t1,t2)
706
707 We also want to deal well cases like this
708 v = (f e1 `cast` co) e2
709 Here we want to make e1,e2 trivial and get
710 x1 = e1; x2 = e2; v = (f x1 `cast` co) v2
711 That's what the 'go' loop in prepareRhs does
712 -}
713
714 prepareRhs :: SimplEnv -> TopLevelFlag
715 -> FastString -- Base for any new variables
716 -> OutExpr
717 -> SimplM (LetFloats, OutExpr)
718 -- Transforms a RHS into a better RHS by ANF'ing args
719 -- for expandable RHSs: constructors and PAPs
720 -- e.g x = Just e
721 -- becomes a = e -- 'a' is fresh
722 -- x = Just a
723 -- See Note [prepareRhs]
724 prepareRhs env top_lvl occ rhs0
725 = do { (_is_exp, floats, rhs1) <- go 0 rhs0
726 ; return (floats, rhs1) }
727 where
728 go :: Int -> OutExpr -> SimplM (Bool, LetFloats, OutExpr)
729 go n_val_args (Cast rhs co)
730 = do { (is_exp, floats, rhs') <- go n_val_args rhs
731 ; return (is_exp, floats, Cast rhs' co) }
732 go n_val_args (App fun (Type ty))
733 = do { (is_exp, floats, rhs') <- go n_val_args fun
734 ; return (is_exp, floats, App rhs' (Type ty)) }
735 go n_val_args (App fun arg)
736 = do { (is_exp, floats1, fun') <- go (n_val_args+1) fun
737 ; case is_exp of
738 False -> return (False, emptyLetFloats, App fun arg)
739 True -> do { (floats2, arg') <- makeTrivial env top_lvl topDmd occ arg
740 ; return (True, floats1 `addLetFlts` floats2, App fun' arg') } }
741 go n_val_args (Var fun)
742 = return (is_exp, emptyLetFloats, Var fun)
743 where
744 is_exp = isExpandableApp fun n_val_args -- The fun a constructor or PAP
745 -- See Note [CONLIKE pragma] in GHC.Types.Basic
746 -- The definition of is_exp should match that in
747 -- 'GHC.Core.Opt.OccurAnal.occAnalApp'
748
749 go n_val_args (Tick t rhs)
750 -- We want to be able to float bindings past this
751 -- tick. Non-scoping ticks don't care.
752 | tickishScoped t == NoScope
753 = do { (is_exp, floats, rhs') <- go n_val_args rhs
754 ; return (is_exp, floats, Tick t rhs') }
755
756 -- On the other hand, for scoping ticks we need to be able to
757 -- copy them on the floats, which in turn is only allowed if
758 -- we can obtain non-counting ticks.
759 | (not (tickishCounts t) || tickishCanSplit t)
760 = do { (is_exp, floats, rhs') <- go n_val_args rhs
761 ; let tickIt (id, expr) = (id, mkTick (mkNoCount t) expr)
762 floats' = mapLetFloats floats tickIt
763 ; return (is_exp, floats', Tick t rhs') }
764
765 go _ other
766 = return (False, emptyLetFloats, other)
767
768 makeTrivialArg :: SimplEnv -> ArgSpec -> SimplM (LetFloats, ArgSpec)
769 makeTrivialArg env arg@(ValArg { as_arg = e, as_dmd = dmd })
770 = do { (floats, e') <- makeTrivial env NotTopLevel dmd (fsLit "arg") e
771 ; return (floats, arg { as_arg = e' }) }
772 makeTrivialArg _ arg
773 = return (emptyLetFloats, arg) -- CastBy, TyArg
774
775 makeTrivial :: SimplEnv -> TopLevelFlag -> Demand
776 -> FastString -- ^ A "friendly name" to build the new binder from
777 -> OutExpr -- ^ This expression satisfies the let/app invariant
778 -> SimplM (LetFloats, OutExpr)
779 -- Binds the expression to a variable, if it's not trivial, returning the variable
780 -- For the Demand argument, see Note [Keeping demand info in StrictArg Plan A]
781 makeTrivial env top_lvl dmd occ_fs expr
782 | exprIsTrivial expr -- Already trivial
783 || not (bindingOk top_lvl expr expr_ty) -- Cannot trivialise
784 -- See Note [Cannot trivialise]
785 = return (emptyLetFloats, expr)
786
787 | Cast expr' co <- expr
788 = do { (floats, triv_expr) <- makeTrivial env top_lvl dmd occ_fs expr'
789 ; return (floats, Cast triv_expr co) }
790
791 | otherwise
792 = do { (floats, new_id) <- makeTrivialBinding env top_lvl occ_fs
793 id_info expr expr_ty
794 ; return (floats, Var new_id) }
795 where
796 id_info = vanillaIdInfo `setDemandInfo` dmd
797 expr_ty = exprType expr
798
799 makeTrivialBinding :: SimplEnv -> TopLevelFlag
800 -> FastString -- ^ a "friendly name" to build the new binder from
801 -> IdInfo
802 -> OutExpr -- ^ This expression satisfies the let/app invariant
803 -> OutType -- Type of the expression
804 -> SimplM (LetFloats, OutId)
805 makeTrivialBinding env top_lvl occ_fs info expr expr_ty
806 = do { (floats, expr1) <- prepareRhs env top_lvl occ_fs expr
807 ; uniq <- getUniqueM
808 ; let name = mkSystemVarName uniq occ_fs
809 var = mkLocalIdWithInfo name Many expr_ty info
810
811 -- Now something very like completeBind,
812 -- but without the postInlineUnconditionally part
813 ; (arity_type, expr2) <- tryEtaExpandRhs env var expr1
814 -- Technically we should extend the in-scope set in 'env' with
815 -- the 'floats' from prepareRHS; but they are all fresh, so there is
816 -- no danger of introducing name shadowig in eta expansion
817
818 ; unf <- mkLetUnfolding (sm_uf_opts mode) top_lvl InlineRhs var expr2
819
820 ; let final_id = addLetBndrInfo var arity_type unf
821 bind = NonRec final_id expr2
822
823 ; return ( floats `addLetFlts` unitLetFloat bind, final_id ) }
824 where
825 mode = getMode env
826
827 bindingOk :: TopLevelFlag -> CoreExpr -> Type -> Bool
828 -- True iff we can have a binding of this expression at this level
829 -- Precondition: the type is the type of the expression
830 bindingOk top_lvl expr expr_ty
831 | isTopLevel top_lvl = exprIsTopLevelBindable expr expr_ty
832 | otherwise = True
833
834 {- Note [Cannot trivialise]
835 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
836 Consider:
837 f :: Int -> Addr#
838
839 foo :: Bar
840 foo = Bar (f 3)
841
842 Then we can't ANF-ise foo, even though we'd like to, because
843 we can't make a top-level binding for the Addr# (f 3). And if
844 so we don't want to turn it into
845 foo = let x = f 3 in Bar x
846 because we'll just end up inlining x back, and that makes the
847 simplifier loop. Better not to ANF-ise it at all.
848
849 Literal strings are an exception.
850
851 foo = Ptr "blob"#
852
853 We want to turn this into:
854
855 foo1 = "blob"#
856 foo = Ptr foo1
857
858 See Note [Core top-level string literals] in GHC.Core.
859
860 ************************************************************************
861 * *
862 Completing a lazy binding
863 * *
864 ************************************************************************
865
866 completeBind
867 * deals only with Ids, not TyVars
868 * takes an already-simplified binder and RHS
869 * is used for both recursive and non-recursive bindings
870 * is used for both top-level and non-top-level bindings
871
872 It does the following:
873 - tries discarding a dead binding
874 - tries PostInlineUnconditionally
875 - add unfolding [this is the only place we add an unfolding]
876 - add arity
877
878 It does *not* attempt to do let-to-case. Why? Because it is used for
879 - top-level bindings (when let-to-case is impossible)
880 - many situations where the "rhs" is known to be a WHNF
881 (so let-to-case is inappropriate).
882
883 Nor does it do the atomic-argument thing
884 -}
885
886 completeBind :: SimplEnv
887 -> TopLevelFlag -- Flag stuck into unfolding
888 -> MaybeJoinCont -- Required only for join point
889 -> InId -- Old binder
890 -> OutId -> OutExpr -- New binder and RHS
891 -> SimplM (SimplFloats, SimplEnv)
892 -- completeBind may choose to do its work
893 -- * by extending the substitution (e.g. let x = y in ...)
894 -- * or by adding to the floats in the envt
895 --
896 -- Binder /can/ be a JoinId
897 -- Precondition: rhs obeys the let/app invariant
898 completeBind env top_lvl mb_cont old_bndr new_bndr new_rhs
899 | isCoVar old_bndr
900 = case new_rhs of
901 Coercion co -> return (emptyFloats env, extendCvSubst env old_bndr co)
902 _ -> return (mkFloatBind env (NonRec new_bndr new_rhs))
903
904 | otherwise
905 = assert (isId new_bndr) $
906 do { let old_info = idInfo old_bndr
907 old_unf = realUnfoldingInfo old_info
908 occ_info = occInfo old_info
909
910 -- Do eta-expansion on the RHS of the binding
911 -- See Note [Eta-expanding at let bindings] in GHC.Core.Opt.Simplify.Utils
912 ; (new_arity, eta_rhs) <- tryEtaExpandRhs env new_bndr new_rhs
913
914 -- Simplify the unfolding
915 ; new_unfolding <- simplLetUnfolding env top_lvl mb_cont old_bndr
916 eta_rhs (idType new_bndr) new_arity old_unf
917
918 ; let new_bndr_w_info = addLetBndrInfo new_bndr new_arity new_unfolding
919 -- See Note [In-scope set as a substitution]
920
921 ; if postInlineUnconditionally env top_lvl new_bndr_w_info occ_info eta_rhs
922
923 then -- Inline and discard the binding
924 do { tick (PostInlineUnconditionally old_bndr)
925 ; let unf_rhs = maybeUnfoldingTemplate new_unfolding `orElse` eta_rhs
926 -- See Note [Use occ-anald RHS in postInlineUnconditionally]
927 ; simplTrace env "PostInlineUnconditionally" (ppr new_bndr <+> ppr unf_rhs) $
928 return ( emptyFloats env
929 , extendIdSubst env old_bndr $
930 DoneEx unf_rhs (isJoinId_maybe new_bndr)) }
931 -- Use the substitution to make quite, quite sure that the
932 -- substitution will happen, since we are going to discard the binding
933
934 else -- Keep the binding; do cast worker/wrapper
935 -- pprTrace "Binding" (ppr final_bndr <+> ppr new_unfolding) $
936 tryCastWorkerWrapper env top_lvl old_bndr occ_info new_bndr_w_info eta_rhs }
937
938 addLetBndrInfo :: OutId -> ArityType -> Unfolding -> OutId
939 addLetBndrInfo new_bndr new_arity_type new_unf
940 = new_bndr `setIdInfo` info5
941 where
942 AT oss div = new_arity_type
943 new_arity = length oss
944
945 info1 = idInfo new_bndr `setArityInfo` new_arity
946
947 -- Unfolding info: Note [Setting the new unfolding]
948 info2 = info1 `setUnfoldingInfo` new_unf
949
950 -- Demand info: Note [Setting the demand info]
951 info3 | isEvaldUnfolding new_unf
952 = zapDemandInfo info2 `orElse` info2
953 | otherwise
954 = info2
955
956 -- Bottoming bindings: see Note [Bottoming bindings]
957 info4 | isDeadEndDiv div = info3 `setDmdSigInfo` bot_sig
958 `setCprSigInfo` bot_cpr
959 | otherwise = info3
960
961 bot_sig = mkClosedDmdSig (replicate new_arity topDmd) div
962 bot_cpr = mkCprSig new_arity botCpr
963
964 -- Zap call arity info. We have used it by now (via
965 -- `tryEtaExpandRhs`), and the simplifier can invalidate this
966 -- information, leading to broken code later (e.g. #13479)
967 info5 = zapCallArityInfo info4
968
969
970 {- Note [Bottoming bindings]
971 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
972 Suppose we have
973 let x = error "urk"
974 in ...(case x of <alts>)...
975 or
976 let f = \x. error (x ++ "urk")
977 in ...(case f "foo" of <alts>)...
978
979 Then we'd like to drop the dead <alts> immediately. So it's good to
980 propagate the info that x's RHS is bottom to x's IdInfo as rapidly as
981 possible.
982
983 We use tryEtaExpandRhs on every binding, and it turns out that the
984 arity computation it performs (via GHC.Core.Opt.Arity.findRhsArity) already
985 does a simple bottoming-expression analysis. So all we need to do
986 is propagate that info to the binder's IdInfo.
987
988 This showed up in #12150; see comment:16.
989
990 Note [Setting the demand info]
991 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
992 If the unfolding is a value, the demand info may
993 go pear-shaped, so we nuke it. Example:
994 let x = (a,b) in
995 case x of (p,q) -> h p q x
996 Here x is certainly demanded. But after we've nuked
997 the case, we'll get just
998 let x = (a,b) in h a b x
999 and now x is not demanded (I'm assuming h is lazy)
1000 This really happens. Similarly
1001 let f = \x -> e in ...f..f...
1002 After inlining f at some of its call sites the original binding may
1003 (for example) be no longer strictly demanded.
1004 The solution here is a bit ad hoc...
1005
1006 Note [Use occ-anald RHS in postInlineUnconditionally]
1007 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1008 Suppose we postInlineUnconditionally 'f in
1009 let f = \x -> x True in ...(f blah)...
1010 then we'd like to inline the /occ-anald/ RHS for 'f'. If we
1011 use the non-occ-anald version, we'll end up with a
1012 ...(let x = blah in x True)...
1013 and hence an extra Simplifier iteration.
1014
1015 We already /have/ the occ-anald version in the Unfolding for
1016 the Id. Well, maybe not /quite/ always. If the binder is Dead,
1017 postInlineUnconditionally will return True, but we may not have an
1018 unfolding because it's too big. Hence the belt-and-braces `orElse`
1019 in the defn of unf_rhs. The Nothing case probably never happens.
1020
1021
1022 ************************************************************************
1023 * *
1024 \subsection[Simplify-simplExpr]{The main function: simplExpr}
1025 * *
1026 ************************************************************************
1027
1028 The reason for this OutExprStuff stuff is that we want to float *after*
1029 simplifying a RHS, not before. If we do so naively we get quadratic
1030 behaviour as things float out.
1031
1032 To see why it's important to do it after, consider this (real) example:
1033
1034 let t = f x
1035 in fst t
1036 ==>
1037 let t = let a = e1
1038 b = e2
1039 in (a,b)
1040 in fst t
1041 ==>
1042 let a = e1
1043 b = e2
1044 t = (a,b)
1045 in
1046 a -- Can't inline a this round, cos it appears twice
1047 ==>
1048 e1
1049
1050 Each of the ==> steps is a round of simplification. We'd save a
1051 whole round if we float first. This can cascade. Consider
1052
1053 let f = g d
1054 in \x -> ...f...
1055 ==>
1056 let f = let d1 = ..d.. in \y -> e
1057 in \x -> ...f...
1058 ==>
1059 let d1 = ..d..
1060 in \x -> ...(\y ->e)...
1061
1062 Only in this second round can the \y be applied, and it
1063 might do the same again.
1064 -}
1065
1066 simplExpr :: SimplEnv -> CoreExpr -> SimplM CoreExpr
1067 simplExpr !env (Type ty) -- See Note [Bangs in the Simplifier]
1068 = do { ty' <- simplType env ty -- See Note [Avoiding space leaks in OutType]
1069 ; return (Type ty') }
1070
1071 simplExpr env expr
1072 = simplExprC env expr (mkBoringStop expr_out_ty)
1073 where
1074 expr_out_ty :: OutType
1075 expr_out_ty = substTy env (exprType expr)
1076 -- NB: Since 'expr' is term-valued, not (Type ty), this call
1077 -- to exprType will succeed. exprType fails on (Type ty).
1078
1079 simplExprC :: SimplEnv
1080 -> InExpr -- A term-valued expression, never (Type ty)
1081 -> SimplCont
1082 -> SimplM OutExpr
1083 -- Simplify an expression, given a continuation
1084 simplExprC env expr cont
1085 = -- pprTrace "simplExprC" (ppr expr $$ ppr cont {- $$ ppr (seIdSubst env) -} $$ ppr (seLetFloats env) ) $
1086 do { (floats, expr') <- simplExprF env expr cont
1087 ; -- pprTrace "simplExprC ret" (ppr expr $$ ppr expr') $
1088 -- pprTrace "simplExprC ret3" (ppr (seInScope env')) $
1089 -- pprTrace "simplExprC ret4" (ppr (seLetFloats env')) $
1090 return (wrapFloats floats expr') }
1091
1092 --------------------------------------------------
1093 simplExprF :: SimplEnv
1094 -> InExpr -- A term-valued expression, never (Type ty)
1095 -> SimplCont
1096 -> SimplM (SimplFloats, OutExpr)
1097
1098 simplExprF !env e !cont -- See Note [Bangs in the Simplifier]
1099 = {- pprTrace "simplExprF" (vcat
1100 [ ppr e
1101 , text "cont =" <+> ppr cont
1102 , text "inscope =" <+> ppr (seInScope env)
1103 , text "tvsubst =" <+> ppr (seTvSubst env)
1104 , text "idsubst =" <+> ppr (seIdSubst env)
1105 , text "cvsubst =" <+> ppr (seCvSubst env)
1106 ]) $ -}
1107 simplExprF1 env e cont
1108
1109 simplExprF1 :: SimplEnv -> InExpr -> SimplCont
1110 -> SimplM (SimplFloats, OutExpr)
1111
1112 simplExprF1 _ (Type ty) cont
1113 = pprPanic "simplExprF: type" (ppr ty <+> text"cont: " <+> ppr cont)
1114 -- simplExprF does only with term-valued expressions
1115 -- The (Type ty) case is handled separately by simplExpr
1116 -- and by the other callers of simplExprF
1117
1118 simplExprF1 env (Var v) cont = {-#SCC "simplIdF" #-} simplIdF env v cont
1119 simplExprF1 env (Lit lit) cont = {-#SCC "rebuild" #-} rebuild env (Lit lit) cont
1120 simplExprF1 env (Tick t expr) cont = {-#SCC "simplTick" #-} simplTick env t expr cont
1121 simplExprF1 env (Cast body co) cont = {-#SCC "simplCast" #-} simplCast env body co cont
1122 simplExprF1 env (Coercion co) cont = {-#SCC "simplCoercionF" #-} simplCoercionF env co cont
1123
1124 simplExprF1 env (App fun arg) cont
1125 = {-#SCC "simplExprF1-App" #-} case arg of
1126 Type ty -> do { -- The argument type will (almost) certainly be used
1127 -- in the output program, so just force it now.
1128 -- See Note [Avoiding space leaks in OutType]
1129 arg' <- simplType env ty
1130
1131 -- But use substTy, not simplType, to avoid forcing
1132 -- the hole type; it will likely not be needed.
1133 -- See Note [The hole type in ApplyToTy]
1134 ; let hole' = substTy env (exprType fun)
1135
1136 ; simplExprF env fun $
1137 ApplyToTy { sc_arg_ty = arg'
1138 , sc_hole_ty = hole'
1139 , sc_cont = cont } }
1140 _ ->
1141 -- Crucially, sc_hole_ty is a /lazy/ binding. It will
1142 -- be forced only if we need to run contHoleType.
1143 -- When these are forced, we might get quadratic behavior;
1144 -- this quadratic blowup could be avoided by drilling down
1145 -- to the function and getting its multiplicities all at once
1146 -- (instead of one-at-a-time). But in practice, we have not
1147 -- observed the quadratic behavior, so this extra entanglement
1148 -- seems not worthwhile.
1149 simplExprF env fun $
1150 ApplyToVal { sc_arg = arg, sc_env = env
1151 , sc_hole_ty = substTy env (exprType fun)
1152 , sc_dup = NoDup, sc_cont = cont }
1153
1154 simplExprF1 env expr@(Lam {}) cont
1155 = {-#SCC "simplExprF1-Lam" #-}
1156 simplLam env zapped_bndrs body cont
1157 -- The main issue here is under-saturated lambdas
1158 -- (\x1. \x2. e) arg1
1159 -- Here x1 might have "occurs-once" occ-info, because occ-info
1160 -- is computed assuming that a group of lambdas is applied
1161 -- all at once. If there are too few args, we must zap the
1162 -- occ-info, UNLESS the remaining binders are one-shot
1163 where
1164 (bndrs, body) = collectBinders expr
1165 zapped_bndrs = zapLamBndrs n_args bndrs
1166 n_args = countArgs cont
1167 -- NB: countArgs counts all the args (incl type args)
1168 -- and likewise drop counts all binders (incl type lambdas)
1169
1170 simplExprF1 env (Case scrut bndr _ alts) cont
1171 = {-#SCC "simplExprF1-Case" #-}
1172 simplExprF env scrut (Select { sc_dup = NoDup, sc_bndr = bndr
1173 , sc_alts = alts
1174 , sc_env = env, sc_cont = cont })
1175
1176 simplExprF1 env (Let (Rec pairs) body) cont
1177 | Just pairs' <- joinPointBindings_maybe pairs
1178 = {-#SCC "simplRecJoinPoin" #-} simplRecJoinPoint env pairs' body cont
1179
1180 | otherwise
1181 = {-#SCC "simplRecE" #-} simplRecE env pairs body cont
1182
1183 simplExprF1 env (Let (NonRec bndr rhs) body) cont
1184 | Type ty <- rhs -- First deal with type lets (let a = Type ty in e)
1185 = {-#SCC "simplExprF1-NonRecLet-Type" #-}
1186 assert (isTyVar bndr) $
1187 do { ty' <- simplType env ty
1188 ; simplExprF (extendTvSubst env bndr ty') body cont }
1189
1190 | Just (bndr', rhs') <- joinPointBinding_maybe bndr rhs
1191 = {-#SCC "simplNonRecJoinPoint" #-} simplNonRecJoinPoint env bndr' rhs' body cont
1192
1193 | otherwise
1194 = {-#SCC "simplNonRecE" #-} simplNonRecE env bndr (rhs, env) ([], body) cont
1195
1196 {- Note [Avoiding space leaks in OutType]
1197 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1198 Since the simplifier is run for multiple iterations, we need to ensure
1199 that any thunks in the output of one simplifier iteration are forced
1200 by the evaluation of the next simplifier iteration. Otherwise we may
1201 retain multiple copies of the Core program and leak a terrible amount
1202 of memory (as in #13426).
1203
1204 The simplifier is naturally strict in the entire "Expr part" of the
1205 input Core program, because any expression may contain binders, which
1206 we must find in order to extend the SimplEnv accordingly. But types
1207 do not contain binders and so it is tempting to write things like
1208
1209 simplExpr env (Type ty) = return (Type (substTy env ty)) -- Bad!
1210
1211 This is Bad because the result includes a thunk (substTy env ty) which
1212 retains a reference to the whole simplifier environment; and the next
1213 simplifier iteration will not force this thunk either, because the
1214 line above is not strict in ty.
1215
1216 So instead our strategy is for the simplifier to fully evaluate
1217 OutTypes when it emits them into the output Core program, for example
1218
1219 simplExpr env (Type ty) = do { ty' <- simplType env ty -- Good
1220 ; return (Type ty') }
1221
1222 where the only difference from above is that simplType calls seqType
1223 on the result of substTy.
1224
1225 However, SimplCont can also contain OutTypes and it's not necessarily
1226 a good idea to force types on the way in to SimplCont, because they
1227 may end up not being used and forcing them could be a lot of wasted
1228 work. T5631 is a good example of this.
1229
1230 - For ApplyToTy's sc_arg_ty, we force the type on the way in because
1231 the type will almost certainly appear as a type argument in the
1232 output program.
1233
1234 - For the hole types in Stop and ApplyToTy, we force the type when we
1235 emit it into the output program, after obtaining it from
1236 contResultType. (The hole type in ApplyToTy is only directly used
1237 to form the result type in a new Stop continuation.)
1238 -}
1239
1240 ---------------------------------
1241 -- Simplify a join point, adding the context.
1242 -- Context goes *inside* the lambdas. IOW, if the join point has arity n, we do:
1243 -- \x1 .. xn -> e => \x1 .. xn -> E[e]
1244 -- Note that we need the arity of the join point, since e may be a lambda
1245 -- (though this is unlikely). See Note [Join points and case-of-case].
1246 simplJoinRhs :: SimplEnv -> InId -> InExpr -> SimplCont
1247 -> SimplM OutExpr
1248 simplJoinRhs env bndr expr cont
1249 | Just arity <- isJoinId_maybe bndr
1250 = do { let (join_bndrs, join_body) = collectNBinders arity expr
1251 mult = contHoleScaling cont
1252 ; (env', join_bndrs') <- simplLamBndrs env (map (scaleVarBy mult) join_bndrs)
1253 ; join_body' <- simplExprC env' join_body cont
1254 ; return $ mkLams join_bndrs' join_body' }
1255
1256 | otherwise
1257 = pprPanic "simplJoinRhs" (ppr bndr)
1258
1259 ---------------------------------
1260 simplType :: SimplEnv -> InType -> SimplM OutType
1261 -- Kept monadic just so we can do the seqType
1262 -- See Note [Avoiding space leaks in OutType]
1263 simplType env ty
1264 = -- pprTrace "simplType" (ppr ty $$ ppr (seTvSubst env)) $
1265 seqType new_ty `seq` return new_ty
1266 where
1267 new_ty = substTy env ty
1268
1269 ---------------------------------
1270 simplCoercionF :: SimplEnv -> InCoercion -> SimplCont
1271 -> SimplM (SimplFloats, OutExpr)
1272 simplCoercionF env co cont
1273 = do { co' <- simplCoercion env co
1274 ; rebuild env (Coercion co') cont }
1275
1276 simplCoercion :: SimplEnv -> InCoercion -> SimplM OutCoercion
1277 simplCoercion env co
1278 = do { opts <- getOptCoercionOpts
1279 ; let opt_co = optCoercion opts (getTCvSubst env) co
1280 ; seqCo opt_co `seq` return opt_co }
1281
1282 -----------------------------------
1283 -- | Push a TickIt context outwards past applications and cases, as
1284 -- long as this is a non-scoping tick, to let case and application
1285 -- optimisations apply.
1286
1287 simplTick :: SimplEnv -> CoreTickish -> InExpr -> SimplCont
1288 -> SimplM (SimplFloats, OutExpr)
1289 simplTick env tickish expr cont
1290 -- A scoped tick turns into a continuation, so that we can spot
1291 -- (scc t (\x . e)) in simplLam and eliminate the scc. If we didn't do
1292 -- it this way, then it would take two passes of the simplifier to
1293 -- reduce ((scc t (\x . e)) e').
1294 -- NB, don't do this with counting ticks, because if the expr is
1295 -- bottom, then rebuildCall will discard the continuation.
1296
1297 -- XXX: we cannot do this, because the simplifier assumes that
1298 -- the context can be pushed into a case with a single branch. e.g.
1299 -- scc<f> case expensive of p -> e
1300 -- becomes
1301 -- case expensive of p -> scc<f> e
1302 --
1303 -- So I'm disabling this for now. It just means we will do more
1304 -- simplifier iterations that necessary in some cases.
1305
1306 -- | tickishScoped tickish && not (tickishCounts tickish)
1307 -- = simplExprF env expr (TickIt tickish cont)
1308
1309 -- For unscoped or soft-scoped ticks, we are allowed to float in new
1310 -- cost, so we simply push the continuation inside the tick. This
1311 -- has the effect of moving the tick to the outside of a case or
1312 -- application context, allowing the normal case and application
1313 -- optimisations to fire.
1314 | tickish `tickishScopesLike` SoftScope
1315 = do { (floats, expr') <- simplExprF env expr cont
1316 ; return (floats, mkTick tickish expr')
1317 }
1318
1319 -- Push tick inside if the context looks like this will allow us to
1320 -- do a case-of-case - see Note [case-of-scc-of-case]
1321 | Select {} <- cont, Just expr' <- push_tick_inside
1322 = simplExprF env expr' cont
1323
1324 -- We don't want to move the tick, but we might still want to allow
1325 -- floats to pass through with appropriate wrapping (or not, see
1326 -- wrap_floats below)
1327 --- | not (tickishCounts tickish) || tickishCanSplit tickish
1328 -- = wrap_floats
1329
1330 | otherwise
1331 = no_floating_past_tick
1332
1333 where
1334
1335 -- Try to push tick inside a case, see Note [case-of-scc-of-case].
1336 push_tick_inside =
1337 case expr0 of
1338 Case scrut bndr ty alts
1339 -> Just $ Case (tickScrut scrut) bndr ty (map tickAlt alts)
1340 _other -> Nothing
1341 where (ticks, expr0) = stripTicksTop movable (Tick tickish expr)
1342 movable t = not (tickishCounts t) ||
1343 t `tickishScopesLike` NoScope ||
1344 tickishCanSplit t
1345 tickScrut e = foldr mkTick e ticks
1346 -- Alternatives get annotated with all ticks that scope in some way,
1347 -- but we don't want to count entries.
1348 tickAlt (Alt c bs e) = Alt c bs (foldr mkTick e ts_scope)
1349 ts_scope = map mkNoCount $
1350 filter (not . (`tickishScopesLike` NoScope)) ticks
1351
1352 no_floating_past_tick =
1353 do { let (inc,outc) = splitCont cont
1354 ; (floats, expr1) <- simplExprF env expr inc
1355 ; let expr2 = wrapFloats floats expr1
1356 tickish' = simplTickish env tickish
1357 ; rebuild env (mkTick tickish' expr2) outc
1358 }
1359
1360 -- Alternative version that wraps outgoing floats with the tick. This
1361 -- results in ticks being duplicated, as we don't make any attempt to
1362 -- eliminate the tick if we re-inline the binding (because the tick
1363 -- semantics allows unrestricted inlining of HNFs), so I'm not doing
1364 -- this any more. FloatOut will catch any real opportunities for
1365 -- floating.
1366 --
1367 -- wrap_floats =
1368 -- do { let (inc,outc) = splitCont cont
1369 -- ; (env', expr') <- simplExprF (zapFloats env) expr inc
1370 -- ; let tickish' = simplTickish env tickish
1371 -- ; let wrap_float (b,rhs) = (zapIdDmdSig (setIdArity b 0),
1372 -- mkTick (mkNoCount tickish') rhs)
1373 -- -- when wrapping a float with mkTick, we better zap the Id's
1374 -- -- strictness info and arity, because it might be wrong now.
1375 -- ; let env'' = addFloats env (mapFloats env' wrap_float)
1376 -- ; rebuild env'' expr' (TickIt tickish' outc)
1377 -- }
1378
1379
1380 simplTickish env tickish
1381 | Breakpoint ext n ids <- tickish
1382 = Breakpoint ext n (map (getDoneId . substId env) ids)
1383 | otherwise = tickish
1384
1385 -- Push type application and coercion inside a tick
1386 splitCont :: SimplCont -> (SimplCont, SimplCont)
1387 splitCont cont@(ApplyToTy { sc_cont = tail }) = (cont { sc_cont = inc }, outc)
1388 where (inc,outc) = splitCont tail
1389 splitCont (CastIt co c) = (CastIt co inc, outc)
1390 where (inc,outc) = splitCont c
1391 splitCont other = (mkBoringStop (contHoleType other), other)
1392
1393 getDoneId (DoneId id) = id
1394 getDoneId (DoneEx e _) = getIdFromTrivialExpr e -- Note [substTickish] in GHC.Core.Subst
1395 getDoneId other = pprPanic "getDoneId" (ppr other)
1396
1397 -- Note [case-of-scc-of-case]
1398 -- It's pretty important to be able to transform case-of-case when
1399 -- there's an SCC in the way. For example, the following comes up
1400 -- in nofib/real/compress/Encode.hs:
1401 --
1402 -- case scctick<code_string.r1>
1403 -- case $wcode_string_r13s wild_XC w1_s137 w2_s138 l_aje
1404 -- of _ { (# ww1_s13f, ww2_s13g, ww3_s13h #) ->
1405 -- (ww1_s13f, ww2_s13g, ww3_s13h)
1406 -- }
1407 -- of _ { (ww_s12Y, ww1_s12Z, ww2_s130) ->
1408 -- tick<code_string.f1>
1409 -- (ww_s12Y,
1410 -- ww1_s12Z,
1411 -- PTTrees.PT
1412 -- @ GHC.Types.Char @ GHC.Types.Int wild2_Xj ww2_s130 r_ajf)
1413 -- }
1414 --
1415 -- We really want this case-of-case to fire, because then the 3-tuple
1416 -- will go away (indeed, the CPR optimisation is relying on this
1417 -- happening). But the scctick is in the way - we need to push it
1418 -- inside to expose the case-of-case. So we perform this
1419 -- transformation on the inner case:
1420 --
1421 -- scctick c (case e of { p1 -> e1; ...; pn -> en })
1422 -- ==>
1423 -- case (scctick c e) of { p1 -> scc c e1; ...; pn -> scc c en }
1424 --
1425 -- So we've moved a constant amount of work out of the scc to expose
1426 -- the case. We only do this when the continuation is interesting: in
1427 -- for now, it has to be another Case (maybe generalise this later).
1428
1429 {-
1430 ************************************************************************
1431 * *
1432 \subsection{The main rebuilder}
1433 * *
1434 ************************************************************************
1435 -}
1436
1437 rebuild :: SimplEnv -> OutExpr -> SimplCont -> SimplM (SimplFloats, OutExpr)
1438 -- At this point the substitution in the SimplEnv should be irrelevant;
1439 -- only the in-scope set matters
1440 rebuild env expr cont
1441 = case cont of
1442 Stop {} -> return (emptyFloats env, expr)
1443 TickIt t cont -> rebuild env (mkTick t expr) cont
1444 CastIt co cont -> rebuild env (mkCast expr co) cont
1445 -- NB: mkCast implements the (Coercion co |> g) optimisation
1446
1447 Select { sc_bndr = bndr, sc_alts = alts, sc_env = se, sc_cont = cont }
1448 -> rebuildCase (se `setInScopeFromE` env) expr bndr alts cont
1449
1450 StrictArg { sc_fun = fun, sc_cont = cont, sc_fun_ty = fun_ty }
1451 -> rebuildCall env (addValArgTo fun expr fun_ty ) cont
1452 StrictBind { sc_bndr = b, sc_bndrs = bs, sc_body = body
1453 , sc_env = se, sc_cont = cont }
1454 -> do { (floats1, env') <- simplNonRecX (se `setInScopeFromE` env) b expr
1455 -- expr satisfies let/app since it started life
1456 -- in a call to simplNonRecE
1457 ; (floats2, expr') <- simplLam env' bs body cont
1458 ; return (floats1 `addFloats` floats2, expr') }
1459
1460 ApplyToTy { sc_arg_ty = ty, sc_cont = cont}
1461 -> rebuild env (App expr (Type ty)) cont
1462
1463 ApplyToVal { sc_arg = arg, sc_env = se, sc_dup = dup_flag, sc_cont = cont}
1464 -- See Note [Avoid redundant simplification]
1465 -> do { (_, _, arg') <- simplArg env dup_flag se arg
1466 ; rebuild env (App expr arg') cont }
1467
1468 {-
1469 ************************************************************************
1470 * *
1471 \subsection{Lambdas}
1472 * *
1473 ************************************************************************
1474 -}
1475
1476 {- Note [Optimising reflexivity]
1477 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1478 It's important (for compiler performance) to get rid of reflexivity as soon
1479 as it appears. See #11735, #14737, and #15019.
1480
1481 In particular, we want to behave well on
1482
1483 * e |> co1 |> co2
1484 where the two happen to cancel out entirely. That is quite common;
1485 e.g. a newtype wrapping and unwrapping cancel.
1486
1487
1488 * (f |> co) @t1 @t2 ... @tn x1 .. xm
1489 Here we will use pushCoTyArg and pushCoValArg successively, which
1490 build up NthCo stacks. Silly to do that if co is reflexive.
1491
1492 However, we don't want to call isReflexiveCo too much, because it uses
1493 type equality which is expensive on big types (#14737 comment:7).
1494
1495 A good compromise (determined experimentally) seems to be to call
1496 isReflexiveCo
1497 * when composing casts, and
1498 * at the end
1499
1500 In investigating this I saw missed opportunities for on-the-fly
1501 coercion shrinkage. See #15090.
1502 -}
1503
1504
1505 simplCast :: SimplEnv -> InExpr -> Coercion -> SimplCont
1506 -> SimplM (SimplFloats, OutExpr)
1507 simplCast env body co0 cont0
1508 = do { co1 <- {-#SCC "simplCast-simplCoercion" #-} simplCoercion env co0
1509 ; cont1 <- {-#SCC "simplCast-addCoerce" #-}
1510 if isReflCo co1
1511 then return cont0 -- See Note [Optimising reflexivity]
1512 else addCoerce co1 cont0
1513 ; {-#SCC "simplCast-simplExprF" #-} simplExprF env body cont1 }
1514 where
1515 -- If the first parameter is MRefl, then simplifying revealed a
1516 -- reflexive coercion. Omit.
1517 addCoerceM :: MOutCoercion -> SimplCont -> SimplM SimplCont
1518 addCoerceM MRefl cont = return cont
1519 addCoerceM (MCo co) cont = addCoerce co cont
1520
1521 addCoerce :: OutCoercion -> SimplCont -> SimplM SimplCont
1522 addCoerce co1 (CastIt co2 cont) -- See Note [Optimising reflexivity]
1523 | isReflexiveCo co' = return cont
1524 | otherwise = addCoerce co' cont
1525 where
1526 co' = mkTransCo co1 co2
1527
1528 addCoerce co (ApplyToTy { sc_arg_ty = arg_ty, sc_cont = tail })
1529 | Just (arg_ty', m_co') <- pushCoTyArg co arg_ty
1530 = {-#SCC "addCoerce-pushCoTyArg" #-}
1531 do { tail' <- addCoerceM m_co' tail
1532 ; return (ApplyToTy { sc_arg_ty = arg_ty'
1533 , sc_cont = tail'
1534 , sc_hole_ty = coercionLKind co }) }
1535 -- NB! As the cast goes past, the
1536 -- type of the hole changes (#16312)
1537
1538 -- (f |> co) e ===> (f (e |> co1)) |> co2
1539 -- where co :: (s1->s2) ~ (t1->t2)
1540 -- co1 :: t1 ~ s1
1541 -- co2 :: s2 ~ t2
1542 addCoerce co cont@(ApplyToVal { sc_arg = arg, sc_env = arg_se
1543 , sc_dup = dup, sc_cont = tail })
1544 | Just (m_co1, m_co2) <- pushCoValArg co
1545 , fixed_rep m_co1
1546 = {-#SCC "addCoerce-pushCoValArg" #-}
1547 do { tail' <- addCoerceM m_co2 tail
1548 ; case m_co1 of {
1549 MRefl -> return (cont { sc_cont = tail'
1550 , sc_hole_ty = coercionLKind co }) ;
1551 -- Avoid simplifying if possible;
1552 -- See Note [Avoiding exponential behaviour]
1553
1554 MCo co1 ->
1555 do { (dup', arg_se', arg') <- simplArg env dup arg_se arg
1556 -- When we build the ApplyTo we can't mix the OutCoercion
1557 -- 'co' with the InExpr 'arg', so we simplify
1558 -- to make it all consistent. It's a bit messy.
1559 -- But it isn't a common case.
1560 -- Example of use: #995
1561 ; return (ApplyToVal { sc_arg = mkCast arg' co1
1562 , sc_env = arg_se'
1563 , sc_dup = dup'
1564 , sc_cont = tail'
1565 , sc_hole_ty = coercionLKind co }) } } }
1566
1567 addCoerce co cont
1568 | isReflexiveCo co = return cont -- Having this at the end makes a huge
1569 -- difference in T12227, for some reason
1570 -- See Note [Optimising reflexivity]
1571 | otherwise = return (CastIt co cont)
1572
1573 fixed_rep :: MCoercionR -> Bool
1574 fixed_rep MRefl = True
1575 fixed_rep (MCo co) = typeHasFixedRuntimeRep $ coercionRKind co
1576 -- Without this check, we can get an argument which does not
1577 -- have a fixed runtime representation.
1578 -- See Note [Representation polymorphism invariants] in GHC.Core
1579 -- test: typecheck/should_run/EtaExpandLevPoly
1580
1581 simplArg :: SimplEnv -> DupFlag -> StaticEnv -> CoreExpr
1582 -> SimplM (DupFlag, StaticEnv, OutExpr)
1583 simplArg env dup_flag arg_env arg
1584 | isSimplified dup_flag
1585 = return (dup_flag, arg_env, arg)
1586 | otherwise
1587 = do { arg' <- simplExpr (arg_env `setInScopeFromE` env) arg
1588 ; return (Simplified, zapSubstEnv arg_env, arg') }
1589
1590 {-
1591 ************************************************************************
1592 * *
1593 \subsection{Lambdas}
1594 * *
1595 ************************************************************************
1596 -}
1597
1598 simplLam :: SimplEnv -> [InId] -> InExpr -> SimplCont
1599 -> SimplM (SimplFloats, OutExpr)
1600
1601 simplLam env [] body cont
1602 = simplExprF env body cont
1603
1604 simplLam env (bndr:bndrs) body (ApplyToTy { sc_arg_ty = arg_ty, sc_cont = cont })
1605 = do { tick (BetaReduction bndr)
1606 ; simplLam (extendTvSubst env bndr arg_ty) bndrs body cont }
1607
1608 simplLam env (bndr:bndrs) body (ApplyToVal { sc_arg = arg, sc_env = arg_se
1609 , sc_cont = cont, sc_dup = dup })
1610 | isSimplified dup -- Don't re-simplify if we've simplified it once
1611 -- See Note [Avoiding exponential behaviour]
1612 = do { tick (BetaReduction bndr)
1613 ; (floats1, env') <- simplNonRecX env bndr arg
1614 ; (floats2, expr') <- simplLam env' bndrs body cont
1615 ; return (floats1 `addFloats` floats2, expr') }
1616
1617 | otherwise
1618 = do { tick (BetaReduction bndr)
1619 ; simplNonRecE env bndr (arg, arg_se) (bndrs, body) cont }
1620
1621 -- Discard a non-counting tick on a lambda. This may change the
1622 -- cost attribution slightly (moving the allocation of the
1623 -- lambda elsewhere), but we don't care: optimisation changes
1624 -- cost attribution all the time.
1625 simplLam env bndrs body (TickIt tickish cont)
1626 | not (tickishCounts tickish)
1627 = simplLam env bndrs body cont
1628
1629 -- Not enough args, so there are real lambdas left to put in the result
1630 simplLam env bndrs body cont
1631 = do { (env', bndrs') <- simplLamBndrs env bndrs
1632 ; body' <- simplExpr env' body
1633 ; new_lam <- mkLam env' bndrs' body' cont
1634 ; rebuild env' new_lam cont }
1635
1636 -------------
1637 simplLamBndr :: SimplEnv -> InBndr -> SimplM (SimplEnv, OutBndr)
1638 -- Historically this had a special case for when a lambda-binder
1639 -- could have a stable unfolding;
1640 -- see Historical Note [Case binders and join points]
1641 -- But now it is much simpler!
1642 simplLamBndr env bndr = simplBinder env bndr
1643
1644 simplLamBndrs :: SimplEnv -> [InBndr] -> SimplM (SimplEnv, [OutBndr])
1645 simplLamBndrs env bndrs = mapAccumLM simplLamBndr env bndrs
1646
1647 ------------------
1648 simplNonRecE :: SimplEnv
1649 -> InId -- The binder, always an Id
1650 -- Never a join point
1651 -> (InExpr, SimplEnv) -- Rhs of binding (or arg of lambda)
1652 -> ([InBndr], InExpr) -- Body of the let/lambda
1653 -- \xs.e
1654 -> SimplCont
1655 -> SimplM (SimplFloats, OutExpr)
1656
1657 -- simplNonRecE is used for
1658 -- * non-top-level non-recursive non-join-point lets in expressions
1659 -- * beta reduction
1660 --
1661 -- simplNonRec env b (rhs, rhs_se) (bs, body) k
1662 -- = let env in
1663 -- cont< let b = rhs_se(rhs) in \bs.body >
1664 --
1665 -- It deals with strict bindings, via the StrictBind continuation,
1666 -- which may abort the whole process
1667 --
1668 -- Precondition: rhs satisfies the let/app invariant
1669 -- Note [Core let/app invariant] in GHC.Core
1670 --
1671 -- The "body" of the binding comes as a pair of ([InId],InExpr)
1672 -- representing a lambda; so we recurse back to simplLam
1673 -- Why? Because of the binder-occ-info-zapping done before
1674 -- the call to simplLam in simplExprF (Lam ...)
1675
1676 simplNonRecE env bndr (rhs, rhs_se) (bndrs, body) cont
1677 | assert (isId bndr && not (isJoinId bndr) ) True
1678 , Just env' <- preInlineUnconditionally env NotTopLevel bndr rhs rhs_se
1679 = do { tick (PreInlineUnconditionally bndr)
1680 ; -- pprTrace "preInlineUncond" (ppr bndr <+> ppr rhs) $
1681 simplLam env' bndrs body cont }
1682
1683 | otherwise
1684 = do { (env1, bndr1) <- simplNonRecBndr env bndr
1685
1686 -- Deal with strict bindings
1687 -- See Note [Dark corner with representation polymorphism]
1688 ; if isStrictId bndr1 && sm_case_case (getMode env)
1689 then simplExprF (rhs_se `setInScopeFromE` env) rhs
1690 (StrictBind { sc_bndr = bndr, sc_bndrs = bndrs, sc_body = body
1691 , sc_env = env, sc_cont = cont, sc_dup = NoDup })
1692
1693 -- Deal with lazy bindings
1694 else do
1695 { (env2, bndr2) <- addBndrRules env1 bndr bndr1 Nothing
1696 ; (floats1, env3) <- simplLazyBind env2 NotTopLevel NonRecursive bndr bndr2 rhs rhs_se
1697 ; (floats2, expr') <- simplLam env3 bndrs body cont
1698 ; return (floats1 `addFloats` floats2, expr') } }
1699
1700 ------------------
1701 simplRecE :: SimplEnv
1702 -> [(InId, InExpr)]
1703 -> InExpr
1704 -> SimplCont
1705 -> SimplM (SimplFloats, OutExpr)
1706
1707 -- simplRecE is used for
1708 -- * non-top-level recursive lets in expressions
1709 simplRecE env pairs body cont
1710 = do { let bndrs = map fst pairs
1711 ; massert (all (not . isJoinId) bndrs)
1712 ; env1 <- simplRecBndrs env bndrs
1713 -- NB: bndrs' don't have unfoldings or rules
1714 -- We add them as we go down
1715 ; (floats1, env2) <- simplRecBind env1 NotTopLevel Nothing pairs
1716 ; (floats2, expr') <- simplExprF env2 body cont
1717 ; return (floats1 `addFloats` floats2, expr') }
1718
1719 {- Note [Dark corner with representation polymorphism]
1720 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1721 In `simplNonRecE`, the call to `isStrictId` will fail if the binder
1722 does not have a fixed runtime representation, e.g. if it is of kind (TYPE r).
1723 So we are careful to call `isStrictId` on the OutId, not the InId, in case we have
1724 ((\(r::RuntimeRep) \(x::TYPE r). blah) Lifted arg)
1725 That will lead to `simplNonRecE env (x::TYPE r) arg`, and we can't tell
1726 if x is lifted or unlifted from that.
1727
1728 We only get such redexes from the compulsory inlining of a wired-in,
1729 representation-polymorphic function like `rightSection` (see
1730 GHC.Types.Id.Make). Mind you, SimpleOpt should probably have inlined
1731 such compulsory inlinings already, but belt and braces does no harm.
1732
1733 Plus, it turns out that GHC.Driver.Main.hscCompileCoreExpr calls the
1734 Simplifier without first calling SimpleOpt, so anything involving
1735 GHCi or TH and operator sections will fall over if we don't take
1736 care here.
1737
1738 Note [Avoiding exponential behaviour]
1739 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1740 One way in which we can get exponential behaviour is if we simplify a
1741 big expression, and the re-simplify it -- and then this happens in a
1742 deeply-nested way. So we must be jolly careful about re-simplifying
1743 an expression. That is why completeNonRecX does not try
1744 preInlineUnconditionally.
1745
1746 Example:
1747 f BIG, where f has a RULE
1748 Then
1749 * We simplify BIG before trying the rule; but the rule does not fire
1750 * We inline f = \x. x True
1751 * So if we did preInlineUnconditionally we'd re-simplify (BIG True)
1752
1753 However, if BIG has /not/ already been simplified, we'd /like/ to
1754 simplify BIG True; maybe good things happen. That is why
1755
1756 * simplLam has
1757 - a case for (isSimplified dup), which goes via simplNonRecX, and
1758 - a case for the un-simplified case, which goes via simplNonRecE
1759
1760 * We go to some efforts to avoid unnecessarily simplifying ApplyToVal,
1761 in at least two places
1762 - In simplCast/addCoerce, where we check for isReflCo
1763 - In rebuildCall we avoid simplifying arguments before we have to
1764 (see Note [Trying rewrite rules])
1765
1766
1767 ************************************************************************
1768 * *
1769 Join points
1770 * *
1771 ********************************************************************* -}
1772
1773 {- Note [Rules and unfolding for join points]
1774 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1775 Suppose we have
1776
1777 simplExpr (join j x = rhs ) cont
1778 ( {- RULE j (p:ps) = blah -} )
1779 ( {- StableUnfolding j = blah -} )
1780 (in blah )
1781
1782 Then we will push 'cont' into the rhs of 'j'. But we should *also* push
1783 'cont' into the RHS of
1784 * Any RULEs for j, e.g. generated by SpecConstr
1785 * Any stable unfolding for j, e.g. the result of an INLINE pragma
1786
1787 Simplifying rules and stable-unfoldings happens a bit after
1788 simplifying the right-hand side, so we remember whether or not it
1789 is a join point, and what 'cont' is, in a value of type MaybeJoinCont
1790
1791 #13900 was caused by forgetting to push 'cont' into the RHS
1792 of a SpecConstr-generated RULE for a join point.
1793 -}
1794
1795 type MaybeJoinCont = Maybe SimplCont
1796 -- Nothing => Not a join point
1797 -- Just k => This is a join binding with continuation k
1798 -- See Note [Rules and unfolding for join points]
1799
1800 simplNonRecJoinPoint :: SimplEnv -> InId -> InExpr
1801 -> InExpr -> SimplCont
1802 -> SimplM (SimplFloats, OutExpr)
1803 simplNonRecJoinPoint env bndr rhs body cont
1804 | assert (isJoinId bndr ) True
1805 , Just env' <- preInlineUnconditionally env NotTopLevel bndr rhs env
1806 = do { tick (PreInlineUnconditionally bndr)
1807 ; simplExprF env' body cont }
1808
1809 | otherwise
1810 = wrapJoinCont env cont $ \ env cont ->
1811 do { -- We push join_cont into the join RHS and the body;
1812 -- and wrap wrap_cont around the whole thing
1813 ; let mult = contHoleScaling cont
1814 res_ty = contResultType cont
1815 ; (env1, bndr1) <- simplNonRecJoinBndr env bndr mult res_ty
1816 ; (env2, bndr2) <- addBndrRules env1 bndr bndr1 (Just cont)
1817 ; (floats1, env3) <- simplJoinBind env2 cont bndr bndr2 rhs env
1818 ; (floats2, body') <- simplExprF env3 body cont
1819 ; return (floats1 `addFloats` floats2, body') }
1820
1821
1822 ------------------
1823 simplRecJoinPoint :: SimplEnv -> [(InId, InExpr)]
1824 -> InExpr -> SimplCont
1825 -> SimplM (SimplFloats, OutExpr)
1826 simplRecJoinPoint env pairs body cont
1827 = wrapJoinCont env cont $ \ env cont ->
1828 do { let bndrs = map fst pairs
1829 mult = contHoleScaling cont
1830 res_ty = contResultType cont
1831 ; env1 <- simplRecJoinBndrs env bndrs mult res_ty
1832 -- NB: bndrs' don't have unfoldings or rules
1833 -- We add them as we go down
1834 ; (floats1, env2) <- simplRecBind env1 NotTopLevel (Just cont) pairs
1835 ; (floats2, body') <- simplExprF env2 body cont
1836 ; return (floats1 `addFloats` floats2, body') }
1837
1838 --------------------
1839 wrapJoinCont :: SimplEnv -> SimplCont
1840 -> (SimplEnv -> SimplCont -> SimplM (SimplFloats, OutExpr))
1841 -> SimplM (SimplFloats, OutExpr)
1842 -- Deal with making the continuation duplicable if necessary,
1843 -- and with the no-case-of-case situation.
1844 wrapJoinCont env cont thing_inside
1845 | contIsStop cont -- Common case; no need for fancy footwork
1846 = thing_inside env cont
1847
1848 | not (sm_case_case (getMode env))
1849 -- See Note [Join points with -fno-case-of-case]
1850 = do { (floats1, expr1) <- thing_inside env (mkBoringStop (contHoleType cont))
1851 ; let (floats2, expr2) = wrapJoinFloatsX floats1 expr1
1852 ; (floats3, expr3) <- rebuild (env `setInScopeFromF` floats2) expr2 cont
1853 ; return (floats2 `addFloats` floats3, expr3) }
1854
1855 | otherwise
1856 -- Normal case; see Note [Join points and case-of-case]
1857 = do { (floats1, cont') <- mkDupableCont env cont
1858 ; (floats2, result) <- thing_inside (env `setInScopeFromF` floats1) cont'
1859 ; return (floats1 `addFloats` floats2, result) }
1860
1861
1862 --------------------
1863 trimJoinCont :: Id -> Maybe JoinArity -> SimplCont -> SimplCont
1864 -- Drop outer context from join point invocation (jump)
1865 -- See Note [Join points and case-of-case]
1866
1867 trimJoinCont _ Nothing cont
1868 = cont -- Not a jump
1869 trimJoinCont var (Just arity) cont
1870 = trim arity cont
1871 where
1872 trim 0 cont@(Stop {})
1873 = cont
1874 trim 0 cont
1875 = mkBoringStop (contResultType cont)
1876 trim n cont@(ApplyToVal { sc_cont = k })
1877 = cont { sc_cont = trim (n-1) k }
1878 trim n cont@(ApplyToTy { sc_cont = k })
1879 = cont { sc_cont = trim (n-1) k } -- join arity counts types!
1880 trim _ cont
1881 = pprPanic "completeCall" $ ppr var $$ ppr cont
1882
1883
1884 {- Note [Join points and case-of-case]
1885 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1886 When we perform the case-of-case transform (or otherwise push continuations
1887 inward), we want to treat join points specially. Since they're always
1888 tail-called and we want to maintain this invariant, we can do this (for any
1889 evaluation context E):
1890
1891 E[join j = e
1892 in case ... of
1893 A -> jump j 1
1894 B -> jump j 2
1895 C -> f 3]
1896
1897 -->
1898
1899 join j = E[e]
1900 in case ... of
1901 A -> jump j 1
1902 B -> jump j 2
1903 C -> E[f 3]
1904
1905 As is evident from the example, there are two components to this behavior:
1906
1907 1. When entering the RHS of a join point, copy the context inside.
1908 2. When a join point is invoked, discard the outer context.
1909
1910 We need to be very careful here to remain consistent---neither part is
1911 optional!
1912
1913 We need do make the continuation E duplicable (since we are duplicating it)
1914 with mkDupableCont.
1915
1916
1917 Note [Join points with -fno-case-of-case]
1918 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1919 Supose case-of-case is switched off, and we are simplifying
1920
1921 case (join j x = <j-rhs> in
1922 case y of
1923 A -> j 1
1924 B -> j 2
1925 C -> e) of <outer-alts>
1926
1927 Usually, we'd push the outer continuation (case . of <outer-alts>) into
1928 both the RHS and the body of the join point j. But since we aren't doing
1929 case-of-case we may then end up with this totally bogus result
1930
1931 join x = case <j-rhs> of <outer-alts> in
1932 case (case y of
1933 A -> j 1
1934 B -> j 2
1935 C -> e) of <outer-alts>
1936
1937 This would be OK in the language of the paper, but not in GHC: j is no longer
1938 a join point. We can only do the "push continuation into the RHS of the
1939 join point j" if we also push the continuation right down to the /jumps/ to
1940 j, so that it can evaporate there. If we are doing case-of-case, we'll get to
1941
1942 join x = case <j-rhs> of <outer-alts> in
1943 case y of
1944 A -> j 1
1945 B -> j 2
1946 C -> case e of <outer-alts>
1947
1948 which is great.
1949
1950 Bottom line: if case-of-case is off, we must stop pushing the continuation
1951 inwards altogether at any join point. Instead simplify the (join ... in ...)
1952 with a Stop continuation, and wrap the original continuation around the
1953 outside. Surprisingly tricky!
1954
1955
1956 ************************************************************************
1957 * *
1958 Variables
1959 * *
1960 ************************************************************************
1961 -}
1962
1963 simplVar :: SimplEnv -> InVar -> SimplM OutExpr
1964 -- Look up an InVar in the environment
1965 simplVar env var
1966 -- Why $! ? See Note [Bangs in the Simplifier]
1967 | isTyVar var = return $! Type $! (substTyVar env var)
1968 | isCoVar var = return $! Coercion $! (substCoVar env var)
1969 | otherwise
1970 = case substId env var of
1971 ContEx tvs cvs ids e -> let env' = setSubstEnv env tvs cvs ids
1972 in simplExpr env' e
1973 DoneId var1 -> return (Var var1)
1974 DoneEx e _ -> return e
1975
1976 simplIdF :: SimplEnv -> InId -> SimplCont -> SimplM (SimplFloats, OutExpr)
1977 simplIdF env var cont
1978 = case substId env var of
1979 ContEx tvs cvs ids e ->
1980 let env' = setSubstEnv env tvs cvs ids
1981 in simplExprF env' e cont
1982 -- Don't trim; haven't already simplified e,
1983 -- so the cont is not embodied in e
1984
1985 DoneId var1 ->
1986 let cont' = trimJoinCont var (isJoinId_maybe var1) cont
1987 in completeCall env var1 cont'
1988
1989 DoneEx e mb_join ->
1990 let env' = zapSubstEnv env
1991 cont' = trimJoinCont var mb_join cont
1992 in simplExprF env' e cont'
1993 -- Note [zapSubstEnv]
1994 -- The template is already simplified, so don't re-substitute.
1995 -- This is VITAL. Consider
1996 -- let x = e in
1997 -- let y = \z -> ...x... in
1998 -- \ x -> ...y...
1999 -- We'll clone the inner \x, adding x->x' in the id_subst
2000 -- Then when we inline y, we must *not* replace x by x' in
2001 -- the inlined copy!!
2002
2003 ---------------------------------------------------------
2004 -- Dealing with a call site
2005
2006 completeCall :: SimplEnv -> OutId -> SimplCont -> SimplM (SimplFloats, OutExpr)
2007 completeCall env var cont
2008 | Just expr <- callSiteInline logger uf_opts case_depth var active_unf
2009 lone_variable arg_infos interesting_cont
2010 -- Inline the variable's RHS
2011 = do { checkedTick (UnfoldingDone var)
2012 ; dump_inline expr cont
2013 ; let env1 = zapSubstEnv env
2014 ; simplExprF env1 expr cont }
2015
2016 | otherwise
2017 -- Don't inline; instead rebuild the call
2018 = do { rule_base <- getSimplRules
2019 ; let rules = getRules rule_base var
2020 info = mkArgInfo env var rules
2021 n_val_args call_cont
2022 ; rebuildCall env info cont }
2023
2024 where
2025 uf_opts = seUnfoldingOpts env
2026 case_depth = seCaseDepth env
2027 logger = seLogger env
2028 (lone_variable, arg_infos, call_cont) = contArgs cont
2029 n_val_args = length arg_infos
2030 interesting_cont = interestingCallContext env call_cont
2031 active_unf = activeUnfolding (getMode env) var
2032
2033 log_inlining doc
2034 = liftIO $ logDumpFile logger (mkDumpStyle alwaysQualify)
2035 Opt_D_dump_inlinings
2036 "" FormatText doc
2037
2038 dump_inline unfolding cont
2039 | not (logHasDumpFlag logger Opt_D_dump_inlinings) = return ()
2040 | not (logHasDumpFlag logger Opt_D_verbose_core2core)
2041 = when (isExternalName (idName var)) $
2042 log_inlining $
2043 sep [text "Inlining done:", nest 4 (ppr var)]
2044 | otherwise
2045 = log_inlining $
2046 sep [text "Inlining done: " <> ppr var,
2047 nest 4 (vcat [text "Inlined fn: " <+> nest 2 (ppr unfolding),
2048 text "Cont: " <+> ppr cont])]
2049
2050 rebuildCall :: SimplEnv
2051 -> ArgInfo
2052 -> SimplCont
2053 -> SimplM (SimplFloats, OutExpr)
2054 -- We decided not to inline, so
2055 -- - simplify the arguments
2056 -- - try rewrite rules
2057 -- - and rebuild
2058
2059 ---------- Bottoming applications --------------
2060 rebuildCall env (ArgInfo { ai_fun = fun, ai_args = rev_args, ai_dmds = [] }) cont
2061 -- When we run out of strictness args, it means
2062 -- that the call is definitely bottom; see GHC.Core.Opt.Simplify.Utils.mkArgInfo
2063 -- Then we want to discard the entire strict continuation. E.g.
2064 -- * case (error "hello") of { ... }
2065 -- * (error "Hello") arg
2066 -- * f (error "Hello") where f is strict
2067 -- etc
2068 -- Then, especially in the first of these cases, we'd like to discard
2069 -- the continuation, leaving just the bottoming expression. But the
2070 -- type might not be right, so we may have to add a coerce.
2071 | not (contIsTrivial cont) -- Only do this if there is a non-trivial
2072 -- continuation to discard, else we do it
2073 -- again and again!
2074 = seqType cont_ty `seq` -- See Note [Avoiding space leaks in OutType]
2075 return (emptyFloats env, castBottomExpr res cont_ty)
2076 where
2077 res = argInfoExpr fun rev_args
2078 cont_ty = contResultType cont
2079
2080 ---------- Try rewrite RULES --------------
2081 -- See Note [Trying rewrite rules]
2082 rebuildCall env info@(ArgInfo { ai_fun = fun, ai_args = rev_args
2083 , ai_rules = Just (nr_wanted, rules) }) cont
2084 | nr_wanted == 0 || no_more_args
2085 , let info' = info { ai_rules = Nothing }
2086 = -- We've accumulated a simplified call in <fun,rev_args>
2087 -- so try rewrite rules; see Note [RULEs apply to simplified arguments]
2088 -- See also Note [Rules for recursive functions]
2089 do { mb_match <- tryRules env rules fun (reverse rev_args) cont
2090 ; case mb_match of
2091 Just (env', rhs, cont') -> simplExprF env' rhs cont'
2092 Nothing -> rebuildCall env info' cont }
2093 where
2094 no_more_args = case cont of
2095 ApplyToTy {} -> False
2096 ApplyToVal {} -> False
2097 _ -> True
2098
2099
2100 ---------- Simplify applications and casts --------------
2101 rebuildCall env info (CastIt co cont)
2102 = rebuildCall env (addCastTo info co) cont
2103
2104 rebuildCall env info (ApplyToTy { sc_arg_ty = arg_ty, sc_hole_ty = hole_ty, sc_cont = cont })
2105 = rebuildCall env (addTyArgTo info arg_ty hole_ty) cont
2106
2107 ---------- The runRW# rule. Do this after absorbing all arguments ------
2108 -- See Note [Simplification of runRW#] in GHC.CoreToSTG.Prep.
2109 --
2110 -- runRW# :: forall (r :: RuntimeRep) (o :: TYPE r). (State# RealWorld -> o) -> o
2111 -- K[ runRW# rr ty body ] --> runRW rr' ty' (\s. K[ body s ])
2112 rebuildCall env (ArgInfo { ai_fun = fun_id, ai_args = rev_args })
2113 (ApplyToVal { sc_arg = arg, sc_env = arg_se
2114 , sc_cont = cont, sc_hole_ty = fun_ty })
2115 | fun_id `hasKey` runRWKey
2116 , not (contIsStop cont) -- Don't fiddle around if the continuation is boring
2117 , [ TyArg {}, TyArg {} ] <- rev_args
2118 = do { s <- newId (fsLit "s") Many realWorldStatePrimTy
2119 ; let (m,_,_) = splitFunTy fun_ty
2120 env' = (arg_se `setInScopeFromE` env) `addNewInScopeIds` [s]
2121 ty' = contResultType cont
2122 cont' = ApplyToVal { sc_dup = Simplified, sc_arg = Var s
2123 , sc_env = env', sc_cont = cont
2124 , sc_hole_ty = mkVisFunTy m realWorldStatePrimTy ty' }
2125 -- cont' applies to s, then K
2126 ; body' <- simplExprC env' arg cont'
2127 ; let arg' = Lam s body'
2128 rr' = getRuntimeRep ty'
2129 call' = mkApps (Var fun_id) [mkTyArg rr', mkTyArg ty', arg']
2130 ; return (emptyFloats env, call') }
2131
2132 rebuildCall env fun_info
2133 (ApplyToVal { sc_arg = arg, sc_env = arg_se
2134 , sc_dup = dup_flag, sc_hole_ty = fun_ty
2135 , sc_cont = cont })
2136 -- Argument is already simplified
2137 | isSimplified dup_flag -- See Note [Avoid redundant simplification]
2138 = rebuildCall env (addValArgTo fun_info arg fun_ty) cont
2139
2140 -- Strict arguments
2141 | isStrictArgInfo fun_info
2142 , sm_case_case (getMode env)
2143 = -- pprTrace "Strict Arg" (ppr arg $$ ppr (seIdSubst env) $$ ppr (seInScope env)) $
2144 simplExprF (arg_se `setInScopeFromE` env) arg
2145 (StrictArg { sc_fun = fun_info, sc_fun_ty = fun_ty
2146 , sc_dup = Simplified
2147 , sc_cont = cont })
2148 -- Note [Shadowing]
2149
2150 -- Lazy arguments
2151 | otherwise
2152 -- DO NOT float anything outside, hence simplExprC
2153 -- There is no benefit (unlike in a let-binding), and we'd
2154 -- have to be very careful about bogus strictness through
2155 -- floating a demanded let.
2156 = do { arg' <- simplExprC (arg_se `setInScopeFromE` env) arg
2157 (mkLazyArgStop arg_ty (lazyArgContext fun_info))
2158 ; rebuildCall env (addValArgTo fun_info arg' fun_ty) cont }
2159 where
2160 arg_ty = funArgTy fun_ty
2161
2162
2163 ---------- No further useful info, revert to generic rebuild ------------
2164 rebuildCall env (ArgInfo { ai_fun = fun, ai_args = rev_args }) cont
2165 = rebuild env (argInfoExpr fun rev_args) cont
2166
2167 {- Note [Trying rewrite rules]
2168 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2169 Consider an application (f e1 e2 e3) where the e1,e2,e3 are not yet
2170 simplified. We want to simplify enough arguments to allow the rules
2171 to apply, but it's more efficient to avoid simplifying e2,e3 if e1 alone
2172 is sufficient. Example: class ops
2173 (+) dNumInt e2 e3
2174 If we rewrite ((+) dNumInt) to plusInt, we can take advantage of the
2175 latter's strictness when simplifying e2, e3. Moreover, suppose we have
2176 RULE f Int = \x. x True
2177
2178 Then given (f Int e1) we rewrite to
2179 (\x. x True) e1
2180 without simplifying e1. Now we can inline x into its unique call site,
2181 and absorb the True into it all in the same pass. If we simplified
2182 e1 first, we couldn't do that; see Note [Avoiding exponential behaviour].
2183
2184 So we try to apply rules if either
2185 (a) no_more_args: we've run out of argument that the rules can "see"
2186 (b) nr_wanted: none of the rules wants any more arguments
2187
2188
2189 Note [RULES apply to simplified arguments]
2190 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2191 It's very desirable to try RULES once the arguments have been simplified, because
2192 doing so ensures that rule cascades work in one pass. Consider
2193 {-# RULES g (h x) = k x
2194 f (k x) = x #-}
2195 ...f (g (h x))...
2196 Then we want to rewrite (g (h x)) to (k x) and only then try f's rules. If
2197 we match f's rules against the un-simplified RHS, it won't match. This
2198 makes a particularly big difference when superclass selectors are involved:
2199 op ($p1 ($p2 (df d)))
2200 We want all this to unravel in one sweep.
2201
2202 Note [Avoid redundant simplification]
2203 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2204 Because RULES apply to simplified arguments, there's a danger of repeatedly
2205 simplifying already-simplified arguments. An important example is that of
2206 (>>=) d e1 e2
2207 Here e1, e2 are simplified before the rule is applied, but don't really
2208 participate in the rule firing. So we mark them as Simplified to avoid
2209 re-simplifying them.
2210
2211 Note [Shadowing]
2212 ~~~~~~~~~~~~~~~~
2213 This part of the simplifier may break the no-shadowing invariant
2214 Consider
2215 f (...(\a -> e)...) (case y of (a,b) -> e')
2216 where f is strict in its second arg
2217 If we simplify the innermost one first we get (...(\a -> e)...)
2218 Simplifying the second arg makes us float the case out, so we end up with
2219 case y of (a,b) -> f (...(\a -> e)...) e'
2220 So the output does not have the no-shadowing invariant. However, there is
2221 no danger of getting name-capture, because when the first arg was simplified
2222 we used an in-scope set that at least mentioned all the variables free in its
2223 static environment, and that is enough.
2224
2225 We can't just do innermost first, or we'd end up with a dual problem:
2226 case x of (a,b) -> f e (...(\a -> e')...)
2227
2228 I spent hours trying to recover the no-shadowing invariant, but I just could
2229 not think of an elegant way to do it. The simplifier is already knee-deep in
2230 continuations. We have to keep the right in-scope set around; AND we have
2231 to get the effect that finding (error "foo") in a strict arg position will
2232 discard the entire application and replace it with (error "foo"). Getting
2233 all this at once is TOO HARD!
2234
2235
2236 ************************************************************************
2237 * *
2238 Rewrite rules
2239 * *
2240 ************************************************************************
2241 -}
2242
2243 tryRules :: SimplEnv -> [CoreRule]
2244 -> Id -> [ArgSpec]
2245 -> SimplCont
2246 -> SimplM (Maybe (SimplEnv, CoreExpr, SimplCont))
2247
2248 tryRules env rules fn args call_cont
2249 | null rules
2250 = return Nothing
2251
2252 {- Disabled until we fix #8326
2253 | fn `hasKey` tagToEnumKey -- See Note [Optimising tagToEnum#]
2254 , [_type_arg, val_arg] <- args
2255 , Select dup bndr ((_,[],rhs1) : rest_alts) se cont <- call_cont
2256 , isDeadBinder bndr
2257 = do { let enum_to_tag :: CoreAlt -> CoreAlt
2258 -- Takes K -> e into tagK# -> e
2259 -- where tagK# is the tag of constructor K
2260 enum_to_tag (DataAlt con, [], rhs)
2261 = assert (isEnumerationTyCon (dataConTyCon con) )
2262 (LitAlt tag, [], rhs)
2263 where
2264 tag = mkLitInt dflags (toInteger (dataConTag con - fIRST_TAG))
2265 enum_to_tag alt = pprPanic "tryRules: tagToEnum" (ppr alt)
2266
2267 new_alts = (DEFAULT, [], rhs1) : map enum_to_tag rest_alts
2268 new_bndr = setIdType bndr intPrimTy
2269 -- The binder is dead, but should have the right type
2270 ; return (Just (val_arg, Select dup new_bndr new_alts se cont)) }
2271 -}
2272
2273 | Just (rule, rule_rhs) <- lookupRule ropts (getUnfoldingInRuleMatch env)
2274 (activeRule (getMode env)) fn
2275 (argInfoAppArgs args) rules
2276 -- Fire a rule for the function
2277 = do { checkedTick (RuleFired (ruleName rule))
2278 ; let cont' = pushSimplifiedArgs zapped_env
2279 (drop (ruleArity rule) args)
2280 call_cont
2281 -- (ruleArity rule) says how
2282 -- many args the rule consumed
2283
2284 occ_anald_rhs = occurAnalyseExpr rule_rhs
2285 -- See Note [Occurrence-analyse after rule firing]
2286 ; dump rule rule_rhs
2287 ; return (Just (zapped_env, occ_anald_rhs, cont')) }
2288 -- The occ_anald_rhs and cont' are all Out things
2289 -- hence zapping the environment
2290
2291 | otherwise -- No rule fires
2292 = do { nodump -- This ensures that an empty file is written
2293 ; return Nothing }
2294
2295 where
2296 ropts = initRuleOpts dflags
2297 dflags = seDynFlags env
2298 logger = seLogger env
2299 zapped_env = zapSubstEnv env -- See Note [zapSubstEnv]
2300
2301 printRuleModule rule
2302 = parens (maybe (text "BUILTIN")
2303 (pprModuleName . moduleName)
2304 (ruleModule rule))
2305
2306 dump rule rule_rhs
2307 | logHasDumpFlag logger Opt_D_dump_rule_rewrites
2308 = log_rule Opt_D_dump_rule_rewrites "Rule fired" $ vcat
2309 [ text "Rule:" <+> ftext (ruleName rule)
2310 , text "Module:" <+> printRuleModule rule
2311 , text "Before:" <+> hang (ppr fn) 2 (sep (map ppr args))
2312 , text "After: " <+> hang (pprCoreExpr rule_rhs) 2
2313 (sep $ map ppr $ drop (ruleArity rule) args)
2314 , text "Cont: " <+> ppr call_cont ]
2315
2316 | logHasDumpFlag logger Opt_D_dump_rule_firings
2317 = log_rule Opt_D_dump_rule_firings "Rule fired:" $
2318 ftext (ruleName rule)
2319 <+> printRuleModule rule
2320
2321 | otherwise
2322 = return ()
2323
2324 nodump
2325 | logHasDumpFlag logger Opt_D_dump_rule_rewrites
2326 = liftIO $
2327 touchDumpFile logger Opt_D_dump_rule_rewrites
2328
2329 | logHasDumpFlag logger Opt_D_dump_rule_firings
2330 = liftIO $
2331 touchDumpFile logger Opt_D_dump_rule_firings
2332
2333 | otherwise
2334 = return ()
2335
2336 log_rule flag hdr details
2337 = liftIO $ logDumpFile logger (mkDumpStyle alwaysQualify) flag "" FormatText
2338 $ sep [text hdr, nest 4 details]
2339
2340 trySeqRules :: SimplEnv
2341 -> OutExpr -> InExpr -- Scrutinee and RHS
2342 -> SimplCont
2343 -> SimplM (Maybe (SimplEnv, CoreExpr, SimplCont))
2344 -- See Note [User-defined RULES for seq]
2345 trySeqRules in_env scrut rhs cont
2346 = do { rule_base <- getSimplRules
2347 ; tryRules in_env (getRules rule_base seqId) seqId out_args rule_cont }
2348 where
2349 no_cast_scrut = drop_casts scrut
2350 scrut_ty = exprType no_cast_scrut
2351 seq_id_ty = idType seqId -- forall r a (b::TYPE r). a -> b -> b
2352 res1_ty = piResultTy seq_id_ty rhs_rep -- forall a (b::TYPE rhs_rep). a -> b -> b
2353 res2_ty = piResultTy res1_ty scrut_ty -- forall (b::TYPE rhs_rep). scrut_ty -> b -> b
2354 res3_ty = piResultTy res2_ty rhs_ty -- scrut_ty -> rhs_ty -> rhs_ty
2355 res4_ty = funResultTy res3_ty -- rhs_ty -> rhs_ty
2356 rhs_ty = substTy in_env (exprType rhs)
2357 rhs_rep = getRuntimeRep rhs_ty
2358 out_args = [ TyArg { as_arg_ty = rhs_rep
2359 , as_hole_ty = seq_id_ty }
2360 , TyArg { as_arg_ty = scrut_ty
2361 , as_hole_ty = res1_ty }
2362 , TyArg { as_arg_ty = rhs_ty
2363 , as_hole_ty = res2_ty }
2364 , ValArg { as_arg = no_cast_scrut
2365 , as_dmd = seqDmd
2366 , as_hole_ty = res3_ty } ]
2367 rule_cont = ApplyToVal { sc_dup = NoDup, sc_arg = rhs
2368 , sc_env = in_env, sc_cont = cont
2369 , sc_hole_ty = res4_ty }
2370
2371 -- Lazily evaluated, so we don't do most of this
2372
2373 drop_casts (Cast e _) = drop_casts e
2374 drop_casts e = e
2375
2376 {- Note [User-defined RULES for seq]
2377 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2378 Given
2379 case (scrut |> co) of _ -> rhs
2380 look for rules that match the expression
2381 seq @t1 @t2 scrut
2382 where scrut :: t1
2383 rhs :: t2
2384
2385 If you find a match, rewrite it, and apply to 'rhs'.
2386
2387 Notice that we can simply drop casts on the fly here, which
2388 makes it more likely that a rule will match.
2389
2390 See Note [User-defined RULES for seq] in GHC.Types.Id.Make.
2391
2392 Note [Occurrence-analyse after rule firing]
2393 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2394 After firing a rule, we occurrence-analyse the instantiated RHS before
2395 simplifying it. Usually this doesn't make much difference, but it can
2396 be huge. Here's an example (simplCore/should_compile/T7785)
2397
2398 map f (map f (map f xs)
2399
2400 = -- Use build/fold form of map, twice
2401 map f (build (\cn. foldr (mapFB c f) n
2402 (build (\cn. foldr (mapFB c f) n xs))))
2403
2404 = -- Apply fold/build rule
2405 map f (build (\cn. (\cn. foldr (mapFB c f) n xs) (mapFB c f) n))
2406
2407 = -- Beta-reduce
2408 -- Alas we have no occurrence-analysed, so we don't know
2409 -- that c is used exactly once
2410 map f (build (\cn. let c1 = mapFB c f in
2411 foldr (mapFB c1 f) n xs))
2412
2413 = -- Use mapFB rule: mapFB (mapFB c f) g = mapFB c (f.g)
2414 -- We can do this because (mapFB c n) is a PAP and hence expandable
2415 map f (build (\cn. let c1 = mapFB c n in
2416 foldr (mapFB c (f.f)) n x))
2417
2418 This is not too bad. But now do the same with the outer map, and
2419 we get another use of mapFB, and t can interact with /both/ remaining
2420 mapFB calls in the above expression. This is stupid because actually
2421 that 'c1' binding is dead. The outer map introduces another c2. If
2422 there is a deep stack of maps we get lots of dead bindings, and lots
2423 of redundant work as we repeatedly simplify the result of firing rules.
2424
2425 The easy thing to do is simply to occurrence analyse the result of
2426 the rule firing. Note that this occ-anals not only the RHS of the
2427 rule, but also the function arguments, which by now are OutExprs.
2428 E.g.
2429 RULE f (g x) = x+1
2430
2431 Call f (g BIG) --> (\x. x+1) BIG
2432
2433 The rule binders are lambda-bound and applied to the OutExpr arguments
2434 (here BIG) which lack all internal occurrence info.
2435
2436 Is this inefficient? Not really: we are about to walk over the result
2437 of the rule firing to simplify it, so occurrence analysis is at most
2438 a constant factor.
2439
2440 Possible improvement: occ-anal the rules when putting them in the
2441 database; and in the simplifier just occ-anal the OutExpr arguments.
2442 But that's more complicated and the rule RHS is usually tiny; so I'm
2443 just doing the simple thing.
2444
2445 Historical note: previously we did occ-anal the rules in Rule.hs,
2446 but failed to occ-anal the OutExpr arguments, which led to the
2447 nasty performance problem described above.
2448
2449
2450 Note [Optimising tagToEnum#]
2451 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2452 If we have an enumeration data type:
2453
2454 data Foo = A | B | C
2455
2456 Then we want to transform
2457
2458 case tagToEnum# x of ==> case x of
2459 A -> e1 DEFAULT -> e1
2460 B -> e2 1# -> e2
2461 C -> e3 2# -> e3
2462
2463 thereby getting rid of the tagToEnum# altogether. If there was a DEFAULT
2464 alternative we retain it (remember it comes first). If not the case must
2465 be exhaustive, and we reflect that in the transformed version by adding
2466 a DEFAULT. Otherwise Lint complains that the new case is not exhaustive.
2467 See #8317.
2468
2469 Note [Rules for recursive functions]
2470 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2471 You might think that we shouldn't apply rules for a loop breaker:
2472 doing so might give rise to an infinite loop, because a RULE is
2473 rather like an extra equation for the function:
2474 RULE: f (g x) y = x+y
2475 Eqn: f a y = a-y
2476
2477 But it's too drastic to disable rules for loop breakers.
2478 Even the foldr/build rule would be disabled, because foldr
2479 is recursive, and hence a loop breaker:
2480 foldr k z (build g) = g k z
2481 So it's up to the programmer: rules can cause divergence
2482
2483
2484 ************************************************************************
2485 * *
2486 Rebuilding a case expression
2487 * *
2488 ************************************************************************
2489
2490 Note [Case elimination]
2491 ~~~~~~~~~~~~~~~~~~~~~~~
2492 The case-elimination transformation discards redundant case expressions.
2493 Start with a simple situation:
2494
2495 case x# of ===> let y# = x# in e
2496 y# -> e
2497
2498 (when x#, y# are of primitive type, of course). We can't (in general)
2499 do this for algebraic cases, because we might turn bottom into
2500 non-bottom!
2501
2502 The code in GHC.Core.Opt.Simplify.Utils.prepareAlts has the effect of generalise
2503 this idea to look for a case where we're scrutinising a variable, and we know
2504 that only the default case can match. For example:
2505
2506 case x of
2507 0# -> ...
2508 DEFAULT -> ...(case x of
2509 0# -> ...
2510 DEFAULT -> ...) ...
2511
2512 Here the inner case is first trimmed to have only one alternative, the
2513 DEFAULT, after which it's an instance of the previous case. This
2514 really only shows up in eliminating error-checking code.
2515
2516 Note that GHC.Core.Opt.Simplify.Utils.mkCase combines identical RHSs. So
2517
2518 case e of ===> case e of DEFAULT -> r
2519 True -> r
2520 False -> r
2521
2522 Now again the case may be eliminated by the CaseElim transformation.
2523 This includes things like (==# a# b#)::Bool so that we simplify
2524 case ==# a# b# of { True -> x; False -> x }
2525 to just
2526 x
2527 This particular example shows up in default methods for
2528 comparison operations (e.g. in (>=) for Int.Int32)
2529
2530 Note [Case to let transformation]
2531 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2532 If a case over a lifted type has a single alternative, and is being
2533 used as a strict 'let' (all isDeadBinder bndrs), we may want to do
2534 this transformation:
2535
2536 case e of r ===> let r = e in ...r...
2537 _ -> ...r...
2538
2539 We treat the unlifted and lifted cases separately:
2540
2541 * Unlifted case: 'e' satisfies exprOkForSpeculation
2542 (ok-for-spec is needed to satisfy the let/app invariant).
2543 This turns case a +# b of r -> ...r...
2544 into let r = a +# b in ...r...
2545 and thence .....(a +# b)....
2546
2547 However, if we have
2548 case indexArray# a i of r -> ...r...
2549 we might like to do the same, and inline the (indexArray# a i).
2550 But indexArray# is not okForSpeculation, so we don't build a let
2551 in rebuildCase (lest it get floated *out*), so the inlining doesn't
2552 happen either. Annoying.
2553
2554 * Lifted case: we need to be sure that the expression is already
2555 evaluated (exprIsHNF). If it's not already evaluated
2556 - we risk losing exceptions, divergence or
2557 user-specified thunk-forcing
2558 - even if 'e' is guaranteed to converge, we don't want to
2559 create a thunk (call by need) instead of evaluating it
2560 right away (call by value)
2561
2562 However, we can turn the case into a /strict/ let if the 'r' is
2563 used strictly in the body. Then we won't lose divergence; and
2564 we won't build a thunk because the let is strict.
2565 See also Note [Case-to-let for strictly-used binders]
2566
2567 Note [Case-to-let for strictly-used binders]
2568 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2569 If we have this:
2570 case <scrut> of r { _ -> ..r.. }
2571
2572 where 'r' is used strictly in (..r..), we can safely transform to
2573 let r = <scrut> in ...r...
2574
2575 This is a Good Thing, because 'r' might be dead (if the body just
2576 calls error), or might be used just once (in which case it can be
2577 inlined); or we might be able to float the let-binding up or down.
2578 E.g. #15631 has an example.
2579
2580 Note that this can change the error behaviour. For example, we might
2581 transform
2582 case x of { _ -> error "bad" }
2583 --> error "bad"
2584 which is might be puzzling if 'x' currently lambda-bound, but later gets
2585 let-bound to (error "good").
2586
2587 Nevertheless, the paper "A semantics for imprecise exceptions" allows
2588 this transformation. If you want to fix the evaluation order, use
2589 'pseq'. See #8900 for an example where the loss of this
2590 transformation bit us in practice.
2591
2592 See also Note [Empty case alternatives] in GHC.Core.
2593
2594 Historical notes
2595
2596 There have been various earlier versions of this patch:
2597
2598 * By Sept 18 the code looked like this:
2599 || scrut_is_demanded_var scrut
2600
2601 scrut_is_demanded_var :: CoreExpr -> Bool
2602 scrut_is_demanded_var (Cast s _) = scrut_is_demanded_var s
2603 scrut_is_demanded_var (Var _) = isStrUsedDmd (idDemandInfo case_bndr)
2604 scrut_is_demanded_var _ = False
2605
2606 This only fired if the scrutinee was a /variable/, which seems
2607 an unnecessary restriction. So in #15631 I relaxed it to allow
2608 arbitrary scrutinees. Less code, less to explain -- but the change
2609 had 0.00% effect on nofib.
2610
2611 * Previously, in Jan 13 the code looked like this:
2612 || case_bndr_evald_next rhs
2613
2614 case_bndr_evald_next :: CoreExpr -> Bool
2615 -- See Note [Case binder next]
2616 case_bndr_evald_next (Var v) = v == case_bndr
2617 case_bndr_evald_next (Cast e _) = case_bndr_evald_next e
2618 case_bndr_evald_next (App e _) = case_bndr_evald_next e
2619 case_bndr_evald_next (Case e _ _ _) = case_bndr_evald_next e
2620 case_bndr_evald_next _ = False
2621
2622 This patch was part of fixing #7542. See also
2623 Note [Eta reduction of an eval'd function] in GHC.Core.Utils.)
2624
2625
2626 Further notes about case elimination
2627 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2628 Consider: test :: Integer -> IO ()
2629 test = print
2630
2631 Turns out that this compiles to:
2632 Print.test
2633 = \ eta :: Integer
2634 eta1 :: Void# ->
2635 case PrelNum.< eta PrelNum.zeroInteger of wild { __DEFAULT ->
2636 case hPutStr stdout
2637 (PrelNum.jtos eta ($w[] @ Char))
2638 eta1
2639 of wild1 { (# new_s, a4 #) -> PrelIO.lvl23 new_s }}
2640
2641 Notice the strange '<' which has no effect at all. This is a funny one.
2642 It started like this:
2643
2644 f x y = if x < 0 then jtos x
2645 else if y==0 then "" else jtos x
2646
2647 At a particular call site we have (f v 1). So we inline to get
2648
2649 if v < 0 then jtos x
2650 else if 1==0 then "" else jtos x
2651
2652 Now simplify the 1==0 conditional:
2653
2654 if v<0 then jtos v else jtos v
2655
2656 Now common-up the two branches of the case:
2657
2658 case (v<0) of DEFAULT -> jtos v
2659
2660 Why don't we drop the case? Because it's strict in v. It's technically
2661 wrong to drop even unnecessary evaluations, and in practice they
2662 may be a result of 'seq' so we *definitely* don't want to drop those.
2663 I don't really know how to improve this situation.
2664
2665
2666 Note [FloatBinds from constructor wrappers]
2667 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2668 If we have FloatBinds coming from the constructor wrapper
2669 (as in Note [exprIsConApp_maybe on data constructors with wrappers]),
2670 we cannot float past them. We'd need to float the FloatBind
2671 together with the simplify floats, unfortunately the
2672 simplifier doesn't have case-floats. The simplest thing we can
2673 do is to wrap all the floats here. The next iteration of the
2674 simplifier will take care of all these cases and lets.
2675
2676 Given data T = MkT !Bool, this allows us to simplify
2677 case $WMkT b of { MkT x -> f x }
2678 to
2679 case b of { b' -> f b' }.
2680
2681 We could try and be more clever (like maybe wfloats only contain
2682 let binders, so we could float them). But the need for the
2683 extra complication is not clear.
2684
2685 Note [Do not duplicate constructor applications]
2686 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2687 Consider this (#20125)
2688 let x = (a,b)
2689 in ...(case x of x' -> blah)...x...x...
2690
2691 We want that `case` to vanish (since `x` is bound to a data con) leaving
2692 let x = (a,b)
2693 in ...(let x'=x in blah)...x..x...
2694
2695 In rebuildCase, `exprIsConApp_maybe` will succeed on the scrutinee `x`,
2696 since is bound to (a,b). But in eliminating the case, if the scrutinee
2697 is trivial, we want to bind the case-binder to the scrutinee, /not/ to
2698 the constructor application. Hence the case_bndr_rhs in rebuildCase.
2699
2700 This applies equally to a non-DEFAULT case alternative, say
2701 let x = (a,b) in ...(case x of x' { (p,q) -> blah })...
2702 This variant is handled by bind_case_bndr in knownCon.
2703
2704 We want to bind x' to x, and not to a duplicated (a,b)).
2705 -}
2706
2707 ---------------------------------------------------------
2708 -- Eliminate the case if possible
2709
2710 rebuildCase, reallyRebuildCase
2711 :: SimplEnv
2712 -> OutExpr -- Scrutinee
2713 -> InId -- Case binder
2714 -> [InAlt] -- Alternatives (increasing order)
2715 -> SimplCont
2716 -> SimplM (SimplFloats, OutExpr)
2717
2718 --------------------------------------------------
2719 -- 1. Eliminate the case if there's a known constructor
2720 --------------------------------------------------
2721
2722 rebuildCase env scrut case_bndr alts cont
2723 | Lit lit <- scrut -- No need for same treatment as constructors
2724 -- because literals are inlined more vigorously
2725 , not (litIsLifted lit)
2726 = do { tick (KnownBranch case_bndr)
2727 ; case findAlt (LitAlt lit) alts of
2728 Nothing -> missingAlt env case_bndr alts cont
2729 Just (Alt _ bs rhs) -> simple_rhs env [] scrut bs rhs }
2730
2731 | Just (in_scope', wfloats, con, ty_args, other_args)
2732 <- exprIsConApp_maybe (getUnfoldingInRuleMatch env) scrut
2733 -- Works when the scrutinee is a variable with a known unfolding
2734 -- as well as when it's an explicit constructor application
2735 , let env0 = setInScopeSet env in_scope'
2736 = do { tick (KnownBranch case_bndr)
2737 ; let scaled_wfloats = map scale_float wfloats
2738 -- case_bndr_unf: see Note [Do not duplicate constructor applications]
2739 case_bndr_rhs | exprIsTrivial scrut = scrut
2740 | otherwise = con_app
2741 con_app = Var (dataConWorkId con) `mkTyApps` ty_args
2742 `mkApps` other_args
2743 ; case findAlt (DataAlt con) alts of
2744 Nothing -> missingAlt env0 case_bndr alts cont
2745 Just (Alt DEFAULT bs rhs) -> simple_rhs env0 scaled_wfloats case_bndr_rhs bs rhs
2746 Just (Alt _ bs rhs) -> knownCon env0 scrut scaled_wfloats con ty_args
2747 other_args case_bndr bs rhs cont
2748 }
2749 where
2750 simple_rhs env wfloats case_bndr_rhs bs rhs =
2751 assert (null bs) $
2752 do { (floats1, env') <- simplNonRecX env case_bndr case_bndr_rhs
2753 -- scrut is a constructor application,
2754 -- hence satisfies let/app invariant
2755 ; (floats2, expr') <- simplExprF env' rhs cont
2756 ; case wfloats of
2757 [] -> return (floats1 `addFloats` floats2, expr')
2758 _ -> return
2759 -- See Note [FloatBinds from constructor wrappers]
2760 ( emptyFloats env,
2761 GHC.Core.Make.wrapFloats wfloats $
2762 wrapFloats (floats1 `addFloats` floats2) expr' )}
2763
2764 -- This scales case floats by the multiplicity of the continuation hole (see
2765 -- Note [Scaling in case-of-case]). Let floats are _not_ scaled, because
2766 -- they are aliases anyway.
2767 scale_float (GHC.Core.Make.FloatCase scrut case_bndr con vars) =
2768 let
2769 scale_id id = scaleVarBy holeScaling id
2770 in
2771 GHC.Core.Make.FloatCase scrut (scale_id case_bndr) con (map scale_id vars)
2772 scale_float f = f
2773
2774 holeScaling = contHoleScaling cont `mkMultMul` idMult case_bndr
2775 -- We are in the following situation
2776 -- case[p] case[q] u of { D x -> C v } of { C x -> w }
2777 -- And we are producing case[??] u of { D x -> w[x\v]}
2778 --
2779 -- What should the multiplicity `??` be? In order to preserve the usage of
2780 -- variables in `u`, it needs to be `pq`.
2781 --
2782 -- As an illustration, consider the following
2783 -- case[Many] case[1] of { C x -> C x } of { C x -> (x, x) }
2784 -- Where C :: A %1 -> T is linear
2785 -- If we were to produce a case[1], like the inner case, we would get
2786 -- case[1] of { C x -> (x, x) }
2787 -- Which is ill-typed with respect to linearity. So it needs to be a
2788 -- case[Many].
2789
2790 --------------------------------------------------
2791 -- 2. Eliminate the case if scrutinee is evaluated
2792 --------------------------------------------------
2793
2794 rebuildCase env scrut case_bndr alts@[Alt _ bndrs rhs] cont
2795 -- See if we can get rid of the case altogether
2796 -- See Note [Case elimination]
2797 -- mkCase made sure that if all the alternatives are equal,
2798 -- then there is now only one (DEFAULT) rhs
2799
2800 -- 2a. Dropping the case altogether, if
2801 -- a) it binds nothing (so it's really just a 'seq')
2802 -- b) evaluating the scrutinee has no side effects
2803 | is_plain_seq
2804 , exprOkForSideEffects scrut
2805 -- The entire case is dead, so we can drop it
2806 -- if the scrutinee converges without having imperative
2807 -- side effects or raising a Haskell exception
2808 -- See Note [PrimOp can_fail and has_side_effects] in GHC.Builtin.PrimOps
2809 = simplExprF env rhs cont
2810
2811 -- 2b. Turn the case into a let, if
2812 -- a) it binds only the case-binder
2813 -- b) unlifted case: the scrutinee is ok-for-speculation
2814 -- lifted case: the scrutinee is in HNF (or will later be demanded)
2815 -- See Note [Case to let transformation]
2816 | all_dead_bndrs
2817 , doCaseToLet scrut case_bndr
2818 = do { tick (CaseElim case_bndr)
2819 ; (floats1, env') <- simplNonRecX env case_bndr scrut
2820 ; (floats2, expr') <- simplExprF env' rhs cont
2821 ; return (floats1 `addFloats` floats2, expr') }
2822
2823 -- 2c. Try the seq rules if
2824 -- a) it binds only the case binder
2825 -- b) a rule for seq applies
2826 -- See Note [User-defined RULES for seq] in GHC.Types.Id.Make
2827 | is_plain_seq
2828 = do { mb_rule <- trySeqRules env scrut rhs cont
2829 ; case mb_rule of
2830 Just (env', rule_rhs, cont') -> simplExprF env' rule_rhs cont'
2831 Nothing -> reallyRebuildCase env scrut case_bndr alts cont }
2832 where
2833 all_dead_bndrs = all isDeadBinder bndrs -- bndrs are [InId]
2834 is_plain_seq = all_dead_bndrs && isDeadBinder case_bndr -- Evaluation *only* for effect
2835
2836 rebuildCase env scrut case_bndr alts cont
2837 = reallyRebuildCase env scrut case_bndr alts cont
2838
2839
2840 doCaseToLet :: OutExpr -- Scrutinee
2841 -> InId -- Case binder
2842 -> Bool
2843 -- The situation is case scrut of b { DEFAULT -> body }
2844 -- Can we transform thus? let { b = scrut } in body
2845 doCaseToLet scrut case_bndr
2846 | isTyCoVar case_bndr -- Respect GHC.Core
2847 = isTyCoArg scrut -- Note [Core type and coercion invariant]
2848
2849 | isUnliftedType (idType case_bndr)
2850 = exprOkForSpeculation scrut
2851
2852 | otherwise -- Scrut has a lifted type
2853 = exprIsHNF scrut
2854 || isStrUsedDmd (idDemandInfo case_bndr)
2855 -- See Note [Case-to-let for strictly-used binders]
2856
2857 --------------------------------------------------
2858 -- 3. Catch-all case
2859 --------------------------------------------------
2860
2861 reallyRebuildCase env scrut case_bndr alts cont
2862 | not (sm_case_case (getMode env))
2863 = do { case_expr <- simplAlts env scrut case_bndr alts
2864 (mkBoringStop (contHoleType cont))
2865 ; rebuild env case_expr cont }
2866
2867 | otherwise
2868 = do { (floats, env', cont') <- mkDupableCaseCont env alts cont
2869 ; case_expr <- simplAlts env' scrut
2870 (scaleIdBy holeScaling case_bndr)
2871 (scaleAltsBy holeScaling alts)
2872 cont'
2873 ; return (floats, case_expr) }
2874 where
2875 holeScaling = contHoleScaling cont
2876 -- Note [Scaling in case-of-case]
2877
2878 {-
2879 simplCaseBinder checks whether the scrutinee is a variable, v. If so,
2880 try to eliminate uses of v in the RHSs in favour of case_bndr; that
2881 way, there's a chance that v will now only be used once, and hence
2882 inlined.
2883
2884 Historical note: we use to do the "case binder swap" in the Simplifier
2885 so there were additional complications if the scrutinee was a variable.
2886 Now the binder-swap stuff is done in the occurrence analyser; see
2887 "GHC.Core.Opt.OccurAnal" Note [Binder swap].
2888
2889 Note [knownCon occ info]
2890 ~~~~~~~~~~~~~~~~~~~~~~~~
2891 If the case binder is not dead, then neither are the pattern bound
2892 variables:
2893 case <any> of x { (a,b) ->
2894 case x of { (p,q) -> p } }
2895 Here (a,b) both look dead, but come alive after the inner case is eliminated.
2896 The point is that we bring into the envt a binding
2897 let x = (a,b)
2898 after the outer case, and that makes (a,b) alive. At least we do unless
2899 the case binder is guaranteed dead.
2900
2901 Note [Case alternative occ info]
2902 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2903 When we are simply reconstructing a case (the common case), we always
2904 zap the occurrence info on the binders in the alternatives. Even
2905 if the case binder is dead, the scrutinee is usually a variable, and *that*
2906 can bring the case-alternative binders back to life.
2907 See Note [Add unfolding for scrutinee]
2908
2909 Note [Improving seq]
2910 ~~~~~~~~~~~~~~~~~~~
2911 Consider
2912 type family F :: * -> *
2913 type instance F Int = Int
2914
2915 We'd like to transform
2916 case e of (x :: F Int) { DEFAULT -> rhs }
2917 ===>
2918 case e `cast` co of (x'::Int)
2919 I# x# -> let x = x' `cast` sym co
2920 in rhs
2921
2922 so that 'rhs' can take advantage of the form of x'. Notice that Note
2923 [Case of cast] (in OccurAnal) may then apply to the result.
2924
2925 We'd also like to eliminate empty types (#13468). So if
2926
2927 data Void
2928 type instance F Bool = Void
2929
2930 then we'd like to transform
2931 case (x :: F Bool) of { _ -> error "urk" }
2932 ===>
2933 case (x |> co) of (x' :: Void) of {}
2934
2935 Nota Bene: we used to have a built-in rule for 'seq' that dropped
2936 casts, so that
2937 case (x |> co) of { _ -> blah }
2938 dropped the cast; in order to improve the chances of trySeqRules
2939 firing. But that works in the /opposite/ direction to Note [Improving
2940 seq] so there's a danger of flip/flopping. Better to make trySeqRules
2941 insensitive to the cast, which is now is.
2942
2943 The need for [Improving seq] showed up in Roman's experiments. Example:
2944 foo :: F Int -> Int -> Int
2945 foo t n = t `seq` bar n
2946 where
2947 bar 0 = 0
2948 bar n = bar (n - case t of TI i -> i)
2949 Here we'd like to avoid repeated evaluating t inside the loop, by
2950 taking advantage of the `seq`.
2951
2952 At one point I did transformation in LiberateCase, but it's more
2953 robust here. (Otherwise, there's a danger that we'll simply drop the
2954 'seq' altogether, before LiberateCase gets to see it.)
2955
2956 Note [Scaling in case-of-case]
2957 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2958
2959 When two cases commute, if done naively, the multiplicities will be wrong:
2960
2961 case (case u of w[1] { (x[1], y[1]) } -> f x y) of w'[Many]
2962 { (z[Many], t[Many]) -> z
2963 }
2964
2965 The multiplicities here, are correct, but if I perform a case of case:
2966
2967 case u of w[1]
2968 { (x[1], y[1]) -> case f x y of w'[Many] of { (z[Many], t[Many]) -> z }
2969 }
2970
2971 This is wrong! Using `f x y` inside a `case … of w'[Many]` means that `x` and
2972 `y` must have multiplicities `Many` not `1`! The correct solution is to make
2973 all the `1`-s be `Many`-s instead:
2974
2975 case u of w[Many]
2976 { (x[Many], y[Many]) -> case f x y of w'[Many] of { (z[Many], t[Many]) -> z }
2977 }
2978
2979 In general, when commuting two cases, the rule has to be:
2980
2981 case (case … of x[p] {…}) of y[q] { … }
2982 ===> case … of x[p*q] { … case … of y[q] { … } }
2983
2984 This is materialised, in the simplifier, by the fact that every time we simplify
2985 case alternatives with a continuation (the surrounded case (or more!)), we must
2986 scale the entire case we are simplifying, by a scaling factor which can be
2987 computed in the continuation (with function `contHoleScaling`).
2988 -}
2989
2990 simplAlts :: SimplEnv
2991 -> OutExpr -- Scrutinee
2992 -> InId -- Case binder
2993 -> [InAlt] -- Non-empty
2994 -> SimplCont
2995 -> SimplM OutExpr -- Returns the complete simplified case expression
2996
2997 simplAlts env0 scrut case_bndr alts cont'
2998 = do { traceSmpl "simplAlts" (vcat [ ppr case_bndr
2999 , text "cont':" <+> ppr cont'
3000 , text "in_scope" <+> ppr (seInScope env0) ])
3001 ; (env1, case_bndr1) <- simplBinder env0 case_bndr
3002 ; let case_bndr2 = case_bndr1 `setIdUnfolding` evaldUnfolding
3003 env2 = modifyInScope env1 case_bndr2
3004 -- See Note [Case binder evaluated-ness]
3005
3006 ; fam_envs <- getFamEnvs
3007 ; (alt_env', scrut', case_bndr') <- improveSeq fam_envs env2 scrut
3008 case_bndr case_bndr2 alts
3009
3010 ; (imposs_deflt_cons, in_alts) <- prepareAlts scrut' case_bndr' alts
3011 -- NB: it's possible that the returned in_alts is empty: this is handled
3012 -- by the caller (rebuildCase) in the missingAlt function
3013
3014 ; alts' <- mapM (simplAlt alt_env' (Just scrut') imposs_deflt_cons case_bndr' cont') in_alts
3015 ; -- pprTrace "simplAlts" (ppr case_bndr $$ ppr alts_ty $$ ppr alts_ty' $$ ppr alts $$ ppr cont') $
3016
3017 ; let alts_ty' = contResultType cont'
3018 -- See Note [Avoiding space leaks in OutType]
3019 ; seqType alts_ty' `seq`
3020 mkCase (seDynFlags env0) scrut' case_bndr' alts_ty' alts' }
3021
3022
3023 ------------------------------------
3024 improveSeq :: (FamInstEnv, FamInstEnv) -> SimplEnv
3025 -> OutExpr -> InId -> OutId -> [InAlt]
3026 -> SimplM (SimplEnv, OutExpr, OutId)
3027 -- Note [Improving seq]
3028 improveSeq fam_envs env scrut case_bndr case_bndr1 [Alt DEFAULT _ _]
3029 | Just (Reduction co ty2) <- topNormaliseType_maybe fam_envs (idType case_bndr1)
3030 = do { case_bndr2 <- newId (fsLit "nt") Many ty2
3031 ; let rhs = DoneEx (Var case_bndr2 `Cast` mkSymCo co) Nothing
3032 env2 = extendIdSubst env case_bndr rhs
3033 ; return (env2, scrut `Cast` co, case_bndr2) }
3034
3035 improveSeq _ env scrut _ case_bndr1 _
3036 = return (env, scrut, case_bndr1)
3037
3038
3039 ------------------------------------
3040 simplAlt :: SimplEnv
3041 -> Maybe OutExpr -- The scrutinee
3042 -> [AltCon] -- These constructors can't be present when
3043 -- matching the DEFAULT alternative
3044 -> OutId -- The case binder
3045 -> SimplCont
3046 -> InAlt
3047 -> SimplM OutAlt
3048
3049 simplAlt env _ imposs_deflt_cons case_bndr' cont' (Alt DEFAULT bndrs rhs)
3050 = assert (null bndrs) $
3051 do { let env' = addBinderUnfolding env case_bndr'
3052 (mkOtherCon imposs_deflt_cons)
3053 -- Record the constructors that the case-binder *can't* be.
3054 ; rhs' <- simplExprC env' rhs cont'
3055 ; return (Alt DEFAULT [] rhs') }
3056
3057 simplAlt env scrut' _ case_bndr' cont' (Alt (LitAlt lit) bndrs rhs)
3058 = assert (null bndrs) $
3059 do { env' <- addAltUnfoldings env scrut' case_bndr' (Lit lit)
3060 ; rhs' <- simplExprC env' rhs cont'
3061 ; return (Alt (LitAlt lit) [] rhs') }
3062
3063 simplAlt env scrut' _ case_bndr' cont' (Alt (DataAlt con) vs rhs)
3064 = do { -- See Note [Adding evaluatedness info to pattern-bound variables]
3065 let vs_with_evals = addEvals scrut' con vs
3066 ; (env', vs') <- simplLamBndrs env vs_with_evals
3067
3068 -- Bind the case-binder to (con args)
3069 ; let inst_tys' = tyConAppArgs (idType case_bndr')
3070 con_app :: OutExpr
3071 con_app = mkConApp2 con inst_tys' vs'
3072
3073 ; env'' <- addAltUnfoldings env' scrut' case_bndr' con_app
3074 ; rhs' <- simplExprC env'' rhs cont'
3075 ; return (Alt (DataAlt con) vs' rhs') }
3076
3077 {- Note [Adding evaluatedness info to pattern-bound variables]
3078 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3079 addEvals records the evaluated-ness of the bound variables of
3080 a case pattern. This is *important*. Consider
3081
3082 data T = T !Int !Int
3083
3084 case x of { T a b -> T (a+1) b }
3085
3086 We really must record that b is already evaluated so that we don't
3087 go and re-evaluate it when constructing the result.
3088 See Note [Data-con worker strictness] in GHC.Core.DataCon
3089
3090 NB: simplLamBndrs preserves this eval info
3091
3092 In addition to handling data constructor fields with !s, addEvals
3093 also records the fact that the result of seq# is always in WHNF.
3094 See Note [seq# magic] in GHC.Core.Opt.ConstantFold. Example (#15226):
3095
3096 case seq# v s of
3097 (# s', v' #) -> E
3098
3099 we want the compiler to be aware that v' is in WHNF in E.
3100
3101 Open problem: we don't record that v itself is in WHNF (and we can't
3102 do it here). The right thing is to do some kind of binder-swap;
3103 see #15226 for discussion.
3104 -}
3105
3106 addEvals :: Maybe OutExpr -> DataCon -> [Id] -> [Id]
3107 -- See Note [Adding evaluatedness info to pattern-bound variables]
3108 addEvals scrut con vs
3109 -- Deal with seq# applications
3110 | Just scr <- scrut
3111 , isUnboxedTupleDataCon con
3112 , [s,x] <- vs
3113 -- Use stripNArgs rather than collectArgsTicks to avoid building
3114 -- a list of arguments only to throw it away immediately.
3115 , Just (Var f) <- stripNArgs 4 scr
3116 , Just SeqOp <- isPrimOpId_maybe f
3117 , let x' = zapIdOccInfoAndSetEvald MarkedStrict x
3118 = [s, x']
3119
3120 -- Deal with banged datacon fields
3121 addEvals _scrut con vs = go vs the_strs
3122 where
3123 the_strs = dataConRepStrictness con
3124
3125 go [] [] = []
3126 go (v:vs') strs | isTyVar v = v : go vs' strs
3127 go (v:vs') (str:strs) = zapIdOccInfoAndSetEvald str v : go vs' strs
3128 go _ _ = pprPanic "Simplify.addEvals"
3129 (ppr con $$
3130 ppr vs $$
3131 ppr_with_length (map strdisp the_strs) $$
3132 ppr_with_length (dataConRepArgTys con) $$
3133 ppr_with_length (dataConRepStrictness con))
3134 where
3135 ppr_with_length list
3136 = ppr list <+> parens (text "length =" <+> ppr (length list))
3137 strdisp MarkedStrict = text "MarkedStrict"
3138 strdisp NotMarkedStrict = text "NotMarkedStrict"
3139
3140 zapIdOccInfoAndSetEvald :: StrictnessMark -> Id -> Id
3141 zapIdOccInfoAndSetEvald str v =
3142 setCaseBndrEvald str $ -- Add eval'dness info
3143 zapIdOccInfo v -- And kill occ info;
3144 -- see Note [Case alternative occ info]
3145
3146 addAltUnfoldings :: SimplEnv -> Maybe OutExpr -> OutId -> OutExpr -> SimplM SimplEnv
3147 addAltUnfoldings env scrut case_bndr con_app
3148 = do { let con_app_unf = mk_simple_unf con_app
3149 env1 = addBinderUnfolding env case_bndr con_app_unf
3150
3151 -- See Note [Add unfolding for scrutinee]
3152 env2 | Many <- idMult case_bndr = case scrut of
3153 Just (Var v) -> addBinderUnfolding env1 v con_app_unf
3154 Just (Cast (Var v) co) -> addBinderUnfolding env1 v $
3155 mk_simple_unf (Cast con_app (mkSymCo co))
3156 _ -> env1
3157 | otherwise = env1
3158
3159 ; traceSmpl "addAltUnf" (vcat [ppr case_bndr <+> ppr scrut, ppr con_app])
3160 ; return env2 }
3161 where
3162 -- Force the opts, so that the whole SimplEnv isn't retained
3163 !opts = seUnfoldingOpts env
3164 mk_simple_unf = mkSimpleUnfolding opts
3165
3166 addBinderUnfolding :: SimplEnv -> Id -> Unfolding -> SimplEnv
3167 addBinderUnfolding env bndr unf
3168 | debugIsOn, Just tmpl <- maybeUnfoldingTemplate unf
3169 = warnPprTrace (not (eqType (idType bndr) (exprType tmpl)))
3170 (ppr bndr $$ ppr (idType bndr) $$ ppr tmpl $$ ppr (exprType tmpl)) $
3171 modifyInScope env (bndr `setIdUnfolding` unf)
3172
3173 | otherwise
3174 = modifyInScope env (bndr `setIdUnfolding` unf)
3175
3176 zapBndrOccInfo :: Bool -> Id -> Id
3177 -- Consider case e of b { (a,b) -> ... }
3178 -- Then if we bind b to (a,b) in "...", and b is not dead,
3179 -- then we must zap the deadness info on a,b
3180 zapBndrOccInfo keep_occ_info pat_id
3181 | keep_occ_info = pat_id
3182 | otherwise = zapIdOccInfo pat_id
3183
3184 {- Note [Case binder evaluated-ness]
3185 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3186 We pin on a (OtherCon []) unfolding to the case-binder of a Case,
3187 even though it'll be over-ridden in every case alternative with a more
3188 informative unfolding. Why? Because suppose a later, less clever, pass
3189 simply replaces all occurrences of the case binder with the binder itself;
3190 then Lint may complain about the let/app invariant. Example
3191 case e of b { DEFAULT -> let v = reallyUnsafePtrEquality# b y in ....
3192 ; K -> blah }
3193
3194 The let/app invariant requires that y is evaluated in the call to
3195 reallyUnsafePtrEquality#, which it is. But we still want that to be true if we
3196 propagate binders to occurrences.
3197
3198 This showed up in #13027.
3199
3200 Note [Add unfolding for scrutinee]
3201 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3202 In general it's unlikely that a variable scrutinee will appear
3203 in the case alternatives case x of { ...x unlikely to appear... }
3204 because the binder-swap in OccurAnal has got rid of all such occurrences
3205 See Note [Binder swap] in "GHC.Core.Opt.OccurAnal".
3206
3207 BUT it is still VERY IMPORTANT to add a suitable unfolding for a
3208 variable scrutinee, in simplAlt. Here's why
3209 case x of y
3210 (a,b) -> case b of c
3211 I# v -> ...(f y)...
3212 There is no occurrence of 'b' in the (...(f y)...). But y gets
3213 the unfolding (a,b), and *that* mentions b. If f has a RULE
3214 RULE f (p, I# q) = ...
3215 we want that rule to match, so we must extend the in-scope env with a
3216 suitable unfolding for 'y'. It's *essential* for rule matching; but
3217 it's also good for case-elimination -- suppose that 'f' was inlined
3218 and did multi-level case analysis, then we'd solve it in one
3219 simplifier sweep instead of two.
3220
3221 Exactly the same issue arises in GHC.Core.Opt.SpecConstr;
3222 see Note [Add scrutinee to ValueEnv too] in GHC.Core.Opt.SpecConstr
3223
3224 HOWEVER, given
3225 case x of y { Just a -> r1; Nothing -> r2 }
3226 we do not want to add the unfolding x -> y to 'x', which might seem cool,
3227 since 'y' itself has different unfoldings in r1 and r2. Reason: if we
3228 did that, we'd have to zap y's deadness info and that is a very useful
3229 piece of information.
3230
3231 So instead we add the unfolding x -> Just a, and x -> Nothing in the
3232 respective RHSs.
3233
3234 Since this transformation is tantamount to a binder swap, the same caveat as in
3235 Note [Suppressing binder-swaps on linear case] in OccurAnal apply.
3236
3237
3238 ************************************************************************
3239 * *
3240 \subsection{Known constructor}
3241 * *
3242 ************************************************************************
3243
3244 We are a bit careful with occurrence info. Here's an example
3245
3246 (\x* -> case x of (a*, b) -> f a) (h v, e)
3247
3248 where the * means "occurs once". This effectively becomes
3249 case (h v, e) of (a*, b) -> f a)
3250 and then
3251 let a* = h v; b = e in f a
3252 and then
3253 f (h v)
3254
3255 All this should happen in one sweep.
3256 -}
3257
3258 knownCon :: SimplEnv
3259 -> OutExpr -- The scrutinee
3260 -> [FloatBind] -> DataCon -> [OutType] -> [OutExpr] -- The scrutinee (in pieces)
3261 -> InId -> [InBndr] -> InExpr -- The alternative
3262 -> SimplCont
3263 -> SimplM (SimplFloats, OutExpr)
3264
3265 knownCon env scrut dc_floats dc dc_ty_args dc_args bndr bs rhs cont
3266 = do { (floats1, env1) <- bind_args env bs dc_args
3267 ; (floats2, env2) <- bind_case_bndr env1
3268 ; (floats3, expr') <- simplExprF env2 rhs cont
3269 ; case dc_floats of
3270 [] ->
3271 return (floats1 `addFloats` floats2 `addFloats` floats3, expr')
3272 _ ->
3273 return ( emptyFloats env
3274 -- See Note [FloatBinds from constructor wrappers]
3275 , GHC.Core.Make.wrapFloats dc_floats $
3276 wrapFloats (floats1 `addFloats` floats2 `addFloats` floats3) expr') }
3277 where
3278 zap_occ = zapBndrOccInfo (isDeadBinder bndr) -- bndr is an InId
3279
3280 -- Ugh!
3281 bind_args env' [] _ = return (emptyFloats env', env')
3282
3283 bind_args env' (b:bs') (Type ty : args)
3284 = assert (isTyVar b )
3285 bind_args (extendTvSubst env' b ty) bs' args
3286
3287 bind_args env' (b:bs') (Coercion co : args)
3288 = assert (isCoVar b )
3289 bind_args (extendCvSubst env' b co) bs' args
3290
3291 bind_args env' (b:bs') (arg : args)
3292 = assert (isId b) $
3293 do { let b' = zap_occ b
3294 -- Note that the binder might be "dead", because it doesn't
3295 -- occur in the RHS; and simplNonRecX may therefore discard
3296 -- it via postInlineUnconditionally.
3297 -- Nevertheless we must keep it if the case-binder is alive,
3298 -- because it may be used in the con_app. See Note [knownCon occ info]
3299 ; (floats1, env2) <- simplNonRecX env' b' arg -- arg satisfies let/app invariant
3300 ; (floats2, env3) <- bind_args env2 bs' args
3301 ; return (floats1 `addFloats` floats2, env3) }
3302
3303 bind_args _ _ _ =
3304 pprPanic "bind_args" $ ppr dc $$ ppr bs $$ ppr dc_args $$
3305 text "scrut:" <+> ppr scrut
3306
3307 -- It's useful to bind bndr to scrut, rather than to a fresh
3308 -- binding x = Con arg1 .. argn
3309 -- because very often the scrut is a variable, so we avoid
3310 -- creating, and then subsequently eliminating, a let-binding
3311 -- BUT, if scrut is a not a variable, we must be careful
3312 -- about duplicating the arg redexes; in that case, make
3313 -- a new con-app from the args
3314 bind_case_bndr env
3315 | isDeadBinder bndr = return (emptyFloats env, env)
3316 | exprIsTrivial scrut = return (emptyFloats env
3317 , extendIdSubst env bndr (DoneEx scrut Nothing))
3318 -- See Note [Do not duplicate constructor applications]
3319 | otherwise = do { dc_args <- mapM (simplVar env) bs
3320 -- dc_ty_args are already OutTypes,
3321 -- but bs are InBndrs
3322 ; let con_app = Var (dataConWorkId dc)
3323 `mkTyApps` dc_ty_args
3324 `mkApps` dc_args
3325 ; simplNonRecX env bndr con_app }
3326
3327 -------------------
3328 missingAlt :: SimplEnv -> Id -> [InAlt] -> SimplCont
3329 -> SimplM (SimplFloats, OutExpr)
3330 -- This isn't strictly an error, although it is unusual.
3331 -- It's possible that the simplifier might "see" that
3332 -- an inner case has no accessible alternatives before
3333 -- it "sees" that the entire branch of an outer case is
3334 -- inaccessible. So we simply put an error case here instead.
3335 missingAlt env case_bndr _ cont
3336 = warnPprTrace True (text "missingAlt" <+> ppr case_bndr) $
3337 -- See Note [Avoiding space leaks in OutType]
3338 let cont_ty = contResultType cont
3339 in seqType cont_ty `seq`
3340 return (emptyFloats env, mkImpossibleExpr cont_ty)
3341
3342 {-
3343 ************************************************************************
3344 * *
3345 \subsection{Duplicating continuations}
3346 * *
3347 ************************************************************************
3348
3349 Consider
3350 let x* = case e of { True -> e1; False -> e2 }
3351 in b
3352 where x* is a strict binding. Then mkDupableCont will be given
3353 the continuation
3354 case [] of { True -> e1; False -> e2 } ; let x* = [] in b ; stop
3355 and will split it into
3356 dupable: case [] of { True -> $j1; False -> $j2 } ; stop
3357 join floats: $j1 = e1, $j2 = e2
3358 non_dupable: let x* = [] in b; stop
3359
3360 Putting this back together would give
3361 let x* = let { $j1 = e1; $j2 = e2 } in
3362 case e of { True -> $j1; False -> $j2 }
3363 in b
3364 (Of course we only do this if 'e' wants to duplicate that continuation.)
3365 Note how important it is that the new join points wrap around the
3366 inner expression, and not around the whole thing.
3367
3368 In contrast, any let-bindings introduced by mkDupableCont can wrap
3369 around the entire thing.
3370
3371 Note [Bottom alternatives]
3372 ~~~~~~~~~~~~~~~~~~~~~~~~~~
3373 When we have
3374 case (case x of { A -> error .. ; B -> e; C -> error ..)
3375 of alts
3376 then we can just duplicate those alts because the A and C cases
3377 will disappear immediately. This is more direct than creating
3378 join points and inlining them away. See #4930.
3379 -}
3380
3381 --------------------
3382 mkDupableCaseCont :: SimplEnv -> [InAlt] -> SimplCont
3383 -> SimplM ( SimplFloats -- Join points (if any)
3384 , SimplEnv -- Use this for the alts
3385 , SimplCont)
3386 mkDupableCaseCont env alts cont
3387 | altsWouldDup alts = do { (floats, cont) <- mkDupableCont env cont
3388 ; let env' = bumpCaseDepth $
3389 env `setInScopeFromF` floats
3390 ; return (floats, env', cont) }
3391 | otherwise = return (emptyFloats env, env, cont)
3392
3393 altsWouldDup :: [InAlt] -> Bool -- True iff strictly > 1 non-bottom alternative
3394 altsWouldDup [] = False -- See Note [Bottom alternatives]
3395 altsWouldDup [_] = False
3396 altsWouldDup (alt:alts)
3397 | is_bot_alt alt = altsWouldDup alts
3398 | otherwise = not (all is_bot_alt alts)
3399 -- otherwise case: first alt is non-bot, so all the rest must be bot
3400 where
3401 is_bot_alt (Alt _ _ rhs) = exprIsDeadEnd rhs
3402
3403 -------------------------
3404 mkDupableCont :: SimplEnv
3405 -> SimplCont
3406 -> SimplM ( SimplFloats -- Incoming SimplEnv augmented with
3407 -- extra let/join-floats and in-scope variables
3408 , SimplCont) -- dup_cont: duplicable continuation
3409 mkDupableCont env cont
3410 = mkDupableContWithDmds env (repeat topDmd) cont
3411
3412 mkDupableContWithDmds
3413 :: SimplEnv -> [Demand] -- Demands on arguments; always infinite
3414 -> SimplCont -> SimplM ( SimplFloats, SimplCont)
3415
3416 mkDupableContWithDmds env _ cont
3417 | contIsDupable cont
3418 = return (emptyFloats env, cont)
3419
3420 mkDupableContWithDmds _ _ (Stop {}) = panic "mkDupableCont" -- Handled by previous eqn
3421
3422 mkDupableContWithDmds env dmds (CastIt ty cont)
3423 = do { (floats, cont') <- mkDupableContWithDmds env dmds cont
3424 ; return (floats, CastIt ty cont') }
3425
3426 -- Duplicating ticks for now, not sure if this is good or not
3427 mkDupableContWithDmds env dmds (TickIt t cont)
3428 = do { (floats, cont') <- mkDupableContWithDmds env dmds cont
3429 ; return (floats, TickIt t cont') }
3430
3431 mkDupableContWithDmds env _
3432 (StrictBind { sc_bndr = bndr, sc_bndrs = bndrs
3433 , sc_body = body, sc_env = se, sc_cont = cont})
3434 -- See Note [Duplicating StrictBind]
3435 -- K[ let x = <> in b ] --> join j x = K[ b ]
3436 -- j <>
3437 = do { let sb_env = se `setInScopeFromE` env
3438 ; (sb_env1, bndr') <- simplBinder sb_env bndr
3439 ; (floats1, join_inner) <- simplLam sb_env1 bndrs body cont
3440 -- No need to use mkDupableCont before simplLam; we
3441 -- use cont once here, and then share the result if necessary
3442
3443 ; let join_body = wrapFloats floats1 join_inner
3444 res_ty = contResultType cont
3445
3446 ; mkDupableStrictBind env bndr' join_body res_ty }
3447
3448 mkDupableContWithDmds env _
3449 (StrictArg { sc_fun = fun, sc_cont = cont
3450 , sc_fun_ty = fun_ty })
3451 -- NB: sc_dup /= OkToDup; that is caught earlier by contIsDupable
3452 | isNothing (isDataConId_maybe (ai_fun fun))
3453 , thumbsUpPlanA cont -- See point (3) of Note [Duplicating join points]
3454 = -- Use Plan A of Note [Duplicating StrictArg]
3455 do { let (_ : dmds) = ai_dmds fun
3456 ; (floats1, cont') <- mkDupableContWithDmds env dmds cont
3457 -- Use the demands from the function to add the right
3458 -- demand info on any bindings we make for further args
3459 ; (floats_s, args') <- mapAndUnzipM (makeTrivialArg env)
3460 (ai_args fun)
3461 ; return ( foldl' addLetFloats floats1 floats_s
3462 , StrictArg { sc_fun = fun { ai_args = args' }
3463 , sc_cont = cont'
3464 , sc_fun_ty = fun_ty
3465 , sc_dup = OkToDup} ) }
3466
3467 | otherwise
3468 = -- Use Plan B of Note [Duplicating StrictArg]
3469 -- K[ f a b <> ] --> join j x = K[ f a b x ]
3470 -- j <>
3471 do { let rhs_ty = contResultType cont
3472 (m,arg_ty,_) = splitFunTy fun_ty
3473 ; arg_bndr <- newId (fsLit "arg") m arg_ty
3474 ; let env' = env `addNewInScopeIds` [arg_bndr]
3475 ; (floats, join_rhs) <- rebuildCall env' (addValArgTo fun (Var arg_bndr) fun_ty) cont
3476 ; mkDupableStrictBind env' arg_bndr (wrapFloats floats join_rhs) rhs_ty }
3477 where
3478 thumbsUpPlanA (StrictArg {}) = False
3479 thumbsUpPlanA (CastIt _ k) = thumbsUpPlanA k
3480 thumbsUpPlanA (TickIt _ k) = thumbsUpPlanA k
3481 thumbsUpPlanA (ApplyToVal { sc_cont = k }) = thumbsUpPlanA k
3482 thumbsUpPlanA (ApplyToTy { sc_cont = k }) = thumbsUpPlanA k
3483 thumbsUpPlanA (Select {}) = True
3484 thumbsUpPlanA (StrictBind {}) = True
3485 thumbsUpPlanA (Stop {}) = True
3486
3487 mkDupableContWithDmds env dmds
3488 (ApplyToTy { sc_cont = cont, sc_arg_ty = arg_ty, sc_hole_ty = hole_ty })
3489 = do { (floats, cont') <- mkDupableContWithDmds env dmds cont
3490 ; return (floats, ApplyToTy { sc_cont = cont'
3491 , sc_arg_ty = arg_ty, sc_hole_ty = hole_ty }) }
3492
3493 mkDupableContWithDmds env dmds
3494 (ApplyToVal { sc_arg = arg, sc_dup = dup, sc_env = se
3495 , sc_cont = cont, sc_hole_ty = hole_ty })
3496 = -- e.g. [...hole...] (...arg...)
3497 -- ==>
3498 -- let a = ...arg...
3499 -- in [...hole...] a
3500 -- NB: sc_dup /= OkToDup; that is caught earlier by contIsDupable
3501 do { let (dmd:_) = dmds -- Never fails
3502 ; (floats1, cont') <- mkDupableContWithDmds env dmds cont
3503 ; let env' = env `setInScopeFromF` floats1
3504 ; (_, se', arg') <- simplArg env' dup se arg
3505 ; (let_floats2, arg'') <- makeTrivial env NotTopLevel dmd (fsLit "karg") arg'
3506 ; let all_floats = floats1 `addLetFloats` let_floats2
3507 ; return ( all_floats
3508 , ApplyToVal { sc_arg = arg''
3509 , sc_env = se' `setInScopeFromF` all_floats
3510 -- Ensure that sc_env includes the free vars of
3511 -- arg'' in its in-scope set, even if makeTrivial
3512 -- has turned arg'' into a fresh variable
3513 -- See Note [StaticEnv invariant] in GHC.Core.Opt.Simplify.Utils
3514 , sc_dup = OkToDup, sc_cont = cont'
3515 , sc_hole_ty = hole_ty }) }
3516
3517 mkDupableContWithDmds env _
3518 (Select { sc_bndr = case_bndr, sc_alts = alts, sc_env = se, sc_cont = cont })
3519 = -- e.g. (case [...hole...] of { pi -> ei })
3520 -- ===>
3521 -- let ji = \xij -> ei
3522 -- in case [...hole...] of { pi -> ji xij }
3523 -- NB: sc_dup /= OkToDup; that is caught earlier by contIsDupable
3524 do { tick (CaseOfCase case_bndr)
3525 ; (floats, alt_env, alt_cont) <- mkDupableCaseCont (se `setInScopeFromE` env) alts cont
3526 -- NB: We call mkDupableCaseCont here to make cont duplicable
3527 -- (if necessary, depending on the number of alts)
3528 -- And this is important: see Note [Fusing case continuations]
3529
3530 ; let cont_scaling = contHoleScaling cont
3531 -- See Note [Scaling in case-of-case]
3532 ; (alt_env', case_bndr') <- simplBinder alt_env (scaleIdBy cont_scaling case_bndr)
3533 ; alts' <- mapM (simplAlt alt_env' Nothing [] case_bndr' alt_cont) (scaleAltsBy cont_scaling alts)
3534 -- Safe to say that there are no handled-cons for the DEFAULT case
3535 -- NB: simplBinder does not zap deadness occ-info, so
3536 -- a dead case_bndr' will still advertise its deadness
3537 -- This is really important because in
3538 -- case e of b { (# p,q #) -> ... }
3539 -- b is always dead, and indeed we are not allowed to bind b to (# p,q #),
3540 -- which might happen if e was an explicit unboxed pair and b wasn't marked dead.
3541 -- In the new alts we build, we have the new case binder, so it must retain
3542 -- its deadness.
3543 -- NB: we don't use alt_env further; it has the substEnv for
3544 -- the alternatives, and we don't want that
3545
3546 ; (join_floats, alts'') <- mapAccumLM (mkDupableAlt (targetPlatform (seDynFlags env)) case_bndr')
3547 emptyJoinFloats alts'
3548
3549 ; let all_floats = floats `addJoinFloats` join_floats
3550 -- Note [Duplicated env]
3551 ; return (all_floats
3552 , Select { sc_dup = OkToDup
3553 , sc_bndr = case_bndr'
3554 , sc_alts = alts''
3555 , sc_env = zapSubstEnv se `setInScopeFromF` all_floats
3556 -- See Note [StaticEnv invariant] in GHC.Core.Opt.Simplify.Utils
3557 , sc_cont = mkBoringStop (contResultType cont) } ) }
3558
3559 mkDupableStrictBind :: SimplEnv -> OutId -> OutExpr -> OutType
3560 -> SimplM (SimplFloats, SimplCont)
3561 mkDupableStrictBind env arg_bndr join_rhs res_ty
3562 | exprIsTrivial join_rhs -- See point (2) of Note [Duplicating join points]
3563 = return (emptyFloats env
3564 , StrictBind { sc_bndr = arg_bndr, sc_bndrs = []
3565 , sc_body = join_rhs
3566 , sc_env = zapSubstEnv env
3567 -- See Note [StaticEnv invariant] in GHC.Core.Opt.Simplify.Utils
3568 , sc_dup = OkToDup
3569 , sc_cont = mkBoringStop res_ty } )
3570 | otherwise
3571 = do { join_bndr <- newJoinId [arg_bndr] res_ty
3572 ; let arg_info = ArgInfo { ai_fun = join_bndr
3573 , ai_rules = Nothing, ai_args = []
3574 , ai_encl = False, ai_dmds = repeat topDmd
3575 , ai_discs = repeat 0 }
3576 ; return ( addJoinFloats (emptyFloats env) $
3577 unitJoinFloat $
3578 NonRec join_bndr $
3579 Lam (setOneShotLambda arg_bndr) join_rhs
3580 , StrictArg { sc_dup = OkToDup
3581 , sc_fun = arg_info
3582 , sc_fun_ty = idType join_bndr
3583 , sc_cont = mkBoringStop res_ty
3584 } ) }
3585
3586 mkDupableAlt :: Platform -> OutId
3587 -> JoinFloats -> OutAlt
3588 -> SimplM (JoinFloats, OutAlt)
3589 mkDupableAlt _platform case_bndr jfloats (Alt con bndrs' rhs')
3590 | exprIsTrivial rhs' -- See point (2) of Note [Duplicating join points]
3591 = return (jfloats, Alt con bndrs' rhs')
3592
3593 | otherwise
3594 = do { let rhs_ty' = exprType rhs'
3595
3596 final_bndrs'
3597 | isDeadBinder case_bndr = filter abstract_over bndrs'
3598 | otherwise = bndrs' ++ [case_bndr]
3599
3600 abstract_over bndr
3601 | isTyVar bndr = True -- Abstract over all type variables just in case
3602 | otherwise = not (isDeadBinder bndr)
3603 -- The deadness info on the new Ids is preserved by simplBinders
3604 final_args = varsToCoreExprs final_bndrs'
3605 -- Note [Join point abstraction]
3606
3607 -- We make the lambdas into one-shot-lambdas. The
3608 -- join point is sure to be applied at most once, and doing so
3609 -- prevents the body of the join point being floated out by
3610 -- the full laziness pass
3611 really_final_bndrs = map one_shot final_bndrs'
3612 one_shot v | isId v = setOneShotLambda v
3613 | otherwise = v
3614 join_rhs = mkLams really_final_bndrs rhs'
3615
3616 ; join_bndr <- newJoinId final_bndrs' rhs_ty'
3617
3618 ; let join_call = mkApps (Var join_bndr) final_args
3619 alt' = Alt con bndrs' join_call
3620
3621 ; return ( jfloats `addJoinFlts` unitJoinFloat (NonRec join_bndr join_rhs)
3622 , alt') }
3623 -- See Note [Duplicated env]
3624
3625 {-
3626 Note [Fusing case continuations]
3627 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3628 It's important to fuse two successive case continuations when the
3629 first has one alternative. That's why we call prepareCaseCont here.
3630 Consider this, which arises from thunk splitting (see Note [Thunk
3631 splitting] in GHC.Core.Opt.WorkWrap):
3632
3633 let
3634 x* = case (case v of {pn -> rn}) of
3635 I# a -> I# a
3636 in body
3637
3638 The simplifier will find
3639 (Var v) with continuation
3640 Select (pn -> rn) (
3641 Select [I# a -> I# a] (
3642 StrictBind body Stop
3643
3644 So we'll call mkDupableCont on
3645 Select [I# a -> I# a] (StrictBind body Stop)
3646 There is just one alternative in the first Select, so we want to
3647 simplify the rhs (I# a) with continuation (StrictBind body Stop)
3648 Supposing that body is big, we end up with
3649 let $j a = <let x = I# a in body>
3650 in case v of { pn -> case rn of
3651 I# a -> $j a }
3652 This is just what we want because the rn produces a box that
3653 the case rn cancels with.
3654
3655 See #4957 a fuller example.
3656
3657 Note [Duplicating join points]
3658 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3659 IN #19996 we discovered that we want to be really careful about
3660 inlining join points. Consider
3661 case (join $j x = K f x )
3662 (in case v of )
3663 ( p1 -> $j x1 ) of
3664 ( p2 -> $j x2 )
3665 ( p3 -> $j x3 )
3666 K g y -> blah[g,y]
3667
3668 Here the join-point RHS is very small, just a constructor
3669 application (K x y). So we might inline it to get
3670 case (case v of )
3671 ( p1 -> K f x1 ) of
3672 ( p2 -> K f x2 )
3673 ( p3 -> K f x3 )
3674 K g y -> blah[g,y]
3675
3676 But now we have to make `blah` into a join point, /abstracted/
3677 over `g` and `y`. In contrast, if we /don't/ inline $j we
3678 don't need a join point for `blah` and we'll get
3679 join $j x = let g=f, y=x in blah[g,y]
3680 in case v of
3681 p1 -> $j x1
3682 p2 -> $j x2
3683 p3 -> $j x3
3684
3685 This can make a /massive/ difference, because `blah` can see
3686 what `f` is, instead of lambda-abstracting over it.
3687
3688 To achieve this:
3689
3690 1. Do not postInlineUnconditionally a join point, until the Final
3691 phase. (The Final phase is still quite early, so we might consider
3692 delaying still more.)
3693
3694 2. In mkDupableAlt and mkDupableStrictBind, generate an alterative for
3695 all alternatives, except for exprIsTrival RHSs. Previously we used
3696 exprIsDupable. This generates a lot more join points, but makes
3697 them much more case-of-case friendly.
3698
3699 It is definitely worth checking for exprIsTrivial, otherwise we get
3700 an extra Simplifier iteration, because it is inlined in the next
3701 round.
3702
3703 3. By the same token we want to use Plan B in
3704 Note [Duplicating StrictArg] when the RHS of the new join point
3705 is a data constructor application. That same Note explains why we
3706 want Plan A when the RHS of the new join point would be a
3707 non-data-constructor application
3708
3709 4. You might worry that $j will be inlined by the call-site inliner,
3710 but it won't because the call-site context for a join is usually
3711 extremely boring (the arguments come from the pattern match).
3712 And if not, then perhaps inlining it would be a good idea.
3713
3714 You might also wonder if we get UnfWhen, because the RHS of the
3715 join point is no bigger than the call. But in the cases we care
3716 about it will be a little bigger, because of that free `f` in
3717 $j x = K f x
3718 So for now we don't do anything special in callSiteInline
3719
3720 There is a bit of tension between (2) and (3). Do we want to retain
3721 the join point only when the RHS is
3722 * a constructor application? or
3723 * just non-trivial?
3724 Currently, a bit ad-hoc, but we definitely want to retain the join
3725 point for data constructors in mkDupalbleALt (point 2); that is the
3726 whole point of #19996 described above.
3727
3728 Historical Note [Case binders and join points]
3729 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3730 NB: this entire Note is now irrelevant. In Jun 21 we stopped
3731 adding unfoldings to lambda binders (#17530). It was always a
3732 hack and bit us in multiple small and not-so-small ways
3733
3734 Consider this
3735 case (case .. ) of c {
3736 I# c# -> ....c....
3737
3738 If we make a join point with c but not c# we get
3739 $j = \c -> ....c....
3740
3741 But if later inlining scrutinises the c, thus
3742
3743 $j = \c -> ... case c of { I# y -> ... } ...
3744
3745 we won't see that 'c' has already been scrutinised. This actually
3746 happens in the 'tabulate' function in wave4main, and makes a significant
3747 difference to allocation.
3748
3749 An alternative plan is this:
3750
3751 $j = \c# -> let c = I# c# in ...c....
3752
3753 but that is bad if 'c' is *not* later scrutinised.
3754
3755 So instead we do both: we pass 'c' and 'c#' , and record in c's inlining
3756 (a stable unfolding) that it's really I# c#, thus
3757
3758 $j = \c# -> \c[=I# c#] -> ...c....
3759
3760 Absence analysis may later discard 'c'.
3761
3762 NB: take great care when doing strictness analysis;
3763 see Note [Lambda-bound unfoldings] in GHC.Core.Opt.DmdAnal.
3764
3765 Also note that we can still end up passing stuff that isn't used. Before
3766 strictness analysis we have
3767 let $j x y c{=(x,y)} = (h c, ...)
3768 in ...
3769 After strictness analysis we see that h is strict, we end up with
3770 let $j x y c{=(x,y)} = ($wh x y, ...)
3771 and c is unused.
3772
3773 Note [Duplicated env]
3774 ~~~~~~~~~~~~~~~~~~~~~
3775 Some of the alternatives are simplified, but have not been turned into a join point
3776 So they *must* have a zapped subst-env. So we can't use completeNonRecX to
3777 bind the join point, because it might to do PostInlineUnconditionally, and
3778 we'd lose that when zapping the subst-env. We could have a per-alt subst-env,
3779 but zapping it (as we do in mkDupableCont, the Select case) is safe, and
3780 at worst delays the join-point inlining.
3781
3782 Note [Funky mkLamTypes]
3783 ~~~~~~~~~~~~~~~~~~~~~~
3784 Notice the funky mkLamTypes. If the constructor has existentials
3785 it's possible that the join point will be abstracted over
3786 type variables as well as term variables.
3787 Example: Suppose we have
3788 data T = forall t. C [t]
3789 Then faced with
3790 case (case e of ...) of
3791 C t xs::[t] -> rhs
3792 We get the join point
3793 let j :: forall t. [t] -> ...
3794 j = /\t \xs::[t] -> rhs
3795 in
3796 case (case e of ...) of
3797 C t xs::[t] -> j t xs
3798
3799 Note [Duplicating StrictArg]
3800 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3801 Dealing with making a StrictArg continuation duplicable has turned out
3802 to be one of the trickiest corners of the simplifier, giving rise
3803 to several cases in which the simplier expanded the program's size
3804 *exponentially*. They include
3805 #13253 exponential inlining
3806 #10421 ditto
3807 #18140 strict constructors
3808 #18282 another nested-function call case
3809
3810 Suppose we have a call
3811 f e1 (case x of { True -> r1; False -> r2 }) e3
3812 and f is strict in its second argument. Then we end up in
3813 mkDupableCont with a StrictArg continuation for (f e1 <> e3).
3814 There are two ways to make it duplicable.
3815
3816 * Plan A: move the entire call inwards, being careful not
3817 to duplicate e1 or e3, thus:
3818 let a1 = e1
3819 a3 = e3
3820 in case x of { True -> f a1 r1 a3
3821 ; False -> f a1 r2 a3 }
3822
3823 * Plan B: make a join point:
3824 join $j x = f e1 x e3
3825 in case x of { True -> jump $j r1
3826 ; False -> jump $j r2 }
3827
3828 Notice that Plan B is very like the way we handle strict bindings;
3829 see Note [Duplicating StrictBind]. And Plan B is exactly what we'd
3830 get if we turned use a case expression to evaluate the strict arg:
3831
3832 case (case x of { True -> r1; False -> r2 }) of
3833 r -> f e1 r e3
3834
3835 So, looking at Note [Duplicating join points], we also want Plan B
3836 when `f` is a data constructor.
3837
3838 Plan A is often good. Here's an example from #3116
3839 go (n+1) (case l of
3840 1 -> bs'
3841 _ -> Chunk p fpc (o+1) (l-1) bs')
3842
3843 If we pushed the entire call for 'go' inside the case, we get
3844 call-pattern specialisation for 'go', which is *crucial* for
3845 this particular program.
3846
3847 Here is another example.
3848 && E (case x of { T -> F; F -> T })
3849
3850 Pushing the call inward (being careful not to duplicate E)
3851 let a = E
3852 in case x of { T -> && a F; F -> && a T }
3853
3854 and now the (&& a F) etc can optimise. Moreover there might
3855 be a RULE for the function that can fire when it "sees" the
3856 particular case alternative.
3857
3858 But Plan A can have terrible, terrible behaviour. Here is a classic
3859 case:
3860 f (f (f (f (f True))))
3861
3862 Suppose f is strict, and has a body that is small enough to inline.
3863 The innermost call inlines (seeing the True) to give
3864 f (f (f (f (case v of { True -> e1; False -> e2 }))))
3865
3866 Now, suppose we naively push the entire continuation into both
3867 case branches (it doesn't look large, just f.f.f.f). We get
3868 case v of
3869 True -> f (f (f (f e1)))
3870 False -> f (f (f (f e2)))
3871
3872 And now the process repeats, so we end up with an exponentially large
3873 number of copies of f. No good!
3874
3875 CONCLUSION: we want Plan A in general, but do Plan B is there a
3876 danger of this nested call behaviour. The function that decides
3877 this is called thumbsUpPlanA.
3878
3879 Note [Keeping demand info in StrictArg Plan A]
3880 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3881 Following on from Note [Duplicating StrictArg], another common code
3882 pattern that can go bad is this:
3883 f (case x1 of { T -> F; F -> T })
3884 (case x2 of { T -> F; F -> T })
3885 ...etc...
3886 when f is strict in all its arguments. (It might, for example, be a
3887 strict data constructor whose wrapper has not yet been inlined.)
3888
3889 We use Plan A (because there is no nesting) giving
3890 let a2 = case x2 of ...
3891 a3 = case x3 of ...
3892 in case x1 of { T -> f F a2 a3 ... ; F -> f T a2 a3 ... }
3893
3894 Now we must be careful! a2 and a3 are small, and the OneOcc code in
3895 postInlineUnconditionally may inline them both at both sites; see Note
3896 Note [Inline small things to avoid creating a thunk] in
3897 Simplify.Utils. But if we do inline them, the entire process will
3898 repeat -- back to exponential behaviour.
3899
3900 So we are careful to keep the demand-info on a2 and a3. Then they'll
3901 be /strict/ let-bindings, which will be dealt with by StrictBind.
3902 That's why contIsDupableWithDmds is careful to propagage demand
3903 info to the auxiliary bindings it creates. See the Demand argument
3904 to makeTrivial.
3905
3906 Note [Duplicating StrictBind]
3907 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3908 We make a StrictBind duplicable in a very similar way to
3909 that for case expressions. After all,
3910 let x* = e in b is similar to case e of x -> b
3911
3912 So we potentially make a join-point for the body, thus:
3913 let x = <> in b ==> join j x = b
3914 in j <>
3915
3916 Just like StrictArg in fact -- and indeed they share code.
3917
3918 Note [Join point abstraction] Historical note
3919 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3920 NB: This note is now historical, describing how (in the past) we used
3921 to add a void argument to nullary join points. But now that "join
3922 point" is not a fuzzy concept but a formal syntactic construct (as
3923 distinguished by the JoinId constructor of IdDetails), each of these
3924 concerns is handled separately, with no need for a vestigial extra
3925 argument.
3926
3927 Join points always have at least one value argument,
3928 for several reasons
3929
3930 * If we try to lift a primitive-typed something out
3931 for let-binding-purposes, we will *caseify* it (!),
3932 with potentially-disastrous strictness results. So
3933 instead we turn it into a function: \v -> e
3934 where v::Void#. The value passed to this function is void,
3935 which generates (almost) no code.
3936
3937 * CPR. We used to say "&& isUnliftedType rhs_ty'" here, but now
3938 we make the join point into a function whenever used_bndrs'
3939 is empty. This makes the join-point more CPR friendly.
3940 Consider: let j = if .. then I# 3 else I# 4
3941 in case .. of { A -> j; B -> j; C -> ... }
3942
3943 Now CPR doesn't w/w j because it's a thunk, so
3944 that means that the enclosing function can't w/w either,
3945 which is a lose. Here's the example that happened in practice:
3946 kgmod :: Int -> Int -> Int
3947 kgmod x y = if x > 0 && y < 0 || x < 0 && y > 0
3948 then 78
3949 else 5
3950
3951 * Let-no-escape. We want a join point to turn into a let-no-escape
3952 so that it is implemented as a jump, and one of the conditions
3953 for LNE is that it's not updatable. In CoreToStg, see
3954 Note [What is a non-escaping let]
3955
3956 * Floating. Since a join point will be entered once, no sharing is
3957 gained by floating out, but something might be lost by doing
3958 so because it might be allocated.
3959
3960 I have seen a case alternative like this:
3961 True -> \v -> ...
3962 It's a bit silly to add the realWorld dummy arg in this case, making
3963 $j = \s v -> ...
3964 True -> $j s
3965 (the \v alone is enough to make CPR happy) but I think it's rare
3966
3967 There's a slight infelicity here: we pass the overall
3968 case_bndr to all the join points if it's used in *any* RHS,
3969 because we don't know its usage in each RHS separately
3970
3971
3972
3973 ************************************************************************
3974 * *
3975 Unfoldings
3976 * *
3977 ************************************************************************
3978 -}
3979
3980 simplLetUnfolding :: SimplEnv-> TopLevelFlag
3981 -> MaybeJoinCont
3982 -> InId
3983 -> OutExpr -> OutType -> ArityType
3984 -> Unfolding -> SimplM Unfolding
3985 simplLetUnfolding env top_lvl cont_mb id new_rhs rhs_ty arity unf
3986 | isStableUnfolding unf
3987 = simplStableUnfolding env top_lvl cont_mb id rhs_ty arity unf
3988 | isExitJoinId id
3989 = return noUnfolding -- See Note [Do not inline exit join points] in GHC.Core.Opt.Exitify
3990 | otherwise
3991 = -- Otherwise, we end up retaining all the SimpleEnv
3992 let !opts = seUnfoldingOpts env
3993 in mkLetUnfolding opts top_lvl InlineRhs id new_rhs
3994
3995 -------------------
3996 mkLetUnfolding :: UnfoldingOpts -> TopLevelFlag -> UnfoldingSource
3997 -> InId -> OutExpr -> SimplM Unfolding
3998 mkLetUnfolding !uf_opts top_lvl src id new_rhs
3999 = return (mkUnfolding uf_opts src is_top_lvl is_bottoming new_rhs)
4000 -- We make an unfolding *even for loop-breakers*.
4001 -- Reason: (a) It might be useful to know that they are WHNF
4002 -- (b) In GHC.Iface.Tidy we currently assume that, if we want to
4003 -- expose the unfolding then indeed we *have* an unfolding
4004 -- to expose. (We could instead use the RHS, but currently
4005 -- we don't.) The simple thing is always to have one.
4006 where
4007 -- Might as well force this, profiles indicate up to 0.5MB of thunks
4008 -- just from this site.
4009 !is_top_lvl = isTopLevel top_lvl
4010 -- See Note [Force bottoming field]
4011 !is_bottoming = isDeadEndId id
4012
4013 -------------------
4014 simplStableUnfolding :: SimplEnv -> TopLevelFlag
4015 -> MaybeJoinCont -- Just k => a join point with continuation k
4016 -> InId
4017 -> OutType
4018 -> ArityType -- Used to eta expand, but only for non-join-points
4019 -> Unfolding
4020 ->SimplM Unfolding
4021 -- Note [Setting the new unfolding]
4022 simplStableUnfolding env top_lvl mb_cont id rhs_ty id_arity unf
4023 = case unf of
4024 NoUnfolding -> return unf
4025 BootUnfolding -> return unf
4026 OtherCon {} -> return unf
4027
4028 DFunUnfolding { df_bndrs = bndrs, df_con = con, df_args = args }
4029 -> do { (env', bndrs') <- simplBinders unf_env bndrs
4030 ; args' <- mapM (simplExpr env') args
4031 ; return (mkDFunUnfolding bndrs' con args') }
4032
4033 CoreUnfolding { uf_tmpl = expr, uf_src = src, uf_guidance = guide }
4034 | isStableSource src
4035 -> do { expr' <- case mb_cont of
4036 Just cont -> -- Binder is a join point
4037 -- See Note [Rules and unfolding for join points]
4038 simplJoinRhs unf_env id expr cont
4039 Nothing -> -- Binder is not a join point
4040 do { expr' <- simplExprC unf_env expr (mkBoringStop rhs_ty)
4041 ; return (eta_expand expr') }
4042 ; case guide of
4043 UnfWhen { ug_arity = arity
4044 , ug_unsat_ok = sat_ok
4045 , ug_boring_ok = boring_ok
4046 }
4047 -- Happens for INLINE things
4048 -- Really important to force new_boring_ok as otherwise
4049 -- `ug_boring_ok` is a thunk chain of
4050 -- inlineBoringExprOk expr0
4051 -- || inlineBoringExprOk expr1 || ...
4052 -- See #20134
4053 -> let !new_boring_ok = boring_ok || inlineBoringOk expr'
4054 guide' =
4055 UnfWhen { ug_arity = arity
4056 , ug_unsat_ok = sat_ok
4057 , ug_boring_ok = new_boring_ok
4058
4059 }
4060 -- Refresh the boring-ok flag, in case expr'
4061 -- has got small. This happens, notably in the inlinings
4062 -- for dfuns for single-method classes; see
4063 -- Note [Single-method classes] in GHC.Tc.TyCl.Instance.
4064 -- A test case is #4138
4065 -- But retain a previous boring_ok of True; e.g. see
4066 -- the way it is set in calcUnfoldingGuidanceWithArity
4067 in return (mkCoreUnfolding src is_top_lvl expr' guide')
4068 -- See Note [Top-level flag on inline rules] in GHC.Core.Unfold
4069
4070 _other -- Happens for INLINABLE things
4071 -> mkLetUnfolding uf_opts top_lvl src id expr' }
4072 -- If the guidance is UnfIfGoodArgs, this is an INLINABLE
4073 -- unfolding, and we need to make sure the guidance is kept up
4074 -- to date with respect to any changes in the unfolding.
4075
4076 | otherwise -> return noUnfolding -- Discard unstable unfoldings
4077 where
4078 uf_opts = seUnfoldingOpts env
4079 -- Forcing this can save about 0.5MB of max residency and the result
4080 -- is small and easy to compute so might as well force it.
4081 !is_top_lvl = isTopLevel top_lvl
4082 act = idInlineActivation id
4083 unf_env = updMode (updModeForStableUnfoldings act) env
4084 -- See Note [Simplifying inside stable unfoldings] in GHC.Core.Opt.Simplify.Utils
4085
4086 -- See Note [Eta-expand stable unfoldings]
4087 eta_expand expr
4088 | not eta_on = expr
4089 | exprIsTrivial expr = expr
4090 | otherwise = etaExpandAT (getInScope env) id_arity expr
4091 eta_on = sm_eta_expand (getMode env)
4092
4093 {- Note [Eta-expand stable unfoldings]
4094 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4095 For INLINE/INLINABLE things (which get stable unfoldings) there's a danger
4096 of getting
4097 f :: Int -> Int -> Int -> Blah
4098 [ Arity = 3 -- Good arity
4099 , Unf=Stable (\xy. blah) -- Less good arity, only 2
4100 f = \pqr. e
4101
4102 This can happen because f's RHS is optimised more vigorously than
4103 its stable unfolding. Now suppose we have a call
4104 g = f x
4105 Because f has arity=3, g will have arity=2. But if we inline f (using
4106 its stable unfolding) g's arity will reduce to 1, because <blah>
4107 hasn't been optimised yet. This happened in the 'parsec' library,
4108 for Text.Pasec.Char.string.
4109
4110 Generally, if we know that 'f' has arity N, it seems sensible to
4111 eta-expand the stable unfolding to arity N too. Simple and consistent.
4112
4113 Wrinkles
4114 * Don't eta-expand a trivial expr, else each pass will eta-reduce it,
4115 and then eta-expand again. See Note [Do not eta-expand trivial expressions]
4116 in GHC.Core.Opt.Simplify.Utils.
4117 * Don't eta-expand join points; see Note [Do not eta-expand join points]
4118 in GHC.Core.Opt.Simplify.Utils. We uphold this because the join-point
4119 case (mb_cont = Just _) doesn't use eta_expand.
4120
4121 Note [Force bottoming field]
4122 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4123 We need to force bottoming, or the new unfolding holds
4124 on to the old unfolding (which is part of the id).
4125
4126 Note [Setting the new unfolding]
4127 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4128 * If there's an INLINE pragma, we simplify the RHS gently. Maybe we
4129 should do nothing at all, but simplifying gently might get rid of
4130 more crap.
4131
4132 * If not, we make an unfolding from the new RHS. But *only* for
4133 non-loop-breakers. Making loop breakers not have an unfolding at all
4134 means that we can avoid tests in exprIsConApp, for example. This is
4135 important: if exprIsConApp says 'yes' for a recursive thing, then we
4136 can get into an infinite loop
4137
4138 If there's a stable unfolding on a loop breaker (which happens for
4139 INLINABLE), we hang on to the inlining. It's pretty dodgy, but the
4140 user did say 'INLINE'. May need to revisit this choice.
4141
4142 ************************************************************************
4143 * *
4144 Rules
4145 * *
4146 ************************************************************************
4147
4148 Note [Rules in a letrec]
4149 ~~~~~~~~~~~~~~~~~~~~~~~~
4150 After creating fresh binders for the binders of a letrec, we
4151 substitute the RULES and add them back onto the binders; this is done
4152 *before* processing any of the RHSs. This is important. Manuel found
4153 cases where he really, really wanted a RULE for a recursive function
4154 to apply in that function's own right-hand side.
4155
4156 See Note [Forming Rec groups] in "GHC.Core.Opt.OccurAnal"
4157 -}
4158
4159 addBndrRules :: SimplEnv -> InBndr -> OutBndr
4160 -> MaybeJoinCont -- Just k for a join point binder
4161 -- Nothing otherwise
4162 -> SimplM (SimplEnv, OutBndr)
4163 -- Rules are added back into the bin
4164 addBndrRules env in_id out_id mb_cont
4165 | null old_rules
4166 = return (env, out_id)
4167 | otherwise
4168 = do { new_rules <- simplRules env (Just out_id) old_rules mb_cont
4169 ; let final_id = out_id `setIdSpecialisation` mkRuleInfo new_rules
4170 ; return (modifyInScope env final_id, final_id) }
4171 where
4172 old_rules = ruleInfoRules (idSpecialisation in_id)
4173
4174 simplRules :: SimplEnv -> Maybe OutId -> [CoreRule]
4175 -> MaybeJoinCont -> SimplM [CoreRule]
4176 simplRules env mb_new_id rules mb_cont
4177 = mapM simpl_rule rules
4178 where
4179 simpl_rule rule@(BuiltinRule {})
4180 = return rule
4181
4182 simpl_rule rule@(Rule { ru_bndrs = bndrs, ru_args = args
4183 , ru_fn = fn_name, ru_rhs = rhs
4184 , ru_act = act })
4185 = do { (env', bndrs') <- simplBinders env bndrs
4186 ; let rhs_ty = substTy env' (exprType rhs)
4187 rhs_cont = case mb_cont of -- See Note [Rules and unfolding for join points]
4188 Nothing -> mkBoringStop rhs_ty
4189 Just cont -> assertPpr join_ok bad_join_msg cont
4190 lhs_env = updMode updModeForRules env'
4191 rhs_env = updMode (updModeForStableUnfoldings act) env'
4192 -- See Note [Simplifying the RHS of a RULE]
4193 fn_name' = case mb_new_id of
4194 Just id -> idName id
4195 Nothing -> fn_name
4196
4197 -- join_ok is an assertion check that the join-arity of the
4198 -- binder matches that of the rule, so that pushing the
4199 -- continuation into the RHS makes sense
4200 join_ok = case mb_new_id of
4201 Just id | Just join_arity <- isJoinId_maybe id
4202 -> length args == join_arity
4203 _ -> False
4204 bad_join_msg = vcat [ ppr mb_new_id, ppr rule
4205 , ppr (fmap isJoinId_maybe mb_new_id) ]
4206
4207 ; args' <- mapM (simplExpr lhs_env) args
4208 ; rhs' <- simplExprC rhs_env rhs rhs_cont
4209 ; return (rule { ru_bndrs = bndrs'
4210 , ru_fn = fn_name'
4211 , ru_args = args'
4212 , ru_rhs = rhs' }) }
4213
4214 {- Note [Simplifying the RHS of a RULE]
4215 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4216 We can simplify the RHS of a RULE much as we do the RHS of a stable
4217 unfolding. We used to use the much more conservative updModeForRules
4218 for the RHS as well as the LHS, but that seems more conservative
4219 than necesary. Allowing some inlining might, for example, eliminate
4220 a binding.
4221 -}
4222