never executed always true always false
    1 {-
    2 (c) The AQUA Project, Glasgow University, 1993-1998
    3 
    4 \section{Common subexpression}
    5 -}
    6 
    7 
    8 
    9 {-# OPTIONS_GHC -Wno-incomplete-record-updates #-}
   10 {-# OPTIONS_GHC -Wno-incomplete-uni-patterns   #-}
   11 
   12 module GHC.Core.Opt.CSE (cseProgram, cseOneExpr) where
   13 
   14 import GHC.Prelude
   15 
   16 import GHC.Core.Subst
   17 import GHC.Types.Var    ( Var )
   18 import GHC.Types.Var.Env ( mkInScopeSet )
   19 import GHC.Types.Id     ( Id, idType, idHasRules, zapStableUnfolding
   20                         , idInlineActivation, setInlineActivation
   21                         , zapIdOccInfo, zapIdUsageInfo, idInlinePragma
   22                         , isJoinId, isJoinId_maybe )
   23 import GHC.Core.Utils   ( mkAltExpr, eqExpr
   24                         , exprIsTickedString
   25                         , stripTicksE, stripTicksT, mkTicks )
   26 import GHC.Core.FVs     ( exprFreeVars )
   27 import GHC.Core.Type    ( tyConAppArgs )
   28 import GHC.Core
   29 import GHC.Utils.Outputable
   30 import GHC.Types.Basic
   31 import GHC.Types.Tickish
   32 import GHC.Core.Map.Expr
   33 import GHC.Utils.Misc   ( filterOut, equalLength )
   34 import GHC.Utils.Panic
   35 import Data.List        ( mapAccumL )
   36 
   37 {-
   38                         Simple common sub-expression
   39                         ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   40 When we see
   41         x1 = C a b
   42         x2 = C x1 b
   43 we build up a reverse mapping:   C a b  -> x1
   44                                  C x1 b -> x2
   45 and apply that to the rest of the program.
   46 
   47 When we then see
   48         y1 = C a b
   49         y2 = C y1 b
   50 we replace the C a b with x1.  But then we *dont* want to
   51 add   x1 -> y1  to the mapping.  Rather, we want the reverse, y1 -> x1
   52 so that a subsequent binding
   53         y2 = C y1 b
   54 will get transformed to C x1 b, and then to x2.
   55 
   56 So we carry an extra var->var substitution which we apply *before* looking up in the
   57 reverse mapping.
   58 
   59 
   60 Note [Shadowing]
   61 ~~~~~~~~~~~~~~~~
   62 We have to be careful about shadowing.
   63 For example, consider
   64         f = \x -> let y = x+x in
   65                       h = \x -> x+x
   66                   in ...
   67 
   68 Here we must *not* do CSE on the inner x+x!  The simplifier used to guarantee no
   69 shadowing, but it doesn't any more (it proved too hard), so we clone as we go.
   70 We can simply add clones to the substitution already described.
   71 
   72 
   73 Note [CSE for bindings]
   74 ~~~~~~~~~~~~~~~~~~~~~~~
   75 Let-bindings have two cases, implemented by addBinding.
   76 
   77 * SUBSTITUTE: applies when the RHS is a variable
   78 
   79      let x = y in ...(h x)....
   80 
   81   Here we want to extend the /substitution/ with x -> y, so that the
   82   (h x) in the body might CSE with an enclosing (let v = h y in ...).
   83   NB: the substitution maps InIds, so we extend the substitution with
   84       a binding for the original InId 'x'
   85 
   86   How can we have a variable on the RHS? Doesn't the simplifier inline them?
   87 
   88     - First, the original RHS might have been (g z) which has CSE'd
   89       with an enclosing (let y = g z in ...).  This is super-important.
   90       See #5996:
   91          x1 = C a b
   92          x2 = C x1 b
   93          y1 = C a b
   94          y2 = C y1 b
   95       Here we CSE y1's rhs to 'x1', and then we must add (y1->x1) to
   96       the substitution so that we can CSE the binding for y2.
   97 
   98     - Second, we use addBinding for case expression scrutinees too;
   99       see Note [CSE for case expressions]
  100 
  101 * EXTEND THE REVERSE MAPPING: applies in all other cases
  102 
  103      let x = h y in ...(h y)...
  104 
  105   Here we want to extend the /reverse mapping (cs_map)/ so that
  106   we CSE the (h y) call to x.
  107 
  108   Note that we use EXTEND even for a trivial expression, provided it
  109   is not a variable or literal. In particular this /includes/ type
  110   applications. This can be important (#13156); e.g.
  111      case f @ Int of { r1 ->
  112      case f @ Int of { r2 -> ...
  113   Here we want to common-up the two uses of (f @ Int) so we can
  114   remove one of the case expressions.
  115 
  116   See also Note [Corner case for case expressions] for another
  117   reason not to use SUBSTITUTE for all trivial expressions.
  118 
  119 Notice that
  120   - The SUBSTITUTE situation extends the substitution (cs_subst)
  121   - The EXTEND situation extends the reverse mapping (cs_map)
  122 
  123 Notice also that in the SUBSTITUTE case we leave behind a binding
  124   x = y
  125 even though we /also/ carry a substitution x -> y.  Can we just drop
  126 the binding instead?  Well, not at top level! See Note [Top level and
  127 postInlineUnconditionally] in GHC.Core.Opt.Simplify.Utils; and in any
  128 case CSE applies only to the /bindings/ of the program, and we leave
  129 it to the simplifier to propate effects to the RULES. Finally, it
  130 doesn't seem worth the effort to discard the nested bindings because
  131 the simplifier will do it next.
  132 
  133 Note [CSE for case expressions]
  134 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  135 Consider
  136   case scrut_expr of x { ...alts... }
  137 This is very like a strict let-binding
  138   let !x = scrut_expr in ...
  139 So we use (addBinding x scrut_expr) to process scrut_expr and x, and as a
  140 result all the stuff under Note [CSE for bindings] applies directly.
  141 
  142 For example:
  143 
  144 * Trivial scrutinee
  145      f = \x -> case x of wild {
  146                  (a:as) -> case a of wild1 {
  147                              (p,q) -> ...(wild1:as)...
  148 
  149   Here, (wild1:as) is morally the same as (a:as) and hence equal to
  150   wild. But that's not quite obvious.  In the rest of the compiler we
  151   want to keep it as (wild1:as), but for CSE purpose that's a bad
  152   idea.
  153 
  154   By using addBinding we add the binding (wild1 -> a) to the substitution,
  155   which does exactly the right thing.
  156 
  157   (Notice this is exactly backwards to what the simplifier does, which
  158   is to try to replaces uses of 'a' with uses of 'wild1'.)
  159 
  160   This is the main reason that addBinding is called with a trivial rhs.
  161 
  162 * Non-trivial scrutinee
  163      case (f x) of y { pat -> ...let z = f x in ... }
  164 
  165   By using addBinding we'll add (f x :-> y) to the cs_map, and
  166   thereby CSE the inner (f x) to y.
  167 
  168 Note [CSE for INLINE and NOINLINE]
  169 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  170 There are some subtle interactions of CSE with functions that the user
  171 has marked as INLINE or NOINLINE. (Examples from Roman Leshchinskiy.)
  172 Consider
  173 
  174         yes :: Int  {-# NOINLINE yes #-}
  175         yes = undefined
  176 
  177         no :: Int   {-# NOINLINE no #-}
  178         no = undefined
  179 
  180         foo :: Int -> Int -> Int  {-# NOINLINE foo #-}
  181         foo m n = n
  182 
  183         {-# RULES "foo/no" foo no = id #-}
  184 
  185         bar :: Int -> Int
  186         bar = foo yes
  187 
  188 We do not expect the rule to fire.  But if we do CSE, then we risk
  189 getting yes=no, and the rule does fire.  Actually, it won't because
  190 NOINLINE means that 'yes' will never be inlined, not even if we have
  191 yes=no.  So that's fine (now; perhaps in the olden days, yes=no would
  192 have substituted even if 'yes' was NOINLINE).
  193 
  194 But we do need to take care.  Consider
  195 
  196         {-# NOINLINE bar #-}
  197         bar = <rhs>     -- Same rhs as foo
  198 
  199         foo = <rhs>
  200 
  201 If CSE produces
  202         foo = bar
  203 then foo will never be inlined to <rhs> (when it should be, if <rhs>
  204 is small).  The conclusion here is this:
  205 
  206    We should not add
  207        <rhs> :-> bar
  208   to the CSEnv if 'bar' has any constraints on when it can inline;
  209   that is, if its 'activation' not always active.  Otherwise we
  210   might replace <rhs> by 'bar', and then later be unable to see that it
  211   really was <rhs>.
  212 
  213 An except to the rule is when the INLINE pragma is not from the user, e.g. from
  214 WorkWrap (see Note [Wrapper activation]). We can tell because noUserInlineSpec
  215 is then true.
  216 
  217 Note that we do not (currently) do CSE on the unfolding stored inside
  218 an Id, even if it is a 'stable' unfolding.  That means that when an
  219 unfolding happens, it is always faithful to what the stable unfolding
  220 originally was.
  221 
  222 Note [CSE for stable unfoldings]
  223 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  224 Consider
  225    {-# Unf = Stable (\pq. build blah) #-}
  226    foo = x
  227 
  228 Here 'foo' has a stable unfolding, but its (optimised) RHS is trivial.
  229 (Turns out that this actually happens for the enumFromTo method of
  230 the Integer instance of Enum in GHC.Enum.)  Suppose moreover that foo's
  231 stable unfolding originates from an INLINE or INLINEABLE pragma on foo.
  232 Then we obviously do NOT want to extend the substitution with (foo->x),
  233 because we promised to inline foo as what the user wrote.  See similar Note
  234 [Stable unfoldings and postInlineUnconditionally] in GHC.Core.Opt.Simplify.Utils.
  235 
  236 Nor do we want to change the reverse mapping. Suppose we have
  237 
  238    foo {-# Unf = Stable (\pq. build blah) #-}
  239        = <expr>
  240    bar = <expr>
  241 
  242 There could conceivably be merit in rewriting the RHS of bar:
  243    bar = foo
  244 but now bar's inlining behaviour will change, and importing
  245 modules might see that.  So it seems dodgy and we don't do it.
  246 
  247 Stable unfoldings are also created during worker/wrapper when we decide
  248 that a function's definition is so small that it should always inline.
  249 In this case we still want to do CSE (#13340). Hence the use of
  250 isAnyInlinePragma rather than isStableUnfolding.
  251 
  252 Now consider
  253    foo = <expr>
  254    bar {-# Unf = Stable ... #-}
  255       = <expr>
  256 
  257 where the unfolding was added by strictness analysis, say.  Then
  258 CSE goes ahead, so we get
  259    bar = foo
  260 and probably use SUBSTITUTE that will make 'bar' dead.  But just
  261 possibly not -- see Note [Dealing with ticks].  In that case we might
  262 be left with
  263    bar = tick t1 (tick t2 foo)
  264 in which case we would really like to get rid of the stable unfolding
  265 (generated by the strictness analyser, say).  Hence the zapStableUnfolding
  266 in cse_bind.  Not a big deal, and only makes a difference when ticks
  267 get into the picture.
  268 
  269 Note [Corner case for case expressions]
  270 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  271 Here is another reason that we do not use SUBSTITUTE for
  272 all trivial expressions. Consider
  273    case x |> co of (y::Array# Int) { ... }
  274 
  275 We do not want to extend the substitution with (y -> x |> co); since y
  276 is of unlifted type, this would destroy the let/app invariant if (x |>
  277 co) was not ok-for-speculation.
  278 
  279 But surely (x |> co) is ok-for-speculation, because it's a trivial
  280 expression, and x's type is also unlifted, presumably.  Well, maybe
  281 not if you are using unsafe casts.  I actually found a case where we
  282 had
  283    (x :: HValue) |> (UnsafeCo :: HValue ~ Array# Int)
  284 
  285 Note [CSE for join points?]
  286 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
  287 We must not be naive about join points in CSE:
  288    join j = e in
  289    if b then jump j else 1 + e
  290 The expression (1 + jump j) is not good (see Note [Invariants on join points] in
  291 GHC.Core). This seems to come up quite seldom, but it happens (first seen
  292 compiling ppHtml in Haddock.Backends.Xhtml).
  293 
  294 We could try and be careful by tracking which join points are still valid at
  295 each subexpression, but since join points aren't allocated or shared, there's
  296 less to gain by trying to CSE them. (#13219)
  297 
  298 Note [Look inside join-point binders]
  299 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  300 Another way how CSE for join points is tricky is
  301 
  302   let join foo x = (x, 42)
  303       join bar x = (x, 42)
  304   in … jump foo 1 … jump bar 2 …
  305 
  306 naively, CSE would turn this into
  307 
  308   let join foo x = (x, 42)
  309       join bar = foo
  310   in … jump foo 1 … jump bar 2 …
  311 
  312 but now bar is a join point that claims arity one, but its right-hand side
  313 is not a lambda, breaking the join-point invariant (this was #15002).
  314 
  315 So `cse_bind` must zoom past the lambdas of a join point (using
  316 `collectNBinders`) and resume searching for CSE opportunities only in
  317 the body of the join point.
  318 
  319 Note [CSE for recursive bindings]
  320 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  321 Consider
  322   f = \x ... f....
  323   g = \y ... g ...
  324 where the "..." are identical.  Could we CSE them?  In full generality
  325 with mutual recursion it's quite hard; but for self-recursive bindings
  326 (which are very common) it's rather easy:
  327 
  328 * Maintain a separate cs_rec_map, that maps
  329       (\f. (\x. ...f...) ) -> f
  330   Note the \f in the domain of the mapping!
  331 
  332 * When we come across the binding for 'g', look up (\g. (\y. ...g...))
  333   Bingo we get a hit.  So we can replace the 'g' binding with
  334      g = f
  335 
  336 We can't use cs_map for this, because the key isn't an expression of
  337 the program; it's a kind of synthetic key for recursive bindings.
  338 
  339 
  340 ************************************************************************
  341 *                                                                      *
  342 \section{Common subexpression}
  343 *                                                                      *
  344 ************************************************************************
  345 -}
  346 
  347 cseProgram :: CoreProgram -> CoreProgram
  348 cseProgram binds = snd (mapAccumL (cseBind TopLevel) emptyCSEnv binds)
  349 
  350 cseBind :: TopLevelFlag -> CSEnv -> CoreBind -> (CSEnv, CoreBind)
  351 cseBind toplevel env (NonRec b e)
  352   = (env2, NonRec b2 e2)
  353   where
  354     (env1, b1)       = addBinder env b
  355     (env2, (b2, e2)) = cse_bind toplevel env1 (b,e) b1
  356 
  357 cseBind toplevel env (Rec [(in_id, rhs)])
  358   | noCSE in_id
  359   = (env1, Rec [(out_id, rhs')])
  360 
  361   -- See Note [CSE for recursive bindings]
  362   | Just previous <- lookupCSRecEnv env out_id rhs''
  363   , let previous' = mkTicks ticks previous
  364         out_id'   = delayInlining toplevel out_id
  365   = -- We have a hit in the recursive-binding cache
  366     (extendCSSubst env1 in_id previous', NonRec out_id' previous')
  367 
  368   | otherwise
  369   = (extendCSRecEnv env1 out_id rhs'' id_expr', Rec [(zapped_id, rhs')])
  370 
  371   where
  372     (env1, [out_id]) = addRecBinders env [in_id]
  373     rhs'  = cseExpr env1 rhs
  374     rhs'' = stripTicksE tickishFloatable rhs'
  375     ticks = stripTicksT tickishFloatable rhs'
  376     id_expr'  = varToCoreExpr out_id
  377     zapped_id = zapIdUsageInfo out_id
  378 
  379 cseBind toplevel env (Rec pairs)
  380   = (env2, Rec pairs')
  381   where
  382     (env1, bndrs1) = addRecBinders env (map fst pairs)
  383     (env2, pairs') = mapAccumL do_one env1 (zip pairs bndrs1)
  384 
  385     do_one env (pr, b1) = cse_bind toplevel env pr b1
  386 
  387 -- | Given a binding of @in_id@ to @in_rhs@, and a fresh name to refer
  388 -- to @in_id@ (@out_id@, created from addBinder or addRecBinders),
  389 -- first try to CSE @in_rhs@, and then add the resulting (possibly CSE'd)
  390 -- binding to the 'CSEnv', so that we attempt to CSE any expressions
  391 -- which are equal to @out_rhs@.
  392 cse_bind :: TopLevelFlag -> CSEnv -> (InId, InExpr) -> OutId -> (CSEnv, (OutId, OutExpr))
  393 cse_bind toplevel env (in_id, in_rhs) out_id
  394   | isTopLevel toplevel, exprIsTickedString in_rhs
  395       -- See Note [Take care with literal strings]
  396   = (env', (out_id', in_rhs))
  397 
  398   | Just arity <- isJoinId_maybe in_id
  399       -- See Note [Look inside join-point binders]
  400   = let (params, in_body) = collectNBinders arity in_rhs
  401         (env', params') = addBinders env params
  402         out_body = tryForCSE env' in_body
  403     in (env, (out_id, mkLams params' out_body))
  404 
  405   | otherwise
  406   = (env', (out_id'', out_rhs))
  407   where
  408     (env', out_id') = addBinding env in_id out_id out_rhs cse_done
  409     (cse_done, out_rhs)  = try_for_cse env in_rhs
  410     out_id'' | cse_done  = zapStableUnfolding $
  411                            delayInlining toplevel out_id'
  412              | otherwise = out_id'
  413 
  414 delayInlining :: TopLevelFlag -> Id -> Id
  415 -- Add a NOINLINE[2] if the Id doesn't have an INLNE pragma already
  416 -- See Note [Delay inlining after CSE]
  417 delayInlining top_lvl bndr
  418   | isTopLevel top_lvl
  419   , isAlwaysActive (idInlineActivation bndr)
  420   , idHasRules bndr  -- Only if the Id has some RULES,
  421                      -- which might otherwise get lost
  422        -- These rules are probably auto-generated specialisations,
  423        -- since Ids with manual rules usually have manually-inserted
  424        -- delayed inlining anyway
  425   = bndr `setInlineActivation` activateAfterInitial
  426   | otherwise
  427   = bndr
  428 
  429 addBinding :: CSEnv            -- Includes InId->OutId cloning
  430            -> InVar            -- Could be a let-bound type
  431            -> OutId -> OutExpr -- Processed binding
  432            -> Bool             -- True <=> RHS was CSE'd and is a variable
  433                                --          or maybe (Tick t variable)
  434            -> (CSEnv, OutId)   -- Final env, final bndr
  435 -- Extend the CSE env with a mapping [rhs -> out-id]
  436 -- unless we can instead just substitute [in-id -> rhs]
  437 --
  438 -- It's possible for the binder to be a type variable (see
  439 -- Note [Type-let] in GHC.Core), in which case we can just substitute.
  440 addBinding env in_id out_id rhs' cse_done
  441   | not (isId in_id) = (extendCSSubst env in_id rhs',     out_id)
  442   | noCSE in_id      = (env,                              out_id)
  443   | use_subst        = (extendCSSubst env in_id rhs',     out_id)
  444   | cse_done         = (env,                              out_id)
  445                        -- See Note [Dealing with ticks]
  446   | otherwise        = (extendCSEnv env rhs' id_expr', zapped_id)
  447   where
  448     id_expr'  = varToCoreExpr out_id
  449     zapped_id = zapIdUsageInfo out_id
  450        -- Putting the Id into the cs_map makes it possible that
  451        -- it'll become shared more than it is now, which would
  452        -- invalidate (the usage part of) its demand info.
  453        --    This caused #100218.
  454        -- Easiest thing is to zap the usage info; subsequently
  455        -- performing late demand-analysis will restore it.  Don't zap
  456        -- the strictness info; it's not necessary to do so, and losing
  457        -- it is bad for performance if you don't do late demand
  458        -- analysis
  459 
  460     -- Should we use SUBSTITUTE or EXTEND?
  461     -- See Note [CSE for bindings]
  462     use_subst | Var {} <- rhs' = True
  463               | otherwise      = False
  464 
  465 -- | Given a binder `let x = e`, this function
  466 -- determines whether we should add `e -> x` to the cs_map
  467 noCSE :: InId -> Bool
  468 noCSE id =  not (isAlwaysActive (idInlineActivation id)) &&
  469             not (noUserInlineSpec (inlinePragmaSpec (idInlinePragma id)))
  470              -- See Note [CSE for INLINE and NOINLINE]
  471          || isAnyInlinePragma (idInlinePragma id)
  472              -- See Note [CSE for stable unfoldings]
  473          || isJoinId id
  474              -- See Note [CSE for join points?]
  475 
  476 
  477 {- Note [Take care with literal strings]
  478 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  479 Consider this example:
  480 
  481   x = "foo"#
  482   y = "foo"#
  483   ...x...y...x...y....
  484 
  485 We would normally turn this into:
  486 
  487   x = "foo"#
  488   y = x
  489   ...x...x...x...x....
  490 
  491 But this breaks an invariant of Core, namely that the RHS of a top-level binding
  492 of type Addr# must be a string literal, not another variable. See Note
  493 [Core top-level string literals] in GHC.Core.
  494 
  495 For this reason, we special case top-level bindings to literal strings and leave
  496 the original RHS unmodified. This produces:
  497 
  498   x = "foo"#
  499   y = "foo"#
  500   ...x...x...x...x....
  501 
  502 Now 'y' will be discarded as dead code, and we are done.
  503 
  504 The net effect is that for the y-binding we want to
  505   - Use SUBSTITUTE, by extending the substitution with  y :-> x
  506   - but leave the original binding for y undisturbed
  507 
  508 This is done by cse_bind.  I got it wrong the first time (#13367).
  509 
  510 Note [Dealing with ticks]
  511 ~~~~~~~~~~~~~~~~~~~~~~~~~
  512 Ticks complicate CSE a bit, as I discovered in the fallout from
  513 fixing #19360.
  514 
  515 * To get more CSE-ing, we strip all the tickishFloatable ticks from
  516   an expression
  517   - when inserting into the cs_map (see extendCSEnv)
  518   - when looking up in the cs_map (see call to lookupCSEnv in try_for_cse)
  519   Quite why only the tickishFloatble ticks, I'm not quite sure.
  520 
  521 * If we get a hit in cs_map, we wrap the result in the ticks from the
  522   thing we are looking up (see try_for_cse)
  523 
  524 Net result: if we get a hit, we might replace
  525   let x = tick t1 (tick t2 e)
  526 with
  527   let x = tick t1 (tick t2 y)
  528 where 'y' is the variable that 'e' maps to.  Now consider addBinding for
  529 the binding for 'x':
  530 
  531 * We can't use SUBSTITUTE because those ticks might not be trivial (we
  532   use tickishIsCode in exprIsTrivial)
  533 
  534 * We should not use EXTEND, because we definitely don't want to
  535   add  (tick t1 (tick t2 y)) :-> x
  536   to the cs_map. Remember we strip off the ticks, so that would amount
  537   to adding y :-> x, very silly.
  538 
  539 TL;DR: we do neither; hence the cse_done case in addBinding.
  540 
  541 
  542 Note [Delay inlining after CSE]
  543 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  544 Suppose (#15445) we have
  545    f,g :: Num a => a -> a
  546    f x = ...f (x-1).....
  547    g y = ...g (y-1) ....
  548 
  549 and we make some specialisations of 'g', either automatically, or via
  550 a SPECIALISE pragma.  Then CSE kicks in and notices that the RHSs of
  551 'f' and 'g' are identical, so we get
  552    f x = ...f (x-1)...
  553    g = f
  554    {-# RULES g @Int _ = $sg #-}
  555 
  556 Now there is terrible danger that, in an importing module, we'll inline
  557 'g' before we have a chance to run its specialisation!
  558 
  559 Solution: during CSE, after a "hit" in the CSE cache
  560   * when adding a binding
  561         g = f
  562   * for a top-level function g
  563   * and g has specialisation RULES
  564 add a NOINLINE[2] activation to it, to ensure it's not inlined
  565 right away.
  566 
  567 Notes:
  568 * Why top level only?  Because for nested bindings we are already past
  569   phase 2 and will never return there.
  570 
  571 * Why "only if g has RULES"?  Because there is no point in
  572   doing this if there are no RULES; and other things being
  573   equal it delays optimisation to delay inlining (#17409)
  574 
  575 
  576 ---- Historical note ---
  577 
  578 This patch is simpler and more direct than an earlier
  579 version:
  580 
  581   commit 2110738b280543698407924a16ac92b6d804dc36
  582   Author: Simon Peyton Jones <simonpj@microsoft.com>
  583   Date:   Mon Jul 30 13:43:56 2018 +0100
  584 
  585   Don't inline functions with RULES too early
  586 
  587 We had to revert this patch because it made GHC itself slower.
  588 
  589 Why? It delayed inlining of /all/ functions with RULES, and that was
  590 very bad in GHC.Tc.Solver.Flatten.flatten_ty_con_app
  591 
  592 * It delayed inlining of liftM
  593 * That delayed the unravelling of the recursion in some dictionary
  594   bindings.
  595 * That delayed some eta expansion, leaving
  596      flatten_ty_con_app = \x y. let <stuff> in \z. blah
  597 * That allowed the float-out pass to put sguff between
  598   the \y and \z.
  599 * And that permanently stopped eta expansion of the function,
  600   even once <stuff> was simplified.
  601 
  602 -}
  603 
  604 tryForCSE :: CSEnv -> InExpr -> OutExpr
  605 tryForCSE env expr = snd (try_for_cse env expr)
  606 
  607 try_for_cse :: CSEnv -> InExpr -> (Bool, OutExpr)
  608 -- (False, e') => We did not CSE the entire expression,
  609 --                but we might have CSE'd some sub-expressions,
  610 --                yielding e'
  611 --
  612 -- (True, te') => We CSE'd the entire expression,
  613 --                yielding the trivial expression te'
  614 try_for_cse env expr
  615   | Just e <- lookupCSEnv env expr'' = (True,  mkTicks ticks e)
  616   | otherwise                        = (False, expr')
  617     -- The varToCoreExpr is needed if we have
  618     --   case e of xco { ...case e of yco { ... } ... }
  619     -- Then CSE will substitute yco -> xco;
  620     -- but these are /coercion/ variables
  621   where
  622     expr'  = cseExpr env expr
  623     expr'' = stripTicksE tickishFloatable expr'
  624     ticks  = stripTicksT tickishFloatable expr'
  625     -- We don't want to lose the source notes when a common sub
  626     -- expression gets eliminated. Hence we push all (!) of them on
  627     -- top of the replaced sub-expression. This is probably not too
  628     -- useful in practice, but upholds our semantics.
  629 
  630 -- | Runs CSE on a single expression.
  631 --
  632 -- This entry point is not used in the compiler itself, but is provided
  633 -- as a convenient entry point for users of the GHC API.
  634 cseOneExpr :: InExpr -> OutExpr
  635 cseOneExpr e = cseExpr env e
  636   where env = emptyCSEnv {cs_subst = mkEmptySubst (mkInScopeSet (exprFreeVars e)) }
  637 
  638 cseExpr :: CSEnv -> InExpr -> OutExpr
  639 cseExpr env (Type t)              = Type (substTy (csEnvSubst env) t)
  640 cseExpr env (Coercion c)          = Coercion (substCo (csEnvSubst env) c)
  641 cseExpr _   (Lit lit)             = Lit lit
  642 cseExpr env (Var v)               = lookupSubst env v
  643 cseExpr env (App f a)             = App (cseExpr env f) (tryForCSE env a)
  644 cseExpr env (Tick t e)            = Tick t (cseExpr env e)
  645 cseExpr env (Cast e co)           = Cast (tryForCSE env e) (substCo (csEnvSubst env) co)
  646 cseExpr env (Lam b e)             = let (env', b') = addBinder env b
  647                                     in Lam b' (cseExpr env' e)
  648 cseExpr env (Let bind e)          = let (env', bind') = cseBind NotTopLevel env bind
  649                                     in Let bind' (cseExpr env' e)
  650 cseExpr env (Case e bndr ty alts) = cseCase env e bndr ty alts
  651 
  652 cseCase :: CSEnv -> InExpr -> InId -> InType -> [InAlt] -> OutExpr
  653 cseCase env scrut bndr ty alts
  654   = Case scrut1 bndr3 ty' $
  655     combineAlts alt_env (map cse_alt alts)
  656   where
  657     ty' = substTy (csEnvSubst env) ty
  658     (cse_done, scrut1) = try_for_cse env scrut
  659 
  660     bndr1 = zapIdOccInfo bndr
  661       -- Zapping the OccInfo is needed because the extendCSEnv
  662       -- in cse_alt may mean that a dead case binder
  663       -- becomes alive, and Lint rejects that
  664     (env1, bndr2)    = addBinder env bndr1
  665     (alt_env, bndr3) = addBinding env1 bndr bndr2 scrut1 cse_done
  666          -- addBinding: see Note [CSE for case expressions]
  667 
  668     con_target :: OutExpr
  669     con_target = lookupSubst alt_env bndr
  670 
  671     arg_tys :: [OutType]
  672     arg_tys = tyConAppArgs (idType bndr3)
  673 
  674     -- See Note [CSE for case alternatives]
  675     cse_alt (Alt (DataAlt con) args rhs)
  676         = Alt (DataAlt con) args' (tryForCSE new_env rhs)
  677         where
  678           (env', args') = addBinders alt_env args
  679           new_env       = extendCSEnv env' con_expr con_target
  680           con_expr      = mkAltExpr (DataAlt con) args' arg_tys
  681 
  682     cse_alt (Alt con args rhs)
  683         = Alt con args' (tryForCSE env' rhs)
  684         where
  685           (env', args') = addBinders alt_env args
  686 
  687 combineAlts :: CSEnv -> [OutAlt] -> [OutAlt]
  688 -- See Note [Combine case alternatives]
  689 combineAlts env alts
  690   | (Just alt1, rest_alts) <- find_bndr_free_alt alts
  691   , Alt _ bndrs1 rhs1 <- alt1
  692   , let filtered_alts = filterOut (identical_alt rhs1) rest_alts
  693   , not (equalLength rest_alts filtered_alts)
  694   = assertPpr (null bndrs1) (ppr alts) $
  695     Alt DEFAULT [] rhs1 : filtered_alts
  696 
  697   | otherwise
  698   = alts
  699   where
  700     in_scope = substInScope (csEnvSubst env)
  701 
  702     find_bndr_free_alt :: [CoreAlt] -> (Maybe CoreAlt, [CoreAlt])
  703        -- The (Just alt) is a binder-free alt
  704        -- See Note [Combine case alts: awkward corner]
  705     find_bndr_free_alt []
  706       = (Nothing, [])
  707     find_bndr_free_alt (alt@(Alt _ bndrs _) : alts)
  708       | null bndrs = (Just alt, alts)
  709       | otherwise  = case find_bndr_free_alt alts of
  710                        (mb_bf, alts) -> (mb_bf, alt:alts)
  711 
  712     identical_alt rhs1 (Alt _ _ rhs) = eqExpr in_scope rhs1 rhs
  713        -- Even if this alt has binders, they will have been cloned
  714        -- If any of these binders are mentioned in 'rhs', then
  715        -- 'rhs' won't compare equal to 'rhs1' (which is from an
  716        -- alt with no binders).
  717 
  718 {- Note [CSE for case alternatives]
  719 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  720 Consider   case e of x
  721             K1 y -> ....(K1 y)...
  722             K2   -> ....K2....
  723 
  724 We definitely want to CSE that (K1 y) into just x.
  725 
  726 But what about the lone K2?  At first you would think "no" because
  727 turning K2 into 'x' increases the number of live variables.  But
  728 
  729 * Turning K2 into x increases the chance of combining identical alts.
  730   Example      case xs of
  731                   (_:_) -> f xs
  732                   []    -> f []
  733   See #17901 and simplCore/should_compile/T17901 for more examples
  734   of this kind.
  735 
  736 * The next run of the simplifier will turn 'x' back into K2, so we won't
  737   permanently bloat the free-var count.
  738 
  739 
  740 Note [Combine case alternatives]
  741 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  742 combineAlts is just a more heavyweight version of the use of
  743 combineIdenticalAlts in GHC.Core.Opt.Simplify.Utils.prepareAlts.  The basic idea is
  744 to transform
  745 
  746     DEFAULT -> e1
  747     K x     -> e1
  748     W y z   -> e2
  749 ===>
  750    DEFAULT -> e1
  751    W y z   -> e2
  752 
  753 In the simplifier we use cheapEqExpr, because it is called a lot.
  754 But here in CSE we use the full eqExpr.  After all, two alternatives usually
  755 differ near the root, so it probably isn't expensive to compare the full
  756 alternative.  It seems like the same kind of thing that CSE is supposed
  757 to be doing, which is why I put it here.
  758 
  759 I actually saw some examples in the wild, where some inlining made e1 too
  760 big for cheapEqExpr to catch it.
  761 
  762 Note [Combine case alts: awkward corner]
  763 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  764 We would really like to check isDeadBinder on the binders in the
  765 alternative.  But alas, the simplifer zaps occ-info on binders in case
  766 alternatives; see Note [Case alternative occ info] in GHC.Core.Opt.Simplify.
  767 
  768 * One alternative (perhaps a good one) would be to do OccAnal
  769   just before CSE.  Then perhaps we could get rid of combineIdenticalAlts
  770   in the Simplifier, which might save work.
  771 
  772 * Another would be for CSE to return free vars as it goes.
  773 
  774 * But the current solution is to find a nullary alternative (including
  775   the DEFAULT alt, if any). This will not catch
  776       case x of
  777         A y   -> blah
  778         B z p -> blah
  779   where no alternative is nullary or DEFAULT.  But the current
  780   solution is at least cheap.
  781 
  782 
  783 ************************************************************************
  784 *                                                                      *
  785 \section{The CSE envt}
  786 *                                                                      *
  787 ************************************************************************
  788 -}
  789 
  790 data CSEnv
  791   = CS { cs_subst :: Subst  -- Maps InBndrs to OutExprs
  792             -- The substitution variables to
  793             -- /trivial/ OutExprs, not arbitrary expressions
  794 
  795        , cs_map   :: CoreMap OutExpr   -- The reverse mapping
  796             -- Maps a OutExpr to a /trivial/ OutExpr
  797             -- The key of cs_map is stripped of all Ticks
  798 
  799        , cs_rec_map :: CoreMap OutExpr
  800             -- See Note [CSE for recursive bindings]
  801        }
  802 
  803 emptyCSEnv :: CSEnv
  804 emptyCSEnv = CS { cs_map = emptyCoreMap, cs_rec_map = emptyCoreMap
  805                 , cs_subst = emptySubst }
  806 
  807 lookupCSEnv :: CSEnv -> OutExpr -> Maybe OutExpr
  808 lookupCSEnv (CS { cs_map = csmap }) expr
  809   = lookupCoreMap csmap expr
  810 
  811 extendCSEnv :: CSEnv -> OutExpr -> OutExpr -> CSEnv
  812 extendCSEnv cse expr triv_expr
  813   = cse { cs_map = extendCoreMap (cs_map cse) sexpr triv_expr }
  814   where
  815     sexpr = stripTicksE tickishFloatable expr
  816 
  817 extendCSRecEnv :: CSEnv -> OutId -> OutExpr -> OutExpr -> CSEnv
  818 -- See Note [CSE for recursive bindings]
  819 extendCSRecEnv cse bndr expr triv_expr
  820   = cse { cs_rec_map = extendCoreMap (cs_rec_map cse) (Lam bndr expr) triv_expr }
  821 
  822 lookupCSRecEnv :: CSEnv -> OutId -> OutExpr -> Maybe OutExpr
  823 -- See Note [CSE for recursive bindings]
  824 lookupCSRecEnv (CS { cs_rec_map = csmap }) bndr expr
  825   = lookupCoreMap csmap (Lam bndr expr)
  826 
  827 csEnvSubst :: CSEnv -> Subst
  828 csEnvSubst = cs_subst
  829 
  830 lookupSubst :: CSEnv -> Id -> OutExpr
  831 lookupSubst (CS { cs_subst = sub}) x = lookupIdSubst sub x
  832 
  833 extendCSSubst :: CSEnv -> Id  -> CoreExpr -> CSEnv
  834 extendCSSubst cse x rhs = cse { cs_subst = extendSubst (cs_subst cse) x rhs }
  835 
  836 -- | Add clones to the substitution to deal with shadowing.  See
  837 -- Note [Shadowing] for more details.  You should call this whenever
  838 -- you go under a binder.
  839 addBinder :: CSEnv -> Var -> (CSEnv, Var)
  840 addBinder cse v = (cse { cs_subst = sub' }, v')
  841                 where
  842                   (sub', v') = substBndr (cs_subst cse) v
  843 
  844 addBinders :: CSEnv -> [Var] -> (CSEnv, [Var])
  845 addBinders cse vs = (cse { cs_subst = sub' }, vs')
  846                 where
  847                   (sub', vs') = substBndrs (cs_subst cse) vs
  848 
  849 addRecBinders :: CSEnv -> [Id] -> (CSEnv, [Id])
  850 addRecBinders cse vs = (cse { cs_subst = sub' }, vs')
  851                 where
  852                   (sub', vs') = substRecBndrs (cs_subst cse) vs