never executed always true always false

    1 {-
    2 (c) The AQUA Project, Glasgow University, 1993-1998
    3 
    4 The simplifier utilities
    5 -}
    6 
    7 
    8 
    9 module GHC.Core.Opt.Simplify.Utils (
   10         -- Rebuilding
   11         mkLam, mkCase, prepareAlts, tryEtaExpandRhs,
   12 
   13         -- Inlining,
   14         preInlineUnconditionally, postInlineUnconditionally,
   15         activeUnfolding, activeRule,
   16         getUnfoldingInRuleMatch,
   17         simplEnvForGHCi, updModeForStableUnfoldings, updModeForRules,
   18 
   19         -- The continuation type
   20         SimplCont(..), DupFlag(..), StaticEnv,
   21         isSimplified, contIsStop,
   22         contIsDupable, contResultType, contHoleType, contHoleScaling,
   23         contIsTrivial, contArgs,
   24         countArgs,
   25         mkBoringStop, mkRhsStop, mkLazyArgStop, contIsRhsOrArg,
   26         interestingCallContext,
   27 
   28         -- ArgInfo
   29         ArgInfo(..), ArgSpec(..), mkArgInfo,
   30         addValArgTo, addCastTo, addTyArgTo,
   31         argInfoExpr, argInfoAppArgs, pushSimplifiedArgs,
   32         isStrictArgInfo, lazyArgContext,
   33 
   34         abstractFloats,
   35 
   36         -- Utilities
   37         isExitJoinId
   38     ) where
   39 
   40 import GHC.Prelude
   41 
   42 import GHC.Driver.Session
   43 
   44 import GHC.Core
   45 import GHC.Types.Literal ( isLitRubbish )
   46 import GHC.Core.Opt.Simplify.Env
   47 import GHC.Core.Opt.Monad        ( SimplMode(..), Tick(..) )
   48 import qualified GHC.Core.Subst
   49 import GHC.Core.Ppr
   50 import GHC.Core.TyCo.Ppr ( pprParendType )
   51 import GHC.Core.FVs
   52 import GHC.Core.Utils
   53 import GHC.Core.Opt.Arity
   54 import GHC.Core.Unfold
   55 import GHC.Core.Unfold.Make
   56 import GHC.Core.Opt.Simplify.Monad
   57 import GHC.Core.Type     hiding( substTy )
   58 import GHC.Core.Coercion hiding( substCo )
   59 import GHC.Core.DataCon ( dataConWorkId, isNullaryRepDataCon )
   60 import GHC.Core.Multiplicity
   61 import GHC.Core.Opt.ConstantFold
   62 
   63 import GHC.Types.Name
   64 import GHC.Types.Id
   65 import GHC.Types.Id.Info
   66 import GHC.Types.Tickish
   67 import GHC.Types.Demand
   68 import GHC.Types.Var.Set
   69 import GHC.Types.Basic
   70 
   71 import GHC.Data.OrdList ( isNilOL )
   72 import GHC.Data.FastString ( fsLit )
   73 
   74 import GHC.Utils.Misc
   75 import GHC.Utils.Monad
   76 import GHC.Utils.Outputable
   77 import GHC.Utils.Logger
   78 import GHC.Utils.Panic
   79 import GHC.Utils.Panic.Plain
   80 import GHC.Utils.Trace
   81 
   82 import Control.Monad    ( when )
   83 import Data.List        ( sortBy )
   84 
   85 {-
   86 ************************************************************************
   87 *                                                                      *
   88                 The SimplCont and DupFlag types
   89 *                                                                      *
   90 ************************************************************************
   91 
   92 A SimplCont allows the simplifier to traverse the expression in a
   93 zipper-like fashion.  The SimplCont represents the rest of the expression,
   94 "above" the point of interest.
   95 
   96 You can also think of a SimplCont as an "evaluation context", using
   97 that term in the way it is used for operational semantics. This is the
   98 way I usually think of it, For example you'll often see a syntax for
   99 evaluation context looking like
  100         C ::= []  |  C e   |  case C of alts  |  C `cast` co
  101 That's the kind of thing we are doing here, and I use that syntax in
  102 the comments.
  103 
  104 
  105 Key points:
  106   * A SimplCont describes a *strict* context (just like
  107     evaluation contexts do).  E.g. Just [] is not a SimplCont
  108 
  109   * A SimplCont describes a context that *does not* bind
  110     any variables.  E.g. \x. [] is not a SimplCont
  111 -}
  112 
  113 data SimplCont
  114   = Stop                -- Stop[e] = e
  115         OutType         -- Type of the <hole>
  116         CallCtxt        -- Tells if there is something interesting about
  117                         --          the context, and hence the inliner
  118                         --          should be a bit keener (see interestingCallContext)
  119                         -- Specifically:
  120                         --     This is an argument of a function that has RULES
  121                         --     Inlining the call might allow the rule to fire
  122                         -- Never ValAppCxt (use ApplyToVal instead)
  123                         -- or CaseCtxt (use Select instead)
  124 
  125   | CastIt              -- (CastIt co K)[e] = K[ e `cast` co ]
  126         OutCoercion             -- The coercion simplified
  127                                 -- Invariant: never an identity coercion
  128         SimplCont
  129 
  130   | ApplyToVal         -- (ApplyToVal arg K)[e] = K[ e arg ]
  131       { sc_dup     :: DupFlag   -- See Note [DupFlag invariants]
  132       , sc_hole_ty :: OutType   -- Type of the function, presumably (forall a. blah)
  133                                 -- See Note [The hole type in ApplyToTy/Val]
  134       , sc_arg  :: InExpr       -- The argument,
  135       , sc_env  :: StaticEnv    -- see Note [StaticEnv invariant]
  136       , sc_cont :: SimplCont }
  137 
  138   | ApplyToTy          -- (ApplyToTy ty K)[e] = K[ e ty ]
  139       { sc_arg_ty  :: OutType     -- Argument type
  140       , sc_hole_ty :: OutType     -- Type of the function, presumably (forall a. blah)
  141                                   -- See Note [The hole type in ApplyToTy/Val]
  142       , sc_cont    :: SimplCont }
  143 
  144   | Select             -- (Select alts K)[e] = K[ case e of alts ]
  145       { sc_dup  :: DupFlag        -- See Note [DupFlag invariants]
  146       , sc_bndr :: InId           -- case binder
  147       , sc_alts :: [InAlt]        -- Alternatives
  148       , sc_env  :: StaticEnv      -- See Note [StaticEnv invariant]
  149       , sc_cont :: SimplCont }
  150 
  151   -- The two strict forms have no DupFlag, because we never duplicate them
  152   | StrictBind          -- (StrictBind x xs b K)[e] = let x = e in K[\xs.b]
  153                         --       or, equivalently,  = K[ (\x xs.b) e ]
  154       { sc_dup   :: DupFlag        -- See Note [DupFlag invariants]
  155       , sc_bndr  :: InId
  156       , sc_bndrs :: [InBndr]
  157       , sc_body  :: InExpr
  158       , sc_env   :: StaticEnv      -- See Note [StaticEnv invariant]
  159       , sc_cont  :: SimplCont }
  160 
  161   | StrictArg           -- (StrictArg (f e1 ..en) K)[e] = K[ f e1 .. en e ]
  162       { sc_dup  :: DupFlag     -- Always Simplified or OkToDup
  163       , sc_fun  :: ArgInfo     -- Specifies f, e1..en, Whether f has rules, etc
  164                                --     plus demands and discount flags for *this* arg
  165                                --          and further args
  166                                --     So ai_dmds and ai_discs are never empty
  167       , sc_fun_ty :: OutType   -- Type of the function (f e1 .. en),
  168                                -- presumably (arg_ty -> res_ty)
  169                                -- where res_ty is expected by sc_cont
  170       , sc_cont :: SimplCont }
  171 
  172   | TickIt              -- (TickIt t K)[e] = K[ tick t e ]
  173         CoreTickish     -- Tick tickish <hole>
  174         SimplCont
  175 
  176 type StaticEnv = SimplEnv       -- Just the static part is relevant
  177 
  178 data DupFlag = NoDup       -- Unsimplified, might be big
  179              | Simplified  -- Simplified
  180              | OkToDup     -- Simplified and small
  181 
  182 isSimplified :: DupFlag -> Bool
  183 isSimplified NoDup = False
  184 isSimplified _     = True       -- Invariant: the subst-env is empty
  185 
  186 perhapsSubstTy :: DupFlag -> StaticEnv -> Type -> Type
  187 perhapsSubstTy dup env ty
  188   | isSimplified dup = ty
  189   | otherwise        = substTy env ty
  190 
  191 {- Note [StaticEnv invariant]
  192 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  193 We pair up an InExpr or InAlts with a StaticEnv, which establishes the
  194 lexical scope for that InExpr.  When we simplify that InExpr/InAlts, we
  195 use
  196   - Its captured StaticEnv
  197   - Overriding its InScopeSet with the larger one at the
  198     simplification point.
  199 
  200 Why override the InScopeSet?  Example:
  201       (let y = ey in f) ex
  202 By the time we simplify ex, 'y' will be in scope.
  203 
  204 However the InScopeSet in the StaticEnv is not irrelevant: it should
  205 include all the free vars of applying the substitution to the InExpr.
  206 Reason: contHoleType uses perhapsSubstTy to apply the substitution to
  207 the expression, and that (rightly) gives ASSERT failures if the InScopeSet
  208 isn't big enough.
  209 
  210 Note [DupFlag invariants]
  211 ~~~~~~~~~~~~~~~~~~~~~~~~~
  212 In both (ApplyToVal dup _ env k)
  213    and  (Select dup _ _ env k)
  214 the following invariants hold
  215 
  216   (a) if dup = OkToDup, then continuation k is also ok-to-dup
  217   (b) if dup = OkToDup or Simplified, the subst-env is empty
  218       (and hence no need to re-simplify)
  219 -}
  220 
  221 instance Outputable DupFlag where
  222   ppr OkToDup    = text "ok"
  223   ppr NoDup      = text "nodup"
  224   ppr Simplified = text "simpl"
  225 
  226 instance Outputable SimplCont where
  227   ppr (Stop ty interesting) = text "Stop" <> brackets (ppr interesting) <+> ppr ty
  228   ppr (CastIt co cont  )    = (text "CastIt" <+> pprOptCo co) $$ ppr cont
  229   ppr (TickIt t cont)       = (text "TickIt" <+> ppr t) $$ ppr cont
  230   ppr (ApplyToTy  { sc_arg_ty = ty, sc_cont = cont })
  231     = (text "ApplyToTy" <+> pprParendType ty) $$ ppr cont
  232   ppr (ApplyToVal { sc_arg = arg, sc_dup = dup, sc_cont = cont, sc_hole_ty = hole_ty })
  233     = (hang (text "ApplyToVal" <+> ppr dup <+> text "hole" <+> ppr hole_ty)
  234           2 (pprParendExpr arg))
  235       $$ ppr cont
  236   ppr (StrictBind { sc_bndr = b, sc_cont = cont })
  237     = (text "StrictBind" <+> ppr b) $$ ppr cont
  238   ppr (StrictArg { sc_fun = ai, sc_cont = cont })
  239     = (text "StrictArg" <+> ppr (ai_fun ai)) $$ ppr cont
  240   ppr (Select { sc_dup = dup, sc_bndr = bndr, sc_alts = alts, sc_env = se, sc_cont = cont })
  241     = (text "Select" <+> ppr dup <+> ppr bndr) $$
  242        whenPprDebug (nest 2 $ vcat [ppr (seTvSubst se), ppr alts]) $$ ppr cont
  243 
  244 
  245 {- Note [The hole type in ApplyToTy]
  246 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  247 The sc_hole_ty field of ApplyToTy records the type of the "hole" in the
  248 continuation.  It is absolutely necessary to compute contHoleType, but it is
  249 not used for anything else (and hence may not be evaluated).
  250 
  251 Why is it necessary for contHoleType?  Consider the continuation
  252      ApplyToType Int (Stop Int)
  253 corresponding to
  254      (<hole> @Int) :: Int
  255 What is the type of <hole>?  It could be (forall a. Int) or (forall a. a),
  256 and there is no way to know which, so we must record it.
  257 
  258 In a chain of applications  (f @t1 @t2 @t3) we'll lazily compute exprType
  259 for (f @t1) and (f @t1 @t2), which is potentially non-linear; but it probably
  260 doesn't matter because we'll never compute them all.
  261 
  262 ************************************************************************
  263 *                                                                      *
  264                 ArgInfo and ArgSpec
  265 *                                                                      *
  266 ************************************************************************
  267 -}
  268 
  269 data ArgInfo
  270   = ArgInfo {
  271         ai_fun   :: OutId,      -- The function
  272         ai_args  :: [ArgSpec],  -- ...applied to these args (which are in *reverse* order)
  273 
  274         ai_rules :: FunRules,   -- Rules for this function
  275 
  276         ai_encl :: Bool,        -- Flag saying whether this function
  277                                 -- or an enclosing one has rules (recursively)
  278                                 --      True => be keener to inline in all args
  279 
  280         ai_dmds :: [Demand],    -- Demands on remaining value arguments (beyond ai_args)
  281                                 --   Usually infinite, but if it is finite it guarantees
  282                                 --   that the function diverges after being given
  283                                 --   that number of args
  284 
  285         ai_discs :: [Int]       -- Discounts for remaining value arguments (beyong ai_args)
  286                                 --   non-zero => be keener to inline
  287                                 --   Always infinite
  288     }
  289 
  290 data ArgSpec
  291   = ValArg { as_dmd  :: Demand        -- Demand placed on this argument
  292            , as_arg  :: OutExpr       -- Apply to this (coercion or value); c.f. ApplyToVal
  293            , as_hole_ty :: OutType }  -- Type of the function (presumably t1 -> t2)
  294 
  295   | TyArg { as_arg_ty  :: OutType     -- Apply to this type; c.f. ApplyToTy
  296           , as_hole_ty :: OutType }   -- Type of the function (presumably forall a. blah)
  297 
  298   | CastBy OutCoercion                -- Cast by this; c.f. CastIt
  299 
  300 instance Outputable ArgInfo where
  301   ppr (ArgInfo { ai_fun = fun, ai_args = args, ai_dmds = dmds })
  302     = text "ArgInfo" <+> braces
  303          (sep [ text "fun =" <+> ppr fun
  304               , text "dmds =" <+> ppr dmds
  305               , text "args =" <+> ppr args ])
  306 
  307 instance Outputable ArgSpec where
  308   ppr (ValArg { as_arg = arg })  = text "ValArg" <+> ppr arg
  309   ppr (TyArg { as_arg_ty = ty }) = text "TyArg" <+> ppr ty
  310   ppr (CastBy c)                 = text "CastBy" <+> ppr c
  311 
  312 addValArgTo :: ArgInfo ->  OutExpr -> OutType -> ArgInfo
  313 addValArgTo ai arg hole_ty
  314   | ArgInfo { ai_dmds = dmd:dmds, ai_discs = _:discs, ai_rules = rules } <- ai
  315       -- Pop the top demand and and discounts off
  316   , let arg_spec = ValArg { as_arg = arg, as_hole_ty = hole_ty, as_dmd = dmd }
  317   = ai { ai_args  = arg_spec : ai_args ai
  318        , ai_dmds  = dmds
  319        , ai_discs = discs
  320        , ai_rules = decRules rules }
  321   | otherwise
  322   = pprPanic "addValArgTo" (ppr ai $$ ppr arg)
  323     -- There should always be enough demands and discounts
  324 
  325 addTyArgTo :: ArgInfo -> OutType -> OutType -> ArgInfo
  326 addTyArgTo ai arg_ty hole_ty = ai { ai_args = arg_spec : ai_args ai
  327                                   , ai_rules = decRules (ai_rules ai) }
  328   where
  329     arg_spec = TyArg { as_arg_ty = arg_ty, as_hole_ty = hole_ty }
  330 
  331 addCastTo :: ArgInfo -> OutCoercion -> ArgInfo
  332 addCastTo ai co = ai { ai_args = CastBy co : ai_args ai }
  333 
  334 isStrictArgInfo :: ArgInfo -> Bool
  335 -- True if the function is strict in the next argument
  336 isStrictArgInfo (ArgInfo { ai_dmds = dmds })
  337   | dmd:_ <- dmds = isStrUsedDmd dmd
  338   | otherwise     = False
  339 
  340 argInfoAppArgs :: [ArgSpec] -> [OutExpr]
  341 argInfoAppArgs []                              = []
  342 argInfoAppArgs (CastBy {}                : _)  = []  -- Stop at a cast
  343 argInfoAppArgs (ValArg { as_arg = arg }  : as) = arg     : argInfoAppArgs as
  344 argInfoAppArgs (TyArg { as_arg_ty = ty } : as) = Type ty : argInfoAppArgs as
  345 
  346 pushSimplifiedArgs :: SimplEnv -> [ArgSpec] -> SimplCont -> SimplCont
  347 pushSimplifiedArgs _env []           k = k
  348 pushSimplifiedArgs env  (arg : args) k
  349   = case arg of
  350       TyArg { as_arg_ty = arg_ty, as_hole_ty = hole_ty }
  351                -> ApplyToTy  { sc_arg_ty = arg_ty, sc_hole_ty = hole_ty, sc_cont = rest }
  352       ValArg { as_arg = arg, as_hole_ty = hole_ty }
  353              -> ApplyToVal { sc_arg = arg, sc_env = env, sc_dup = Simplified
  354                            , sc_hole_ty = hole_ty, sc_cont = rest }
  355       CastBy c -> CastIt c rest
  356   where
  357     rest = pushSimplifiedArgs env args k
  358            -- The env has an empty SubstEnv
  359 
  360 argInfoExpr :: OutId -> [ArgSpec] -> OutExpr
  361 -- NB: the [ArgSpec] is reversed so that the first arg
  362 -- in the list is the last one in the application
  363 argInfoExpr fun rev_args
  364   = go rev_args
  365   where
  366     go []                              = Var fun
  367     go (ValArg { as_arg = arg }  : as) = go as `App` arg
  368     go (TyArg { as_arg_ty = ty } : as) = go as `App` Type ty
  369     go (CastBy co                : as) = mkCast (go as) co
  370 
  371 
  372 type FunRules = Maybe (Int, [CoreRule]) -- Remaining rules for this function
  373      -- Nothing => No rules
  374      -- Just (n, rules) => some rules, requiring at least n more type/value args
  375 
  376 decRules :: FunRules -> FunRules
  377 decRules (Just (n, rules)) = Just (n-1, rules)
  378 decRules Nothing           = Nothing
  379 
  380 mkFunRules :: [CoreRule] -> FunRules
  381 mkFunRules [] = Nothing
  382 mkFunRules rs = Just (n_required, rs)
  383   where
  384     n_required = maximum (map ruleArity rs)
  385 
  386 {-
  387 ************************************************************************
  388 *                                                                      *
  389                 Functions on SimplCont
  390 *                                                                      *
  391 ************************************************************************
  392 -}
  393 
  394 mkBoringStop :: OutType -> SimplCont
  395 mkBoringStop ty = Stop ty BoringCtxt
  396 
  397 mkRhsStop :: OutType -> SimplCont       -- See Note [RHS of lets] in GHC.Core.Unfold
  398 mkRhsStop ty = Stop ty RhsCtxt
  399 
  400 mkLazyArgStop :: OutType -> CallCtxt -> SimplCont
  401 mkLazyArgStop ty cci = Stop ty cci
  402 
  403 -------------------
  404 contIsRhsOrArg :: SimplCont -> Bool
  405 contIsRhsOrArg (Stop {})       = True
  406 contIsRhsOrArg (StrictBind {}) = True
  407 contIsRhsOrArg (StrictArg {})  = True
  408 contIsRhsOrArg _               = False
  409 
  410 contIsRhs :: SimplCont -> Bool
  411 contIsRhs (Stop _ RhsCtxt) = True
  412 contIsRhs (CastIt _ k)     = contIsRhs k   -- For f = e |> co, treat e as Rhs context
  413 contIsRhs _                = False
  414 
  415 -------------------
  416 contIsStop :: SimplCont -> Bool
  417 contIsStop (Stop {}) = True
  418 contIsStop _         = False
  419 
  420 contIsDupable :: SimplCont -> Bool
  421 contIsDupable (Stop {})                         = True
  422 contIsDupable (ApplyToTy  { sc_cont = k })      = contIsDupable k
  423 contIsDupable (ApplyToVal { sc_dup = OkToDup }) = True -- See Note [DupFlag invariants]
  424 contIsDupable (Select { sc_dup = OkToDup })     = True -- ...ditto...
  425 contIsDupable (StrictArg { sc_dup = OkToDup })  = True -- ...ditto...
  426 contIsDupable (CastIt _ k)                      = contIsDupable k
  427 contIsDupable _                                 = False
  428 
  429 -------------------
  430 contIsTrivial :: SimplCont -> Bool
  431 contIsTrivial (Stop {})                                         = True
  432 contIsTrivial (ApplyToTy { sc_cont = k })                       = contIsTrivial k
  433 -- This one doesn't look right.  A value application is not trivial
  434 -- contIsTrivial (ApplyToVal { sc_arg = Coercion _, sc_cont = k }) = contIsTrivial k
  435 contIsTrivial (CastIt _ k)                                      = contIsTrivial k
  436 contIsTrivial _                                                 = False
  437 
  438 -------------------
  439 contResultType :: SimplCont -> OutType
  440 contResultType (Stop ty _)                  = ty
  441 contResultType (CastIt _ k)                 = contResultType k
  442 contResultType (StrictBind { sc_cont = k }) = contResultType k
  443 contResultType (StrictArg { sc_cont = k })  = contResultType k
  444 contResultType (Select { sc_cont = k })     = contResultType k
  445 contResultType (ApplyToTy  { sc_cont = k }) = contResultType k
  446 contResultType (ApplyToVal { sc_cont = k }) = contResultType k
  447 contResultType (TickIt _ k)                 = contResultType k
  448 
  449 contHoleType :: SimplCont -> OutType
  450 contHoleType (Stop ty _)                      = ty
  451 contHoleType (TickIt _ k)                     = contHoleType k
  452 contHoleType (CastIt co _)                    = coercionLKind co
  453 contHoleType (StrictBind { sc_bndr = b, sc_dup = dup, sc_env = se })
  454   = perhapsSubstTy dup se (idType b)
  455 contHoleType (StrictArg  { sc_fun_ty = ty })  = funArgTy ty
  456 contHoleType (ApplyToTy  { sc_hole_ty = ty }) = ty  -- See Note [The hole type in ApplyToTy]
  457 contHoleType (ApplyToVal { sc_hole_ty = ty }) = ty  -- See Note [The hole type in ApplyToTy/Val]
  458 contHoleType (Select { sc_dup = d, sc_bndr =  b, sc_env = se })
  459   = perhapsSubstTy d se (idType b)
  460 
  461 
  462 -- Computes the multiplicity scaling factor at the hole. That is, in (case [] of
  463 -- x ::(p) _ { … }) (respectively for arguments of functions), the scaling
  464 -- factor is p. And in E[G[]], the scaling factor is the product of the scaling
  465 -- factor of E and that of G.
  466 --
  467 -- The scaling factor at the hole of E[] is used to determine how a binder
  468 -- should be scaled if it commutes with E. This appears, in particular, in the
  469 -- case-of-case transformation.
  470 contHoleScaling :: SimplCont -> Mult
  471 contHoleScaling (Stop _ _) = One
  472 contHoleScaling (CastIt _ k) = contHoleScaling k
  473 contHoleScaling (StrictBind { sc_bndr = id, sc_cont = k })
  474   = idMult id `mkMultMul` contHoleScaling k
  475 contHoleScaling (Select { sc_bndr = id, sc_cont = k })
  476   = idMult id `mkMultMul` contHoleScaling k
  477 contHoleScaling (StrictArg { sc_fun_ty = fun_ty, sc_cont = k })
  478   = w `mkMultMul` contHoleScaling k
  479   where
  480     (w, _, _) = splitFunTy fun_ty
  481 contHoleScaling (ApplyToTy { sc_cont = k }) = contHoleScaling k
  482 contHoleScaling (ApplyToVal { sc_cont = k }) = contHoleScaling k
  483 contHoleScaling (TickIt _ k) = contHoleScaling k
  484 -------------------
  485 countArgs :: SimplCont -> Int
  486 -- Count all arguments, including types, coercions, and other values
  487 countArgs (ApplyToTy  { sc_cont = cont }) = 1 + countArgs cont
  488 countArgs (ApplyToVal { sc_cont = cont }) = 1 + countArgs cont
  489 countArgs _                               = 0
  490 
  491 contArgs :: SimplCont -> (Bool, [ArgSummary], SimplCont)
  492 -- Summarises value args, discards type args and coercions
  493 -- The returned continuation of the call is only used to
  494 -- answer questions like "are you interesting?"
  495 contArgs cont
  496   | lone cont = (True, [], cont)
  497   | otherwise = go [] cont
  498   where
  499     lone (ApplyToTy  {}) = False  -- See Note [Lone variables] in GHC.Core.Unfold
  500     lone (ApplyToVal {}) = False  -- NB: even a type application or cast
  501     lone (CastIt {})     = False  --     stops it being "lone"
  502     lone _               = True
  503 
  504     go args (ApplyToVal { sc_arg = arg, sc_env = se, sc_cont = k })
  505                                         = go (is_interesting arg se : args) k
  506     go args (ApplyToTy { sc_cont = k }) = go args k
  507     go args (CastIt _ k)                = go args k
  508     go args k                           = (False, reverse args, k)
  509 
  510     is_interesting arg se = interestingArg se arg
  511                    -- Do *not* use short-cutting substitution here
  512                    -- because we want to get as much IdInfo as possible
  513 
  514 
  515 -------------------
  516 mkArgInfo :: SimplEnv
  517           -> Id
  518           -> [CoreRule] -- Rules for function
  519           -> Int        -- Number of value args
  520           -> SimplCont  -- Context of the call
  521           -> ArgInfo
  522 
  523 mkArgInfo env fun rules n_val_args call_cont
  524   | n_val_args < idArity fun            -- Note [Unsaturated functions]
  525   = ArgInfo { ai_fun = fun, ai_args = []
  526             , ai_rules = fun_rules
  527             , ai_encl = False
  528             , ai_dmds = vanilla_dmds
  529             , ai_discs = vanilla_discounts }
  530   | otherwise
  531   = ArgInfo { ai_fun   = fun
  532             , ai_args = []
  533             , ai_rules = fun_rules
  534             , ai_encl  = interestingArgContext rules call_cont
  535             , ai_dmds  = add_type_strictness (idType fun) arg_dmds
  536             , ai_discs = arg_discounts }
  537   where
  538     fun_rules = mkFunRules rules
  539 
  540     vanilla_discounts, arg_discounts :: [Int]
  541     vanilla_discounts = repeat 0
  542     arg_discounts = case idUnfolding fun of
  543                         CoreUnfolding {uf_guidance = UnfIfGoodArgs {ug_args = discounts}}
  544                               -> discounts ++ vanilla_discounts
  545                         _     -> vanilla_discounts
  546 
  547     vanilla_dmds, arg_dmds :: [Demand]
  548     vanilla_dmds  = repeat topDmd
  549 
  550     arg_dmds
  551       | not (sm_inline (seMode env))
  552       = vanilla_dmds -- See Note [Do not expose strictness if sm_inline=False]
  553       | otherwise
  554       = -- add_type_str fun_ty $
  555         case splitDmdSig (idDmdSig fun) of
  556           (demands, result_info)
  557                 | not (demands `lengthExceeds` n_val_args)
  558                 ->      -- Enough args, use the strictness given.
  559                         -- For bottoming functions we used to pretend that the arg
  560                         -- is lazy, so that we don't treat the arg as an
  561                         -- interesting context.  This avoids substituting
  562                         -- top-level bindings for (say) strings into
  563                         -- calls to error.  But now we are more careful about
  564                         -- inlining lone variables, so its ok
  565                         -- (see GHC.Core.Op.Simplify.Utils.analyseCont)
  566                    if isDeadEndDiv result_info then
  567                         demands  -- Finite => result is bottom
  568                    else
  569                         demands ++ vanilla_dmds
  570                | otherwise
  571                -> warnPprTrace True (text "More demands than arity" <+> ppr fun <+> ppr (idArity fun)
  572                                 <+> ppr n_val_args <+> ppr demands) $
  573                   vanilla_dmds      -- Not enough args, or no strictness
  574 
  575     add_type_strictness :: Type -> [Demand] -> [Demand]
  576     -- If the function arg types are strict, record that in the 'strictness bits'
  577     -- No need to instantiate because unboxed types (which dominate the strict
  578     --   types) can't instantiate type variables.
  579     -- add_type_strictness is done repeatedly (for each call);
  580     --   might be better once-for-all in the function
  581     -- But beware primops/datacons with no strictness
  582 
  583     add_type_strictness fun_ty dmds
  584       | null dmds = []
  585 
  586       | Just (_, fun_ty') <- splitForAllTyCoVar_maybe fun_ty
  587       = add_type_strictness fun_ty' dmds     -- Look through foralls
  588 
  589       | Just (_, arg_ty, fun_ty') <- splitFunTy_maybe fun_ty        -- Add strict-type info
  590       , dmd : rest_dmds <- dmds
  591       , let dmd' = case isLiftedType_maybe arg_ty of
  592                        Just False -> strictifyDmd dmd
  593                        _          -> dmd
  594       = dmd' : add_type_strictness fun_ty' rest_dmds
  595           -- If the type is representation-polymorphic, we can't know whether
  596           -- it's strict. isLiftedType_maybe will return Just False only when
  597           -- we're sure the type is unlifted.
  598 
  599       | otherwise
  600       = dmds
  601 
  602 {- Note [Unsaturated functions]
  603   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  604 Consider (test eyeball/inline4)
  605         x = a:as
  606         y = f x
  607 where f has arity 2.  Then we do not want to inline 'x', because
  608 it'll just be floated out again.  Even if f has lots of discounts
  609 on its first argument -- it must be saturated for these to kick in
  610 
  611 Note [Do not expose strictness if sm_inline=False]
  612 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  613 #15163 showed a case in which we had
  614 
  615   {-# INLINE [1] zip #-}
  616   zip = undefined
  617 
  618   {-# RULES "foo" forall as bs. stream (zip as bs) = ..blah... #-}
  619 
  620 If we expose zip's bottoming nature when simplifying the LHS of the
  621 RULE we get
  622   {-# RULES "foo" forall as bs.
  623                    stream (case zip of {}) = ..blah... #-}
  624 discarding the arguments to zip.  Usually this is fine, but on the
  625 LHS of a rule it's not, because 'as' and 'bs' are now not bound on
  626 the LHS.
  627 
  628 This is a pretty pathological example, so I'm not losing sleep over
  629 it, but the simplest solution was to check sm_inline; if it is False,
  630 which it is on the LHS of a rule (see updModeForRules), then don't
  631 make use of the strictness info for the function.
  632 -}
  633 
  634 
  635 {-
  636 ************************************************************************
  637 *                                                                      *
  638         Interesting arguments
  639 *                                                                      *
  640 ************************************************************************
  641 
  642 Note [Interesting call context]
  643 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  644 We want to avoid inlining an expression where there can't possibly be
  645 any gain, such as in an argument position.  Hence, if the continuation
  646 is interesting (eg. a case scrutinee, application etc.) then we
  647 inline, otherwise we don't.
  648 
  649 Previously some_benefit used to return True only if the variable was
  650 applied to some value arguments.  This didn't work:
  651 
  652         let x = _coerce_ (T Int) Int (I# 3) in
  653         case _coerce_ Int (T Int) x of
  654                 I# y -> ....
  655 
  656 we want to inline x, but can't see that it's a constructor in a case
  657 scrutinee position, and some_benefit is False.
  658 
  659 Another example:
  660 
  661 dMonadST = _/\_ t -> :Monad (g1 _@_ t, g2 _@_ t, g3 _@_ t)
  662 
  663 ....  case dMonadST _@_ x0 of (a,b,c) -> ....
  664 
  665 we'd really like to inline dMonadST here, but we *don't* want to
  666 inline if the case expression is just
  667 
  668         case x of y { DEFAULT -> ... }
  669 
  670 since we can just eliminate this case instead (x is in WHNF).  Similar
  671 applies when x is bound to a lambda expression.  Hence
  672 contIsInteresting looks for case expressions with just a single
  673 default case.
  674 
  675 Note [No case of case is boring]
  676 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  677 If we see
  678    case f x of <alts>
  679 
  680 we'd usually treat the context as interesting, to encourage 'f' to
  681 inline.  But if case-of-case is off, it's really not so interesting
  682 after all, because we are unlikely to be able to push the case
  683 expression into the branches of any case in f's unfolding.  So, to
  684 reduce unnecessary code expansion, we just make the context look boring.
  685 This made a small compile-time perf improvement in perf/compiler/T6048,
  686 and it looks plausible to me.
  687 -}
  688 
  689 lazyArgContext :: ArgInfo -> CallCtxt
  690 -- Use this for lazy arguments
  691 lazyArgContext (ArgInfo { ai_encl = encl_rules, ai_discs = discs })
  692   | encl_rules                = RuleArgCtxt
  693   | disc:_ <- discs, disc > 0 = DiscArgCtxt  -- Be keener here
  694   | otherwise                 = BoringCtxt   -- Nothing interesting
  695 
  696 strictArgContext :: ArgInfo -> CallCtxt
  697 strictArgContext (ArgInfo { ai_encl = encl_rules, ai_discs = discs })
  698 -- Use this for strict arguments
  699   | encl_rules                = RuleArgCtxt
  700   | disc:_ <- discs, disc > 0 = DiscArgCtxt  -- Be keener here
  701   | otherwise                 = RhsCtxt
  702       -- Why RhsCtxt?  if we see f (g x) (h x), and f is strict, we
  703       -- want to be a bit more eager to inline g, because it may
  704       -- expose an eval (on x perhaps) that can be eliminated or
  705       -- shared. I saw this in nofib 'boyer2', RewriteFuns.onewayunify1
  706       -- It's worth an 18% improvement in allocation for this
  707       -- particular benchmark; 5% on 'mate' and 1.3% on 'multiplier'
  708 
  709 interestingCallContext :: SimplEnv -> SimplCont -> CallCtxt
  710 -- See Note [Interesting call context]
  711 interestingCallContext env cont
  712   = interesting cont
  713   where
  714     interesting (Select {})
  715        | sm_case_case (getMode env) = CaseCtxt
  716        | otherwise                  = BoringCtxt
  717        -- See Note [No case of case is boring]
  718 
  719     interesting (ApplyToVal {}) = ValAppCtxt
  720         -- Can happen if we have (f Int |> co) y
  721         -- If f has an INLINE prag we need to give it some
  722         -- motivation to inline. See Note [Cast then apply]
  723         -- in GHC.Core.Unfold
  724 
  725     interesting (StrictArg { sc_fun = fun }) = strictArgContext fun
  726     interesting (StrictBind {})              = BoringCtxt
  727     interesting (Stop _ cci)                 = cci
  728     interesting (TickIt _ k)                 = interesting k
  729     interesting (ApplyToTy { sc_cont = k })  = interesting k
  730     interesting (CastIt _ k)                 = interesting k
  731         -- If this call is the arg of a strict function, the context
  732         -- is a bit interesting.  If we inline here, we may get useful
  733         -- evaluation information to avoid repeated evals: e.g.
  734         --      x + (y * z)
  735         -- Here the contIsInteresting makes the '*' keener to inline,
  736         -- which in turn exposes a constructor which makes the '+' inline.
  737         -- Assuming that +,* aren't small enough to inline regardless.
  738         --
  739         -- It's also very important to inline in a strict context for things
  740         -- like
  741         --              foldr k z (f x)
  742         -- Here, the context of (f x) is strict, and if f's unfolding is
  743         -- a build it's *great* to inline it here.  So we must ensure that
  744         -- the context for (f x) is not totally uninteresting.
  745 
  746 interestingArgContext :: [CoreRule] -> SimplCont -> Bool
  747 -- If the argument has form (f x y), where x,y are boring,
  748 -- and f is marked INLINE, then we don't want to inline f.
  749 -- But if the context of the argument is
  750 --      g (f x y)
  751 -- where g has rules, then we *do* want to inline f, in case it
  752 -- exposes a rule that might fire.  Similarly, if the context is
  753 --      h (g (f x x))
  754 -- where h has rules, then we do want to inline f; hence the
  755 -- call_cont argument to interestingArgContext
  756 --
  757 -- The ai-rules flag makes this happen; if it's
  758 -- set, the inliner gets just enough keener to inline f
  759 -- regardless of how boring f's arguments are, if it's marked INLINE
  760 --
  761 -- The alternative would be to *always* inline an INLINE function,
  762 -- regardless of how boring its context is; but that seems overkill
  763 -- For example, it'd mean that wrapper functions were always inlined
  764 --
  765 -- The call_cont passed to interestingArgContext is the context of
  766 -- the call itself, e.g. g <hole> in the example above
  767 interestingArgContext rules call_cont
  768   = notNull rules || enclosing_fn_has_rules
  769   where
  770     enclosing_fn_has_rules = go call_cont
  771 
  772     go (Select {})                  = False
  773     go (ApplyToVal {})              = False  -- Shouldn't really happen
  774     go (ApplyToTy  {})              = False  -- Ditto
  775     go (StrictArg { sc_fun = fun }) = ai_encl fun
  776     go (StrictBind {})              = False      -- ??
  777     go (CastIt _ c)                 = go c
  778     go (Stop _ RuleArgCtxt)         = True
  779     go (Stop _ _)                   = False
  780     go (TickIt _ c)                 = go c
  781 
  782 {- Note [Interesting arguments]
  783 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  784 An argument is interesting if it deserves a discount for unfoldings
  785 with a discount in that argument position.  The idea is to avoid
  786 unfolding a function that is applied only to variables that have no
  787 unfolding (i.e. they are probably lambda bound): f x y z There is
  788 little point in inlining f here.
  789 
  790 Generally, *values* (like (C a b) and (\x.e)) deserve discounts.  But
  791 we must look through lets, eg (let x = e in C a b), because the let will
  792 float, exposing the value, if we inline.  That makes it different to
  793 exprIsHNF.
  794 
  795 Before 2009 we said it was interesting if the argument had *any* structure
  796 at all; i.e. (hasSomeUnfolding v).  But does too much inlining; see #3016.
  797 
  798 But we don't regard (f x y) as interesting, unless f is unsaturated.
  799 If it's saturated and f hasn't inlined, then it's probably not going
  800 to now!
  801 
  802 Note [Conlike is interesting]
  803 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  804 Consider
  805         f d = ...((*) d x y)...
  806         ... f (df d')...
  807 where df is con-like. Then we'd really like to inline 'f' so that the
  808 rule for (*) (df d) can fire.  To do this
  809   a) we give a discount for being an argument of a class-op (eg (*) d)
  810   b) we say that a con-like argument (eg (df d)) is interesting
  811 -}
  812 
  813 interestingArg :: SimplEnv -> CoreExpr -> ArgSummary
  814 -- See Note [Interesting arguments]
  815 interestingArg env e = go env 0 e
  816   where
  817     -- n is # value args to which the expression is applied
  818     go env n (Var v)
  819        = case substId env v of
  820            DoneId v'            -> go_var n v'
  821            DoneEx e _           -> go (zapSubstEnv env)             n e
  822            ContEx tvs cvs ids e -> go (setSubstEnv env tvs cvs ids) n e
  823 
  824     go _   _ (Lit l)
  825        | isLitRubbish l        = TrivArg -- Leads to unproductive inlining in WWRec, #20035
  826        | otherwise             = ValueArg
  827     go _   _ (Type _)          = TrivArg
  828     go _   _ (Coercion _)      = TrivArg
  829     go env n (App fn (Type _)) = go env n fn
  830     go env n (App fn _)        = go env (n+1) fn
  831     go env n (Tick _ a)        = go env n a
  832     go env n (Cast e _)        = go env n e
  833     go env n (Lam v e)
  834        | isTyVar v             = go env n e
  835        | n>0                   = NonTrivArg     -- (\x.b) e   is NonTriv
  836        | otherwise             = ValueArg
  837     go _ _ (Case {})           = NonTrivArg
  838     go env n (Let b e)         = case go env' n e of
  839                                    ValueArg -> ValueArg
  840                                    _        -> NonTrivArg
  841                                where
  842                                  env' = env `addNewInScopeIds` bindersOf b
  843 
  844     go_var n v
  845        | isConLikeId v     = ValueArg   -- Experimenting with 'conlike' rather that
  846                                         --    data constructors here
  847        | idArity v > n     = ValueArg   -- Catches (eg) primops with arity but no unfolding
  848        | n > 0             = NonTrivArg -- Saturated or unknown call
  849        | conlike_unfolding = ValueArg   -- n==0; look for an interesting unfolding
  850                                         -- See Note [Conlike is interesting]
  851        | otherwise         = TrivArg    -- n==0, no useful unfolding
  852        where
  853          conlike_unfolding = isConLikeUnfolding (idUnfolding v)
  854 
  855 {-
  856 ************************************************************************
  857 *                                                                      *
  858                   SimplMode
  859 *                                                                      *
  860 ************************************************************************
  861 
  862 The SimplMode controls several switches; see its definition in
  863 GHC.Core.Opt.Monad
  864         sm_rules      :: Bool     -- Whether RULES are enabled
  865         sm_inline     :: Bool     -- Whether inlining is enabled
  866         sm_case_case  :: Bool     -- Whether case-of-case is enabled
  867         sm_eta_expand :: Bool     -- Whether eta-expansion is enabled
  868 -}
  869 
  870 simplEnvForGHCi :: Logger -> DynFlags -> SimplEnv
  871 simplEnvForGHCi logger dflags
  872   = mkSimplEnv $ SimplMode { sm_names  = ["GHCi"]
  873                            , sm_phase  = InitialPhase
  874                            , sm_logger = logger
  875                            , sm_dflags = dflags
  876                            , sm_uf_opts = uf_opts
  877                            , sm_rules  = rules_on
  878                            , sm_inline = False
  879                               -- Do not do any inlining, in case we expose some
  880                               -- unboxed tuple stuff that confuses the bytecode
  881                               -- interpreter
  882                            , sm_eta_expand = eta_expand_on
  883                            , sm_case_case  = True
  884                            , sm_pre_inline = pre_inline_on
  885                            }
  886   where
  887     rules_on      = gopt Opt_EnableRewriteRules   dflags
  888     eta_expand_on = gopt Opt_DoLambdaEtaExpansion dflags
  889     pre_inline_on = gopt Opt_SimplPreInlining     dflags
  890     uf_opts       = unfoldingOpts                 dflags
  891 
  892 updModeForStableUnfoldings :: Activation -> SimplMode -> SimplMode
  893 -- See Note [Simplifying inside stable unfoldings]
  894 updModeForStableUnfoldings unf_act current_mode
  895   = current_mode { sm_phase      = phaseFromActivation unf_act
  896                  , sm_inline     = True
  897                  , sm_eta_expand = False }
  898        -- sm_eta_expand: see Note [No eta expansion in stable unfoldings]
  899        -- sm_rules: just inherit; sm_rules might be "off"
  900        -- because of -fno-enable-rewrite-rules
  901   where
  902     phaseFromActivation (ActiveAfter _ n) = Phase n
  903     phaseFromActivation _                 = InitialPhase
  904 
  905 updModeForRules :: SimplMode -> SimplMode
  906 -- See Note [Simplifying rules]
  907 updModeForRules current_mode
  908   = current_mode { sm_phase      = InitialPhase
  909                  , sm_inline     = False  -- See Note [Do not expose strictness if sm_inline=False]
  910                  , sm_rules      = False
  911                  , sm_eta_expand = False }
  912 
  913 {- Note [Simplifying rules]
  914 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  915 When simplifying a rule LHS, refrain from /any/ inlining or applying
  916 of other RULES.
  917 
  918 Doing anything to the LHS is plain confusing, because it means that what the
  919 rule matches is not what the user wrote. c.f. #10595, and #10528.
  920 Moreover, inlining (or applying rules) on rule LHSs risks introducing
  921 Ticks into the LHS, which makes matching trickier. #10665, #10745.
  922 
  923 Doing this to either side confounds tools like HERMIT, which seek to reason
  924 about and apply the RULES as originally written. See #10829.
  925 
  926 There is, however, one case where we are pretty much /forced/ to transform the
  927 LHS of a rule: postInlineUnconditionally. For instance, in the case of
  928 
  929     let f = g @Int in f
  930 
  931 We very much want to inline f into the body of the let. However, to do so (and
  932 be able to safely drop f's binding) we must inline into all occurrences of f,
  933 including those in the LHS of rules.
  934 
  935 This can cause somewhat surprising results; for instance, in #18162 we found
  936 that a rule template contained ticks in its arguments, because
  937 postInlineUnconditionally substituted in a trivial expression that contains
  938 ticks. See Note [Tick annotations in RULE matching] in GHC.Core.Rules for
  939 details.
  940 
  941 Note [No eta expansion in stable unfoldings]
  942 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  943 If we have a stable unfolding
  944 
  945   f :: Ord a => a -> IO ()
  946   -- Unfolding template
  947   --    = /\a \(d:Ord a) (x:a). bla
  948 
  949 we do not want to eta-expand to
  950 
  951   f :: Ord a => a -> IO ()
  952   -- Unfolding template
  953   --    = (/\a \(d:Ord a) (x:a) (eta:State#). bla eta) |> co
  954 
  955 because not specialisation of the overloading doesn't work properly
  956 (see Note [Specialisation shape] in GHC.Core.Opt.Specialise), #9509.
  957 
  958 So we disable eta-expansion in stable unfoldings.
  959 
  960 Note [Inlining in gentle mode]
  961 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  962 Something is inlined if
  963    (i)   the sm_inline flag is on, AND
  964    (ii)  the thing has an INLINE pragma, AND
  965    (iii) the thing is inlinable in the earliest phase.
  966 
  967 Example of why (iii) is important:
  968   {-# INLINE [~1] g #-}
  969   g = ...
  970 
  971   {-# INLINE f #-}
  972   f x = g (g x)
  973 
  974 If we were to inline g into f's inlining, then an importing module would
  975 never be able to do
  976         f e --> g (g e) ---> RULE fires
  977 because the stable unfolding for f has had g inlined into it.
  978 
  979 On the other hand, it is bad not to do ANY inlining into an
  980 stable unfolding, because then recursive knots in instance declarations
  981 don't get unravelled.
  982 
  983 However, *sometimes* SimplGently must do no call-site inlining at all
  984 (hence sm_inline = False).  Before full laziness we must be careful
  985 not to inline wrappers, because doing so inhibits floating
  986     e.g. ...(case f x of ...)...
  987     ==> ...(case (case x of I# x# -> fw x#) of ...)...
  988     ==> ...(case x of I# x# -> case fw x# of ...)...
  989 and now the redex (f x) isn't floatable any more.
  990 
  991 The no-inlining thing is also important for Template Haskell.  You might be
  992 compiling in one-shot mode with -O2; but when TH compiles a splice before
  993 running it, we don't want to use -O2.  Indeed, we don't want to inline
  994 anything, because the byte-code interpreter might get confused about
  995 unboxed tuples and suchlike.
  996 
  997 Note [Simplifying inside stable unfoldings]
  998 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  999 We must take care with simplification inside stable unfoldings (which come from
 1000 INLINE pragmas).
 1001 
 1002 First, consider the following example
 1003         let f = \pq -> BIG
 1004         in
 1005         let g = \y -> f y y
 1006             {-# INLINE g #-}
 1007         in ...g...g...g...g...g...
 1008 Now, if that's the ONLY occurrence of f, it might be inlined inside g,
 1009 and thence copied multiple times when g is inlined. HENCE we treat
 1010 any occurrence in a stable unfolding as a multiple occurrence, not a single
 1011 one; see OccurAnal.addRuleUsage.
 1012 
 1013 Second, we do want *do* to some modest rules/inlining stuff in stable
 1014 unfoldings, partly to eliminate senseless crap, and partly to break
 1015 the recursive knots generated by instance declarations.
 1016 
 1017 However, suppose we have
 1018         {-# INLINE <act> f #-}
 1019         f = <rhs>
 1020 meaning "inline f in phases p where activation <act>(p) holds".
 1021 Then what inlinings/rules can we apply to the copy of <rhs> captured in
 1022 f's stable unfolding?  Our model is that literally <rhs> is substituted for
 1023 f when it is inlined.  So our conservative plan (implemented by
 1024 updModeForStableUnfoldings) is this:
 1025 
 1026   -------------------------------------------------------------
 1027   When simplifying the RHS of a stable unfolding, set the phase
 1028   to the phase in which the stable unfolding first becomes active
 1029   -------------------------------------------------------------
 1030 
 1031 That ensures that
 1032 
 1033   a) Rules/inlinings that *cease* being active before p will
 1034      not apply to the stable unfolding, consistent with it being
 1035      inlined in its *original* form in phase p.
 1036 
 1037   b) Rules/inlinings that only become active *after* p will
 1038      not apply to the stable unfolding, again to be consistent with
 1039      inlining the *original* rhs in phase p.
 1040 
 1041 For example,
 1042         {-# INLINE f #-}
 1043         f x = ...g...
 1044 
 1045         {-# NOINLINE [1] g #-}
 1046         g y = ...
 1047 
 1048         {-# RULE h g = ... #-}
 1049 Here we must not inline g into f's RHS, even when we get to phase 0,
 1050 because when f is later inlined into some other module we want the
 1051 rule for h to fire.
 1052 
 1053 Similarly, consider
 1054         {-# INLINE f #-}
 1055         f x = ...g...
 1056 
 1057         g y = ...
 1058 and suppose that there are auto-generated specialisations and a strictness
 1059 wrapper for g.  The specialisations get activation AlwaysActive, and the
 1060 strictness wrapper get activation (ActiveAfter 0).  So the strictness
 1061 wrepper fails the test and won't be inlined into f's stable unfolding. That
 1062 means f can inline, expose the specialised call to g, so the specialisation
 1063 rules can fire.
 1064 
 1065 A note about wrappers
 1066 ~~~~~~~~~~~~~~~~~~~~~
 1067 It's also important not to inline a worker back into a wrapper.
 1068 A wrapper looks like
 1069         wraper = inline_me (\x -> ...worker... )
 1070 Normally, the inline_me prevents the worker getting inlined into
 1071 the wrapper (initially, the worker's only call site!).  But,
 1072 if the wrapper is sure to be called, the strictness analyser will
 1073 mark it 'demanded', so when the RHS is simplified, it'll get an ArgOf
 1074 continuation.
 1075 -}
 1076 
 1077 activeUnfolding :: SimplMode -> Id -> Bool
 1078 activeUnfolding mode id
 1079   | isCompulsoryUnfolding (realIdUnfolding id)
 1080   = True   -- Even sm_inline can't override compulsory unfoldings
 1081   | otherwise
 1082   = isActive (sm_phase mode) (idInlineActivation id)
 1083   && sm_inline mode
 1084       -- `or` isStableUnfolding (realIdUnfolding id)
 1085       -- Inline things when
 1086       --  (a) they are active
 1087       --  (b) sm_inline says so, except that for stable unfoldings
 1088       --                         (ie pragmas) we inline anyway
 1089 
 1090 getUnfoldingInRuleMatch :: SimplEnv -> InScopeEnv
 1091 -- When matching in RULE, we want to "look through" an unfolding
 1092 -- (to see a constructor) if *rules* are on, even if *inlinings*
 1093 -- are not.  A notable example is DFuns, which really we want to
 1094 -- match in rules like (op dfun) in gentle mode. Another example
 1095 -- is 'otherwise' which we want exprIsConApp_maybe to be able to
 1096 -- see very early on
 1097 getUnfoldingInRuleMatch env
 1098   = (in_scope, id_unf)
 1099   where
 1100     in_scope = seInScope env
 1101     mode = getMode env
 1102     id_unf id | unf_is_active id = idUnfolding id
 1103               | otherwise        = NoUnfolding
 1104     unf_is_active id
 1105      | not (sm_rules mode) = -- active_unfolding_minimal id
 1106                              isStableUnfolding (realIdUnfolding id)
 1107         -- Do we even need to test this?  I think this InScopeEnv
 1108         -- is only consulted if activeRule returns True, which
 1109         -- never happens if sm_rules is False
 1110      | otherwise           = isActive (sm_phase mode) (idInlineActivation id)
 1111 
 1112 ----------------------
 1113 activeRule :: SimplMode -> Activation -> Bool
 1114 -- Nothing => No rules at all
 1115 activeRule mode
 1116   | not (sm_rules mode) = \_ -> False     -- Rewriting is off
 1117   | otherwise           = isActive (sm_phase mode)
 1118 
 1119 {-
 1120 ************************************************************************
 1121 *                                                                      *
 1122                   preInlineUnconditionally
 1123 *                                                                      *
 1124 ************************************************************************
 1125 
 1126 preInlineUnconditionally
 1127 ~~~~~~~~~~~~~~~~~~~~~~~~
 1128 @preInlineUnconditionally@ examines a bndr to see if it is used just
 1129 once in a completely safe way, so that it is safe to discard the
 1130 binding inline its RHS at the (unique) usage site, REGARDLESS of how
 1131 big the RHS might be.  If this is the case we don't simplify the RHS
 1132 first, but just inline it un-simplified.
 1133 
 1134 This is much better than first simplifying a perhaps-huge RHS and then
 1135 inlining and re-simplifying it.  Indeed, it can be at least quadratically
 1136 better.  Consider
 1137 
 1138         x1 = e1
 1139         x2 = e2[x1]
 1140         x3 = e3[x2]
 1141         ...etc...
 1142         xN = eN[xN-1]
 1143 
 1144 We may end up simplifying e1 N times, e2 N-1 times, e3 N-3 times etc.
 1145 This can happen with cascades of functions too:
 1146 
 1147         f1 = \x1.e1
 1148         f2 = \xs.e2[f1]
 1149         f3 = \xs.e3[f3]
 1150         ...etc...
 1151 
 1152 THE MAIN INVARIANT is this:
 1153 
 1154         ----  preInlineUnconditionally invariant -----
 1155    IF preInlineUnconditionally chooses to inline x = <rhs>
 1156    THEN doing the inlining should not change the occurrence
 1157         info for the free vars of <rhs>
 1158         ----------------------------------------------
 1159 
 1160 For example, it's tempting to look at trivial binding like
 1161         x = y
 1162 and inline it unconditionally.  But suppose x is used many times,
 1163 but this is the unique occurrence of y.  Then inlining x would change
 1164 y's occurrence info, which breaks the invariant.  It matters: y
 1165 might have a BIG rhs, which will now be dup'd at every occurrence of x.
 1166 
 1167 
 1168 Even RHSs labelled InlineMe aren't caught here, because there might be
 1169 no benefit from inlining at the call site.
 1170 
 1171 [Sept 01] Don't unconditionally inline a top-level thing, because that
 1172 can simply make a static thing into something built dynamically.  E.g.
 1173         x = (a,b)
 1174         main = \s -> h x
 1175 
 1176 [Remember that we treat \s as a one-shot lambda.]  No point in
 1177 inlining x unless there is something interesting about the call site.
 1178 
 1179 But watch out: if you aren't careful, some useful foldr/build fusion
 1180 can be lost (most notably in spectral/hartel/parstof) because the
 1181 foldr didn't see the build.  Doing the dynamic allocation isn't a big
 1182 deal, in fact, but losing the fusion can be.  But the right thing here
 1183 seems to be to do a callSiteInline based on the fact that there is
 1184 something interesting about the call site (it's strict).  Hmm.  That
 1185 seems a bit fragile.
 1186 
 1187 Conclusion: inline top level things gaily until FinalPhase (the last
 1188 phase), at which point don't.
 1189 
 1190 Note [pre/postInlineUnconditionally in gentle mode]
 1191 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 1192 Even in gentle mode we want to do preInlineUnconditionally.  The
 1193 reason is that too little clean-up happens if you don't inline
 1194 use-once things.  Also a bit of inlining is *good* for full laziness;
 1195 it can expose constant sub-expressions.  Example in
 1196 spectral/mandel/Mandel.hs, where the mandelset function gets a useful
 1197 let-float if you inline windowToViewport
 1198 
 1199 However, as usual for Gentle mode, do not inline things that are
 1200 inactive in the initial stages.  See Note [Gentle mode].
 1201 
 1202 Note [Stable unfoldings and preInlineUnconditionally]
 1203 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 1204 Surprisingly, do not pre-inline-unconditionally Ids with INLINE pragmas!
 1205 Example
 1206 
 1207    {-# INLINE f #-}
 1208    f :: Eq a => a -> a
 1209    f x = ...
 1210 
 1211    fInt :: Int -> Int
 1212    fInt = f Int dEqInt
 1213 
 1214    ...fInt...fInt...fInt...
 1215 
 1216 Here f occurs just once, in the RHS of fInt. But if we inline it there
 1217 it might make fInt look big, and we'll lose the opportunity to inline f
 1218 at each of fInt's call sites.  The INLINE pragma will only inline when
 1219 the application is saturated for exactly this reason; and we don't
 1220 want PreInlineUnconditionally to second-guess it. A live example is #3736.
 1221     c.f. Note [Stable unfoldings and postInlineUnconditionally]
 1222 
 1223 NB: this only applies for INLINE things. Do /not/ switch off
 1224 preInlineUnconditionally for
 1225 
 1226 * INLINABLE. It just says to GHC "inline this if you like".  If there
 1227   is a unique occurrence, we want to inline the stable unfolding, not
 1228   the RHS.
 1229 
 1230 * NONLINE[n] just switches off inlining until phase n.  We should
 1231   respect that, but after phase n, just behave as usual.
 1232 
 1233 * NoUserInlinePrag.  There is no pragma at all. This ends up on wrappers.
 1234   (See #18815.)
 1235 
 1236 Note [Top-level bottoming Ids]
 1237 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 1238 Don't inline top-level Ids that are bottoming, even if they are used just
 1239 once, because FloatOut has gone to some trouble to extract them out.
 1240 Inlining them won't make the program run faster!
 1241 
 1242 Note [Do not inline CoVars unconditionally]
 1243 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 1244 Coercion variables appear inside coercions, and the RHS of a let-binding
 1245 is a term (not a coercion) so we can't necessarily inline the latter in
 1246 the former.
 1247 -}
 1248 
 1249 preInlineUnconditionally
 1250     :: SimplEnv -> TopLevelFlag -> InId
 1251     -> InExpr -> StaticEnv  -- These two go together
 1252     -> Maybe SimplEnv       -- Returned env has extended substitution
 1253 -- Precondition: rhs satisfies the let/app invariant
 1254 -- See Note [Core let/app invariant] in GHC.Core
 1255 -- Reason: we don't want to inline single uses, or discard dead bindings,
 1256 --         for unlifted, side-effect-ful bindings
 1257 preInlineUnconditionally env top_lvl bndr rhs rhs_env
 1258   | not pre_inline_unconditionally           = Nothing
 1259   | not active                               = Nothing
 1260   | isTopLevel top_lvl && isDeadEndId bndr   = Nothing -- Note [Top-level bottoming Ids]
 1261   | isCoVar bndr                             = Nothing -- Note [Do not inline CoVars unconditionally]
 1262   | isExitJoinId bndr                        = Nothing -- Note [Do not inline exit join points]
 1263                                                        -- in module Exitify
 1264   | not (one_occ (idOccInfo bndr))           = Nothing
 1265   | not (isStableUnfolding unf)              = Just $! (extend_subst_with rhs)
 1266 
 1267   -- Note [Stable unfoldings and preInlineUnconditionally]
 1268   | not (isInlinePragma inline_prag)
 1269   , Just inl <- maybeUnfoldingTemplate unf   = Just $! (extend_subst_with inl)
 1270   | otherwise                                = Nothing
 1271   where
 1272     unf = idUnfolding bndr
 1273     extend_subst_with inl_rhs = extendIdSubst env bndr $! (mkContEx rhs_env inl_rhs)
 1274 
 1275     one_occ IAmDead = True -- Happens in ((\x.1) v)
 1276     one_occ OneOcc{ occ_n_br   = 1
 1277                   , occ_in_lam = NotInsideLam }   = isNotTopLevel top_lvl || early_phase
 1278     one_occ OneOcc{ occ_n_br   = 1
 1279                   , occ_in_lam = IsInsideLam
 1280                   , occ_int_cxt = IsInteresting } = canInlineInLam rhs
 1281     one_occ _                                     = False
 1282 
 1283     pre_inline_unconditionally = sm_pre_inline mode
 1284     mode   = getMode env
 1285     active = isActive (sm_phase mode) (inlinePragmaActivation inline_prag)
 1286              -- See Note [pre/postInlineUnconditionally in gentle mode]
 1287     inline_prag = idInlinePragma bndr
 1288 
 1289 -- Be very careful before inlining inside a lambda, because (a) we must not
 1290 -- invalidate occurrence information, and (b) we want to avoid pushing a
 1291 -- single allocation (here) into multiple allocations (inside lambda).
 1292 -- Inlining a *function* with a single *saturated* call would be ok, mind you.
 1293 --      || (if is_cheap && not (canInlineInLam rhs) then pprTrace "preinline" (ppr bndr <+> ppr rhs) ok else ok)
 1294 --      where
 1295 --              is_cheap = exprIsCheap rhs
 1296 --              ok = is_cheap && int_cxt
 1297 
 1298         --      int_cxt         The context isn't totally boring
 1299         -- E.g. let f = \ab.BIG in \y. map f xs
 1300         --      Don't want to substitute for f, because then we allocate
 1301         --      its closure every time the \y is called
 1302         -- But: let f = \ab.BIG in \y. map (f y) xs
 1303         --      Now we do want to substitute for f, even though it's not
 1304         --      saturated, because we're going to allocate a closure for
 1305         --      (f y) every time round the loop anyhow.
 1306 
 1307         -- canInlineInLam => free vars of rhs are (Once in_lam) or Many,
 1308         -- so substituting rhs inside a lambda doesn't change the occ info.
 1309         -- Sadly, not quite the same as exprIsHNF.
 1310     canInlineInLam (Lit _)    = True
 1311     canInlineInLam (Lam b e)  = isRuntimeVar b || canInlineInLam e
 1312     canInlineInLam (Tick t e) = not (tickishIsCode t) && canInlineInLam e
 1313     canInlineInLam _          = False
 1314       -- not ticks.  Counting ticks cannot be duplicated, and non-counting
 1315       -- ticks around a Lam will disappear anyway.
 1316 
 1317     early_phase = sm_phase mode /= FinalPhase
 1318     -- If we don't have this early_phase test, consider
 1319     --      x = length [1,2,3]
 1320     -- The full laziness pass carefully floats all the cons cells to
 1321     -- top level, and preInlineUnconditionally floats them all back in.
 1322     -- Result is (a) static allocation replaced by dynamic allocation
 1323     --           (b) many simplifier iterations because this tickles
 1324     --               a related problem; only one inlining per pass
 1325     --
 1326     -- On the other hand, I have seen cases where top-level fusion is
 1327     -- lost if we don't inline top level thing (e.g. string constants)
 1328     -- Hence the test for phase zero (which is the phase for all the final
 1329     -- simplifications).  Until phase zero we take no special notice of
 1330     -- top level things, but then we become more leery about inlining
 1331     -- them.
 1332 
 1333 {-
 1334 ************************************************************************
 1335 *                                                                      *
 1336                   postInlineUnconditionally
 1337 *                                                                      *
 1338 ************************************************************************
 1339 
 1340 postInlineUnconditionally
 1341 ~~~~~~~~~~~~~~~~~~~~~~~~~
 1342 @postInlineUnconditionally@ decides whether to unconditionally inline
 1343 a thing based on the form of its RHS; in particular if it has a
 1344 trivial RHS.  If so, we can inline and discard the binding altogether.
 1345 
 1346 NB: a loop breaker has must_keep_binding = True and non-loop-breakers
 1347 only have *forward* references. Hence, it's safe to discard the binding
 1348 
 1349 NOTE: This isn't our last opportunity to inline.  We're at the binding
 1350 site right now, and we'll get another opportunity when we get to the
 1351 occurrence(s)
 1352 
 1353 Note that we do this unconditional inlining only for trivial RHSs.
 1354 Don't inline even WHNFs inside lambdas; doing so may simply increase
 1355 allocation when the function is called. This isn't the last chance; see
 1356 NOTE above.
 1357 
 1358 NB: Even inline pragmas (e.g. IMustBeINLINEd) are ignored here Why?
 1359 Because we don't even want to inline them into the RHS of constructor
 1360 arguments. See NOTE above
 1361 
 1362 NB: At one time even NOINLINE was ignored here: if the rhs is trivial
 1363 it's best to inline it anyway.  We often get a=E; b=a from desugaring,
 1364 with both a and b marked NOINLINE.  But that seems incompatible with
 1365 our new view that inlining is like a RULE, so I'm sticking to the 'active'
 1366 story for now.
 1367 
 1368 NB: unconditional inlining of this sort can introduce ticks in places that
 1369 may seem surprising; for instance, the LHS of rules. See Note [Simplfying
 1370 rules] for details.
 1371 -}
 1372 
 1373 postInlineUnconditionally
 1374     :: SimplEnv -> TopLevelFlag
 1375     -> OutId            -- The binder (*not* a CoVar), including its unfolding
 1376     -> OccInfo          -- From the InId
 1377     -> OutExpr
 1378     -> Bool
 1379 -- Precondition: rhs satisfies the let/app invariant
 1380 -- See Note [Core let/app invariant] in GHC.Core
 1381 -- Reason: we don't want to inline single uses, or discard dead bindings,
 1382 --         for unlifted, side-effect-ful bindings
 1383 postInlineUnconditionally env top_lvl bndr occ_info rhs
 1384   | not active                  = False
 1385   | isWeakLoopBreaker occ_info  = False -- If it's a loop-breaker of any kind, don't inline
 1386                                         -- because it might be referred to "earlier"
 1387   | isStableUnfolding unfolding = False -- Note [Stable unfoldings and postInlineUnconditionally]
 1388   | isTopLevel top_lvl          = False -- Note [Top level and postInlineUnconditionally]
 1389   | exprIsTrivial rhs           = True
 1390   | isJoinId bndr                       -- See point (1) of Note [Duplicating join points]
 1391   , not (phase == FinalPhase)   = False -- in Simplify.hs
 1392   | otherwise
 1393   = case occ_info of
 1394       OneOcc { occ_in_lam = in_lam, occ_int_cxt = int_cxt, occ_n_br = n_br }
 1395         -- See Note [Inline small things to avoid creating a thunk]
 1396 
 1397         -> n_br < 100  -- See Note [Suppress exponential blowup]
 1398 
 1399            && smallEnoughToInline uf_opts unfolding     -- Small enough to dup
 1400                         -- ToDo: consider discount on smallEnoughToInline if int_cxt is true
 1401                         --
 1402                         -- NB: Do NOT inline arbitrarily big things, even if occ_n_br=1
 1403                         -- Reason: doing so risks exponential behaviour.  We simplify a big
 1404                         --         expression, inline it, and simplify it again.  But if the
 1405                         --         very same thing happens in the big expression, we get
 1406                         --         exponential cost!
 1407                         -- PRINCIPLE: when we've already simplified an expression once,
 1408                         -- make sure that we only inline it if it's reasonably small.
 1409 
 1410            && (in_lam == NotInsideLam ||
 1411                         -- Outside a lambda, we want to be reasonably aggressive
 1412                         -- about inlining into multiple branches of case
 1413                         -- e.g. let x = <non-value>
 1414                         --      in case y of { C1 -> ..x..; C2 -> ..x..; C3 -> ... }
 1415                         -- Inlining can be a big win if C3 is the hot-spot, even if
 1416                         -- the uses in C1, C2 are not 'interesting'
 1417                         -- An example that gets worse if you add int_cxt here is 'clausify'
 1418 
 1419                 (isCheapUnfolding unfolding && int_cxt == IsInteresting))
 1420                         -- isCheap => acceptable work duplication; in_lam may be true
 1421                         -- int_cxt to prevent us inlining inside a lambda without some
 1422                         -- good reason.  See the notes on int_cxt in preInlineUnconditionally
 1423 
 1424       IAmDead -> True   -- This happens; for example, the case_bndr during case of
 1425                         -- known constructor:  case (a,b) of x { (p,q) -> ... }
 1426                         -- Here x isn't mentioned in the RHS, so we don't want to
 1427                         -- create the (dead) let-binding  let x = (a,b) in ...
 1428 
 1429       _ -> False
 1430 
 1431 -- Here's an example that we don't handle well:
 1432 --      let f = if b then Left (\x.BIG) else Right (\y.BIG)
 1433 --      in \y. ....case f of {...} ....
 1434 -- Here f is used just once, and duplicating the case work is fine (exprIsCheap).
 1435 -- But
 1436 --  - We can't preInlineUnconditionally because that would invalidate
 1437 --    the occ info for b.
 1438 --  - We can't postInlineUnconditionally because the RHS is big, and
 1439 --    that risks exponential behaviour
 1440 --  - We can't call-site inline, because the rhs is big
 1441 -- Alas!
 1442 
 1443   where
 1444     unfolding = idUnfolding bndr
 1445     uf_opts   = seUnfoldingOpts env
 1446     phase     = sm_phase (getMode env)
 1447     active    = isActive phase (idInlineActivation bndr)
 1448         -- See Note [pre/postInlineUnconditionally in gentle mode]
 1449 
 1450 {- Note [Inline small things to avoid creating a thunk]
 1451 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 1452 The point of examining occ_info here is that for *non-values* that
 1453 occur outside a lambda, the call-site inliner won't have a chance
 1454 (because it doesn't know that the thing only occurs once).  The
 1455 pre-inliner won't have gotten it either, if the thing occurs in more
 1456 than one branch So the main target is things like
 1457 
 1458      let x = f y in
 1459      case v of
 1460         True  -> case x of ...
 1461         False -> case x of ...
 1462 
 1463 This is very important in practice; e.g. wheel-seive1 doubles
 1464 in allocation if you miss this out.  And bits of GHC itself start
 1465 to allocate more.  An egregious example is test perf/compiler/T14697,
 1466 where GHC.Driver.CmdLine.$wprocessArgs allocated hugely more.
 1467 
 1468 Note [Suppress exponential blowup]
 1469 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 1470 In #13253, and several related tickets, we got an exponential blowup
 1471 in code size from postInlineUnconditionally.  The trouble comes when
 1472 we have
 1473   let j1a = case f y     of { True -> p;   False -> q }
 1474       j1b = case f y     of { True -> q;   False -> p }
 1475       j2a = case f (y+1) of { True -> j1a; False -> j1b }
 1476       j2b = case f (y+1) of { True -> j1b; False -> j1a }
 1477       ...
 1478   in case f (y+10) of { True -> j10a; False -> j10b }
 1479 
 1480 when there are many branches. In pass 1, postInlineUnconditionally
 1481 inlines j10a and j10b (they are both small).  Now we have two calls
 1482 to j9a and two to j9b.  In pass 2, postInlineUnconditionally inlines
 1483 all four of these calls, leaving four calls to j8a and j8b. Etc.
 1484 Yikes!  This is exponential!
 1485 
 1486 A possible plan: stop doing postInlineUnconditionally
 1487 for some fixed, smallish number of branches, say 4. But that turned
 1488 out to be bad: see Note [Inline small things to avoid creating a thunk].
 1489 And, as it happened, the problem with #13253 was solved in a
 1490 different way (Note [Duplicating StrictArg] in Simplify).
 1491 
 1492 So I just set an arbitrary, high limit of 100, to stop any
 1493 totally exponential behaviour.
 1494 
 1495 This still leaves the nasty possibility that /ordinary/ inlining (not
 1496 postInlineUnconditionally) might inline these join points, each of
 1497 which is individually quiet small.  I'm still not sure what to do
 1498 about this (e.g. see #15488).
 1499 
 1500 Note [Top level and postInlineUnconditionally]
 1501 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 1502 We don't do postInlineUnconditionally for top-level things (even for
 1503 ones that are trivial):
 1504 
 1505   * Doing so will inline top-level error expressions that have been
 1506     carefully floated out by FloatOut.  More generally, it might
 1507     replace static allocation with dynamic.
 1508 
 1509   * Even for trivial expressions there's a problem.  Consider
 1510       {-# RULE "foo" forall (xs::[T]). reverse xs = ruggle xs #-}
 1511       blah xs = reverse xs
 1512       ruggle = sort
 1513     In one simplifier pass we might fire the rule, getting
 1514       blah xs = ruggle xs
 1515     but in *that* simplifier pass we must not do postInlineUnconditionally
 1516     on 'ruggle' because then we'll have an unbound occurrence of 'ruggle'
 1517 
 1518     If the rhs is trivial it'll be inlined by callSiteInline, and then
 1519     the binding will be dead and discarded by the next use of OccurAnal
 1520 
 1521   * There is less point, because the main goal is to get rid of local
 1522     bindings used in multiple case branches.
 1523 
 1524   * The inliner should inline trivial things at call sites anyway.
 1525 
 1526   * The Id might be exported.  We could check for that separately,
 1527     but since we aren't going to postInlineUnconditionally /any/
 1528     top-level bindings, we don't need to test.
 1529 
 1530 Note [Stable unfoldings and postInlineUnconditionally]
 1531 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 1532 Do not do postInlineUnconditionally if the Id has a stable unfolding,
 1533 otherwise we lose the unfolding.  Example
 1534 
 1535      -- f has stable unfolding with rhs (e |> co)
 1536      --   where 'e' is big
 1537      f = e |> co
 1538 
 1539 Then there's a danger we'll optimise to
 1540 
 1541      f' = e
 1542      f = f' |> co
 1543 
 1544 and now postInlineUnconditionally, losing the stable unfolding on f.  Now f'
 1545 won't inline because 'e' is too big.
 1546 
 1547     c.f. Note [Stable unfoldings and preInlineUnconditionally]
 1548 
 1549 
 1550 ************************************************************************
 1551 *                                                                      *
 1552         Rebuilding a lambda
 1553 *                                                                      *
 1554 ************************************************************************
 1555 -}
 1556 
 1557 mkLam :: SimplEnv -> [OutBndr] -> OutExpr -> SimplCont -> SimplM OutExpr
 1558 -- mkLam tries three things
 1559 --      a) eta reduction, if that gives a trivial expression
 1560 --      b) eta expansion [only if there are some value lambdas]
 1561 --
 1562 -- NB: the SimplEnv already includes the [OutBndr] in its in-scope set
 1563 mkLam _env [] body _cont
 1564   = return body
 1565 mkLam env bndrs body cont
 1566   = {-#SCC "mkLam" #-}
 1567     do { dflags <- getDynFlags
 1568        ; mkLam' dflags bndrs body }
 1569   where
 1570     mkLam' :: DynFlags -> [OutBndr] -> OutExpr -> SimplM OutExpr
 1571     mkLam' dflags bndrs (Cast body co)
 1572       | not (any bad bndrs)
 1573         -- Note [Casts and lambdas]
 1574       = do { lam <- mkLam' dflags bndrs body
 1575            ; return (mkCast lam (mkPiCos Representational bndrs co)) }
 1576       where
 1577         co_vars  = tyCoVarsOfCo co
 1578         bad bndr = isCoVar bndr && bndr `elemVarSet` co_vars
 1579 
 1580     mkLam' dflags bndrs body@(Lam {})
 1581       = mkLam' dflags (bndrs ++ bndrs1) body1
 1582       where
 1583         (bndrs1, body1) = collectBinders body
 1584 
 1585     mkLam' dflags bndrs (Tick t expr)
 1586       | tickishFloatable t
 1587       = mkTick t <$> mkLam' dflags bndrs expr
 1588 
 1589     mkLam' dflags bndrs body
 1590       | gopt Opt_DoEtaReduction dflags
 1591       , Just etad_lam <- tryEtaReduce bndrs body
 1592       = do { tick (EtaReduction (head bndrs))
 1593            ; return etad_lam }
 1594 
 1595       | not (contIsRhs cont)   -- See Note [Eta-expanding lambdas]
 1596       , sm_eta_expand (getMode env)
 1597       , any isRuntimeVar bndrs
 1598       , let body_arity = exprEtaExpandArity dflags body
 1599       , expandableArityType body_arity
 1600       = do { tick (EtaExpansion (head bndrs))
 1601            ; let res = mkLams bndrs $
 1602                        etaExpandAT in_scope body_arity body
 1603            ; traceSmpl "eta expand" (vcat [text "before" <+> ppr (mkLams bndrs body)
 1604                                           , text "after" <+> ppr res])
 1605            ; return res }
 1606 
 1607       | otherwise
 1608       = return (mkLams bndrs body)
 1609       where
 1610         in_scope = getInScope env  -- Includes 'bndrs'
 1611 
 1612 {-
 1613 Note [Eta expanding lambdas]
 1614 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 1615 In general we *do* want to eta-expand lambdas. Consider
 1616    f (\x -> case x of (a,b) -> \s -> blah)
 1617 where 's' is a state token, and hence can be eta expanded.  This
 1618 showed up in the code for GHc.IO.Handle.Text.hPutChar, a rather
 1619 important function!
 1620 
 1621 The eta-expansion will never happen unless we do it now.  (Well, it's
 1622 possible that CorePrep will do it, but CorePrep only has a half-baked
 1623 eta-expander that can't deal with casts.  So it's much better to do it
 1624 here.)
 1625 
 1626 However, when the lambda is let-bound, as the RHS of a let, we have a
 1627 better eta-expander (in the form of tryEtaExpandRhs), so we don't
 1628 bother to try expansion in mkLam in that case; hence the contIsRhs
 1629 guard.
 1630 
 1631 NB: We check the SimplEnv (sm_eta_expand), not DynFlags.
 1632     See Note [No eta expansion in stable unfoldings]
 1633 
 1634 Note [Casts and lambdas]
 1635 ~~~~~~~~~~~~~~~~~~~~~~~~
 1636 Consider
 1637         (\x. (\y. e) `cast` g1) `cast` g2
 1638 There is a danger here that the two lambdas look separated, and the
 1639 full laziness pass might float an expression to between the two.
 1640 
 1641 So this equation in mkLam' floats the g1 out, thus:
 1642         (\x. e `cast` g1)  -->  (\x.e) `cast` (tx -> g1)
 1643 where x:tx.
 1644 
 1645 In general, this floats casts outside lambdas, where (I hope) they
 1646 might meet and cancel with some other cast:
 1647         \x. e `cast` co   ===>   (\x. e) `cast` (tx -> co)
 1648         /\a. e `cast` co  ===>   (/\a. e) `cast` (/\a. co)
 1649         /\g. e `cast` co  ===>   (/\g. e) `cast` (/\g. co)
 1650                           (if not (g `in` co))
 1651 
 1652 Notice that it works regardless of 'e'.  Originally it worked only
 1653 if 'e' was itself a lambda, but in some cases that resulted in
 1654 fruitless iteration in the simplifier.  A good example was when
 1655 compiling Text.ParserCombinators.ReadPrec, where we had a definition
 1656 like    (\x. Get `cast` g)
 1657 where Get is a constructor with nonzero arity.  Then mkLam eta-expanded
 1658 the Get, and the next iteration eta-reduced it, and then eta-expanded
 1659 it again.
 1660 
 1661 Note also the side condition for the case of coercion binders.
 1662 It does not make sense to transform
 1663         /\g. e `cast` g  ==>  (/\g.e) `cast` (/\g.g)
 1664 because the latter is not well-kinded.
 1665 
 1666 ************************************************************************
 1667 *                                                                      *
 1668               Eta expansion
 1669 *                                                                      *
 1670 ************************************************************************
 1671 -}
 1672 
 1673 tryEtaExpandRhs :: SimplEnv -> OutId -> OutExpr
 1674                 -> SimplM (ArityType, OutExpr)
 1675 -- See Note [Eta-expanding at let bindings]
 1676 -- If tryEtaExpandRhs rhs = (n, is_bot, rhs') then
 1677 --   (a) rhs' has manifest arity n
 1678 --   (b) if is_bot is True then rhs' applied to n args is guaranteed bottom
 1679 tryEtaExpandRhs env bndr rhs
 1680   | Just join_arity <- isJoinId_maybe bndr
 1681   = do { let (join_bndrs, join_body) = collectNBinders join_arity rhs
 1682              oss   = [idOneShotInfo id | id <- join_bndrs, isId id]
 1683              arity_type | exprIsDeadEnd join_body = mkBotArityType oss
 1684                         | otherwise               = mkTopArityType oss
 1685        ; return (arity_type, rhs) }
 1686          -- Note [Do not eta-expand join points]
 1687          -- But do return the correct arity and bottom-ness, because
 1688          -- these are used to set the bndr's IdInfo (#15517)
 1689          -- Note [Invariants on join points] invariant 2b, in GHC.Core
 1690 
 1691   | sm_eta_expand mode      -- Provided eta-expansion is on
 1692   , new_arity > old_arity   -- And the current manifest arity isn't enough
 1693   , want_eta rhs
 1694   = do { tick (EtaExpansion bndr)
 1695        ; return (arity_type, etaExpandAT in_scope arity_type rhs) }
 1696 
 1697   | otherwise
 1698   = return (arity_type, rhs)
 1699 
 1700   where
 1701     mode      = getMode env
 1702     in_scope  = getInScope env
 1703     dflags    = sm_dflags mode
 1704     old_arity = exprArity rhs
 1705 
 1706     arity_type = findRhsArity dflags bndr rhs old_arity
 1707                  `maxWithArity` idCallArity bndr
 1708     new_arity = arityTypeArity arity_type
 1709 
 1710     -- See Note [Which RHSs do we eta-expand?]
 1711     want_eta (Cast e _)                  = want_eta e
 1712     want_eta (Tick _ e)                  = want_eta e
 1713     want_eta (Lam b e) | isTyVar b       = want_eta e
 1714     want_eta (App e a) | exprIsTrivial a = want_eta e
 1715     want_eta (Var {})                    = False
 1716     want_eta (Lit {})                    = False
 1717     want_eta _ = True
 1718 {-
 1719     want_eta _ = case arity_type of
 1720                    ATop (os:_) -> isOneShotInfo os
 1721                    ATop []     -> False
 1722                    ABot {}     -> True
 1723 -}
 1724 
 1725 {-
 1726 Note [Eta-expanding at let bindings]
 1727 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 1728 We now eta expand at let-bindings, which is where the payoff comes.
 1729 The most significant thing is that we can do a simple arity analysis
 1730 (in GHC.Core.Opt.Arity.findRhsArity), which we can't do for free-floating lambdas
 1731 
 1732 One useful consequence of not eta-expanding lambdas is this example:
 1733    genMap :: C a => ...
 1734    {-# INLINE genMap #-}
 1735    genMap f xs = ...
 1736 
 1737    myMap :: D a => ...
 1738    {-# INLINE myMap #-}
 1739    myMap = genMap
 1740 
 1741 Notice that 'genMap' should only inline if applied to two arguments.
 1742 In the stable unfolding for myMap we'll have the unfolding
 1743     (\d -> genMap Int (..d..))
 1744 We do not want to eta-expand to
 1745     (\d f xs -> genMap Int (..d..) f xs)
 1746 because then 'genMap' will inline, and it really shouldn't: at least
 1747 as far as the programmer is concerned, it's not applied to two
 1748 arguments!
 1749 
 1750 Note [Which RHSs do we eta-expand?]
 1751 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 1752 We don't eta-expand:
 1753 
 1754 * Trivial RHSs, e.g.     f = g
 1755   If we eta expand do
 1756     f = \x. g x
 1757   we'll just eta-reduce again, and so on; so the
 1758   simplifier never terminates.
 1759 
 1760 * PAPs: see Note [Do not eta-expand PAPs]
 1761 
 1762 What about things like this?
 1763    f = case y of p -> \x -> blah
 1764 
 1765 Here we do eta-expand.  This is a change (Jun 20), but if we have
 1766 really decided that f has arity 1, then putting that lambda at the top
 1767 seems like a Good idea.
 1768 
 1769 Note [Do not eta-expand PAPs]
 1770 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 1771 We used to have old_arity = manifestArity rhs, which meant that we
 1772 would eta-expand even PAPs.  But this gives no particular advantage,
 1773 and can lead to a massive blow-up in code size, exhibited by #9020.
 1774 Suppose we have a PAP
 1775     foo :: IO ()
 1776     foo = returnIO ()
 1777 Then we can eta-expand to
 1778     foo = (\eta. (returnIO () |> sym g) eta) |> g
 1779 where
 1780     g :: IO () ~ State# RealWorld -> (# State# RealWorld, () #)
 1781 
 1782 But there is really no point in doing this, and it generates masses of
 1783 coercions and whatnot that eventually disappear again. For T9020, GHC
 1784 allocated 6.6G before, and 0.8G afterwards; and residency dropped from
 1785 1.8G to 45M.
 1786 
 1787 Moreover, if we eta expand
 1788         f = g d  ==>  f = \x. g d x
 1789 that might in turn make g inline (if it has an inline pragma), which
 1790 we might not want.  After all, INLINE pragmas say "inline only when
 1791 saturated" so we don't want to be too gung-ho about saturating!
 1792 
 1793 But note that this won't eta-expand, say
 1794   f = \g -> map g
 1795 Does it matter not eta-expanding such functions?  I'm not sure.  Perhaps
 1796 strictness analysis will have less to bite on?
 1797 
 1798 Note [Do not eta-expand join points]
 1799 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 1800 Similarly to CPR (see Note [Don't w/w join points for CPR] in
 1801 GHC.Core.Opt.WorkWrap), a join point stands well to gain from its outer binding's
 1802 eta-expansion, and eta-expanding a join point is fraught with issues like how to
 1803 deal with a cast:
 1804 
 1805     let join $j1 :: IO ()
 1806              $j1 = ...
 1807              $j2 :: Int -> IO ()
 1808              $j2 n = if n > 0 then $j1
 1809                               else ...
 1810 
 1811     =>
 1812 
 1813     let join $j1 :: IO ()
 1814              $j1 = (\eta -> ...)
 1815                      `cast` N:IO :: State# RealWorld -> (# State# RealWorld, ())
 1816                                  ~  IO ()
 1817              $j2 :: Int -> IO ()
 1818              $j2 n = (\eta -> if n > 0 then $j1
 1819                                        else ...)
 1820                      `cast` N:IO :: State# RealWorld -> (# State# RealWorld, ())
 1821                                  ~  IO ()
 1822 
 1823 The cast here can't be pushed inside the lambda (since it's not casting to a
 1824 function type), so the lambda has to stay, but it can't because it contains a
 1825 reference to a join point. In fact, $j2 can't be eta-expanded at all. Rather
 1826 than try and detect this situation (and whatever other situations crop up!), we
 1827 don't bother; again, any surrounding eta-expansion will improve these join
 1828 points anyway, since an outer cast can *always* be pushed inside. By the time
 1829 CorePrep comes around, the code is very likely to look more like this:
 1830 
 1831     let join $j1 :: State# RealWorld -> (# State# RealWorld, ())
 1832              $j1 = (...) eta
 1833              $j2 :: Int -> State# RealWorld -> (# State# RealWorld, ())
 1834              $j2 = if n > 0 then $j1
 1835                             else (...) eta
 1836 
 1837 
 1838 ************************************************************************
 1839 *                                                                      *
 1840 \subsection{Floating lets out of big lambdas}
 1841 *                                                                      *
 1842 ************************************************************************
 1843 
 1844 Note [Floating and type abstraction]
 1845 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 1846 Consider this:
 1847         x = /\a. C e1 e2
 1848 We'd like to float this to
 1849         y1 = /\a. e1
 1850         y2 = /\a. e2
 1851         x  = /\a. C (y1 a) (y2 a)
 1852 for the usual reasons: we want to inline x rather vigorously.
 1853 
 1854 You may think that this kind of thing is rare.  But in some programs it is
 1855 common.  For example, if you do closure conversion you might get:
 1856 
 1857         data a :-> b = forall e. (e -> a -> b) :$ e
 1858 
 1859         f_cc :: forall a. a :-> a
 1860         f_cc = /\a. (\e. id a) :$ ()
 1861 
 1862 Now we really want to inline that f_cc thing so that the
 1863 construction of the closure goes away.
 1864 
 1865 So I have elaborated simplLazyBind to understand right-hand sides that look
 1866 like
 1867         /\ a1..an. body
 1868 
 1869 and treat them specially. The real work is done in
 1870 GHC.Core.Opt.Simplify.Utils.abstractFloats, but there is quite a bit of plumbing
 1871 in simplLazyBind as well.
 1872 
 1873 The same transformation is good when there are lets in the body:
 1874 
 1875         /\abc -> let(rec) x = e in b
 1876    ==>
 1877         let(rec) x' = /\abc -> let x = x' a b c in e
 1878         in
 1879         /\abc -> let x = x' a b c in b
 1880 
 1881 This is good because it can turn things like:
 1882 
 1883         let f = /\a -> letrec g = ... g ... in g
 1884 into
 1885         letrec g' = /\a -> ... g' a ...
 1886         in
 1887         let f = /\ a -> g' a
 1888 
 1889 which is better.  In effect, it means that big lambdas don't impede
 1890 let-floating.
 1891 
 1892 This optimisation is CRUCIAL in eliminating the junk introduced by
 1893 desugaring mutually recursive definitions.  Don't eliminate it lightly!
 1894 
 1895 [May 1999]  If we do this transformation *regardless* then we can
 1896 end up with some pretty silly stuff.  For example,
 1897 
 1898         let
 1899             st = /\ s -> let { x1=r1 ; x2=r2 } in ...
 1900         in ..
 1901 becomes
 1902         let y1 = /\s -> r1
 1903             y2 = /\s -> r2
 1904             st = /\s -> ...[y1 s/x1, y2 s/x2]
 1905         in ..
 1906 
 1907 Unless the "..." is a WHNF there is really no point in doing this.
 1908 Indeed it can make things worse.  Suppose x1 is used strictly,
 1909 and is of the form
 1910 
 1911         x1* = case f y of { (a,b) -> e }
 1912 
 1913 If we abstract this wrt the tyvar we then can't do the case inline
 1914 as we would normally do.
 1915 
 1916 That's why the whole transformation is part of the same process that
 1917 floats let-bindings and constructor arguments out of RHSs.  In particular,
 1918 it is guarded by the doFloatFromRhs call in simplLazyBind.
 1919 
 1920 Note [Which type variables to abstract over]
 1921 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 1922 Abstract only over the type variables free in the rhs wrt which the
 1923 new binding is abstracted.  Note that
 1924 
 1925   * The naive approach of abstracting wrt the
 1926     tyvars free in the Id's /type/ fails. Consider:
 1927         /\ a b -> let t :: (a,b) = (e1, e2)
 1928                       x :: a     = fst t
 1929                   in ...
 1930     Here, b isn't free in x's type, but we must nevertheless
 1931     abstract wrt b as well, because t's type mentions b.
 1932     Since t is floated too, we'd end up with the bogus:
 1933          poly_t = /\ a b -> (e1, e2)
 1934          poly_x = /\ a   -> fst (poly_t a *b*)
 1935 
 1936   * We must do closeOverKinds.  Example (#10934):
 1937        f = /\k (f:k->*) (a:k). let t = AccFailure @ (f a) in ...
 1938     Here we want to float 't', but we must remember to abstract over
 1939     'k' as well, even though it is not explicitly mentioned in the RHS,
 1940     otherwise we get
 1941        t = /\ (f:k->*) (a:k). AccFailure @ (f a)
 1942     which is obviously bogus.
 1943 
 1944   * We get the variables to abstract over by filtering down the
 1945     the main_tvs for the original function, picking only ones
 1946     mentioned in the abstracted body. This means:
 1947     - they are automatically in dependency order, because main_tvs is
 1948     - there is no issue about non-determinism
 1949     - we don't gratuitiously change order, which may help (in a tiny
 1950       way) with CSE and/or the compiler-debugging experience
 1951 -}
 1952 
 1953 abstractFloats :: UnfoldingOpts -> TopLevelFlag -> [OutTyVar] -> SimplFloats
 1954               -> OutExpr -> SimplM ([OutBind], OutExpr)
 1955 abstractFloats uf_opts top_lvl main_tvs floats body
 1956   = assert (notNull body_floats) $
 1957     assert (isNilOL (sfJoinFloats floats)) $
 1958     do  { (subst, float_binds) <- mapAccumLM abstract empty_subst body_floats
 1959         ; return (float_binds, GHC.Core.Subst.substExpr subst body) }
 1960   where
 1961     is_top_lvl  = isTopLevel top_lvl
 1962     body_floats = letFloatBinds (sfLetFloats floats)
 1963     empty_subst = GHC.Core.Subst.mkEmptySubst (sfInScope floats)
 1964 
 1965     abstract :: GHC.Core.Subst.Subst -> OutBind -> SimplM (GHC.Core.Subst.Subst, OutBind)
 1966     abstract subst (NonRec id rhs)
 1967       = do { (poly_id1, poly_app) <- mk_poly1 tvs_here id
 1968            ; let (poly_id2, poly_rhs) = mk_poly2 poly_id1 tvs_here rhs'
 1969                  !subst' = GHC.Core.Subst.extendIdSubst subst id poly_app
 1970            ; return (subst', NonRec poly_id2 poly_rhs) }
 1971       where
 1972         rhs' = GHC.Core.Subst.substExpr subst rhs
 1973 
 1974         -- tvs_here: see Note [Which type variables to abstract over]
 1975         tvs_here = filter (`elemVarSet` free_tvs) main_tvs
 1976         free_tvs = closeOverKinds $
 1977                    exprSomeFreeVars isTyVar rhs'
 1978 
 1979     abstract subst (Rec prs)
 1980        = do { (poly_ids, poly_apps) <- mapAndUnzipM (mk_poly1 tvs_here) ids
 1981             ; let subst' = GHC.Core.Subst.extendSubstList subst (ids `zip` poly_apps)
 1982                   poly_pairs = [ mk_poly2 poly_id tvs_here rhs'
 1983                                | (poly_id, rhs) <- poly_ids `zip` rhss
 1984                                , let rhs' = GHC.Core.Subst.substExpr subst' rhs ]
 1985             ; return (subst', Rec poly_pairs) }
 1986        where
 1987          (ids,rhss) = unzip prs
 1988                 -- For a recursive group, it's a bit of a pain to work out the minimal
 1989                 -- set of tyvars over which to abstract:
 1990                 --      /\ a b c.  let x = ...a... in
 1991                 --                 letrec { p = ...x...q...
 1992                 --                          q = .....p...b... } in
 1993                 --                 ...
 1994                 -- Since 'x' is abstracted over 'a', the {p,q} group must be abstracted
 1995                 -- over 'a' (because x is replaced by (poly_x a)) as well as 'b'.
 1996                 -- Since it's a pain, we just use the whole set, which is always safe
 1997                 --
 1998                 -- If you ever want to be more selective, remember this bizarre case too:
 1999                 --      x::a = x
 2000                 -- Here, we must abstract 'x' over 'a'.
 2001          tvs_here = scopedSort main_tvs
 2002 
 2003     mk_poly1 :: [TyVar] -> Id -> SimplM (Id, CoreExpr)
 2004     mk_poly1 tvs_here var
 2005       = do { uniq <- getUniqueM
 2006            ; let  poly_name = setNameUnique (idName var) uniq      -- Keep same name
 2007                   poly_ty   = mkInfForAllTys tvs_here (idType var) -- But new type of course
 2008                   poly_id   = transferPolyIdInfo var tvs_here $ -- Note [transferPolyIdInfo] in GHC.Types.Id
 2009                               mkLocalId poly_name (idMult var) poly_ty
 2010            ; return (poly_id, mkTyApps (Var poly_id) (mkTyVarTys tvs_here)) }
 2011                 -- In the olden days, it was crucial to copy the occInfo of the original var,
 2012                 -- because we were looking at occurrence-analysed but as yet unsimplified code!
 2013                 -- In particular, we mustn't lose the loop breakers.  BUT NOW we are looking
 2014                 -- at already simplified code, so it doesn't matter
 2015                 --
 2016                 -- It's even right to retain single-occurrence or dead-var info:
 2017                 -- Suppose we started with  /\a -> let x = E in B
 2018                 -- where x occurs once in B. Then we transform to:
 2019                 --      let x' = /\a -> E in /\a -> let x* = x' a in B
 2020                 -- where x* has an INLINE prag on it.  Now, once x* is inlined,
 2021                 -- the occurrences of x' will be just the occurrences originally
 2022                 -- pinned on x.
 2023 
 2024     mk_poly2 :: Id -> [TyVar] -> CoreExpr -> (Id, CoreExpr)
 2025     mk_poly2 poly_id tvs_here rhs
 2026       = (poly_id `setIdUnfolding` unf, poly_rhs)
 2027       where
 2028         poly_rhs = mkLams tvs_here rhs
 2029         unf = mkUnfolding uf_opts InlineRhs is_top_lvl False poly_rhs
 2030 
 2031         -- We want the unfolding.  Consider
 2032         --      let
 2033         --            x = /\a. let y = ... in Just y
 2034         --      in body
 2035         -- Then we float the y-binding out (via abstractFloats and addPolyBind)
 2036         -- but 'x' may well then be inlined in 'body' in which case we'd like the
 2037         -- opportunity to inline 'y' too.
 2038 
 2039 {-
 2040 Note [Abstract over coercions]
 2041 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 2042 If a coercion variable (g :: a ~ Int) is free in the RHS, then so is the
 2043 type variable a.  Rather than sort this mess out, we simply bale out and abstract
 2044 wrt all the type variables if any of them are coercion variables.
 2045 
 2046 
 2047 Historical note: if you use let-bindings instead of a substitution, beware of this:
 2048 
 2049                 -- Suppose we start with:
 2050                 --
 2051                 --      x = /\ a -> let g = G in E
 2052                 --
 2053                 -- Then we'll float to get
 2054                 --
 2055                 --      x = let poly_g = /\ a -> G
 2056                 --          in /\ a -> let g = poly_g a in E
 2057                 --
 2058                 -- But now the occurrence analyser will see just one occurrence
 2059                 -- of poly_g, not inside a lambda, so the simplifier will
 2060                 -- PreInlineUnconditionally poly_g back into g!  Badk to square 1!
 2061                 -- (I used to think that the "don't inline lone occurrences" stuff
 2062                 --  would stop this happening, but since it's the *only* occurrence,
 2063                 --  PreInlineUnconditionally kicks in first!)
 2064                 --
 2065                 -- Solution: put an INLINE note on g's RHS, so that poly_g seems
 2066                 --           to appear many times.  (NB: mkInlineMe eliminates
 2067                 --           such notes on trivial RHSs, so do it manually.)
 2068 
 2069 ************************************************************************
 2070 *                                                                      *
 2071                 prepareAlts
 2072 *                                                                      *
 2073 ************************************************************************
 2074 
 2075 prepareAlts tries these things:
 2076 
 2077 1.  filterAlts: eliminate alternatives that cannot match, including
 2078     the DEFAULT alternative.  Here "cannot match" includes knowledge
 2079     from GADTs
 2080 
 2081 2.  refineDefaultAlt: if the DEFAULT alternative can match only one
 2082     possible constructor, then make that constructor explicit.
 2083     e.g.
 2084         case e of x { DEFAULT -> rhs }
 2085      ===>
 2086         case e of x { (a,b) -> rhs }
 2087     where the type is a single constructor type.  This gives better code
 2088     when rhs also scrutinises x or e.
 2089     See CoreUtils Note [Refine DEFAULT case alternatives]
 2090 
 2091 3. combineIdenticalAlts: combine identical alternatives into a DEFAULT.
 2092    See CoreUtils Note [Combine identical alternatives], which also
 2093    says why we do this on InAlts not on OutAlts
 2094 
 2095 4. Returns a list of the constructors that cannot holds in the
 2096    DEFAULT alternative (if there is one)
 2097 
 2098 It's a good idea to do this stuff before simplifying the alternatives, to
 2099 avoid simplifying alternatives we know can't happen, and to come up with
 2100 the list of constructors that are handled, to put into the IdInfo of the
 2101 case binder, for use when simplifying the alternatives.
 2102 
 2103 Eliminating the default alternative in (1) isn't so obvious, but it can
 2104 happen:
 2105 
 2106 data Colour = Red | Green | Blue
 2107 
 2108 f x = case x of
 2109         Red -> ..
 2110         Green -> ..
 2111         DEFAULT -> h x
 2112 
 2113 h y = case y of
 2114         Blue -> ..
 2115         DEFAULT -> [ case y of ... ]
 2116 
 2117 If we inline h into f, the default case of the inlined h can't happen.
 2118 If we don't notice this, we may end up filtering out *all* the cases
 2119 of the inner case y, which give us nowhere to go!
 2120 -}
 2121 
 2122 prepareAlts :: OutExpr -> OutId -> [InAlt] -> SimplM ([AltCon], [InAlt])
 2123 -- The returned alternatives can be empty, none are possible
 2124 prepareAlts scrut case_bndr' alts
 2125   | Just (tc, tys) <- splitTyConApp_maybe (varType case_bndr')
 2126            -- Case binder is needed just for its type. Note that as an
 2127            --   OutId, it has maximum information; this is important.
 2128            --   Test simpl013 is an example
 2129   = do { us <- getUniquesM
 2130        ; let (idcs1, alts1)       = filterAlts tc tys imposs_cons alts
 2131              (yes2,  alts2)       = refineDefaultAlt us (idMult case_bndr') tc tys idcs1 alts1
 2132                -- the multiplicity on case_bndr's is the multiplicity of the
 2133                -- case expression The newly introduced patterns in
 2134                -- refineDefaultAlt must be scaled by this multiplicity
 2135              (yes3, idcs3, alts3) = combineIdenticalAlts idcs1 alts2
 2136              -- "idcs" stands for "impossible default data constructors"
 2137              -- i.e. the constructors that can't match the default case
 2138        ; when yes2 $ tick (FillInCaseDefault case_bndr')
 2139        ; when yes3 $ tick (AltMerge case_bndr')
 2140        ; return (idcs3, alts3) }
 2141 
 2142   | otherwise  -- Not a data type, so nothing interesting happens
 2143   = return ([], alts)
 2144   where
 2145     imposs_cons = case scrut of
 2146                     Var v -> otherCons (idUnfolding v)
 2147                     _     -> []
 2148 
 2149 
 2150 {-
 2151 ************************************************************************
 2152 *                                                                      *
 2153                 mkCase
 2154 *                                                                      *
 2155 ************************************************************************
 2156 
 2157 mkCase tries these things
 2158 
 2159 * Note [Nerge nested cases]
 2160 * Note [Eliminate identity case]
 2161 * Note [Scrutinee constant folding]
 2162 
 2163 Note [Merge Nested Cases]
 2164 ~~~~~~~~~~~~~~~~~~~~~~~~~
 2165        case e of b {             ==>   case e of b {
 2166          p1 -> rhs1                      p1 -> rhs1
 2167          ...                             ...
 2168          pm -> rhsm                      pm -> rhsm
 2169          _  -> case b of b' {            pn -> let b'=b in rhsn
 2170                      pn -> rhsn          ...
 2171                      ...                 po -> let b'=b in rhso
 2172                      po -> rhso          _  -> let b'=b in rhsd
 2173                      _  -> rhsd
 2174        }
 2175 
 2176 which merges two cases in one case when -- the default alternative of
 2177 the outer case scrutises the same variable as the outer case. This
 2178 transformation is called Case Merging.  It avoids that the same
 2179 variable is scrutinised multiple times.
 2180 
 2181 Note [Eliminate Identity Case]
 2182 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 2183         case e of               ===> e
 2184                 True  -> True;
 2185                 False -> False
 2186 
 2187 and similar friends.
 2188 
 2189 Note [Scrutinee Constant Folding]
 2190 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 2191      case x op# k# of _ {  ===> case x of _ {
 2192         a1# -> e1                  (a1# inv_op# k#) -> e1
 2193         a2# -> e2                  (a2# inv_op# k#) -> e2
 2194         ...                        ...
 2195         DEFAULT -> ed              DEFAULT -> ed
 2196 
 2197      where (x op# k#) inv_op# k# == x
 2198 
 2199 And similarly for commuted arguments and for some unary operations.
 2200 
 2201 The purpose of this transformation is not only to avoid an arithmetic
 2202 operation at runtime but to allow other transformations to apply in cascade.
 2203 
 2204 Example with the "Merge Nested Cases" optimization (from #12877):
 2205 
 2206       main = case t of t0
 2207          0##     -> ...
 2208          DEFAULT -> case t0 `minusWord#` 1## of t1
 2209             0##     -> ...
 2210             DEFAULT -> case t1 `minusWord#` 1## of t2
 2211                0##     -> ...
 2212                DEFAULT -> case t2 `minusWord#` 1## of _
 2213                   0##     -> ...
 2214                   DEFAULT -> ...
 2215 
 2216   becomes:
 2217 
 2218       main = case t of _
 2219       0##     -> ...
 2220       1##     -> ...
 2221       2##     -> ...
 2222       3##     -> ...
 2223       DEFAULT -> ...
 2224 
 2225 There are some wrinkles
 2226 
 2227 * Do not apply caseRules if there is just a single DEFAULT alternative
 2228      case e +# 3# of b { DEFAULT -> rhs }
 2229   If we applied the transformation here we would (stupidly) get
 2230      case a of b' { DEFAULT -> let b = e +# 3# in rhs }
 2231   and now the process may repeat, because that let will really
 2232   be a case.
 2233 
 2234 * The type of the scrutinee might change.  E.g.
 2235         case tagToEnum (x :: Int#) of (b::Bool)
 2236           False -> e1
 2237           True -> e2
 2238   ==>
 2239         case x of (b'::Int#)
 2240           DEFAULT -> e1
 2241           1#      -> e2
 2242 
 2243 * The case binder may be used in the right hand sides, so we need
 2244   to make a local binding for it, if it is alive.  e.g.
 2245          case e +# 10# of b
 2246            DEFAULT -> blah...b...
 2247            44#     -> blah2...b...
 2248   ===>
 2249          case e of b'
 2250            DEFAULT -> let b = b' +# 10# in blah...b...
 2251            34#     -> let b = 44# in blah2...b...
 2252 
 2253   Note that in the non-DEFAULT cases we know what to bind 'b' to,
 2254   whereas in the DEFAULT case we must reconstruct the original value.
 2255   But NB: we use b'; we do not duplicate 'e'.
 2256 
 2257 * In dataToTag we might need to make up some fake binders;
 2258   see Note [caseRules for dataToTag] in GHC.Core.Opt.ConstantFold
 2259 -}
 2260 
 2261 mkCase, mkCase1, mkCase2, mkCase3
 2262    :: DynFlags
 2263    -> OutExpr -> OutId
 2264    -> OutType -> [OutAlt]               -- Alternatives in standard (increasing) order
 2265    -> SimplM OutExpr
 2266 
 2267 --------------------------------------------------
 2268 --      1. Merge Nested Cases
 2269 --------------------------------------------------
 2270 
 2271 mkCase dflags scrut outer_bndr alts_ty (Alt DEFAULT _ deflt_rhs : outer_alts)
 2272   | gopt Opt_CaseMerge dflags
 2273   , (ticks, Case (Var inner_scrut_var) inner_bndr _ inner_alts)
 2274        <- stripTicksTop tickishFloatable deflt_rhs
 2275   , inner_scrut_var == outer_bndr
 2276   = do  { tick (CaseMerge outer_bndr)
 2277 
 2278         ; let wrap_alt (Alt con args rhs) = assert (outer_bndr `notElem` args)
 2279                                             (Alt con args (wrap_rhs rhs))
 2280                 -- Simplifier's no-shadowing invariant should ensure
 2281                 -- that outer_bndr is not shadowed by the inner patterns
 2282               wrap_rhs rhs = Let (NonRec inner_bndr (Var outer_bndr)) rhs
 2283                 -- The let is OK even for unboxed binders,
 2284 
 2285               wrapped_alts | isDeadBinder inner_bndr = inner_alts
 2286                            | otherwise               = map wrap_alt inner_alts
 2287 
 2288               merged_alts = mergeAlts outer_alts wrapped_alts
 2289                 -- NB: mergeAlts gives priority to the left
 2290                 --      case x of
 2291                 --        A -> e1
 2292                 --        DEFAULT -> case x of
 2293                 --                      A -> e2
 2294                 --                      B -> e3
 2295                 -- When we merge, we must ensure that e1 takes
 2296                 -- precedence over e2 as the value for A!
 2297 
 2298         ; fmap (mkTicks ticks) $
 2299           mkCase1 dflags scrut outer_bndr alts_ty merged_alts
 2300         }
 2301         -- Warning: don't call mkCase recursively!
 2302         -- Firstly, there's no point, because inner alts have already had
 2303         -- mkCase applied to them, so they won't have a case in their default
 2304         -- Secondly, if you do, you get an infinite loop, because the bindCaseBndr
 2305         -- in munge_rhs may put a case into the DEFAULT branch!
 2306 
 2307 mkCase dflags scrut bndr alts_ty alts = mkCase1 dflags scrut bndr alts_ty alts
 2308 
 2309 --------------------------------------------------
 2310 --      2. Eliminate Identity Case
 2311 --------------------------------------------------
 2312 
 2313 mkCase1 _dflags scrut case_bndr _ alts@(Alt _ _ rhs1 : _)      -- Identity case
 2314   | all identity_alt alts
 2315   = do { tick (CaseIdentity case_bndr)
 2316        ; return (mkTicks ticks $ re_cast scrut rhs1) }
 2317   where
 2318     ticks = concatMap (\(Alt _ _ rhs) -> stripTicksT tickishFloatable rhs) (tail alts)
 2319     identity_alt (Alt con args rhs) = check_eq rhs con args
 2320 
 2321     check_eq (Cast rhs co) con args        -- See Note [RHS casts]
 2322       = not (any (`elemVarSet` tyCoVarsOfCo co) args) && check_eq rhs con args
 2323     check_eq (Tick t e) alt args
 2324       = tickishFloatable t && check_eq e alt args
 2325 
 2326     check_eq (Lit lit) (LitAlt lit') _     = lit == lit'
 2327     check_eq (Var v) _ _  | v == case_bndr = True
 2328     check_eq (Var v)   (DataAlt con) args
 2329       | null arg_tys, null args            = v == dataConWorkId con
 2330                                              -- Optimisation only
 2331     check_eq rhs        (DataAlt con) args = cheapEqExpr' tickishFloatable rhs $
 2332                                              mkConApp2 con arg_tys args
 2333     check_eq _          _             _    = False
 2334 
 2335     arg_tys = tyConAppArgs (idType case_bndr)
 2336 
 2337         -- Note [RHS casts]
 2338         -- ~~~~~~~~~~~~~~~~
 2339         -- We've seen this:
 2340         --      case e of x { _ -> x `cast` c }
 2341         -- And we definitely want to eliminate this case, to give
 2342         --      e `cast` c
 2343         -- So we throw away the cast from the RHS, and reconstruct
 2344         -- it at the other end.  All the RHS casts must be the same
 2345         -- if (all identity_alt alts) holds.
 2346         --
 2347         -- Don't worry about nested casts, because the simplifier combines them
 2348 
 2349     re_cast scrut (Cast rhs co) = Cast (re_cast scrut rhs) co
 2350     re_cast scrut _             = scrut
 2351 
 2352 mkCase1 dflags scrut bndr alts_ty alts = mkCase2 dflags scrut bndr alts_ty alts
 2353 
 2354 --------------------------------------------------
 2355 --      2. Scrutinee Constant Folding
 2356 --------------------------------------------------
 2357 
 2358 mkCase2 dflags scrut bndr alts_ty alts
 2359   | -- See Note [Scrutinee Constant Folding]
 2360     case alts of  -- Not if there is just a DEFAULT alternative
 2361       [Alt DEFAULT _ _] -> False
 2362       _                 -> True
 2363   , gopt Opt_CaseFolding dflags
 2364   , Just (scrut', tx_con, mk_orig) <- caseRules (targetPlatform dflags) scrut
 2365   = do { bndr' <- newId (fsLit "lwild") Many (exprType scrut')
 2366 
 2367        ; alts' <- mapMaybeM (tx_alt tx_con mk_orig bndr') alts
 2368                   -- mapMaybeM: discard unreachable alternatives
 2369                   -- See Note [Unreachable caseRules alternatives]
 2370                   -- in GHC.Core.Opt.ConstantFold
 2371 
 2372        ; mkCase3 dflags scrut' bndr' alts_ty $
 2373          add_default (re_sort alts')
 2374        }
 2375 
 2376   | otherwise
 2377   = mkCase3 dflags scrut bndr alts_ty alts
 2378   where
 2379     -- We need to keep the correct association between the scrutinee and its
 2380     -- binder if the latter isn't dead. Hence we wrap rhs of alternatives with
 2381     -- "let bndr = ... in":
 2382     --
 2383     --     case v + 10 of y        =====> case v of y
 2384     --        20      -> e1                 10      -> let y = 20     in e1
 2385     --        DEFAULT -> e2                 DEFAULT -> let y = v + 10 in e2
 2386     --
 2387     -- Other transformations give: =====> case v of y'
 2388     --                                      10      -> let y = 20      in e1
 2389     --                                      DEFAULT -> let y = y' + 10 in e2
 2390     --
 2391     -- This wrapping is done in tx_alt; we use mk_orig, returned by caseRules,
 2392     -- to construct an expression equivalent to the original one, for use
 2393     -- in the DEFAULT case
 2394 
 2395     tx_alt :: (AltCon -> Maybe AltCon) -> (Id -> CoreExpr) -> Id
 2396            -> CoreAlt -> SimplM (Maybe CoreAlt)
 2397     tx_alt tx_con mk_orig new_bndr (Alt con bs rhs)
 2398       = case tx_con con of
 2399           Nothing   -> return Nothing
 2400           Just con' -> do { bs' <- mk_new_bndrs new_bndr con'
 2401                           ; return (Just (Alt con' bs' rhs')) }
 2402       where
 2403         rhs' | isDeadBinder bndr = rhs
 2404              | otherwise         = bindNonRec bndr orig_val rhs
 2405 
 2406         orig_val = case con of
 2407                       DEFAULT    -> mk_orig new_bndr
 2408                       LitAlt l   -> Lit l
 2409                       DataAlt dc -> mkConApp2 dc (tyConAppArgs (idType bndr)) bs
 2410 
 2411     mk_new_bndrs new_bndr (DataAlt dc)
 2412       | not (isNullaryRepDataCon dc)
 2413       = -- For non-nullary data cons we must invent some fake binders
 2414         -- See Note [caseRules for dataToTag] in GHC.Core.Opt.ConstantFold
 2415         do { us <- getUniquesM
 2416            ; let (ex_tvs, arg_ids) = dataConRepInstPat us (idMult new_bndr) dc
 2417                                         (tyConAppArgs (idType new_bndr))
 2418            ; return (ex_tvs ++ arg_ids) }
 2419     mk_new_bndrs _ _ = return []
 2420 
 2421     re_sort :: [CoreAlt] -> [CoreAlt]
 2422     -- Sort the alternatives to re-establish
 2423     -- GHC.Core Note [Case expression invariants]
 2424     re_sort alts = sortBy cmpAlt alts
 2425 
 2426     add_default :: [CoreAlt] -> [CoreAlt]
 2427     -- See Note [Literal cases]
 2428     add_default (Alt (LitAlt {}) bs rhs : alts) = Alt DEFAULT bs rhs : alts
 2429     add_default alts                            = alts
 2430 
 2431 {- Note [Literal cases]
 2432 ~~~~~~~~~~~~~~~~~~~~~~~
 2433 If we have
 2434   case tagToEnum (a ># b) of
 2435      False -> e1
 2436      True  -> e2
 2437 
 2438 then caseRules for TagToEnum will turn it into
 2439   case tagToEnum (a ># b) of
 2440      0# -> e1
 2441      1# -> e2
 2442 
 2443 Since the case is exhaustive (all cases are) we can convert it to
 2444   case tagToEnum (a ># b) of
 2445      DEFAULT -> e1
 2446      1#      -> e2
 2447 
 2448 This may generate sligthtly better code (although it should not, since
 2449 all cases are exhaustive) and/or optimise better.  I'm not certain that
 2450 it's necessary, but currently we do make this change.  We do it here,
 2451 NOT in the TagToEnum rules (see "Beware" in Note [caseRules for tagToEnum]
 2452 in GHC.Core.Opt.ConstantFold)
 2453 -}
 2454 
 2455 --------------------------------------------------
 2456 --      Catch-all
 2457 --------------------------------------------------
 2458 mkCase3 _dflags scrut bndr alts_ty alts
 2459   = return (Case scrut bndr alts_ty alts)
 2460 
 2461 -- See Note [Exitification] and Note [Do not inline exit join points] in
 2462 -- GHC.Core.Opt.Exitify
 2463 -- This lives here (and not in Id) because occurrence info is only valid on
 2464 -- InIds, so it's crucial that isExitJoinId is only called on freshly
 2465 -- occ-analysed code. It's not a generic function you can call anywhere.
 2466 isExitJoinId :: Var -> Bool
 2467 isExitJoinId id
 2468   = isJoinId id
 2469   && isOneOcc (idOccInfo id)
 2470   && occ_in_lam (idOccInfo id) == IsInsideLam
 2471 
 2472 {-
 2473 Note [Dead binders]
 2474 ~~~~~~~~~~~~~~~~~~~~
 2475 Note that dead-ness is maintained by the simplifier, so that it is
 2476 accurate after simplification as well as before.
 2477 
 2478 
 2479 Note [Cascading case merge]
 2480 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 2481 Case merging should cascade in one sweep, because it
 2482 happens bottom-up
 2483 
 2484       case e of a {
 2485         DEFAULT -> case a of b
 2486                       DEFAULT -> case b of c {
 2487                                      DEFAULT -> e
 2488                                      A -> ea
 2489                       B -> eb
 2490         C -> ec
 2491 ==>
 2492       case e of a {
 2493         DEFAULT -> case a of b
 2494                       DEFAULT -> let c = b in e
 2495                       A -> let c = b in ea
 2496                       B -> eb
 2497         C -> ec
 2498 ==>
 2499       case e of a {
 2500         DEFAULT -> let b = a in let c = b in e
 2501         A -> let b = a in let c = b in ea
 2502         B -> let b = a in eb
 2503         C -> ec
 2504 
 2505 
 2506 However here's a tricky case that we still don't catch, and I don't
 2507 see how to catch it in one pass:
 2508 
 2509   case x of c1 { I# a1 ->
 2510   case a1 of c2 ->
 2511     0 -> ...
 2512     DEFAULT -> case x of c3 { I# a2 ->
 2513                case a2 of ...
 2514 
 2515 After occurrence analysis (and its binder-swap) we get this
 2516 
 2517   case x of c1 { I# a1 ->
 2518   let x = c1 in         -- Binder-swap addition
 2519   case a1 of c2 ->
 2520     0 -> ...
 2521     DEFAULT -> case x of c3 { I# a2 ->
 2522                case a2 of ...
 2523 
 2524 When we simplify the inner case x, we'll see that
 2525 x=c1=I# a1.  So we'll bind a2 to a1, and get
 2526 
 2527   case x of c1 { I# a1 ->
 2528   case a1 of c2 ->
 2529     0 -> ...
 2530     DEFAULT -> case a1 of ...
 2531 
 2532 This is correct, but we can't do a case merge in this sweep
 2533 because c2 /= a1.  Reason: the binding c1=I# a1 went inwards
 2534 without getting changed to c1=I# c2.
 2535 
 2536 I don't think this is worth fixing, even if I knew how. It'll
 2537 all come out in the next pass anyway.
 2538 -}