never executed always true always false

    1 {-
    2 (c) The GRASP/AQUA Project, Glasgow University, 1993-1998
    3 
    4 \section[WorkWrap]{Worker/wrapper-generating back-end of strictness analyser}
    5 -}
    6 
    7 
    8 module GHC.Core.Opt.WorkWrap ( wwTopBinds ) where
    9 
   10 import GHC.Prelude
   11 
   12 import GHC.Driver.Session
   13 
   14 import GHC.Core.Opt.Arity  ( manifestArity )
   15 import GHC.Core
   16 import GHC.Core.Unfold.Make
   17 import GHC.Core.Utils  ( exprType, exprIsHNF )
   18 import GHC.Core.Type
   19 import GHC.Core.Opt.WorkWrap.Utils
   20 import GHC.Core.FamInstEnv
   21 import GHC.Core.SimpleOpt
   22 
   23 import GHC.Types.Var
   24 import GHC.Types.Id
   25 import GHC.Types.Id.Info
   26 import GHC.Types.Unique.Supply
   27 import GHC.Types.Basic
   28 import GHC.Types.Demand
   29 import GHC.Types.Cpr
   30 import GHC.Types.SourceText
   31 import GHC.Types.Unique
   32 
   33 import GHC.Utils.Misc
   34 import GHC.Utils.Outputable
   35 import GHC.Utils.Panic
   36 import GHC.Utils.Panic.Plain
   37 import GHC.Utils.Monad
   38 import GHC.Utils.Trace
   39 import GHC.Unit.Types
   40 
   41 {-
   42 We take Core bindings whose binders have:
   43 
   44 \begin{enumerate}
   45 
   46 \item Strictness attached (by the front-end of the strictness
   47 analyser), and / or
   48 
   49 \item Constructed Product Result information attached by the CPR
   50 analysis pass.
   51 
   52 \end{enumerate}
   53 
   54 and we return some ``plain'' bindings which have been
   55 worker/wrapper-ified, meaning:
   56 
   57 \begin{enumerate}
   58 
   59 \item Functions have been split into workers and wrappers where
   60 appropriate.  If a function has both strictness and CPR properties
   61 then only one worker/wrapper doing both transformations is produced;
   62 
   63 \item Binders' @IdInfos@ have been updated to reflect the existence of
   64 these workers/wrappers (this is where we get STRICTNESS and CPR pragma
   65 info for exported values).
   66 \end{enumerate}
   67 -}
   68 
   69 wwTopBinds :: Module -> DynFlags -> FamInstEnvs -> UniqSupply -> CoreProgram -> CoreProgram
   70 
   71 wwTopBinds this_mod dflags fam_envs us top_binds
   72   = initUs_ us $ do
   73     top_binds' <- mapM (wwBind ww_opts) top_binds
   74     return (concat top_binds')
   75   where
   76     ww_opts = initWwOpts this_mod dflags fam_envs
   77 
   78 {-
   79 ************************************************************************
   80 *                                                                      *
   81 \subsection[wwBind-wwExpr]{@wwBind@ and @wwExpr@}
   82 *                                                                      *
   83 ************************************************************************
   84 
   85 @wwBind@ works on a binding, trying each \tr{(binder, expr)} pair in
   86 turn.  Non-recursive case first, then recursive...
   87 -}
   88 
   89 wwBind  :: WwOpts
   90         -> CoreBind
   91         -> UniqSM [CoreBind]    -- returns a WwBinding intermediate form;
   92                                 -- the caller will convert to Expr/Binding,
   93                                 -- as appropriate.
   94 
   95 wwBind ww_opts (NonRec binder rhs) = do
   96     new_rhs   <- wwExpr ww_opts rhs
   97     new_pairs <- tryWW ww_opts NonRecursive binder new_rhs
   98     return [NonRec b e | (b,e) <- new_pairs]
   99       -- Generated bindings must be non-recursive
  100       -- because the original binding was.
  101 
  102 wwBind ww_opts (Rec pairs)
  103   = return . Rec <$> concatMapM do_one pairs
  104   where
  105     do_one (binder, rhs) = do new_rhs <- wwExpr ww_opts rhs
  106                               tryWW ww_opts Recursive binder new_rhs
  107 
  108 {-
  109 @wwExpr@ basically just walks the tree, looking for appropriate
  110 annotations that can be used. Remember it is @wwBind@ that does the
  111 matching by looking for strict arguments of the correct type.
  112 @wwExpr@ is a version that just returns the ``Plain'' Tree.
  113 -}
  114 
  115 wwExpr :: WwOpts -> CoreExpr -> UniqSM CoreExpr
  116 
  117 wwExpr _ e@(Type {}) = return e
  118 wwExpr _ e@(Coercion {}) = return e
  119 wwExpr _ e@(Lit  {}) = return e
  120 wwExpr _ e@(Var  {}) = return e
  121 
  122 wwExpr ww_opts (Lam binder expr)
  123   = Lam new_binder <$> wwExpr ww_opts expr
  124   where new_binder | isId binder = zapIdUsedOnceInfo binder
  125                    | otherwise   = binder
  126   -- See Note [Zapping Used Once info in WorkWrap]
  127 
  128 wwExpr ww_opts (App f a)
  129   = App <$> wwExpr ww_opts f <*> wwExpr ww_opts a
  130 
  131 wwExpr ww_opts (Tick note expr)
  132   = Tick note <$> wwExpr ww_opts expr
  133 
  134 wwExpr ww_opts (Cast expr co) = do
  135     new_expr <- wwExpr ww_opts expr
  136     return (Cast new_expr co)
  137 
  138 wwExpr ww_opts (Let bind expr)
  139   = mkLets <$> wwBind ww_opts bind <*> wwExpr ww_opts expr
  140 
  141 wwExpr ww_opts (Case expr binder ty alts) = do
  142     new_expr <- wwExpr ww_opts expr
  143     new_alts <- mapM ww_alt alts
  144     let new_binder = zapIdUsedOnceInfo binder
  145       -- See Note [Zapping Used Once info in WorkWrap]
  146     return (Case new_expr new_binder ty new_alts)
  147   where
  148     ww_alt (Alt con binders rhs) = do
  149         new_rhs <- wwExpr ww_opts rhs
  150         let new_binders = [ if isId b then zapIdUsedOnceInfo b else b
  151                           | b <- binders ]
  152            -- See Note [Zapping Used Once info in WorkWrap]
  153         return (Alt con new_binders new_rhs)
  154 
  155 {-
  156 ************************************************************************
  157 *                                                                      *
  158 \subsection[tryWW]{@tryWW@: attempt a worker/wrapper pair}
  159 *                                                                      *
  160 ************************************************************************
  161 
  162 @tryWW@ just accumulates arguments, converts strictness info from the
  163 front-end into the proper form, then calls @mkWwBodies@ to do
  164 the business.
  165 
  166 The only reason this is monadised is for the unique supply.
  167 
  168 Note [Don't w/w INLINE things]
  169 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  170 It's very important to refrain from w/w-ing an INLINE function (ie one
  171 with a stable unfolding) because the wrapper will then overwrite the
  172 old stable unfolding with the wrapper code.
  173 
  174 Furthermore, if the programmer has marked something as INLINE,
  175 we may lose by w/w'ing it.
  176 
  177 If the strictness analyser is run twice, this test also prevents
  178 wrappers (which are INLINEd) from being re-done.  (You can end up with
  179 several liked-named Ids bouncing around at the same time---absolute
  180 mischief.)
  181 
  182 Notice that we refrain from w/w'ing an INLINE function even if it is
  183 in a recursive group.  It might not be the loop breaker.  (We could
  184 test for loop-breaker-hood, but I'm not sure that ever matters.)
  185 
  186 Note [Worker/wrapper for INLINABLE functions]
  187 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  188 If we have
  189   {-# INLINABLE f #-}
  190   f :: Ord a => [a] -> Int -> a
  191   f x y = ....f....
  192 
  193 where f is strict in y, we might get a more efficient loop by w/w'ing
  194 f.  But that would make a new unfolding which would overwrite the old
  195 one! So the function would no longer be INLINABLE, and in particular
  196 will not be specialised at call sites in other modules.
  197 
  198 This comes up in practice (#6056).
  199 
  200 Solution: do the w/w for strictness analysis, but transfer the Stable
  201 unfolding to the *worker*.  So we will get something like this:
  202 
  203   {-# INLINE[2] f #-}
  204   f :: Ord a => [a] -> Int -> a
  205   f d x y = case y of I# y' -> fw d x y'
  206 
  207   {-# INLINABLE[2] fw #-}
  208   fw :: Ord a => [a] -> Int# -> a
  209   fw d x y' = let y = I# y' in ...f...
  210 
  211 How do we "transfer the unfolding"? Easy: by using the old one, wrapped
  212 in work_fn! See GHC.Core.Unfold.Make.mkWorkerUnfolding.
  213 
  214 Note [No worker/wrapper for record selectors]
  215 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  216 We sometimes generate a lot of record selectors, and generally the
  217 don't benefit from worker/wrapper.  Yes, mkWwBodies would find a w/w split,
  218 but it is then suppressed by the certainlyWillInline test in splitFun.
  219 
  220 The wasted effort in mkWwBodies makes a measurable difference in
  221 compile time (see MR !2873), so although it's a terribly ad-hoc test,
  222 we just check here for record selectors, and do a no-op in that case.
  223 
  224 I did look for a generalisation, so that it's not just record
  225 selectors that benefit.  But you'd need a cheap test for "this
  226 function will definitely get a w/w split" and that's hard to predict
  227 in advance...the logic in mkWwBodies is complex. So I've left the
  228 super-simple test, with this Note to explain.
  229 
  230 NB: record selectors are ordinary functions, inlined iff GHC wants to,
  231 so won't be caught by the preceding isInlineUnfolding test in tryWW.
  232 
  233 Note [Worker/wrapper for NOINLINE functions]
  234 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  235 We used to disable worker/wrapper for NOINLINE things, but it turns out
  236 this can cause unnecessary reboxing of values. Consider
  237 
  238   {-# NOINLINE f #-}
  239   f :: Int -> a
  240   f x = error (show x)
  241 
  242   g :: Bool -> Bool -> Int -> Int
  243   g True  True  p = f p
  244   g False True  p = p + 1
  245   g b     False p = g b True p
  246 
  247 the strictness analysis will discover f and g are strict, but because f
  248 has no wrapper, the worker for g will rebox p. So we get
  249 
  250   $wg x y p# =
  251     let p = I# p# in  -- Yikes! Reboxing!
  252     case x of
  253       False ->
  254         case y of
  255           False -> $wg False True p#
  256           True -> +# p# 1#
  257       True ->
  258         case y of
  259           False -> $wg True True p#
  260           True -> case f p of { }
  261 
  262   g x y p = case p of (I# p#) -> $wg x y p#
  263 
  264 Now, in this case the reboxing will float into the True branch, and so
  265 the allocation will only happen on the error path. But it won't float
  266 inwards if there are multiple branches that call (f p), so the reboxing
  267 will happen on every call of g. Disaster.
  268 
  269 Solution: do worker/wrapper even on NOINLINE things; but move the
  270 NOINLINE pragma to the worker.
  271 
  272 (See #13143 for a real-world example.)
  273 
  274 It is crucial that we do this for *all* NOINLINE functions. #10069
  275 demonstrates what happens when we promise to w/w a (NOINLINE) leaf
  276 function, but fail to deliver:
  277 
  278   data C = C Int# Int#
  279 
  280   {-# NOINLINE c1 #-}
  281   c1 :: C -> Int#
  282   c1 (C _ n) = n
  283 
  284   {-# NOINLINE fc #-}
  285   fc :: C -> Int#
  286   fc c = 2 *# c1 c
  287 
  288 Failing to w/w `c1`, but still w/wing `fc` leads to the following code:
  289 
  290   c1 :: C -> Int#
  291   c1 (C _ n) = n
  292 
  293   $wfc :: Int# -> Int#
  294   $wfc n = let c = C 0# n in 2 #* c1 c
  295 
  296   fc :: C -> Int#
  297   fc (C _ n) = $wfc n
  298 
  299 Yikes! The reboxed `C` in `$wfc` can't cancel out, so we are in a bad place.
  300 This generalises to any function that derives its strictness signature from
  301 its callees, so we have to make sure that when a function announces particular
  302 strictness properties, we have to w/w them accordingly, even if it means
  303 splitting a NOINLINE function.
  304 
  305 Note [Worker activation]
  306 ~~~~~~~~~~~~~~~~~~~~~~~~
  307 Follows on from Note [Worker/wrapper for INLINABLE functions]
  308 
  309 It is *vital* that if the worker gets an INLINABLE pragma (from the
  310 original function), then the worker has the same phase activation as
  311 the wrapper (or later).  That is necessary to allow the wrapper to
  312 inline into the worker's unfolding: see GHC.Core.Opt.Simplify.Utils
  313 Note [Simplifying inside stable unfoldings].
  314 
  315 If the original is NOINLINE, it's important that the worker inherits the
  316 original activation. Consider
  317 
  318   {-# NOINLINE expensive #-}
  319   expensive x = x + 1
  320 
  321   f y = let z = expensive y in ...
  322 
  323 If expensive's worker inherits the wrapper's activation,
  324 we'll get this (because of the compromise in point (2) of
  325 Note [Wrapper activation])
  326 
  327   {-# NOINLINE[Final] $wexpensive #-}
  328   $wexpensive x = x + 1
  329   {-# INLINE[Final] expensive #-}
  330   expensive x = $wexpensive x
  331 
  332   f y = let z = expensive y in ...
  333 
  334 and $wexpensive will be immediately inlined into expensive, followed by
  335 expensive into f. This effectively removes the original NOINLINE!
  336 
  337 Otherwise, nothing is lost by giving the worker the same activation as the
  338 wrapper, because the worker won't have any chance of inlining until the
  339 wrapper does; there's no point in giving it an earlier activation.
  340 
  341 Note [Don't w/w inline small non-loop-breaker things]
  342 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  343 In general, we refrain from w/w-ing *small* functions, which are not
  344 loop breakers, because they'll inline anyway.  But we must take care:
  345 it may look small now, but get to be big later after other inlining
  346 has happened.  So we take the precaution of adding a StableUnfolding
  347 for any such functions.
  348 
  349 I made this change when I observed a big function at the end of
  350 compilation with a useful strictness signature but no w-w.  (It was
  351 small during demand analysis, we refrained from w/w, and then got big
  352 when something was inlined in its rhs.) When I measured it on nofib,
  353 it didn't make much difference; just a few percent improved allocation
  354 on one benchmark (bspt/Euclid.space).  But nothing got worse.
  355 
  356 There is an infelicity though.  We may get something like
  357       f = g val
  358 ==>
  359       g x = case gw x of r -> I# r
  360 
  361       f {- InlineStable, Template = g val -}
  362       f = case gw x of r -> I# r
  363 
  364 The code for f duplicates that for g, without any real benefit. It
  365 won't really be executed, because calls to f will go via the inlining.
  366 
  367 Note [Don't w/w join points for CPR]
  368 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  369 There's no point in exploiting CPR info on a join point. If the whole function
  370 is getting CPR'd, then the case expression around the worker function will get
  371 pushed into the join point by the simplifier, which will have the same effect
  372 that w/w'ing for CPR would have - the result will be returned in an unboxed
  373 tuple.
  374 
  375   f z = let join j x y = (x+1, y+1)
  376         in case z of A -> j 1 2
  377                      B -> j 2 3
  378 
  379   =>
  380 
  381   f z = case $wf z of (# a, b #) -> (a, b)
  382   $wf z = case (let join j x y = (x+1, y+1)
  383                 in case z of A -> j 1 2
  384                              B -> j 2 3) of (a, b) -> (# a, b #)
  385 
  386   =>
  387 
  388   f z = case $wf z of (# a, b #) -> (a, b)
  389   $wf z = let join j x y = (# x+1, y+1 #)
  390           in case z of A -> j 1 2
  391                        B -> j 2 3
  392 
  393 Note that we still want to give `j` the CPR property, so that `f` has it. So
  394 CPR *analyse* join points as regular functions, but don't *transform* them.
  395 
  396 We could retain the CPR /signature/ on the worker after W/W, but it would
  397 become outright wrong if the Simplifier pushes a non-trivial continuation
  398 into it. For example:
  399     case (let $j x = (x,x) in ...) of alts
  400     ==>
  401     let $j x = case (x,x) of alts in case ... of alts
  402 Before pushing the case in, `$j` has the CPR property, but not afterwards.
  403 
  404 So we simply zap the CPR signature for join pints as part of the W/W pass.
  405 The signature served its purpose during CPR analysis in propagating the
  406 CPR property of `$j`.
  407 
  408 Doing W/W for returned products on a join point would be tricky anyway, as the
  409 worker could not be a join point because it would not be tail-called. However,
  410 doing the *argument* part of W/W still works for join points, since the wrapper
  411 body will make a tail call:
  412 
  413   f z = let join j x y = x + y
  414         in ...
  415 
  416   =>
  417 
  418   f z = let join $wj x# y# = x# +# y#
  419                  j x y = case x of I# x# ->
  420                          case y of I# y# ->
  421                          $wj x# y#
  422         in ...
  423 
  424 Note [Wrapper activation]
  425 ~~~~~~~~~~~~~~~~~~~~~~~~~
  426 When should the wrapper inlining be active?
  427 
  428 1. It must not be active earlier than the current Activation of the Id,
  429    because we must give rewrite rules mentioning the wrapper and
  430    specialisation a chance to fire.
  431    See Note [Worker/wrapper for INLINABLE functions]
  432    and Note [Worker activation]
  433 
  434 2. It should be active at some point, despite (1) because of
  435    Note [Worker/wrapper for NOINLINE functions]
  436 
  437 3. For ordinary functions with no pragmas we want to inline the
  438    wrapper as early as possible (#15056).  Suppose another module
  439    defines    f !x xs = ... foldr k z xs ...
  440    and suppose we have the usual foldr/build RULE.  Then if we have
  441    a call `f x [1..x]`, we'd expect to inline f and the RULE will fire.
  442    But if f is w/w'd (which it might be), we want the inlining to
  443    occur just as if it hadn't been.
  444 
  445    (This only matters if f's RHS is big enough to w/w, but small
  446    enough to inline given the call site, but that can happen.)
  447 
  448 4. We do not want to inline the wrapper before specialisation.
  449          module Foo where
  450            f :: Num a => a -> Int -> a
  451            f n 0 = n              -- Strict in the Int, hence wrapper
  452            f n x = f (n+n) (x-1)
  453 
  454            g :: Int -> Int
  455            g x = f x x            -- Provokes a specialisation for f
  456 
  457          module Bar where
  458            import Foo
  459 
  460            h :: Int -> Int
  461            h x = f 3 x
  462 
  463    In module Bar we want to give specialisations a chance to fire
  464    before inlining f's wrapper.
  465 
  466    Historical note: At one stage I tried making the wrapper inlining
  467    always-active, and that had a very bad effect on nofib/imaginary/x2n1;
  468    a wrapper was inlined before the specialisation fired.
  469 
  470 Reminder: Note [Don't w/w INLINE things], so we don't need to worry
  471           about INLINE things here.
  472 
  473 Conclusion:
  474   - If the user said NOINLINE[n] or INLINABLE[n], respect that by putting
  475     INLINE[n] on the wrapper (and NOINLINE[n]/INLINABLE[n] on the worker).
  476 
  477   - If the user said NOINLINE, inline the wrapper only in
  478     FinalPhase, which is after all the numbered, user-visible phases (and put
  479     the original pragma on the worker). That means that all rules will have had
  480     a chance to fire.
  481     NB: Similar to InitialPhase, users can't write INLINE[Final] f;
  482     it's syntactically illegal. See Note [Compiler phases].
  483 
  484   - Otherwise (no pragma or INLINABLE) inline the wrapper in the first phase
  485     *after* InitialPhase. We run InitialPhase before the specialiser so that
  486     will not inline the wrapper before specialisation; but it will do so
  487     immediately afterwards.
  488 
  489 Note [Wrapper NoUserInlinePrag]
  490 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  491 We use NoUserInlinePrag on the wrapper, to say that there is no
  492 user-specified inline pragma. (The worker inherits that; see Note
  493 [Worker/wrapper for INLINABLE functions].)  The wrapper has no pragma
  494 given by the user.
  495 
  496 (Historical note: we used to give the wrapper an INLINE pragma, but
  497 CSE will not happen if there is a user-specified pragma, but should
  498 happen for w/w’ed things (#14186).  We don't need a pragma, because
  499 everything we needs is expressed by (a) the stable unfolding and (b)
  500 the inl_act activation.)
  501 
  502 Note [Drop absent bindings]
  503 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
  504 Consider (#19824):
  505    let t = ...big...
  506    in ...(f t x)...
  507 
  508 were `f` ignores its first argument.  With luck f's wrapper will inline
  509 thereby dropping `t`, but maybe not: the arguments to f all look boring.
  510 
  511 So we pre-empt the problem by replacing t's RHS with an absent filler.
  512 Simple and effective.
  513 -}
  514 
  515 tryWW   :: WwOpts
  516         -> RecFlag
  517         -> Id                           -- The fn binder
  518         -> CoreExpr                     -- The bound rhs; its innards
  519                                         --   are already ww'd
  520         -> UniqSM [(Id, CoreExpr)]      -- either *one* or *two* pairs;
  521                                         -- if one, then no worker (only
  522                                         -- the orig "wrapper" lives on);
  523                                         -- if two, then a worker and a
  524                                         -- wrapper.
  525 tryWW ww_opts is_rec fn_id rhs
  526   -- Do this even if there is a NOINLINE pragma
  527   -- See Note [Worker/wrapper for NOINLINE functions]
  528 
  529   -- See Note [Drop absent bindings]
  530   | isAbsDmd (demandInfo fn_info)
  531   , not (isJoinId fn_id)
  532   , Just filler <- mkAbsentFiller ww_opts fn_id
  533   = return [(new_fn_id, filler)]
  534 
  535   -- See Note [Don't w/w INLINE things]
  536   | hasInlineUnfolding fn_info
  537   = return [(new_fn_id, rhs)]
  538 
  539   -- See Note [No worker/wrapper for record selectors]
  540   | isRecordSelector fn_id
  541   = return [ (new_fn_id, rhs ) ]
  542 
  543   | is_fun && is_eta_exp
  544   = splitFun ww_opts new_fn_id rhs
  545 
  546   -- See Note [Thunk splitting]
  547   | isNonRec is_rec, is_thunk
  548   = splitThunk ww_opts is_rec new_fn_id rhs
  549 
  550   | otherwise
  551   = return [ (new_fn_id, rhs) ]
  552 
  553   where
  554     fn_info        = idInfo fn_id
  555     (wrap_dmds, _) = splitDmdSig (dmdSigInfo fn_info)
  556     new_fn_id      = zap_join_cpr $ zap_usage fn_id
  557 
  558     zap_usage = zapIdUsedOnceInfo . zapIdUsageEnvInfo
  559         -- See Note [Zapping DmdEnv after Demand Analyzer] and
  560         -- See Note [Zapping Used Once info in WorkWrap]
  561 
  562     zap_join_cpr id
  563       | isJoinId id = id `setIdCprSig` topCprSig
  564       | otherwise   = id
  565         -- See Note [Don't w/w join points for CPR]
  566 
  567     -- is_eta_exp: see Note [Don't eta expand in w/w]
  568     is_eta_exp = length wrap_dmds == manifestArity rhs
  569     is_fun     = notNull wrap_dmds || isJoinId fn_id
  570     is_thunk   = not is_fun && not (exprIsHNF rhs) && not (isJoinId fn_id)
  571                             && not (isUnliftedType (idType fn_id))
  572 
  573 {-
  574 Note [Zapping DmdEnv after Demand Analyzer]
  575 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  576 In the worker-wrapper pass we zap the DmdEnv.  Why?
  577  (a) it is never used again
  578  (b) it wastes space
  579  (c) it becomes incorrect as things are cloned, because
  580      we don't push the substitution into it
  581 
  582 Why here?
  583  * Because we don’t want to do it in the Demand Analyzer, as we never know
  584    there when we are doing the last pass.
  585  * We want them to be still there at the end of DmdAnal, so that
  586    -ddump-str-anal contains them.
  587  * We don’t want a second pass just for that.
  588  * WorkWrap looks at all bindings anyway.
  589 
  590 We also need to do it in TidyCore.tidyLetBndr to clean up after the
  591 final, worker/wrapper-less run of the demand analyser (see
  592 Note [Final Demand Analyser run] in GHC.Core.Opt.DmdAnal).
  593 
  594 Note [Zapping Used Once info in WorkWrap]
  595 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  596 During the work/wrap pass, using zapIdUsedOnceInfo, we zap the "used once" info
  597 * on every binder (let binders, case binders, lambda binders)
  598 * in both demands and in strictness signatures
  599 * recursively
  600 
  601 Why?
  602  * The simplifier may happen to transform code in a way that invalidates the
  603    data (see #11731 for an example).
  604  * It is not used in later passes, up to code generation.
  605 
  606 At first it's hard to see how the simplifier might invalidate it (and
  607 indeed for a while I thought it couldn't: #19482), but it's not quite
  608 as simple as I thought.  Consider this:
  609   {-# STRICTNESS SIG <SP(M,A)> #-}
  610   f p = let v = case p of (a,b) -> a
  611         in p `seq` (v,v)
  612 
  613 I think we'll give `f` the strictness signature `<SP(M,A)>`, where the
  614 `M` sayd that we'll evaluate the first component of the pair at most
  615 once.  Why?  Because the RHS of the thunk `v` is evaluated at most
  616 once.
  617 
  618 But now let's worker/wrapper f:
  619   {-# STRICTNESS SIG <M> #-}
  620   $wf p1 = let p2 = absentError "urk" in
  621            let p = (p1,p2) in
  622            let v = case p of (a,b) -> a
  623            in p `seq` (v,v)
  624 
  625 where I've gotten the demand on `p1` by decomposing the P(M,A) argument demand.
  626 This rapidly simplifies to
  627   {-# STRICTNESS SIG <M> #-}
  628   $wf p1 = let v = p1 in
  629            (v,v)
  630 
  631 and thence to `(p1,p1)` by inlining the trivial let. Now the demand on `p1` should
  632 not be at most once!!
  633 
  634 Conclusion: used-once info is fragile to simplification, because of
  635 the non-monotonic behaviour of let's, which turn used-many into
  636 used-once.  So indeed we should zap this info in worker/wrapper.
  637 
  638 Conclusion: kill it during worker/wrapper, using `zapUsedOnceInfo`.
  639 Both the *demand signature* of the binder, and the *demand-info* of
  640 the binder.  Moreover, do so recursively.
  641 
  642 You might wonder: why do we generate used-once info if we then throw
  643 it away.  The main reason is that we do a final run of the demand analyser,
  644 immediately before CoreTidy, which is /not/ followed by worker/wrapper; it
  645 is there only to generate used-once info for single-entry thunks.
  646 
  647 Note [Don't eta expand in w/w]
  648 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  649 A binding where the manifestArity of the RHS is less than idArity of the binder
  650 means GHC.Core.Opt.Arity didn't eta expand that binding. When this happens, it does so
  651 for a reason (see Note [exprArity invariant] in GHC.Core.Opt.Arity) and we probably have
  652 a PAP, cast or trivial expression as RHS.
  653 
  654 Below is a historical account of what happened when w/w still did eta expansion.
  655 Nowadays, it doesn't do that, but will simply w/w for the wrong arity, unleashing
  656 a demand signature meant for e.g. 2 args to be unleashed for e.g. 1 arg
  657 (manifest arity). That's at least as terrible as doing eta expansion, so don't
  658 do it.
  659 ---
  660 When worker/wrapper did eta expansion, it implictly eta expanded the binding to
  661 idArity, overriding GHC.Core.Opt.Arity's decision. Other than playing fast and loose with
  662 divergence, it's also broken for newtypes:
  663 
  664   f = (\xy.blah) |> co
  665     where
  666       co :: (Int -> Int -> Char) ~ T
  667 
  668 Then idArity is 2 (despite the type T), and it can have a DmdSig based on a
  669 threshold of 2. But we can't w/w it without a type error.
  670 
  671 The situation is less grave for PAPs, but the implicit eta expansion caused a
  672 compiler allocation regression in T15164, where huge recursive instance method
  673 groups, mostly consisting of PAPs, got w/w'd. This caused great churn in the
  674 simplifier, when simply waiting for the PAPs to inline arrived at the same
  675 output program.
  676 
  677 Note there is the worry here that such PAPs and trivial RHSs might not *always*
  678 be inlined. That would lead to reboxing, because the analysis tacitly assumes
  679 that we W/W'd for idArity and will propagate analysis information under that
  680 assumption. So far, this doesn't seem to matter in practice.
  681 See https://gitlab.haskell.org/ghc/ghc/merge_requests/312#note_192064.
  682 
  683 Note [Inline pragma for certainlyWillInline]
  684 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  685 Consider this (#19824 comment on 15 May 21):
  686   f _ (x,y) = ...big...
  687   v = ...big...
  688   g x = f v x + 1
  689 
  690 So `f` will generate a worker/wrapper split; and `g` (since it is small)
  691 will trigger the certainlyWillInline case of splitFun.  The danger is that
  692 we end up with
  693   g {- StableUnfolding = \x -> f v x + 1 -}
  694     = ...blah...
  695 
  696 Since (a) that unfolding for g is AlwaysActive
  697       (b) the unfolding for f's wrapper is ActiveAfterInitial
  698 the call of f will never inline in g's stable unfolding, thereby
  699 keeping `v` alive.
  700 
  701 I thought of changing g's unfolding to be ActiveAfterInitial, but that
  702 too is bad: it delays g's inlining into other modules, which makes fewer
  703 specialisations happen. Example in perf/should_run/DeriveNull.
  704 
  705 So I decided to live with the problem.  In fact v's RHS will be replaced
  706 by LitRubbish (see Note [Drop absent bindings]) so there is no great harm.
  707 -}
  708 
  709 
  710 ---------------------
  711 splitFun :: WwOpts -> Id -> CoreExpr -> UniqSM [(Id, CoreExpr)]
  712 splitFun ww_opts fn_id rhs
  713   = warnPprTrace (not (wrap_dmds `lengthIs` (arityInfo fn_info)))
  714                  (ppr fn_id <+> (ppr wrap_dmds $$ ppr cpr)) $
  715     do { mb_stuff <- mkWwBodies ww_opts fn_id arg_vars (exprType body) wrap_dmds cpr
  716        ; case mb_stuff of
  717             Nothing -> -- No useful wrapper; leave the binding alone
  718                        return [(fn_id, rhs)]
  719 
  720             Just stuff
  721               | let opt_wwd_rhs = simpleOptExpr (wo_simple_opts ww_opts) rhs
  722                   -- We need to stabilise the WW'd (and optimised) RHS below
  723               , Just stable_unf <- certainlyWillInline uf_opts fn_info opt_wwd_rhs
  724                 -- We could make a w/w split, but in fact the RHS is small
  725                 -- See Note [Don't w/w inline small non-loop-breaker things]
  726               , let id_w_unf = fn_id `setIdUnfolding` stable_unf
  727                 -- See Note [Inline pragma for certainlyWillInline]
  728               ->  return [ (id_w_unf, rhs) ]
  729 
  730               | otherwise
  731               -> do { work_uniq <- getUniqueM
  732                     ; return (mkWWBindPair ww_opts fn_id fn_info arg_vars body
  733                                            work_uniq div stuff) } }
  734   where
  735     uf_opts = so_uf_opts (wo_simple_opts ww_opts)
  736     fn_info = idInfo fn_id
  737     (arg_vars, body) = collectBinders rhs
  738 
  739     (wrap_dmds, div) = splitDmdSig (dmdSigInfo fn_info)
  740 
  741     cpr_ty = getCprSig (cprSigInfo fn_info)
  742     -- Arity of the CPR sig should match idArity when it's not a join point.
  743     -- See Note [Arity trimming for CPR signatures] in GHC.Core.Opt.CprAnal
  744     cpr = assertPpr (isJoinId fn_id || cpr_ty == topCprType || ct_arty cpr_ty == arityInfo fn_info)
  745                     (ppr fn_id <> colon <+> text "ct_arty:" <+> int (ct_arty cpr_ty)
  746                       <+> text "arityInfo:" <+> ppr (arityInfo fn_info)) $
  747           ct_cpr cpr_ty
  748 
  749 mkWWBindPair :: WwOpts -> Id -> IdInfo
  750              -> [Var] -> CoreExpr -> Unique -> Divergence
  751              -> ([Demand], JoinArity, Id -> CoreExpr, Expr CoreBndr -> CoreExpr)
  752              -> [(Id, CoreExpr)]
  753 mkWWBindPair ww_opts fn_id fn_info fn_args fn_body work_uniq div
  754              (work_demands, join_arity, wrap_fn, work_fn)
  755   = [(work_id, work_rhs), (wrap_id, wrap_rhs)]
  756      -- Worker first, because wrapper mentions it
  757   where
  758     arity = arityInfo fn_info
  759             -- The arity is set by the simplifier using exprEtaExpandArity
  760             -- So it may be more than the number of top-level-visible lambdas
  761 
  762     simpl_opts = wo_simple_opts ww_opts
  763 
  764     work_rhs = work_fn (mkLams fn_args fn_body)
  765     work_act = case fn_inline_spec of  -- See Note [Worker activation]
  766                    NoInline _  -> inl_act fn_inl_prag
  767                    _           -> inl_act wrap_prag
  768 
  769     work_prag = InlinePragma { inl_src = SourceText "{-# INLINE"
  770                              , inl_inline = fn_inline_spec
  771                              , inl_sat    = Nothing
  772                              , inl_act    = work_act
  773                              , inl_rule   = FunLike }
  774       -- inl_inline: copy from fn_id; see Note [Worker/wrapper for INLINABLE functions]
  775       -- inl_act:    see Note [Worker activation]
  776       -- inl_rule:   it does not make sense for workers to be constructorlike.
  777 
  778     work_join_arity | isJoinId fn_id = Just join_arity
  779                     | otherwise      = Nothing
  780       -- worker is join point iff wrapper is join point
  781       -- (see Note [Don't w/w join points for CPR])
  782 
  783     work_id  = mkWorkerId work_uniq fn_id (exprType work_rhs)
  784                 `setIdOccInfo` occInfo fn_info
  785                         -- Copy over occurrence info from parent
  786                         -- Notably whether it's a loop breaker
  787                         -- Doesn't matter much, since we will simplify next, but
  788                         -- seems right-er to do so
  789 
  790                 `setInlinePragma` work_prag
  791 
  792                 `setIdUnfolding` mkWorkerUnfolding simpl_opts work_fn fn_unfolding
  793                         -- See Note [Worker/wrapper for INLINABLE functions]
  794 
  795                 `setIdDmdSig` mkClosedDmdSig work_demands div
  796                         -- Even though we may not be at top level,
  797                         -- it's ok to give it an empty DmdEnv
  798 
  799                 `setIdCprSig` topCprSig
  800 
  801                 `setIdDemandInfo` worker_demand
  802 
  803                 `setIdArity` work_arity
  804                         -- Set the arity so that the Core Lint check that the
  805                         -- arity is consistent with the demand type goes
  806                         -- through
  807                 `asJoinId_maybe` work_join_arity
  808 
  809     work_arity = length work_demands
  810 
  811     -- See Note [Demand on the Worker]
  812     single_call = saturatedByOneShots arity (demandInfo fn_info)
  813     worker_demand | single_call = mkWorkerDemand work_arity
  814                   | otherwise   = topDmd
  815 
  816     wrap_rhs  = wrap_fn work_id
  817     wrap_prag = mkStrWrapperInlinePrag fn_inl_prag
  818     wrap_unf  = mkWrapperUnfolding simpl_opts wrap_rhs arity
  819 
  820     wrap_id   = fn_id `setIdUnfolding`  wrap_unf
  821                       `setInlinePragma` wrap_prag
  822                       `setIdOccInfo`    noOccInfo
  823                         -- Zap any loop-breaker-ness, to avoid bleating from Lint
  824                         -- about a loop breaker with an INLINE rule
  825 
  826     fn_inl_prag     = inlinePragInfo fn_info
  827     fn_inline_spec  = inl_inline fn_inl_prag
  828     fn_unfolding    = realUnfoldingInfo fn_info
  829 
  830 mkStrWrapperInlinePrag :: InlinePragma -> InlinePragma
  831 mkStrWrapperInlinePrag (InlinePragma { inl_act = act, inl_rule = rule_info })
  832   = InlinePragma { inl_src    = SourceText "{-# INLINE"
  833                  , inl_inline = NoUserInlinePrag -- See Note [Wrapper NoUserInline]
  834                  , inl_sat    = Nothing
  835                  , inl_act    = wrap_act
  836                  , inl_rule   = rule_info }  -- RuleMatchInfo is (and must be) unaffected
  837   where
  838     wrap_act  = case act of  -- See Note [Wrapper activation]
  839                    NeverActive     -> activateDuringFinal
  840                    FinalActive     -> act
  841                    ActiveAfter {}  -> act
  842                    ActiveBefore {} -> activateAfterInitial
  843                    AlwaysActive    -> activateAfterInitial
  844       -- For the last two cases, see (4) in Note [Wrapper activation]
  845       -- NB: the (ActiveBefore n) isn't quite right. We really want
  846       -- it to be active *after* Initial but *before* n.  We don't have
  847       -- a way to say that, alas.
  848 
  849 {-
  850 Note [Demand on the worker]
  851 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
  852 
  853 If the original function is called once, according to its demand info, then
  854 so is the worker. This is important so that the occurrence analyser can
  855 attach OneShot annotations to the worker’s lambda binders.
  856 
  857 
  858 Example:
  859 
  860   -- Original function
  861   f [Demand=<L,1*C1(U)>] :: (a,a) -> a
  862   f = \p -> ...
  863 
  864   -- Wrapper
  865   f [Demand=<L,1*C1(U)>] :: a -> a -> a
  866   f = \p -> case p of (a,b) -> $wf a b
  867 
  868   -- Worker
  869   $wf [Demand=<L,1*C1(C1(U))>] :: Int -> Int
  870   $wf = \a b -> ...
  871 
  872 We need to check whether the original function is called once, with
  873 sufficiently many arguments. This is done using saturatedByOneShots, which
  874 takes the arity of the original function (resp. the wrapper) and the demand on
  875 the original function.
  876 
  877 The demand on the worker is then calculated using mkWorkerDemand, and always of
  878 the form [Demand=<L,1*(C1(...(C1(U))))>]
  879 
  880 
  881 Note [Do not split void functions]
  882 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  883 Consider this rather common form of binding:
  884         $j = \x:Void# -> ...no use of x...
  885 
  886 Since x is not used it'll be marked as absent.  But there is no point
  887 in w/w-ing because we'll simply add (\y:Void#), see GHC.Core.Opt.WorkWrap.Utils.mkWorerArgs.
  888 
  889 If x has a more interesting type (eg Int, or Int#), there *is* a point
  890 in w/w so that we don't pass the argument at all.
  891 
  892 Note [Thunk splitting]
  893 ~~~~~~~~~~~~~~~~~~~~~~
  894 Suppose x is used strictly (never mind whether it has the CPR
  895 property).
  896 
  897       let
  898         x* = x-rhs
  899       in body
  900 
  901 splitThunk transforms like this:
  902 
  903       let
  904         x* = case x-rhs of { I# a -> I# a }
  905       in body
  906 
  907 Now simplifier will transform to
  908 
  909       case x-rhs of
  910         I# a -> let x* = I# a
  911                 in body
  912 
  913 which is what we want. Now suppose x-rhs is itself a case:
  914 
  915         x-rhs = case e of { T -> I# a; F -> I# b }
  916 
  917 The join point will abstract over a, rather than over (which is
  918 what would have happened before) which is fine.
  919 
  920 Notice that x certainly has the CPR property now!
  921 
  922 In fact, splitThunk uses the function argument w/w splitting
  923 function, so that if x's demand is deeper (say U(U(L,L),L))
  924 then the splitting will go deeper too.
  925 
  926 NB: For recursive thunks, the Simplifier is unable to float `x-rhs` out of
  927 `x*`'s RHS, because `x*` occurs freely in `x-rhs`, and will just change it
  928 back to the original definition, so we just split non-recursive thunks.
  929 
  930 Note [Thunk splitting for top-level binders]
  931 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  932 Top-level bindings are never strict. Yet they can be absent, as T14270 shows:
  933 
  934   module T14270 (mkTrApp) where
  935   mkTrApp x y
  936     | Just ... <- ... typeRepKind x ...
  937     = undefined
  938     | otherwise
  939     = undefined
  940   typeRepKind = Tick scc undefined
  941 
  942 (T19180 is a profiling-free test case for this)
  943 Note that `typeRepKind` is not exported and its only use site in
  944 `mkTrApp` guards a bottoming expression. Thus, demand analysis
  945 figures out that `typeRepKind` is absent and splits the thunk to
  946 
  947   typeRepKind =
  948     let typeRepKind = Tick scc undefined in
  949     let typeRepKind = absentError in
  950     typeRepKind
  951 
  952 But now we have a local binding with an External Name
  953 (See Note [About the NameSorts]). That will trigger a CoreLint error, which we
  954 get around by localising the Id for the auxiliary bindings in 'splitThunk'.
  955 -}
  956 
  957 -- | See Note [Thunk splitting].
  958 --
  959 -- splitThunk converts the *non-recursive* binding
  960 --      x = e
  961 -- into
  962 --      x = let x' = e in
  963 --          case x' of I# y -> let x' = I# y in x'
  964 -- See comments above. Is it not beautifully short?
  965 -- Moreover, it works just as well when there are
  966 -- several binders, and if the binders are lifted
  967 -- E.g.     x = e
  968 --     -->  x = let x' = e in
  969 --              case x' of (a,b) -> let x' = (a,b)  in x'
  970 -- Here, x' is a localised version of x, in case x is a
  971 -- top-level Id with an External Name, because Lint rejects local binders with
  972 -- External Names; see Note [About the NameSorts] in GHC.Types.Name.
  973 --
  974 -- How can we do thunk-splitting on a top-level binder?  See
  975 -- Note [Thunk splitting for top-level binders].
  976 splitThunk :: WwOpts -> RecFlag -> Var -> Expr Var -> UniqSM [(Var, Expr Var)]
  977 splitThunk ww_opts is_rec x rhs
  978   = assert (not (isJoinId x)) $
  979     do { let x' = localiseId x -- See comment above
  980        ; (useful,_, wrap_fn, fn_arg) <- mkWWstr_one ww_opts x'
  981        ; let res = [ (x, Let (NonRec x' rhs) (wrap_fn fn_arg)) ]
  982        ; if useful then assertPpr (isNonRec is_rec) (ppr x) -- The thunk must be non-recursive
  983                    return res
  984                    else return [(x, rhs)] }