never executed always true always false
    1 {-
    2 (c) The GRASP/AQUA Project, Glasgow University, 1993-1998
    3 
    4 A library for the ``worker\/wrapper'' back-end to the strictness analyser
    5 -}
    6 
    7 
    8 {-# LANGUAGE ViewPatterns #-}
    9 
   10 module GHC.Core.Opt.WorkWrap.Utils
   11    ( WwOpts(..), initWwOpts, mkWwBodies, mkWWstr, mkWWstr_one, mkWorkerArgs
   12    , DataConPatContext(..)
   13    , UnboxingDecision(..), InsideInlineableFun(..), wantToUnboxArg
   14    , findTypeShape, IsRecDataConResult(..), isRecDataCon, finaliseBoxity
   15    , mkAbsentFiller
   16    , isWorkerSmallEnough
   17    )
   18 where
   19 
   20 import GHC.Prelude
   21 
   22 import GHC.Driver.Session
   23 import GHC.Driver.Config (initSimpleOpts)
   24 
   25 import GHC.Core
   26 import GHC.Core.Utils
   27 import GHC.Core.DataCon
   28 import GHC.Core.Make
   29 import GHC.Core.Subst
   30 import GHC.Core.Type
   31 import GHC.Core.Multiplicity
   32 import GHC.Core.Predicate ( isClassPred )
   33 import GHC.Core.Coercion
   34 import GHC.Core.Reduction
   35 import GHC.Core.FamInstEnv
   36 import GHC.Core.TyCon
   37 import GHC.Core.TyCon.RecWalk
   38 import GHC.Core.SimpleOpt( SimpleOpts )
   39 
   40 import GHC.Types.Id
   41 import GHC.Types.Id.Info
   42 import GHC.Types.Demand
   43 import GHC.Types.Cpr
   44 import GHC.Types.Id.Make ( voidArgId, voidPrimId )
   45 import GHC.Types.Var.Env
   46 import GHC.Types.Basic
   47 import GHC.Types.Unique.Supply
   48 import GHC.Types.Name ( getOccFS )
   49 
   50 import GHC.Data.FastString
   51 import GHC.Data.Maybe
   52 import GHC.Data.OrdList
   53 import GHC.Data.List.SetOps
   54 
   55 import GHC.Builtin.Types ( tupleDataCon )
   56 
   57 import GHC.Utils.Misc
   58 import GHC.Utils.Outputable
   59 import GHC.Utils.Panic
   60 import GHC.Utils.Panic.Plain
   61 import GHC.Utils.Trace
   62 
   63 import Control.Applicative ( (<|>) )
   64 import Control.Monad ( zipWithM )
   65 import Data.List ( unzip4 )
   66 
   67 import GHC.Types.RepType
   68 import GHC.Unit.Types
   69 
   70 {-
   71 ************************************************************************
   72 *                                                                      *
   73 \subsection[mkWrapperAndWorker]{@mkWrapperAndWorker@}
   74 *                                                                      *
   75 ************************************************************************
   76 
   77 Here's an example.  The original function is:
   78 
   79 \begin{verbatim}
   80 g :: forall a . Int -> [a] -> a
   81 
   82 g = \/\ a -> \ x ys ->
   83         case x of
   84           0 -> head ys
   85           _ -> head (tail ys)
   86 \end{verbatim}
   87 
   88 From this, we want to produce:
   89 \begin{verbatim}
   90 -- wrapper (an unfolding)
   91 g :: forall a . Int -> [a] -> a
   92 
   93 g = \/\ a -> \ x ys ->
   94         case x of
   95           I# x# -> $wg a x# ys
   96             -- call the worker; don't forget the type args!
   97 
   98 -- worker
   99 $wg :: forall a . Int# -> [a] -> a
  100 
  101 $wg = \/\ a -> \ x# ys ->
  102         let
  103             x = I# x#
  104         in
  105             case x of               -- note: body of g moved intact
  106               0 -> head ys
  107               _ -> head (tail ys)
  108 \end{verbatim}
  109 
  110 Something we have to be careful about:  Here's an example:
  111 
  112 \begin{verbatim}
  113 -- "f" strictness: U(P)U(P)
  114 f (I# a) (I# b) = a +# b
  115 
  116 g = f   -- "g" strictness same as "f"
  117 \end{verbatim}
  118 
  119 \tr{f} will get a worker all nice and friendly-like; that's good.
  120 {\em But we don't want a worker for \tr{g}}, even though it has the
  121 same strictness as \tr{f}.  Doing so could break laziness, at best.
  122 
  123 Consequently, we insist that the number of strictness-info items is
  124 exactly the same as the number of lambda-bound arguments.  (This is
  125 probably slightly paranoid, but OK in practice.)  If it isn't the
  126 same, we ``revise'' the strictness info, so that we won't propagate
  127 the unusable strictness-info into the interfaces.
  128 
  129 
  130 ************************************************************************
  131 *                                                                      *
  132 \subsection{The worker wrapper core}
  133 *                                                                      *
  134 ************************************************************************
  135 
  136 @mkWwBodies@ is called when doing the worker\/wrapper split inside a module.
  137 -}
  138 
  139 data WwOpts
  140   = MkWwOpts
  141   { wo_fam_envs          :: !FamInstEnvs
  142   , wo_simple_opts       :: !SimpleOpts
  143   , wo_cpr_anal          :: !Bool
  144   , wo_fun_to_thunk      :: !Bool
  145   , wo_max_worker_args   :: !Int
  146   -- Used for absent argument error message
  147   , wo_module            :: !Module
  148   }
  149 
  150 initWwOpts :: Module -> DynFlags -> FamInstEnvs -> WwOpts
  151 initWwOpts this_mod dflags fam_envs = MkWwOpts
  152   { wo_fam_envs          = fam_envs
  153   , wo_simple_opts       = initSimpleOpts dflags
  154   , wo_cpr_anal          = gopt Opt_CprAnal dflags
  155   , wo_fun_to_thunk      = gopt Opt_FunToThunk dflags
  156   , wo_max_worker_args   = maxWorkerArgs dflags
  157   , wo_module            = this_mod
  158   }
  159 
  160 type WwResult
  161   = ([Demand],              -- Demands for worker (value) args
  162      JoinArity,             -- Number of worker (type OR value) args
  163      Id -> CoreExpr,        -- Wrapper body, lacking only the worker Id
  164      CoreExpr -> CoreExpr)  -- Worker body, lacking the original function rhs
  165 
  166 nop_fn :: CoreExpr -> CoreExpr
  167 nop_fn body = body
  168 
  169 
  170 mkWwBodies :: WwOpts
  171            -> Id             -- ^ The original function
  172            -> [Var]          -- ^ Manifest args of original function
  173            -> Type           -- ^ Result type of the original function,
  174                              --   after being stripped of args
  175            -> [Demand]       -- ^ Strictness of original function
  176            -> Cpr            -- ^ Info about function result
  177            -> UniqSM (Maybe WwResult)
  178 -- ^ Given a function definition
  179 --
  180 -- > data T = MkT Int Bool Char
  181 -- > f :: (a, b) -> Int -> T
  182 -- > f = \x y -> E
  183 --
  184 -- @mkWwBodies _ 'f' ['x::(a,b)','y::Int'] '(a,b)' ['1P(L,L)', '1P(L)'] '1'@
  185 -- returns
  186 --
  187 --   * The wrapper body context for the call to the worker function, lacking
  188 --     only the 'Id' for the worker function:
  189 --
  190 --     > W[_] :: Id -> CoreExpr
  191 --     > W[work_fn] = \x y ->          -- args of the wrapper    (cloned_arg_vars)
  192 --     >   case x of (a, b) ->         -- unbox wrapper args     (wrap_fn_str)
  193 --     >   case y of I# n ->           --
  194 --     >   case <work_fn> a b n of     -- call to the worker fun (call_work)
  195 --     >   (# i, b, c #) -> MkT i b c  -- rebox result           (wrap_fn_cpr)
  196 --
  197 --   * The worker body context that wraps around its hole reboxing defns for x
  198 --     and y, as well as returning CPR transit variables of the unboxed MkT
  199 --     result in an unboxed tuple:
  200 --
  201 --     > w[_] :: CoreExpr -> CoreExpr
  202 --     > w[fn_rhs] = \a b n ->              -- args of the worker       (work_lam_args)
  203 --     >   let { y = I# n; x = (a, b) } in  -- reboxing wrapper args    (work_fn_str)
  204 --     >   case <fn_rhs> x y of             -- call to the original RHS (call_rhs)
  205 --     >   MkT i b c -> (# i, b, c #)       -- return CPR transit vars  (work_fn_cpr)
  206 --
  207 --     NB: The wrap_rhs hole is to be filled with the original wrapper RHS
  208 --     @\x y -> E@. This is so that we can also use @w@ to transform stable
  209 --     unfoldings, the lambda args of which may be different than x and y.
  210 --
  211 --   * Id details for the worker function like demands on arguments and its join
  212 --     arity.
  213 --
  214 -- All without looking at E (except for beta reduction, see Note [Join points
  215 -- and beta-redexes]), which allows us to apply the same split to function body
  216 -- and its unfolding(s) alike.
  217 --
  218 mkWwBodies opts fun_id arg_vars res_ty demands res_cpr
  219   = do  { massertPpr (filter isId arg_vars `equalLength` demands)
  220                      (text "wrong wrapper arity" $$ ppr fun_id $$ ppr arg_vars $$ ppr res_ty $$ ppr demands)
  221 
  222         -- Clone and prepare arg_vars of the original fun RHS
  223         -- See Note [Freshen WW arguments]
  224         -- and Note [Zap IdInfo on worker args]
  225         ; uniq_supply <- getUniqueSupplyM
  226         ; let args_free_tcvs = tyCoVarsOfTypes (res_ty : map varType arg_vars)
  227               empty_subst = mkEmptySubst (mkInScopeSet args_free_tcvs)
  228               zapped_arg_vars = map zap_var arg_vars
  229               (subst, cloned_arg_vars) = cloneBndrs empty_subst uniq_supply zapped_arg_vars
  230               res_ty' = GHC.Core.Subst.substTy subst res_ty
  231 
  232         ; (useful1, work_args, wrap_fn_str, fn_args)
  233              <- mkWWstr opts cloned_arg_vars
  234 
  235         -- Do CPR w/w.  See Note [Always do CPR w/w]
  236         ; (useful2, wrap_fn_cpr, work_fn_cpr, cpr_res_ty)
  237               <- mkWWcpr_entry opts res_ty' res_cpr
  238 
  239         ; let (work_lam_args, work_call_args) = mkWorkerArgs fun_id (wo_fun_to_thunk opts)
  240                                                              work_args cpr_res_ty
  241               call_work work_fn  = mkVarApps (Var work_fn) work_call_args
  242               call_rhs fn_rhs = mkAppsBeta fn_rhs fn_args
  243                                   -- See Note [Join points and beta-redexes]
  244               wrapper_body = mkLams cloned_arg_vars . wrap_fn_cpr . wrap_fn_str . call_work
  245               worker_body = mkLams work_lam_args . work_fn_cpr . call_rhs
  246               worker_args_dmds = [idDemandInfo v | v <- work_call_args, isId v]
  247 
  248         ; if isWorkerSmallEnough (wo_max_worker_args opts) (length demands) work_args
  249              && not (too_many_args_for_join_point arg_vars)
  250              && ((useful1 && not only_one_void_argument) || useful2)
  251           then return (Just (worker_args_dmds, length work_call_args,
  252                        wrapper_body, worker_body))
  253           else return Nothing
  254         }
  255         -- We use an INLINE unconditionally, even if the wrapper turns out to be
  256         -- something trivial like
  257         --      fw = ...
  258         --      f = __inline__ (coerce T fw)
  259         -- The point is to propagate the coerce to f's call sites, so even though
  260         -- f's RHS is now trivial (size 1) we still want the __inline__ to prevent
  261         -- fw from being inlined into f's RHS
  262   where
  263     zap_var v | isTyVar v = v
  264               | otherwise = modifyIdInfo zap_info v
  265     zap_info info -- See Note [Zap IdInfo on worker args]
  266       = info `setOccInfo`       noOccInfo
  267 
  268     mb_join_arity = isJoinId_maybe fun_id
  269 
  270     -- Note [Do not split void functions]
  271     only_one_void_argument
  272       | [d] <- demands
  273       , [v] <- filter isId arg_vars
  274       , isAbsDmd d && isVoidTy (idType v)
  275       = True
  276       | otherwise
  277       = False
  278 
  279     -- Note [Join points returning functions]
  280     too_many_args_for_join_point wrap_args
  281       | Just join_arity <- mb_join_arity
  282       , wrap_args `lengthExceeds` join_arity
  283       = warnPprTrace True (text "Unable to worker/wrapper join point with arity " <+>
  284                      int join_arity <+> text "but" <+>
  285                      int (length wrap_args) <+> text "args") $
  286         True
  287       | otherwise
  288       = False
  289 
  290 -- | Version of 'GHC.Core.mkApps' that does beta reduction on-the-fly.
  291 -- PRECONDITION: The arg expressions are not free in any of the lambdas binders.
  292 mkAppsBeta :: CoreExpr -> [CoreArg] -> CoreExpr
  293 -- The precondition holds for our call site in mkWwBodies, because all the FVs
  294 -- of as are either cloned_arg_vars (and thus fresh) or fresh worker args.
  295 mkAppsBeta (Lam b body) (a:as) = bindNonRec b a $! mkAppsBeta body as
  296 mkAppsBeta f            as     = mkApps f as
  297 
  298 -- See Note [Limit w/w arity]
  299 isWorkerSmallEnough :: Int -> Int -> [Var] -> Bool
  300 isWorkerSmallEnough max_worker_args old_n_args vars
  301   = count isId vars <= max old_n_args max_worker_args
  302     -- We count only Free variables (isId) to skip Type, Kind
  303     -- variables which have no runtime representation.
  304     -- Also if the function took 82 arguments before (old_n_args), it's fine if
  305     -- it takes <= 82 arguments afterwards.
  306 
  307 {-
  308 Note [Always do CPR w/w]
  309 ~~~~~~~~~~~~~~~~~~~~~~~~
  310 At one time we refrained from doing CPR w/w for thunks, on the grounds that
  311 we might duplicate work.  But that is already handled by the demand analyser,
  312 which doesn't give the CPR property if w/w might waste work: see
  313 Note [CPR for thunks] in GHC.Core.Opt.DmdAnal.
  314 
  315 And if something *has* been given the CPR property and we don't w/w, it's
  316 a disaster, because then the enclosing function might say it has the CPR
  317 property, but now doesn't and there a cascade of disaster.  A good example
  318 is #5920.
  319 
  320 Note [Limit w/w arity]
  321 ~~~~~~~~~~~~~~~~~~~~~~~~
  322 Guard against high worker arity as it generates a lot of stack traffic.
  323 A simplified example is #11565#comment:6
  324 
  325 Current strategy is very simple: don't perform w/w transformation at all
  326 if the result produces a wrapper with arity higher than -fmax-worker-args
  327 and the number arguments before w/w (see #18122).
  328 
  329 It is a bit all or nothing, consider
  330 
  331         f (x,y) (a,b,c,d,e ... , z) = rhs
  332 
  333 Currently we will remove all w/w ness entirely. But actually we could
  334 w/w on the (x,y) pair... it's the huge product that is the problem.
  335 
  336 Could we instead refrain from w/w on an arg-by-arg basis? Yes, that'd
  337 solve f. But we can get a lot of args from deeply-nested products:
  338 
  339         g (a, (b, (c, (d, ...)))) = rhs
  340 
  341 This is harder to spot on an arg-by-arg basis. Previously mkWwStr was
  342 given some "fuel" saying how many arguments it could add; when we ran
  343 out of fuel it would stop w/wing.
  344 
  345 Still not very clever because it had a left-right bias.
  346 
  347 Note [Zap IdInfo on worker args]
  348 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  349 We have to zap the following IdInfo when re-using arg variables of the original
  350 function for the worker:
  351 
  352   * OccInfo: Dead wrapper args now occur in Apps of the worker's call to the
  353     original fun body. Those occurrences will quickly cancel away with the lambdas
  354     of the fun body in the next run of the Simplifier, but CoreLint will complain
  355     in the meantime, so zap it.
  356 
  357 We zap in mkWwBodies because we need the zapped variables both when binding them
  358 in mkWWstr (mkAbsentFiller, specifically) and in mkWorkerArgs, where we produce
  359 the call to the fun body.
  360 
  361 ************************************************************************
  362 *                                                                      *
  363 \subsection{Making wrapper args}
  364 *                                                                      *
  365 ************************************************************************
  366 
  367 During worker-wrapper stuff we may end up with an unlifted thing
  368 which we want to let-bind without losing laziness.  So we
  369 add a void argument.  E.g.
  370 
  371         f = /\a -> \x y z -> E::Int#    -- E does not mention x,y,z
  372 ==>
  373         fw = /\ a -> \void -> E
  374         f  = /\ a -> \x y z -> fw realworld
  375 
  376 We use the state-token type which generates no code.
  377 -}
  378 
  379 mkWorkerArgs :: Id      -- The wrapper Id
  380              -> Bool
  381              -> [Var]
  382              -> Type    -- Type of body
  383              -> ([Var], -- Lambda bound args
  384                  [Var]) -- Args at call site
  385 mkWorkerArgs wrap_id fun_to_thunk args res_ty
  386     | not (isJoinId wrap_id) -- Join Ids never need an extra arg
  387     , not (any isId args)    -- No existing value lambdas
  388     , needs_a_value_lambda   -- and we need to add one
  389     = (args ++ [voidArgId], args ++ [voidPrimId])
  390 
  391     | otherwise
  392     = (args, args)
  393     where
  394       -- If fun_to_thunk is False we always keep at least one value
  395       --   argument: see Note [Protecting the last value argument]
  396       -- If it is True, we only need to keep a value argument if
  397       --   the result type is (or might be) unlifted, in which case
  398       --   dropping the last arg would mean we wrongly used call-by-value
  399       needs_a_value_lambda
  400         = not fun_to_thunk
  401           || might_be_unlifted
  402 
  403       -- Might the result be lifted?
  404       --     False => definitely lifted
  405       --     True  => might be unlifted
  406       -- We may encounter a representation-polymorphic result, in which case we
  407       -- conservatively assume that we have laziness that needs
  408       -- preservation. See #15186.
  409       might_be_unlifted = case isLiftedType_maybe res_ty of
  410                             Just lifted -> not lifted
  411                             Nothing     -> True
  412 
  413 {-
  414 Note [Protecting the last value argument]
  415 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  416 If the user writes (\_ -> E), they might be intentionally disallowing
  417 the sharing of E. Since absence analysis and worker-wrapper are keen
  418 to remove such unused arguments, we add in a void argument to prevent
  419 the function from becoming a thunk.
  420 
  421 The user can avoid adding the void argument with the -ffun-to-thunk
  422 flag. However, this can create sharing, which may be bad in two ways. 1) It can
  423 create a space leak. 2) It can prevent inlining *under a lambda*. If w/w
  424 removes the last argument from a function f, then f now looks like a thunk, and
  425 so f can't be inlined *under a lambda*.
  426 
  427 Note [Join points and beta-redexes]
  428 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  429 Originally, the worker would invoke the original function by calling it with
  430 arguments, thus producing a beta-redex for the simplifier to munch away:
  431 
  432   \x y z -> e => (\x y z -> e) wx wy wz
  433 
  434 Now that we have special rules about join points, however, this is Not Good if
  435 the original function is itself a join point, as then it may contain invocations
  436 of other join points:
  437 
  438   join j1 x = ...
  439   join j2 y = if y == 0 then 0 else j1 y
  440 
  441   =>
  442 
  443   join j1 x = ...
  444   join $wj2 y# = let wy = I# y# in (\y -> if y == 0 then 0 else jump j1 y) wy
  445   join j2 y = case y of I# y# -> jump $wj2 y#
  446 
  447 There can't be an intervening lambda between a join point's declaration and its
  448 occurrences, so $wj2 here is wrong. But of course, this is easy enough to fix:
  449 
  450   ...
  451   let join $wj2 y# = let wy = I# y# in let y = wy in if y == 0 then 0 else j1 y
  452   ...
  453 
  454 Hence we simply do the beta-reduction here. (This would be harder if we had to
  455 worry about hygiene, but luckily wy is freshly generated.)
  456 
  457 Note [Join points returning functions]
  458 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  459 It is crucial that the arity of a join point depends on its *callers,* not its
  460 own syntax. What this means is that a join point can have "extra lambdas":
  461 
  462 f :: Int -> Int -> (Int, Int) -> Int
  463 f x y = join j (z, w) = \(u, v) -> ...
  464         in jump j (x, y)
  465 
  466 Typically this happens with functions that are seen as computing functions,
  467 rather than being curried. (The real-life example was GHC.Data.Graph.Ops.addConflicts.)
  468 
  469 When we create the wrapper, it *must* be in "eta-contracted" form so that the
  470 jump has the right number of arguments:
  471 
  472 f x y = join $wj z' w' = \u' v' -> let {z = z'; w = w'; u = u'; v = v'} in ...
  473              j (z, w)  = jump $wj z w
  474 
  475 (See Note [Join points and beta-redexes] for where the lets come from.) If j
  476 were a function, we would instead say
  477 
  478 f x y = let $wj = \z' w' u' v' -> let {z = z'; w = w'; u = u'; v = v'} in ...
  479             j (z, w) (u, v) = $wj z w u v
  480 
  481 Notice that the worker ends up with the same lambdas; it's only the wrapper we
  482 have to be concerned about.
  483 
  484 FIXME Currently the functionality to produce "eta-contracted" wrappers is
  485 unimplemented; we simply give up.
  486 
  487 Note [Freshen WW arguments]
  488 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
  489 When we do a worker/wrapper split, we must freshen the arg vars of the original
  490 fun RHS because they might shadow each other. E.g.
  491 
  492   f :: forall a. Maybe a -> forall a. Maybe a -> Int -> Int
  493   f @a x @a y z = case x <|> y of
  494     Nothing -> z
  495     Just _  -> z + 1
  496 
  497   ==> {WW split unboxing the Int}
  498 
  499   $wf :: forall a. Maybe a -> forall a. Maybe a -> Int# -> Int
  500   $wf @a x @a y wz = (\@a x @a y z -> case x <|> y of ...) ??? x @a y (I# wz)
  501 
  502 (Notice that the code we actually emit will sort-of ANF-ise the lambda args,
  503 leading to even more shadowing issues. The above demonstrates that even if we
  504 try harder we'll still get shadowing issues.)
  505 
  506 What should we put in place for ??? ? Certainly not @a, because that would
  507 reference the wrong, inner a. A similar situation occurred in #12562, we even
  508 saw a type variable in the worker shadowing an outer term-variable binding.
  509 
  510 We avoid the issue by freshening the argument variables from the original fun
  511 RHS through 'cloneBndrs', which will also take care of subsitution in binder
  512 types. Fortunately, it's sufficient to pick the FVs of the arg vars as in-scope
  513 set, so that we don't need to do a FV traversal over the whole body of the
  514 original function.
  515 
  516 At the moment, #12562 has no regression test. As such, this Note is not covered
  517 by any test logic or when bootstrapping the compiler. Yet we clearly want to
  518 freshen the binders, as the example above demonstrates.
  519 Adding a Core pass that maximises shadowing for testing purposes might help,
  520 see #17478.
  521 -}
  522 
  523 {-
  524 ************************************************************************
  525 *                                                                      *
  526 \subsection{Unboxing Decision for Strictness and CPR}
  527 *                                                                      *
  528 ************************************************************************
  529 -}
  530 
  531 -- | The information needed to build a pattern for a DataCon to be unboxed.
  532 -- The pattern can be generated from 'dcpc_dc' and 'dcpc_tc_args' via
  533 -- 'GHC.Core.Utils.dataConRepInstPat'. The coercion 'dcpc_co' is for newtype
  534 -- wrappers.
  535 --
  536 -- If we get @DataConPatContext dc tys co@ for some type @ty@
  537 -- and @dataConRepInstPat ... dc tys = (exs, flds)@, then
  538 --
  539 --   * @dc @exs flds :: T tys@
  540 --   * @co :: T tys ~ ty@
  541 data DataConPatContext
  542   = DataConPatContext
  543   { dcpc_dc      :: !DataCon
  544   , dcpc_tc_args :: ![Type]
  545   , dcpc_co      :: !Coercion
  546   }
  547 
  548 -- | Describes the outer shape of an argument to be unboxed or left as-is
  549 -- Depending on how @s@ is instantiated (e.g., 'Demand' or 'Cpr').
  550 data UnboxingDecision s
  551   = StopUnboxing
  552   -- ^ We ran out of strictness info. Leave untouched.
  553   | DropAbsent
  554   -- ^ The argument/field was absent. Drop it.
  555   | Unbox !DataConPatContext [s]
  556   -- ^ The argument is used strictly or the returned product was constructed, so
  557   -- unbox it.
  558   -- The 'DataConPatContext' carries the bits necessary for
  559   -- instantiation with 'dataConRepInstPat'.
  560   -- The @[s]@ carries the bits of information with which we can continue
  561   -- unboxing, e.g. @s@ will be 'Demand' or 'Cpr'.
  562 
  563 -- | Unwraps the 'Boxity' decision encoded in the given 'SubDemand' and returns
  564 -- a 'DataConPatContext' as well the nested demands on fields of the 'DataCon'
  565 -- to unbox.
  566 wantToUnboxArg
  567   :: FamInstEnvs
  568   -> Type                -- ^ Type of the argument
  569   -> Demand              -- ^ How the arg was used
  570   -> UnboxingDecision Demand
  571 -- See Note [Which types are unboxed?]
  572 wantToUnboxArg fam_envs ty (n :* sd)
  573   | isAbs n
  574   = DropAbsent
  575 
  576   | Just (tc, tc_args, co) <- normSplitTyConApp_maybe fam_envs ty
  577   , Just dc <- tyConSingleAlgDataCon_maybe tc
  578   , let arity = dataConRepArity dc
  579   , Just (Unboxed, ds) <- viewProd arity sd -- See Note [Boxity Analysis]
  580   -- NB: No strictness or evaluatedness checks here. That is done by
  581   -- 'finaliseBoxity'!
  582   = Unbox (DataConPatContext dc tc_args co) ds
  583 
  584   | otherwise
  585   = StopUnboxing
  586 
  587 
  588 -- | Unboxing strategy for constructed results.
  589 wantToUnboxResult :: FamInstEnvs -> Type -> Cpr -> UnboxingDecision Cpr
  590 -- See Note [Which types are unboxed?]
  591 wantToUnboxResult fam_envs ty cpr
  592   | Just (con_tag, arg_cprs) <- asConCpr cpr
  593   , Just (tc, tc_args, co) <- normSplitTyConApp_maybe fam_envs ty
  594   , Just dcs <- tyConAlgDataCons_maybe tc <|> open_body_ty_warning
  595   , dcs `lengthAtLeast` con_tag -- This might not be true if we import the
  596                                 -- type constructor via a .hs-boot file (#8743)
  597   , let dc = dcs `getNth` (con_tag - fIRST_TAG)
  598   , null (dataConExTyCoVars dc) -- no existentials;
  599                                 -- See Note [Which types are unboxed?]
  600                                 -- and GHC.Core.Opt.CprAnal.argCprType
  601                                 -- where we also check this.
  602   , all isLinear (dataConInstArgTys dc tc_args)
  603   -- Deactivates CPR worker/wrapper splits on constructors with non-linear
  604   -- arguments, for the moment, because they require unboxed tuple with variable
  605   -- multiplicity fields.
  606   = Unbox (DataConPatContext dc tc_args co) arg_cprs
  607 
  608   | otherwise
  609   = StopUnboxing
  610 
  611   where
  612     -- | See Note [non-algebraic or open body type warning]
  613     open_body_ty_warning = warnPprTrace True (text "wantToUnboxResult: non-algebraic or open body type" <+> ppr ty) Nothing
  614 
  615 isLinear :: Scaled a -> Bool
  616 isLinear (Scaled w _ ) =
  617   case w of
  618     One -> True
  619     _ -> False
  620 
  621 
  622 {- Note [Which types are unboxed?]
  623 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  624 Worker/wrapper will unbox
  625 
  626   1. A strict data type argument, that
  627        * is an algebraic data type (not a newtype)
  628        * has a single constructor (thus is a "product")
  629        * that may bind existentials
  630      We can transform
  631      > data D a = forall b. D a b
  632      > f (D @ex a b) = e
  633      to
  634      > $wf @ex a b = e
  635      via 'mkWWstr'.
  636 
  637   2. The constructed result of a function, if
  638        * its type is an algebraic data type (not a newtype)
  639        * (might have multiple constructors, in contrast to (1))
  640        * the applied data constructor *does not* bind existentials
  641      We can transform
  642      > f x y = let ... in D a b
  643      to
  644      > $wf x y = let ... in (# a, b #)
  645      via 'mkWWcpr'.
  646 
  647      NB: We don't allow existentials for CPR W/W, because we don't have unboxed
  648      dependent tuples (yet?). Otherwise, we could transform
  649      > f x y = let ... in D @ex (a :: ..ex..) (b :: ..ex..)
  650      to
  651      > $wf x y = let ... in (# @ex, (a :: ..ex..), (b :: ..ex..) #)
  652 
  653 The respective tests are in 'wantToUnboxArg' and
  654 'wantToUnboxResult', respectively.
  655 
  656 Note that the data constructor /can/ have evidence arguments: equality
  657 constraints, type classes etc.  So it can be GADT.  These evidence
  658 arguments are simply value arguments, and should not get in the way.
  659 
  660 Note [Do not unbox class dictionaries]
  661 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  662 If we have
  663    f :: Ord a => [a] -> Int -> a
  664    {-# INLINABLE f #-}
  665 and we worker/wrapper f, we'll get a worker with an INLINABLE pragma
  666 (see Note [Worker/wrapper for INLINABLE functions] in GHC.Core.Opt.WorkWrap),
  667 which can still be specialised by the type-class specialiser, something like
  668    fw :: Ord a => [a] -> Int# -> a
  669 
  670 BUT if f is strict in the Ord dictionary, we might unpack it, to get
  671    fw :: (a->a->Bool) -> [a] -> Int# -> a
  672 and the type-class specialiser can't specialise that. An example is #6056.
  673 
  674 But in any other situation, a dictionary is just an ordinary value,
  675 and can be unpacked.  So we track the INLINABLE pragma, and discard the boxity
  676 flag in finaliseBoxity (see the isClassPred test).
  677 
  678 Historical note: #14955 describes how I got this fix wrong the first time.
  679 
  680 Note that the simplicity of this fix implies that INLINE functions (such as
  681 wrapper functions after the WW run) will never say that they unbox class
  682 dictionaries. That's not ideal, but not worth losing sleep over, as INLINE
  683 functions will have been inlined by the time we run demand analysis so we'll
  684 see the unboxing around the worker in client modules. I got aware of the issue
  685 in T5075 by the change in boxity of loop between demand analysis runs.
  686 
  687 Note [mkWWstr and unsafeCoerce]
  688 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  689 By using unsafeCoerce, it is possible to make the number of demands fail to
  690 match the number of constructor arguments; this happened in #8037.
  691 If so, the worker/wrapper split doesn't work right and we get a Core Lint
  692 bug.  The fix here is simply to decline to do w/w if that happens.
  693 
  694 Note [Unboxing evaluated arguments]
  695 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  696 Consider this program (due to Roman):
  697 
  698     data X a = X !a
  699 
  700     foo :: X Int -> Int -> Int
  701     foo x@(X a) n = go 0
  702      where
  703        go i | i < n     = a + go (i+1)
  704             | otherwise = 0
  705 
  706 We want the worker for 'foo' too look like this:
  707 
  708     $wfoo :: Int# -> Int# -> Int#
  709 
  710 with the first argument unboxed, so that it is not eval'd each time around the
  711 'go' loop (which would otherwise happen, since 'foo' is not strict in 'a'). It
  712 is sound for the wrapper to pass an unboxed arg because X is strict
  713 (see Note [Strictness and Unboxing] in "GHC.Core.Opt.DmdAnal"), so its argument
  714 must be evaluated. And if we *don't* pass an unboxed argument, we can't even
  715 repair it by adding a `seq` thus:
  716 
  717     foo (X a) n = a `seq` go 0
  718 
  719 because the seq is discarded (very early) since X is strict!
  720 
  721 So here's what we do
  722 
  723 * Since this has nothing to do with how 'foo' uses 'a', we leave demand analysis
  724   alone, but account for the additional evaluatedness when annotating the binder
  725   in 'annotateLamIdBndr' via 'finaliseBoxity', which will retain the Unboxed boxity
  726   on 'a' in the definition of 'foo' in the demand 'L!P(L)'; meaning it's used
  727   lazily but unboxed nonetheless. This seems to contradict
  728   Note [No lazy, Unboxed demands in demand signature], but we know that 'a' is
  729   evaluated and thus can be unboxed.
  730 
  731 * When 'finaliseBoxity' decides to unbox a record, it will zip the field demands
  732   together with the respective 'StrictnessMark'. In case of 'x', it will pair
  733   up the lazy field demand 'L!P(L)' on 'a' with 'MarkedStrict' to account for
  734   the strict field.
  735 
  736 * Said 'StrictnessMark' is passed to the recursive invocation of
  737   'finaliseBoxity' when deciding whether to unbox 'a'. 'a' was used lazily, but
  738   since it also says 'MarkedStrict', we'll retain the 'Unboxed' boxity on 'a'.
  739 
  740 * Worker/wrapper will consult 'wantToUnboxArg' for its unboxing decision. It will
  741   /not/ look at the strictness bits of the demand, only at Boxity flags. As such,
  742   it will happily unbox 'a' despite the lazy demand on it.
  743 
  744 The net effect is that boxity analysis and the w/w transformation are more
  745 aggressive about unboxing the strict arguments of a data constructor than when
  746 looking at strictness info exclusively. It is very much like (Nested) CPR, which
  747 needs its nested fields to be evaluated in order for it to unbox nestedly.
  748 
  749 There is the usual danger of reboxing, which as usual we ignore. But
  750 if X is monomorphic, and has an UNPACK pragma, then this optimisation
  751 is even more important.  We don't want the wrapper to rebox an unboxed
  752 argument, and pass an Int to $wfoo!
  753 
  754 This works in nested situations like T10482
  755 
  756     data family Bar a
  757     data instance Bar (a, b) = BarPair !(Bar a) !(Bar b)
  758     newtype instance Bar Int = Bar Int
  759 
  760     foo :: Bar ((Int, Int), Int) -> Int -> Int
  761     foo f k = case f of BarPair x y ->
  762               case burble of
  763                  True -> case x of
  764                            BarPair p q -> ...
  765                  False -> ...
  766 
  767 The extra eagerness lets us produce a worker of type:
  768      $wfoo :: Int# -> Int# -> Int# -> Int -> Int
  769      $wfoo p# q# y# = ...
  770 
  771 even though the `case x` is only lazily evaluated.
  772 
  773 --------- Historical note ------------
  774 We used to add data-con strictness demands when demand analysing case
  775 expression. However, it was noticed in #15696 that this misses some cases. For
  776 instance, consider the program (from T10482)
  777 
  778     data family Bar a
  779     data instance Bar (a, b) = BarPair !(Bar a) !(Bar b)
  780     newtype instance Bar Int = Bar Int
  781 
  782     foo :: Bar ((Int, Int), Int) -> Int -> Int
  783     foo f k =
  784       case f of
  785         BarPair x y -> case burble of
  786                           True -> case x of
  787                                     BarPair p q -> ...
  788                           False -> ...
  789 
  790 We really should be able to assume that `p` is already evaluated since it came
  791 from a strict field of BarPair. This strictness would allow us to produce a
  792 worker of type:
  793 
  794     $wfoo :: Int# -> Int# -> Int# -> Int -> Int
  795     $wfoo p# q# y# = ...
  796 
  797 even though the `case x` is only lazily evaluated
  798 
  799 Indeed before we fixed #15696 this would happen since we would float the inner
  800 `case x` through the `case burble` to get:
  801 
  802     foo f k =
  803       case f of
  804         BarPair x y -> case x of
  805                           BarPair p q -> case burble of
  806                                           True -> ...
  807                                           False -> ...
  808 
  809 However, after fixing #15696 this could no longer happen (for the reasons
  810 discussed in ticket:15696#comment:76). This means that the demand placed on `f`
  811 would then be significantly weaker (since the False branch of the case on
  812 `burble` is not strict in `p` or `q`).
  813 
  814 Consequently, we now instead account for data-con strictness in mkWWstr_one,
  815 applying the strictness demands to the final result of DmdAnal. The result is
  816 that we get the strict demand signature we wanted even if we can't float
  817 the case on `x` up through the case on `burble`.
  818 
  819 Note [No nested Unboxed inside Boxed in demand signature]
  820 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  821 Consider
  822 ```
  823 f p@(x,y)
  824   | even (x+y) = []
  825   | otherwise  = [p]
  826 ```
  827 Demand analysis will infer that the function body puts a demand of `1P(1!L,1!L)`
  828 on 'p', e.g., Boxed on the outside but Unboxed on the inside. But worker/wrapper
  829 can't unbox the pair components without unboxing the pair! So we better say
  830 `1P(1L,1L)` in the demand signature in order not to spread wrong Boxity info.
  831 That happens in 'finaliseBoxity'.
  832 
  833 Note [No lazy, Unboxed demands in demand signature]
  834 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  835 Consider T19407:
  836 
  837   data Huge = Huge Bool () ... () -- think: DynFlags
  838   data T = T { h :: Huge, n :: Int }
  839   f t@(T h _) = g h t
  840   g (H b _ ... _) t = if b then 1 else n t
  841 
  842 The body of `g` puts (approx.) demand `L!P(A,1)` on `t`. But we better
  843 not put that demand in `g`'s demand signature, because worker/wrapper will not
  844 in general unbox a lazy-and-unboxed demand like `L!P(..)`.
  845 (The exception are known-to-be-evaluated arguments like strict fields,
  846 see Note [Unboxing evaluated arguments].)
  847 
  848 The program above is an example where spreading misinformed boxity through the
  849 signature is particularly egregious. If we give `g` that signature, then `f`
  850 puts demand `S!P(1!P(1L,A,..),ML)` on `t`. Now we will unbox `t` in `f` it and
  851 we get
  852 
  853   f (T (H b _ ... _) n) = $wf b n
  854   $wf b n = $wg b (T (H b x ... x) n)
  855   $wg = ...
  856 
  857 Massive reboxing in `$wf`! Solution: Trim boxity on lazy demands in
  858 'finaliseBoxity', modulo Note [Unboxing evaluated arguments].
  859 
  860 Note [Finalising boxity for demand signature]
  861 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  862 The worker/wrapper pass must strictly adhere to the boxity decisions encoded
  863 in the demand signature, because that is the information that demand analysis
  864 propagates throughout the program. Failing to implement the strategy laid out
  865 in the signature can result in reboxing in unexpected places. Hence, we must
  866 completely anticipate unboxing decisions during demand analysis and reflect
  867 these decicions in demand annotations. That is the job of 'finaliseBoxity',
  868 which is defined here and called from demand analysis.
  869 
  870 Here is a list of different Notes it has to take care of:
  871 
  872   * Note [No lazy, Unboxed demands in demand signature] such as `L!P(L)` in
  873     general, but still allow Note [Unboxing evaluated arguments]
  874   * Note [No nested Unboxed inside Boxed in demand signature] such as `1P(1!L)`
  875   * Implement fixes for corner cases Note [Do not unbox class dictionaries]
  876     and Note [mkWWstr and unsafeCoerce]
  877 
  878 Then, in worker/wrapper blindly trusts the boxity info in the demand signature
  879 and will not look at strictness info *at all*, in 'wantToUnboxArg'.
  880 
  881 Note [non-algebraic or open body type warning]
  882 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  883 There are a few cases where the W/W transformation is told that something
  884 returns a constructor, but the type at hand doesn't really match this. One
  885 real-world example involves unsafeCoerce:
  886   foo = IO a
  887   foo = unsafeCoerce c_exit
  888   foreign import ccall "c_exit" c_exit :: IO ()
  889 Here CPR will tell you that `foo` returns a () constructor for sure, but trying
  890 to create a worker/wrapper for type `a` obviously fails.
  891 (This was a real example until ee8e792  in libraries/base.)
  892 
  893 It does not seem feasible to avoid all such cases already in the analyser (and
  894 after all, the analysis is not really wrong), so we simply do nothing here in
  895 mkWWcpr. But we still want to emit warning with -DDEBUG, to hopefully catch
  896 other cases where something went avoidably wrong.
  897 
  898 This warning also triggers for the stream fusion library within `text`.
  899 We can'easily W/W constructed results like `Stream` because we have no simple
  900 way to express existential types in the worker's type signature.
  901 -}
  902 
  903 {-
  904 ************************************************************************
  905 *                                                                      *
  906 \subsection{Worker/wrapper for Strictness and Absence}
  907 *                                                                      *
  908 ************************************************************************
  909 -}
  910 
  911 mkWWstr :: WwOpts
  912         -> [Var]                         -- Wrapper args; have their demand info on them
  913                                          --  *Includes type variables*
  914         -> UniqSM (Bool,                 -- Is this useful
  915                    [Var],                -- Worker args
  916                    CoreExpr -> CoreExpr, -- Wrapper body, lacking the worker call
  917                                          -- and without its lambdas
  918                                          -- This fn adds the unboxing
  919                    [CoreExpr])           -- Reboxed args for the call to the
  920                                          -- original RHS. Corresponds one-to-one
  921                                          -- with the wrapper arg vars
  922 mkWWstr opts args
  923   = go args
  924   where
  925     go_one arg = mkWWstr_one opts arg
  926 
  927     go []           = return (False, [], nop_fn, [])
  928     go (arg : args) = do { (useful1, args1, wrap_fn1, wrap_arg)  <- go_one arg
  929                          ; (useful2, args2, wrap_fn2, wrap_args) <- go args
  930                          ; return ( useful1 || useful2
  931                                   , args1 ++ args2
  932                                   , wrap_fn1 . wrap_fn2
  933                                   , wrap_arg:wrap_args ) }
  934 
  935 ----------------------
  936 -- mkWWstr_one wrap_var = (useful, work_args, wrap_fn, wrap_arg)
  937 --   *  wrap_fn assumes wrap_var is in scope,
  938 --        brings into scope work_args (via cases)
  939 --   * wrap_arg assumes work_args are in scope, and builds a ConApp that
  940 --        reconstructs the RHS of wrap_var that we pass to the original RHS
  941 -- See Note [Worker/wrapper for Strictness and Absence]
  942 mkWWstr_one :: WwOpts -> Var -> UniqSM (Bool, [Var], CoreExpr -> CoreExpr, CoreExpr)
  943 mkWWstr_one opts arg =
  944   case wantToUnboxArg fam_envs arg_ty arg_dmd of
  945     _ | isTyVar arg -> do_nothing
  946 
  947     DropAbsent
  948       | Just absent_filler <- mkAbsentFiller opts arg
  949          -- Absent case.  We can't always handle absence for arbitrary
  950          -- unlifted types, so we need to choose just the cases we can
  951          -- (that's what mkAbsentFiller does)
  952       -> return (True, [], nop_fn, absent_filler)
  953 
  954     Unbox dcpc ds -> unbox_one_arg opts arg ds dcpc
  955 
  956     _ -> do_nothing -- Other cases, like StopUnboxing
  957 
  958   where
  959     fam_envs   = wo_fam_envs opts
  960     arg_ty     = idType arg
  961     arg_dmd    = idDemandInfo arg
  962     do_nothing = return (False, [arg], nop_fn, varToCoreExpr arg)
  963 
  964 unbox_one_arg :: WwOpts
  965           -> Var
  966           -> [Demand]
  967           -> DataConPatContext
  968           -> UniqSM (Bool, [Var], CoreExpr -> CoreExpr, CoreExpr)
  969 unbox_one_arg opts arg_var ds
  970           DataConPatContext { dcpc_dc = dc, dcpc_tc_args = tc_args
  971                             , dcpc_co = co }
  972   = do { pat_bndrs_uniqs <- getUniquesM
  973        ; let ex_name_fss = map getOccFS $ dataConExTyCoVars dc
  974              (ex_tvs', arg_ids) =
  975                dataConRepFSInstPat (ex_name_fss ++ repeat ww_prefix) pat_bndrs_uniqs (idMult arg_var) dc tc_args
  976              arg_ids' = zipWithEqual "unbox_one_arg" setIdDemandInfo arg_ids ds
  977              unbox_fn = mkUnpackCase (Var arg_var) co (idMult arg_var)
  978                                      dc (ex_tvs' ++ arg_ids')
  979        ; (_, worker_args, wrap_fn, wrap_args) <- mkWWstr opts (ex_tvs' ++ arg_ids')
  980        ; let wrap_arg = mkConApp dc (map Type tc_args ++ wrap_args) `mkCast` mkSymCo co
  981        ; return (True, worker_args, unbox_fn . wrap_fn, wrap_arg) }
  982                           -- Don't pass the arg, rebox instead
  983 
  984 -- | Tries to find a suitable absent filler to bind the given absent identifier
  985 -- to. See Note [Absent fillers].
  986 --
  987 -- If @mkAbsentFiller _ id == Just e@, then @e@ is an absent filler with the
  988 -- same type as @id@. Otherwise, no suitable filler could be found.
  989 mkAbsentFiller :: WwOpts -> Id -> Maybe CoreExpr
  990 mkAbsentFiller opts arg
  991   -- The lifted case: Bind 'absentError' for a nice panic message if we are
  992   -- wrong (like we were in #11126). See (1) in Note [Absent fillers]
  993   | not (isUnliftedType arg_ty)
  994   , not is_strict, not is_evald -- See (2) in Note [Absent fillers]
  995   = Just (mkAbsentErrorApp arg_ty msg)
  996 
  997   -- The default case for mono rep: Bind `RUBBISH[rr] arg_ty`
  998   -- See Note [Absent fillers], the main part
  999   | otherwise
 1000   = mkLitRubbish arg_ty
 1001 
 1002   where
 1003     arg_ty    = idType arg
 1004     is_strict = isStrictDmd (idDemandInfo arg)
 1005     is_evald  = isEvaldUnfolding $ idUnfolding arg
 1006 
 1007     msg = renderWithContext
 1008             (defaultSDocContext { sdocSuppressUniques = True })
 1009             (vcat
 1010               [ text "Arg:" <+> ppr arg
 1011               , text "Type:" <+> ppr arg_ty
 1012               , file_msg ])
 1013               -- We need to suppress uniques here because otherwise they'd
 1014               -- end up in the generated code as strings. This is bad for
 1015               -- determinism, because with different uniques the strings
 1016               -- will have different lengths and hence different costs for
 1017               -- the inliner leading to different inlining.
 1018               -- See also Note [Unique Determinism] in GHC.Types.Unique
 1019     file_msg = text "In module" <+> quotes (ppr $ wo_module opts)
 1020 
 1021 {- Note [Worker/wrapper for Strictness and Absence]
 1022 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 1023 The worker/wrapper transformation, mkWWstr_one, takes concrete action
 1024 based on the 'UnboxingDescision' returned by 'wantToUnboxArg'.
 1025 The latter takes into account several possibilities to decide if the
 1026 function is worthy for splitting:
 1027 
 1028 1. If an argument is absent, it would be silly to pass it to
 1029    the worker.  Hence the DropAbsent case.  This case must come
 1030    first because the bottom demand B is also strict.
 1031    E.g. B comes from a function like
 1032        f x = error "urk"
 1033    and the absent demand A can come from Note [Unboxing evaluated arguments]
 1034 
 1035 2. If the argument is evaluated strictly (or known to be eval'd),
 1036    we can take a view into the product demand ('viewProd'). In accordance
 1037    with Note [Boxity analysis], 'wantToUnboxArg' will say 'Unbox'.
 1038    'mkWWstr_one' then follows suit it and recurses into the fields of the
 1039    product demand. For example
 1040 
 1041      f :: (Int, Int) -> Int
 1042      f p = (case p of (a,b) -> a) + 1
 1043    is split to
 1044      f :: (Int, Int) -> Int
 1045      f p = case p of (a,b) -> $wf a
 1046 
 1047      $wf :: Int -> Int
 1048      $wf a = a + 1
 1049 
 1050    and
 1051      g :: Bool -> (Int, Int) -> Int
 1052      g c p = case p of (a,b) ->
 1053                 if c then a else b
 1054    is split to
 1055      g c p = case p of (a,b) -> $gw c a b
 1056      $gw c a b = if c then a else b
 1057 
 1058 2a But do /not/ split if Boxity Analysis said "Boxed".
 1059    In this case, 'wantToUnboxArg' returns 'StopUnboxing'.
 1060    Otherwise we risk decomposing and reboxing a massive
 1061    tuple which is barely used. Example:
 1062 
 1063         f :: ((Int,Int) -> String) -> (Int,Int) -> a
 1064         f g pr = error (g pr)
 1065 
 1066         main = print (f fst (1, error "no"))
 1067 
 1068    Here, f does not take 'pr' apart, and it's stupid to do so.
 1069    Imagine that it had millions of fields. This actually happened
 1070    in GHC itself where the tuple was DynFlags
 1071 
 1072 3. In all other cases (e.g., lazy, used demand and not eval'd),
 1073    'finaliseBoxity' will have cleared the Boxity flag to 'Boxed'
 1074    (see Note [Finalising boxity for demand signature]) and
 1075    'wantToUnboxArg' returns 'StopUnboxing' so that 'mkWWstr_one'
 1076    stops unboxing.
 1077 
 1078 Note [Worker/wrapper for bottoming functions]
 1079 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 1080 We used not to split if the result is bottom.
 1081 [Justification:  there's no efficiency to be gained.]
 1082 
 1083 But it's sometimes bad not to make a wrapper.  Consider
 1084         fw = \x# -> let x = I# x# in case e of
 1085                                         p1 -> error_fn x
 1086                                         p2 -> error_fn x
 1087                                         p3 -> the real stuff
 1088 The re-boxing code won't go away unless error_fn gets a wrapper too.
 1089 [We don't do reboxing now, but in general it's better to pass an
 1090 unboxed thing to f, and have it reboxed in the error cases....]
 1091 
 1092 Note [Record evaluated-ness in worker/wrapper]
 1093 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 1094 Suppose we have
 1095 
 1096    data T = MkT !Int Int
 1097 
 1098    f :: T -> T
 1099    f x = e
 1100 
 1101 and f's is strict, and has the CPR property.  The we are going to generate
 1102 this w/w split
 1103 
 1104    f x = case x of
 1105            MkT x1 x2 -> case $wf x1 x2 of
 1106                            (# r1, r2 #) -> MkT r1 r2
 1107 
 1108    $wfw x1 x2 = let x = MkT x1 x2 in
 1109                 case e of
 1110                   MkT r1 r2 -> (# r1, r2 #)
 1111 
 1112 Note that
 1113 
 1114 * In the worker $wf, inside 'e' we can be sure that x1 will be
 1115   evaluated (it came from unpacking the argument MkT.  But that's no
 1116   immediately apparent in $wf
 1117 
 1118 * In the wrapper 'f', which we'll inline at call sites, we can be sure
 1119   that 'r1' has been evaluated (because it came from unpacking the result
 1120   MkT.  But that is not immediately apparent from the wrapper code.
 1121 
 1122 Missing these facts isn't unsound, but it loses possible future
 1123 opportunities for optimisation.
 1124 
 1125 Solution: use setCaseBndrEvald when creating
 1126  (A) The arg binders x1,x2 in mkWstr_one via mkUnpackCase
 1127          See #13077, test T13077
 1128  (B) The result binders r1,r2 in mkWWcpr_entry
 1129          See Trace #13077, test T13077a
 1130          And #13027 comment:20, item (4)
 1131 to record that the relevant binder is evaluated.
 1132 
 1133 Note [Absent fillers]
 1134 ~~~~~~~~~~~~~~~~~~~~~
 1135 Consider
 1136 
 1137   data T = MkT [Int] [Int] ![Int]  -- NB: last field is strict
 1138   f :: T -> Int# -> blah
 1139   f ps w = case ps of MkT xs ys zs -> <body mentioning xs>
 1140 
 1141 Then f gets a strictness sig of <S(L,A,A)><A>. We make a worker $wf thus:
 1142 
 1143   $wf :: [Int] -> blah
 1144   $wf xs = case ps of MkT xs _ _ -> <body mentioning xs>
 1145     where
 1146       ys = absentError "ys :: [Int]"
 1147       zs = RUBBISH[LiftedRep] @[Int]
 1148       ps = MkT xs ys zs
 1149       w  = RUBBISH[IntRep] @Int#
 1150 
 1151 The absent arguments 'ys', 'zs' and 'w' aren't even passed to the worker.
 1152 And neither should they! They are never used, their value is irrelevant (hence
 1153 they are *dead code*) and they are probably discarded after the next run of the
 1154 Simplifier (when they are in fact *unreachable code*). Yet, we have to come up
 1155 with "filler" values that we bind the absent arg Ids to.
 1156 
 1157 That is exactly what Note [Rubbish literals] are for: A convenient way to
 1158 conjure filler values at any type (and any representation or levity!).
 1159 
 1160 Needless to say, there are some wrinkles:
 1161 
 1162   1. In case we have a absent, /lazy/, and /lifted/ arg, we use an error-thunk
 1163      instead. If absence analysis was wrong (e.g., #11126) and the binding
 1164      in fact is used, then we get a nice panic message instead of undefined
 1165      runtime behavior (See Modes of failure from Note [Rubbish literals]).
 1166 
 1167      Obviously, we can't use an error-thunk if the value is of unlifted rep
 1168      (like 'Int#' or 'MutVar#'), because we'd immediately evaluate the panic.
 1169 
 1170   2. We also mustn't put an error-thunk (that fills in for an absent value of
 1171      lifted rep) in a strict field, because #16970 establishes the invariant
 1172      that strict fields are always evaluated, by (re-)evaluating what is put in
 1173      a strict field. That's the reason why 'zs' binds a rubbish literal instead
 1174      of an error-thunk, see #19133.
 1175 
 1176      How do we detect when we are about to put an error-thunk in a strict field?
 1177      Ideally, we'd just look at the 'StrictnessMark' of the DataCon's field, but
 1178      it's quite nasty to thread the marks though 'mkWWstr' and 'mkWWstr_one'.
 1179      So we rather look out for a necessary condition for strict fields:
 1180      Note [Unboxing evaluated arguments] makes it so that the demand on
 1181      'zs' is absent and /strict/: It will get cardinality 'C_10', the empty
 1182      interval, rather than 'C_00'. Hence the 'isStrictDmd' check: It guarantees
 1183      we never fill in an error-thunk for an absent strict field.
 1184      But that also means we emit a rubbish lit for other args that have
 1185      cardinality 'C_10' (say, the arg to a bottoming function) where we could've
 1186      used an error-thunk, but that's a small price to pay for simplicity.
 1187 
 1188      In #19766, we discovered that even if the binder has eval cardinality
 1189      'C_00', it may end up in a strict field, with no surrounding seq
 1190      whatsoever! That happens if the calling code has already evaluated
 1191      said lambda binder, which will then have an evaluated unfolding
 1192      ('isEvaldUnfolding'). That in turn tells the Simplifier it is free to drop
 1193      the seq. So we better don't fill in an error-thunk for eval'd arguments
 1194      either, just in case it ends up in a strict field!
 1195 
 1196   3. We can only emit a LitRubbish if the arg's type @arg_ty@ is mono-rep, e.g.
 1197      of the form @TYPE rep@ where @rep@ is not (and doesn't contain) a variable.
 1198      Why? Because if we don't know its representation (e.g. size in memory,
 1199      register class), we don't know what or how much rubbish to emit in codegen.
 1200      'typeMonoPrimRep_maybe' returns 'Nothing' in this case and we simply fall
 1201      back to passing the original parameter to the worker.
 1202 
 1203      Note that currently this case should not occur, because binders always
 1204      have to be representation monomorphic. But in the future, we might allow
 1205      levity polymorphism, e.g. a polymorphic levity variable in 'BoxedRep'.
 1206 
 1207 While (1) and (2) are simply an optimisation in terms of compiler debugging
 1208 experience, (3) should be irrelevant in most programs, if not all.
 1209 
 1210 Historical note: I did try the experiment of using an error thunk for unlifted
 1211 things too, relying on the simplifier to drop it as dead code.  But this is
 1212 fragile
 1213 
 1214  - It fails when profiling is on, which disables various optimisations
 1215 
 1216  - It fails when reboxing happens. E.g.
 1217       data T = MkT Int Int#
 1218       f p@(MkT a _) = ...g p....
 1219    where g is /lazy/ in 'p', but only uses the first component.  Then
 1220    'f' is /strict/ in 'p', and only uses the first component.  So we only
 1221    pass that component to the worker for 'f', which reconstructs 'p' to
 1222    pass it to 'g'.  Alas we can't say
 1223        ...f (MkT a (absentError Int# "blah"))...
 1224    because `MkT` is strict in its Int# argument, so we get an absentError
 1225    exception when we shouldn't.  Very annoying!
 1226 
 1227 ************************************************************************
 1228 *                                                                      *
 1229          Type scrutiny that is specific to demand analysis
 1230 *                                                                      *
 1231 ************************************************************************
 1232 -}
 1233 
 1234 -- | Exactly 'dataConInstArgTys', but lacks the (ASSERT'ed) precondition that
 1235 -- the 'DataCon' may not have existentials. The lack of cloning the existentials
 1236 -- compared to 'dataConInstExAndArgVars' makes this function \"dubious\";
 1237 -- only use it where type variables aren't substituted for!
 1238 dubiousDataConInstArgTys :: DataCon -> [Type] -> [Type]
 1239 dubiousDataConInstArgTys dc tc_args = arg_tys
 1240   where
 1241     univ_tvs = dataConUnivTyVars dc
 1242     ex_tvs   = dataConExTyCoVars dc
 1243     subst    = extendTCvInScopeList (zipTvSubst univ_tvs tc_args) ex_tvs
 1244     arg_tys  = map (GHC.Core.Type.substTy subst . scaledThing) (dataConRepArgTys dc)
 1245 
 1246 findTypeShape :: FamInstEnvs -> Type -> TypeShape
 1247 -- Uncover the arrow and product shape of a type
 1248 -- The data type TypeShape is defined in GHC.Types.Demand
 1249 -- See Note [Trimming a demand to a type] in GHC.Core.Opt.DmdAnal
 1250 findTypeShape fam_envs ty
 1251   = go (setRecTcMaxBound 2 initRecTc) ty
 1252        -- You might think this bound of 2 is low, but actually
 1253        -- I think even 1 would be fine.  This only bites for recursive
 1254        -- product types, which are rare, and we really don't want
 1255        -- to look deep into such products -- see #18034
 1256   where
 1257     go rec_tc ty
 1258        | Just (_, _, res) <- splitFunTy_maybe ty
 1259        = TsFun (go rec_tc res)
 1260 
 1261        | Just (tc, tc_args)  <- splitTyConApp_maybe ty
 1262        = go_tc rec_tc tc tc_args
 1263 
 1264        | Just (_, ty') <- splitForAllTyCoVar_maybe ty
 1265        = go rec_tc ty'
 1266 
 1267        | otherwise
 1268        = TsUnk
 1269 
 1270     go_tc rec_tc tc tc_args
 1271        | Just (HetReduction (Reduction _ rhs) _) <- topReduceTyFamApp_maybe fam_envs tc tc_args
 1272        = go rec_tc rhs
 1273 
 1274        | Just con <- tyConSingleAlgDataCon_maybe tc
 1275        , Just rec_tc <- if isTupleTyCon tc
 1276                         then Just rec_tc
 1277                         else checkRecTc rec_tc tc
 1278          -- We treat tuples specially because they can't cause loops.
 1279          -- Maybe we should do so in checkRecTc.
 1280          -- The use of 'dubiousDataConInstArgTys' is OK, since this
 1281          -- function performs no substitution at all, hence the uniques
 1282          -- don't matter.
 1283          -- We really do encounter existentials here, see
 1284          -- Note [Which types are unboxed?] for an example.
 1285        = TsProd (map (go rec_tc) (dubiousDataConInstArgTys con tc_args))
 1286 
 1287        | Just (ty', _) <- instNewTyCon_maybe tc tc_args
 1288        , Just rec_tc <- checkRecTc rec_tc tc
 1289        = go rec_tc ty'
 1290 
 1291        | otherwise
 1292        = TsUnk
 1293 
 1294 -- | Returned by 'isRecDataCon'.
 1295 -- See also Note [Detecting recursive data constructors].
 1296 data IsRecDataConResult
 1297   = DefinitelyRecursive  -- ^ The algorithm detected a loop
 1298   | NonRecursiveOrUnsure -- ^ The algorithm detected no loop, went out of fuel
 1299                          -- or hit an .hs-boot file
 1300   deriving (Eq, Show)
 1301 
 1302 instance Outputable IsRecDataConResult where
 1303   ppr = text . show
 1304 
 1305 combineIRDCR :: IsRecDataConResult -> IsRecDataConResult -> IsRecDataConResult
 1306 combineIRDCR DefinitelyRecursive _                   = DefinitelyRecursive
 1307 combineIRDCR _                   DefinitelyRecursive = DefinitelyRecursive
 1308 combineIRDCR _                   _                   = NonRecursiveOrUnsure
 1309 
 1310 combineIRDCRs :: [IsRecDataConResult] -> IsRecDataConResult
 1311 combineIRDCRs = foldl' combineIRDCR NonRecursiveOrUnsure
 1312 {-# INLINE combineIRDCRs #-}
 1313 
 1314 -- | @isRecDataCon _ fuel dc@, where @tc = dataConTyCon dc@ returns
 1315 --
 1316 --   * @Just Recursive@ if the analysis found that @tc@ is reachable through one
 1317 --     of @dc@'s fields
 1318 --   * @Just NonRecursive@ if the analysis found that @tc@ is not reachable
 1319 --     through one of @dc@'s fields
 1320 --   * @Nothing@ is returned in two cases. The first is when @fuel /= Infinity@
 1321 --     and @f@ expansions of nested data TyCons were not enough to prove
 1322 --     non-recursivenss, nor arrive at an occurrence of @tc@ thus proving
 1323 --     recursiveness. The other is when we hit an abstract TyCon (one without
 1324 --     visible DataCons), such as those imported from .hs-boot files.
 1325 --
 1326 -- If @fuel = 'Infinity'@ and there are no boot files involved, then the result
 1327 -- is never @Nothing@ and the analysis is a depth-first search. If @fuel = 'Int'
 1328 -- f@, then the analysis behaves like a depth-limited DFS and returns @Nothing@
 1329 -- if the search was inconclusive.
 1330 --
 1331 -- See Note [Detecting recursive data constructors] for which recursive DataCons
 1332 -- we want to flag.
 1333 isRecDataCon :: FamInstEnvs -> IntWithInf -> DataCon -> IsRecDataConResult
 1334 isRecDataCon fam_envs fuel dc
 1335   | isTupleDataCon dc || isUnboxedSumDataCon dc
 1336   = NonRecursiveOrUnsure
 1337   | otherwise
 1338   = -- pprTrace "isRecDataCon" (ppr dc <+> dcolon <+> ppr (dataConRepType dc) $$ ppr fuel $$ ppr answer)
 1339     answer
 1340   where
 1341     answer = go_dc fuel (setRecTcMaxBound 1 initRecTc) dc
 1342     (<||>) = combineIRDCR
 1343 
 1344     go_dc :: IntWithInf -> RecTcChecker -> DataCon -> IsRecDataConResult
 1345     go_dc fuel rec_tc dc =
 1346       combineIRDCRs [ go_arg_ty fuel rec_tc (scaledThing arg_ty)
 1347                     | arg_ty <- dataConRepArgTys dc ]
 1348 
 1349     go_arg_ty :: IntWithInf -> RecTcChecker -> Type -> IsRecDataConResult
 1350     go_arg_ty fuel rec_tc ty
 1351       --- | pprTrace "arg_ty" (ppr ty) False = undefined
 1352 
 1353       | Just (_, _arg_ty, _res_ty) <- splitFunTy_maybe ty
 1354       -- = go_arg_ty fuel rec_tc _arg_ty <||> go_arg_ty fuel rec_tc _res_ty
 1355           -- Plausible, but unnecessary for CPR.
 1356           -- See Note [Detecting recursive data constructors], point (1)
 1357       = NonRecursiveOrUnsure
 1358 
 1359       | Just (_tcv, ty') <- splitForAllTyCoVar_maybe ty
 1360       = go_arg_ty fuel rec_tc ty'
 1361           -- See Note [Detecting recursive data constructors], point (2)
 1362 
 1363       | Just (tc, tc_args) <- splitTyConApp_maybe ty
 1364       = combineIRDCRs (map (go_arg_ty fuel rec_tc) tc_args)
 1365         <||> go_tc_app fuel rec_tc tc tc_args
 1366 
 1367       | otherwise
 1368       = NonRecursiveOrUnsure
 1369 
 1370     -- | PRECONDITION: tc_args has no recursive occs
 1371     -- See Note [Detecting recursive data constructors], point (5)
 1372     go_tc_app :: IntWithInf -> RecTcChecker -> TyCon -> [Type] -> IsRecDataConResult
 1373     go_tc_app fuel rec_tc tc tc_args
 1374       --- | pprTrace "tc_app" (vcat [ppr tc, ppr tc_args]) False = undefined
 1375 
 1376       | tc == dataConTyCon dc
 1377       = DefinitelyRecursive -- loop found!
 1378 
 1379       | isPrimTyCon tc
 1380       = NonRecursiveOrUnsure
 1381 
 1382       | not $ tcIsRuntimeTypeKind $ tyConResKind tc
 1383       = NonRecursiveOrUnsure
 1384 
 1385       | isAbstractTyCon tc   -- When tc has no DataCons, from an hs-boot file
 1386       = NonRecursiveOrUnsure -- See Note [Detecting recursive data constructors], point (7)
 1387 
 1388       | isFamilyTyCon tc
 1389       -- This is the only place where we look at tc_args
 1390       -- See Note [Detecting recursive data constructors], point (5)
 1391       = case topReduceTyFamApp_maybe fam_envs tc tc_args of
 1392           Just (HetReduction (Reduction _ rhs) _) -> go_arg_ty fuel rec_tc rhs
 1393           Nothing                                 -> DefinitelyRecursive -- we hit this case for 'Any'
 1394 
 1395       | otherwise
 1396       = assertPpr (isAlgTyCon tc) (ppr tc <+> ppr dc) $
 1397         case checkRecTc rec_tc tc of
 1398           Nothing -> NonRecursiveOrUnsure
 1399             -- we expanded this TyCon once already, no need to test it multiple times
 1400 
 1401           Just rec_tc'
 1402             | Just (_tvs, rhs, _co) <- unwrapNewTyCon_maybe tc
 1403                 -- See Note [Detecting recursive data constructors], points (2) and (3)
 1404             -> go_arg_ty fuel rec_tc' rhs
 1405 
 1406             | fuel < 0
 1407             -> NonRecursiveOrUnsure -- that's why we track fuel!
 1408 
 1409             | let dcs = expectJust "isRecDataCon:go_tc_app" $ tyConDataCons_maybe tc
 1410             -> combineIRDCRs (map (\dc -> go_dc (subWithInf fuel 1) rec_tc' dc) dcs)
 1411                 -- See Note [Detecting recursive data constructors], point (4)
 1412 
 1413 -- | A specialised Bool for an argument to 'finaliseBoxity'.
 1414 -- See Note [Do not unbox class dictionaries].
 1415 data InsideInlineableFun
 1416   = NotInsideInlineableFun -- ^ Not in an inlineable fun.
 1417   | InsideInlineableFun    -- ^ We are in an inlineable fun, so we won't
 1418                            -- unbox dictionary args.
 1419   deriving Eq
 1420 
 1421 -- | This function makes sure that the demand only says 'Unboxed' where
 1422 -- worker/wrapper should actually unbox and trims any boxity beyond that.
 1423 -- Called for every demand annotation during DmdAnal.
 1424 --
 1425 -- > data T a = T !a
 1426 -- > f :: (T (Int,Int), Int) -> ()
 1427 -- > f p = ... -- demand on p: 1!P(L!P(L!P(L), L!P(L)), L!P(L))
 1428 --
 1429 -- 'finaliseBoxity' will trim the demand on 'p' to 1!P(L!P(LP(L), LP(L)), LP(L)).
 1430 -- This is done when annotating lambdas and thunk bindings.
 1431 -- See Note [Finalising boxity for demand signature]
 1432 finaliseBoxity
 1433   :: FamInstEnvs
 1434   -> InsideInlineableFun    -- ^ See the haddocks on 'InsideInlineableFun'
 1435   -> Type                   -- ^ Type of the argument
 1436   -> Demand                 -- ^ How the arg was used
 1437   -> Demand
 1438 finaliseBoxity env in_inl_fun ty dmd = go NotMarkedStrict ty dmd
 1439   where
 1440     go mark ty dmd@(n :* _) =
 1441       case wantToUnboxArg env ty dmd of
 1442         DropAbsent -> dmd
 1443         Unbox DataConPatContext{dcpc_dc=dc, dcpc_tc_args=tc_args} ds
 1444           -- See Note [No lazy, Unboxed demands in demand signature]
 1445           -- See Note [Unboxing evaluated arguments]
 1446           | isStrict n || isMarkedStrict mark
 1447           -- See Note [Do not unbox class dictionaries]
 1448           , in_inl_fun == NotInsideInlineableFun || not (isClassPred ty)
 1449           -- See Note [mkWWstr and unsafeCoerce]
 1450           , ds `lengthIs` dataConRepArity dc
 1451           , let arg_tys = dubiousDataConInstArgTys dc tc_args
 1452           -> -- pprTrace "finaliseBoxity:Unbox" (ppr ty $$ ppr dmd $$ ppr ds) $
 1453              n :* (mkProd Unboxed $! zip_go_with_marks dc arg_tys ds)
 1454         -- See Note [No nested Unboxed inside Boxed in demand signature]
 1455         _ -> trimBoxity dmd
 1456 
 1457     -- See Note [Unboxing evaluated arguments]
 1458     zip_go_with_marks dc arg_tys ds = case dataConWrapId_maybe dc of
 1459       Nothing -> strictZipWith  (go NotMarkedStrict)          arg_tys ds
 1460                     -- Shortcut when DataCon worker=wrapper
 1461       Just _  -> strictZipWith3 go  (dataConRepStrictness dc) arg_tys ds
 1462 
 1463 {-
 1464 ************************************************************************
 1465 *                                                                      *
 1466 \subsection{Worker/wrapper for CPR}
 1467 *                                                                      *
 1468 ************************************************************************
 1469 See Note [Worker/wrapper for CPR] for an overview.
 1470 -}
 1471 
 1472 mkWWcpr_entry
 1473   :: WwOpts
 1474   -> Type                              -- function body
 1475   -> Cpr                               -- CPR analysis results
 1476   -> UniqSM (Bool,                     -- Is w/w'ing useful?
 1477              CoreExpr -> CoreExpr,     -- New wrapper. 'nop_fn' if not useful
 1478              CoreExpr -> CoreExpr,     -- New worker.  'nop_fn' if not useful
 1479              Type)                     -- Type of worker's body.
 1480                                        -- Just the input body_ty if not useful
 1481 -- ^ Entrypoint to CPR W/W. See Note [Worker/wrapper for CPR] for an overview.
 1482 mkWWcpr_entry opts body_ty body_cpr
 1483   | not (wo_cpr_anal opts) = return (False, nop_fn, nop_fn, body_ty)
 1484   | otherwise = do
 1485     -- Part (1)
 1486     res_bndr <- mk_res_bndr body_ty
 1487     let bind_res_bndr body scope = mkDefaultCase body res_bndr scope
 1488 
 1489     -- Part (2)
 1490     (useful, fromOL -> transit_vars, rebuilt_result, work_unpack_res) <-
 1491       mkWWcpr_one opts res_bndr body_cpr
 1492 
 1493     -- Part (3)
 1494     let (unbox_transit_tup, transit_tup) = move_transit_vars transit_vars
 1495 
 1496     -- Stacking unboxer (work_fn) and builder (wrap_fn) together
 1497     let wrap_fn      = unbox_transit_tup rebuilt_result                 -- 3 2
 1498         work_fn body = bind_res_bndr body (work_unpack_res transit_tup) -- 1 2 3
 1499         work_body_ty = exprType transit_tup
 1500     return $ if not useful
 1501                 then (False, nop_fn, nop_fn, body_ty)
 1502                 else (True, wrap_fn, work_fn, work_body_ty)
 1503 
 1504 -- | Part (1) of Note [Worker/wrapper for CPR].
 1505 mk_res_bndr :: Type -> UniqSM Id
 1506 mk_res_bndr body_ty = do
 1507   -- See Note [Linear types and CPR]
 1508   bndr <- mkSysLocalOrCoVarM ww_prefix cprCaseBndrMult body_ty
 1509   -- See Note [Record evaluated-ness in worker/wrapper]
 1510   pure (setCaseBndrEvald MarkedStrict bndr)
 1511 
 1512 -- | What part (2) of Note [Worker/wrapper for CPR] collects.
 1513 --
 1514 --   1. A Bool capturing whether the transformation did anything useful.
 1515 --   2. The list of transit variables (see the Note).
 1516 --   3. The result builder expression for the wrapper.  The original case binder if not useful.
 1517 --   4. The result unpacking expression for the worker. 'nop_fn' if not useful.
 1518 type CprWwResultOne  = (Bool, OrdList Var,  CoreExpr , CoreExpr -> CoreExpr)
 1519 type CprWwResultMany = (Bool, OrdList Var, [CoreExpr], CoreExpr -> CoreExpr)
 1520 
 1521 mkWWcpr :: WwOpts -> [Id] -> [Cpr] -> UniqSM CprWwResultMany
 1522 mkWWcpr _opts vars []   =
 1523   -- special case: No CPRs means all top (for example from FlatConCpr),
 1524   -- hence stop WW.
 1525   return (False, toOL vars, map varToCoreExpr vars, nop_fn)
 1526 mkWWcpr opts  vars cprs = do
 1527   -- No existentials in 'vars'. 'wantToUnboxResult' should have checked that.
 1528   massertPpr (not (any isTyVar vars)) (ppr vars $$ ppr cprs)
 1529   massertPpr (equalLength vars cprs) (ppr vars $$ ppr cprs)
 1530   (usefuls, varss, rebuilt_results, work_unpack_ress) <-
 1531     unzip4 <$> zipWithM (mkWWcpr_one opts) vars cprs
 1532   return ( or usefuls
 1533          , concatOL varss
 1534          , rebuilt_results
 1535          , foldl' (.) nop_fn work_unpack_ress )
 1536 
 1537 mkWWcpr_one :: WwOpts -> Id -> Cpr -> UniqSM CprWwResultOne
 1538 -- ^ See if we want to unbox the result and hand off to 'unbox_one_result'.
 1539 mkWWcpr_one opts res_bndr cpr
 1540   | assert (not (isTyVar res_bndr) ) True
 1541   , Unbox dcpc arg_cprs <- wantToUnboxResult (wo_fam_envs opts) (idType res_bndr) cpr
 1542   = unbox_one_result opts res_bndr arg_cprs dcpc
 1543   | otherwise
 1544   = return (False, unitOL res_bndr, varToCoreExpr res_bndr, nop_fn)
 1545 
 1546 unbox_one_result
 1547   :: WwOpts -> Id -> [Cpr] -> DataConPatContext -> UniqSM CprWwResultOne
 1548 -- ^ Implements the main bits of part (2) of Note [Worker/wrapper for CPR]
 1549 unbox_one_result opts res_bndr arg_cprs
 1550                  DataConPatContext { dcpc_dc = dc, dcpc_tc_args = tc_args
 1551                                    , dcpc_co = co } = do
 1552   -- unboxer (free in `res_bndr`):       |   builder (where <i> builds what was
 1553   --   ( case res_bndr of (i, j) -> )    |            bound to i)
 1554   --   ( case i of I# a ->          )    |
 1555   --   ( case j of I# b ->          )    |     (      (<i>, <j>)      )
 1556   --   ( <hole>                     )    |
 1557   pat_bndrs_uniqs <- getUniquesM
 1558   let (_exs, arg_ids) =
 1559         dataConRepFSInstPat (repeat ww_prefix) pat_bndrs_uniqs cprCaseBndrMult dc tc_args
 1560   massert (null _exs) -- Should have been caught by wantToUnboxResult
 1561 
 1562   (nested_useful, transit_vars, con_args, work_unbox_res) <-
 1563     mkWWcpr opts arg_ids arg_cprs
 1564 
 1565   let -- rebuilt_result = (C a b |> sym co)
 1566       rebuilt_result = mkConApp dc (map Type tc_args ++ con_args) `mkCast` mkSymCo co
 1567       -- this_work_unbox_res alt = (case res_bndr |> co of C a b -> <alt>[a,b])
 1568       this_work_unbox_res = mkUnpackCase (Var res_bndr) co cprCaseBndrMult dc arg_ids
 1569 
 1570   -- Don't try to WW an unboxed tuple return type when there's nothing inside
 1571   -- to unbox further.
 1572   return $ if isUnboxedTupleDataCon dc && not nested_useful
 1573               then ( False, unitOL res_bndr, Var res_bndr, nop_fn )
 1574               else ( True
 1575                    , transit_vars
 1576                    , rebuilt_result
 1577                    , this_work_unbox_res . work_unbox_res
 1578                    )
 1579 
 1580 -- | Implements part (3) of Note [Worker/wrapper for CPR].
 1581 --
 1582 -- If `move_transit_vars [a,b] = (unbox, tup)` then
 1583 --     * `a` and `b` are the *transit vars* to be returned from the worker
 1584 --       to the wrapper
 1585 --     * `unbox scrut alt = (case <scrut> of (# a, b #) -> <alt>)`
 1586 --     * `tup = (# a, b #)`
 1587 -- There is a special case for when there's 1 transit var,
 1588 -- see Note [No unboxed tuple for single, unlifted transit var].
 1589 move_transit_vars :: [Id] -> (CoreExpr -> CoreExpr -> CoreExpr, CoreExpr)
 1590 move_transit_vars vars
 1591   | [var] <- vars
 1592   , let var_ty = idType var
 1593   , isUnliftedType var_ty || exprIsHNF (Var var)
 1594   -- See Note [No unboxed tuple for single, unlifted transit var]
 1595   --   * Wrapper: `unbox scrut alt = (case <scrut> of a -> <alt>)`
 1596   --   * Worker:  `tup = a`
 1597   = ( \build_res wkr_call -> mkDefaultCase wkr_call var build_res
 1598     , varToCoreExpr var ) -- varToCoreExpr important here: var can be a coercion
 1599                           -- Lacking this caused #10658
 1600   | otherwise
 1601   -- The general case: Just return an unboxed tuple from the worker
 1602   --   * Wrapper: `unbox scrut alt = (case <scrut> of (# a, b #) -> <alt>)`
 1603   --   * Worker:  `tup = (# a, b #)`
 1604   = ( \build_res wkr_call -> mkSingleAltCase wkr_call case_bndr
 1605                                     (DataAlt tup_con) vars build_res
 1606     , ubx_tup_app )
 1607    where
 1608     ubx_tup_app = mkCoreUbxTup (map idType vars) (map varToCoreExpr vars)
 1609     tup_con     = tupleDataCon Unboxed (length vars)
 1610     -- See also Note [Linear types and CPR]
 1611     case_bndr   = mkWildValBinder cprCaseBndrMult (exprType ubx_tup_app)
 1612 
 1613 
 1614 {- Note [Worker/wrapper for CPR]
 1615 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 1616 'mkWWcpr_entry' is the entry-point to the worker/wrapper transformation that
 1617 exploits CPR info. Here's an example:
 1618 ```
 1619   f :: ... -> (Int, Int)
 1620   f ... = <body>
 1621 ```
 1622 Let's assume the CPR info `body_cpr` for the body of `f` says
 1623 "unbox the pair and its components" and `body_ty` is the type of the function
 1624 body `body` (i.e., `(Int, Int)`). Then `mkWWcpr_entry body_ty body_cpr` returns
 1625 
 1626   * A result-unpacking expression for the worker, with a hole for the fun body:
 1627     ```
 1628       unpack body = ( case <body> of r __DEFAULT -> )    -- (1)
 1629                     ( case r of (i, j) ->           )    -- (2)
 1630                     ( case i of I# a ->             )    -- (2)
 1631                     ( case j of I# b ->             )    -- (2)
 1632                     ( (# a, b #)                    )    -- (3)
 1633     ```
 1634   * A result-building expression for the wrapper, with a hole for the worker call:
 1635     ```
 1636       build wkr_call = ( case <wkr_call> of (# a, b #) -> )    -- (3)
 1637                        ( (I# a, I# b)                     )    -- (2)
 1638     ```
 1639   * The result type of the worker, e.g., `(# Int#, Int# #)` above.
 1640 
 1641 To achieve said transformation, 'mkWWcpr_entry'
 1642 
 1643   1. First allocates a fresh result binder `r`, giving a name to the `body`
 1644      expression and contributing part (1) of the unpacker and builder.
 1645   2. Then it delegates to 'mkWWcpr_one', which recurses into all result fields
 1646      to unbox, contributing the parts marked with (2). Crucially, it knows
 1647      what belongs in the case scrutinee of the unpacker through the communicated
 1648      Id `r`: The unpacking expression will be free in that variable.
 1649      (This is a similar contract as that of 'mkWWstr_one' for strict args.)
 1650   3. 'mkWWstr_one' produces a bunch of *transit vars*: Those result variables
 1651      that have to be transferred from the worker to the wrapper, where the
 1652      constructed result can be rebuilt, `a` and `b` above. Part (3) is
 1653      responsible for tupling them up in the worker and taking the tuple apart
 1654      in the wrapper. This is implemented in 'move_transit_vars'.
 1655 
 1656 Note [No unboxed tuple for single, unlifted transit var]
 1657 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 1658 When there's only a single, unlifted transit var (Note [Worker/wrapper for CPR]),
 1659 we don't wrap an unboxed singleton tuple around it (which otherwise would be
 1660 needed to suspend evaluation) and return the unlifted thing directly. E.g.
 1661 ```
 1662   f :: Int -> Int
 1663   f x = x+1
 1664 ```
 1665 We certainly want `$wf :: Int# -> Int#`, not `$wf :: Int# -> (# Int# #)`.
 1666 This is OK as long as we know that evaluation of the returned thing terminates
 1667 quickly, as is the case for fields of unlifted type like `Int#`.
 1668 
 1669 But more generally, this should also be true for *lifted* types that terminate
 1670 quickly! Consider from `T18109`:
 1671 ```
 1672   data F = F (Int -> Int)
 1673   f :: Int -> F
 1674   f n = F (+n)
 1675 
 1676   data T = T (Int, Int)
 1677   g :: T -> T
 1678   g t@(T p) = p `seq` t
 1679 
 1680   data U = U ![Int]
 1681   h :: Int -> U
 1682   h n = U [0..n]
 1683 ```
 1684 All of the nested fields are actually ok-for-speculation and thus OK to
 1685 return unboxed instead of in an unboxed singleton tuple:
 1686 
 1687  1. The field of `F` is a HNF.
 1688     We want `$wf :: Int -> Int -> Int`.
 1689     We get  `$wf :: Int -> (# Int -> Int #)`.
 1690  2. The field of `T` is `seq`'d in `g`.
 1691     We want `$wg :: (Int, Int) -> (Int, Int)`.
 1692     We get  `$wg :: (Int, Int) -> (# (Int, Int) #)`.
 1693  3. The field of `U` is strict and thus always evaluated.
 1694     We want  `$wh :: Int# -> [Int]`.
 1695     We'd get `$wh :: Int# -> (# [Int] #)`.
 1696 
 1697 By considering vars as unlifted that satsify 'exprIsHNF', we catch (3).
 1698 Why not check for 'exprOkForSpeculation'? Quite perplexingly, evaluated vars
 1699 are not ok-for-spec, see Note [exprOkForSpeculation and evaluated variables].
 1700 For (1) and (2) we would have to look at the term. WW only looks at the
 1701 type and the CPR signature, so the only way to fix (1) and (2) would be to
 1702 have a nested termination signature, like in MR !1866.
 1703 
 1704 Note [Linear types and CPR]
 1705 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 1706 Remark on linearity: in both the case of the wrapper and the worker,
 1707 we build a linear case to unpack constructed products. All the
 1708 multiplicity information is kept in the constructors (both C and (#, #)).
 1709 In particular (#,#) is parametrised by the multiplicity of its fields.
 1710 Specifically, in this instance, the multiplicity of the fields of (#,#)
 1711 is chosen to be the same as those of C.
 1712 
 1713 
 1714 ************************************************************************
 1715 *                                                                      *
 1716 \subsection{Utilities}
 1717 *                                                                      *
 1718 ************************************************************************
 1719 -}
 1720 
 1721 mkUnpackCase ::  CoreExpr -> Coercion -> Mult -> DataCon -> [Id] -> CoreExpr -> CoreExpr
 1722 -- (mkUnpackCase e co Con args body)
 1723 --      returns
 1724 -- case e |> co of _dead { Con args -> body }
 1725 mkUnpackCase (Tick tickish e) co mult con args body   -- See Note [Profiling and unpacking]
 1726   = Tick tickish (mkUnpackCase e co mult con args body)
 1727 mkUnpackCase scrut co mult boxing_con unpk_args body
 1728   = mkSingleAltCase casted_scrut bndr
 1729                     (DataAlt boxing_con) unpk_args body
 1730   where
 1731     casted_scrut = scrut `mkCast` co
 1732     bndr = mkWildValBinder mult (exprType casted_scrut)
 1733 
 1734 -- | The multiplicity of a case binder unboxing a constructed result.
 1735 -- See Note [Linear types and CPR]
 1736 cprCaseBndrMult :: Mult
 1737 cprCaseBndrMult = One
 1738 
 1739 ww_prefix :: FastString
 1740 ww_prefix = fsLit "ww"
 1741 
 1742 {- Note [Profiling and unpacking]
 1743 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 1744 If the original function looked like
 1745         f = \ x -> {-# SCC "foo" #-} E
 1746 
 1747 then we want the CPR'd worker to look like
 1748         \ x -> {-# SCC "foo" #-} (case E of I# x -> x)
 1749 and definitely not
 1750         \ x -> case ({-# SCC "foo" #-} E) of I# x -> x)
 1751 
 1752 This transform doesn't move work or allocation
 1753 from one cost centre to another.
 1754 
 1755 Later [SDM]: presumably this is because we want the simplifier to
 1756 eliminate the case, and the scc would get in the way?  I'm ok with
 1757 including the case itself in the cost centre, since it is morally
 1758 part of the function (post transformation) anyway.
 1759 -}