never executed always true always false
    1 {-# LANGUAGE TypeFamilies #-}
    2 {-# LANGUAGE DataKinds #-}
    3 
    4 -- | Provides the heuristics for when it's beneficial to lambda lift bindings.
    5 -- Most significantly, this employs a cost model to estimate impact on heap
    6 -- allocations, by looking at an STG expression's 'Skeleton'.
    7 module GHC.Stg.Lift.Analysis (
    8     -- * #when# When to lift
    9     -- $when
   10 
   11     -- * #clogro# Estimating closure growth
   12     -- $clogro
   13 
   14     -- * AST annotation
   15     Skeleton(..), BinderInfo(..), binderInfoBndr,
   16     LlStgBinding, LlStgExpr, LlStgRhs, LlStgAlt, tagSkeletonTopBind,
   17     -- * Lifting decision
   18     goodToLift,
   19     closureGrowth -- Exported just for the docs
   20   ) where
   21 
   22 import GHC.Prelude
   23 
   24 import GHC.Platform
   25 import GHC.Platform.Profile
   26 
   27 import GHC.Types.Basic
   28 import GHC.Types.Demand
   29 import GHC.Driver.Session
   30 import GHC.Types.Id
   31 import GHC.Runtime.Heap.Layout ( WordOff )
   32 import GHC.Stg.Syntax
   33 import qualified GHC.StgToCmm.ArgRep  as StgToCmm.ArgRep
   34 import qualified GHC.StgToCmm.Closure as StgToCmm.Closure
   35 import qualified GHC.StgToCmm.Layout  as StgToCmm.Layout
   36 import GHC.Utils.Outputable
   37 import GHC.Utils.Misc
   38 import GHC.Types.Var.Set
   39 
   40 import Data.Maybe ( mapMaybe )
   41 
   42 -- Note [When to lift]
   43 -- ~~~~~~~~~~~~~~~~~~~
   44 -- $when
   45 -- The analysis proceeds in two steps:
   46 --
   47 --   1. It tags the syntax tree with analysis information in the form of
   48 --      'BinderInfo' at each binder and 'Skeleton's at each let-binding
   49 --      by 'tagSkeletonTopBind' and friends.
   50 --   2. The resulting syntax tree is treated by the "GHC.Stg.Lift"
   51 --      module, calling out to 'goodToLift' to decide if a binding is worthwhile
   52 --      to lift.
   53 --      'goodToLift' consults argument occurrence information in 'BinderInfo'
   54 --      and estimates 'closureGrowth', for which it needs the 'Skeleton'.
   55 --
   56 -- So the annotations from 'tagSkeletonTopBind' ultimately fuel 'goodToLift',
   57 -- which employs a number of heuristics to identify and exclude lambda lifting
   58 -- opportunities deemed non-beneficial:
   59 --
   60 --  [Top-level bindings] can't be lifted.
   61 --  [Thunks] and data constructors shouldn't be lifted in order not to destroy
   62 --    sharing.
   63 --  [Argument occurrences] #arg_occs# of binders prohibit them to be lifted.
   64 --    Doing the lift would re-introduce the very allocation at call sites that
   65 --    we tried to get rid off in the first place. We capture analysis
   66 --    information in 'BinderInfo'. Note that we also consider a nullary
   67 --    application as argument occurrence, because it would turn into an n-ary
   68 --    partial application created by a generic apply function. This occurs in
   69 --    CPS-heavy code like the CS benchmark.
   70 --  [Join points] should not be lifted, simply because there's no reduction in
   71 --    allocation to be had.
   72 --  [Abstracting over join points] destroys join points, because they end up as
   73 --    arguments to the lifted function.
   74 --  [Abstracting over known local functions] turns a known call into an unknown
   75 --    call (e.g. some @stg_ap_*@), which is generally slower. Can be turned off
   76 --    with @-fstg-lift-lams-known@.
   77 --  [Calling convention] Don't lift when the resulting function would have a
   78 --    higher arity than available argument registers for the calling convention.
   79 --    Can be influenced with @-fstg-lift-(non)rec-args(-any)@.
   80 --  [Closure growth] introduced when former free variables have to be available
   81 --    at call sites may actually lead to an increase in overall allocations
   82 --  resulting from a lift. Estimating closure growth is described in
   83 --  "GHC.Stg.Lift.Analysis#clogro" and is what most of this module is ultimately
   84 --  concerned with.
   85 --
   86 -- There's a <https://gitlab.haskell.org/ghc/ghc/wikis/late-lam-lift wiki page> with
   87 -- some more background and history.
   88 
   89 -- Note [Estimating closure growth]
   90 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   91 -- $clogro
   92 -- We estimate closure growth by abstracting the syntax tree into a 'Skeleton',
   93 -- capturing only syntactic details relevant to 'closureGrowth', such as
   94 --
   95 --   * 'ClosureSk', representing closure allocation.
   96 --   * 'RhsSk', representing a RHS of a binding and how many times it's called
   97 --     by an appropriate 'Card'.
   98 --   * 'AltSk', 'BothSk' and 'NilSk' for choice, sequence and empty element.
   99 --
  100 -- This abstraction is mostly so that the main analysis function 'closureGrowth'
  101 -- can stay simple and focused. Also, skeletons tend to be much smaller than
  102 -- the syntax tree they abstract, so it makes sense to construct them once and
  103 -- and operate on them instead of the actual syntax tree.
  104 --
  105 -- A more detailed treatment of computing closure growth, including examples,
  106 -- can be found in the paper referenced from the
  107 -- <https://gitlab.haskell.org/ghc/ghc/wikis/late-lam-lift wiki page>.
  108 
  109 llTrace :: String -> SDoc -> a -> a
  110 llTrace _ _ c = c
  111 -- llTrace a b c = pprTrace a b c
  112 
  113 type instance BinderP      'LiftLams = BinderInfo
  114 type instance XRhsClosure  'LiftLams = DIdSet
  115 type instance XLet         'LiftLams = Skeleton
  116 type instance XLetNoEscape 'LiftLams = Skeleton
  117 
  118 
  119 -- | Captures details of the syntax tree relevant to the cost model, such as
  120 -- closures, multi-shot lambdas and case expressions.
  121 data Skeleton
  122   = ClosureSk !Id !DIdSet {- ^ free vars -} !Skeleton
  123   | RhsSk !Card {- ^ how often the RHS was entered -} !Skeleton
  124   | AltSk !Skeleton !Skeleton
  125   | BothSk !Skeleton !Skeleton
  126   | NilSk
  127 
  128 bothSk :: Skeleton -> Skeleton -> Skeleton
  129 bothSk NilSk b = b
  130 bothSk a NilSk = a
  131 bothSk a b     = BothSk a b
  132 
  133 altSk :: Skeleton -> Skeleton -> Skeleton
  134 altSk NilSk b = b
  135 altSk a NilSk = a
  136 altSk a b     = AltSk a b
  137 
  138 rhsSk :: Card -> Skeleton -> Skeleton
  139 rhsSk _        NilSk = NilSk
  140 rhsSk body_dmd skel  = RhsSk body_dmd skel
  141 
  142 -- | The type used in binder positions in 'GenStgExpr's.
  143 data BinderInfo
  144   = BindsClosure !Id !Bool -- ^ Let(-no-escape)-bound thing with a flag
  145                            --   indicating whether it occurs as an argument
  146                            --   or in a nullary application
  147                            --   (see "GHC.Stg.Lift.Analysis#arg_occs").
  148   | BoringBinder !Id       -- ^ Every other kind of binder
  149 
  150 -- | Gets the bound 'Id' out a 'BinderInfo'.
  151 binderInfoBndr :: BinderInfo -> Id
  152 binderInfoBndr (BoringBinder bndr)   = bndr
  153 binderInfoBndr (BindsClosure bndr _) = bndr
  154 
  155 -- | Returns 'Nothing' for 'BoringBinder's and 'Just' the flag indicating
  156 -- occurrences as argument or in a nullary applications otherwise.
  157 binderInfoOccursAsArg :: BinderInfo -> Maybe Bool
  158 binderInfoOccursAsArg BoringBinder{}     = Nothing
  159 binderInfoOccursAsArg (BindsClosure _ b) = Just b
  160 
  161 instance Outputable Skeleton where
  162   ppr NilSk = text ""
  163   ppr (AltSk l r) = vcat
  164     [ text "{ " <+> ppr l
  165     , text "ALT"
  166     , text "  " <+> ppr r
  167     , text "}"
  168     ]
  169   ppr (BothSk l r) = ppr l $$ ppr r
  170   ppr (ClosureSk f fvs body) = ppr f <+> ppr fvs $$ nest 2 (ppr body)
  171   ppr (RhsSk card body) = hcat
  172     [ lambda
  173     , ppr card
  174     , dot
  175     , ppr body
  176     ]
  177 
  178 instance Outputable BinderInfo where
  179   ppr = ppr . binderInfoBndr
  180 
  181 instance OutputableBndr BinderInfo where
  182   pprBndr b = pprBndr b . binderInfoBndr
  183   pprPrefixOcc = pprPrefixOcc . binderInfoBndr
  184   pprInfixOcc = pprInfixOcc . binderInfoBndr
  185   bndrIsJoin_maybe = bndrIsJoin_maybe . binderInfoBndr
  186 
  187 mkArgOccs :: [StgArg] -> IdSet
  188 mkArgOccs = mkVarSet . mapMaybe stg_arg_var
  189   where
  190     stg_arg_var (StgVarArg occ) = Just occ
  191     stg_arg_var _               = Nothing
  192 
  193 -- | Tags every binder with its 'BinderInfo' and let bindings with their
  194 -- 'Skeleton's.
  195 tagSkeletonTopBind :: CgStgBinding -> LlStgBinding
  196 -- NilSk is OK when tagging top-level bindings. Also, top-level things are never
  197 -- lambda-lifted, so no need to track their argument occurrences. They can also
  198 -- never be let-no-escapes (thus we pass False).
  199 tagSkeletonTopBind bind = bind'
  200   where
  201     (_, _, _, bind') = tagSkeletonBinding False NilSk emptyVarSet bind
  202 
  203 -- | Tags binders of an 'StgExpr' with its 'BinderInfo' and let bindings with
  204 -- their 'Skeleton's. Additionally, returns its 'Skeleton' and the set of binder
  205 -- occurrences in argument and nullary application position
  206 -- (cf. "GHC.Stg.Lift.Analysis#arg_occs").
  207 tagSkeletonExpr :: CgStgExpr -> (Skeleton, IdSet, LlStgExpr)
  208 tagSkeletonExpr (StgLit lit)
  209   = (NilSk, emptyVarSet, StgLit lit)
  210 tagSkeletonExpr (StgConApp con mn args tys)
  211   = (NilSk, mkArgOccs args, StgConApp con mn args tys)
  212 tagSkeletonExpr (StgOpApp op args ty)
  213   = (NilSk, mkArgOccs args, StgOpApp op args ty)
  214 tagSkeletonExpr (StgApp f args)
  215   = (NilSk, arg_occs, StgApp f args)
  216   where
  217     arg_occs
  218       -- This checks for nullary applications, which we treat the same as
  219       -- argument occurrences, see "GHC.Stg.Lift.Analysis#arg_occs".
  220       | null args = unitVarSet f
  221       | otherwise = mkArgOccs args
  222 tagSkeletonExpr (StgCase scrut bndr ty alts)
  223   = (skel, arg_occs, StgCase scrut' bndr' ty alts')
  224   where
  225     (scrut_skel, scrut_arg_occs, scrut') = tagSkeletonExpr scrut
  226     (alt_skels, alt_arg_occss, alts') = mapAndUnzip3 tagSkeletonAlt alts
  227     skel = bothSk scrut_skel (foldr altSk NilSk alt_skels)
  228     arg_occs = unionVarSets (scrut_arg_occs:alt_arg_occss) `delVarSet` bndr
  229     bndr' = BoringBinder bndr
  230 tagSkeletonExpr (StgTick t e)
  231   = (skel, arg_occs, StgTick t e')
  232   where
  233     (skel, arg_occs, e') = tagSkeletonExpr e
  234 tagSkeletonExpr (StgLet _ bind body) = tagSkeletonLet False body bind
  235 tagSkeletonExpr (StgLetNoEscape _ bind body) = tagSkeletonLet True body bind
  236 
  237 mkLet :: Bool -> Skeleton -> LlStgBinding -> LlStgExpr -> LlStgExpr
  238 mkLet True = StgLetNoEscape
  239 mkLet _    = StgLet
  240 
  241 tagSkeletonLet
  242   :: Bool
  243   -- ^ Is the binding a let-no-escape?
  244   -> CgStgExpr
  245   -- ^ Let body
  246   -> CgStgBinding
  247   -- ^ Binding group
  248   -> (Skeleton, IdSet, LlStgExpr)
  249   -- ^ RHS skeletons, argument occurrences and annotated binding
  250 tagSkeletonLet is_lne body bind
  251   = (let_skel, arg_occs, mkLet is_lne scope bind' body')
  252   where
  253     (body_skel, body_arg_occs, body') = tagSkeletonExpr body
  254     (let_skel, arg_occs, scope, bind')
  255       = tagSkeletonBinding is_lne body_skel body_arg_occs bind
  256 
  257 tagSkeletonBinding
  258   :: Bool
  259   -- ^ Is the binding a let-no-escape?
  260   -> Skeleton
  261   -- ^ Let body skeleton
  262   -> IdSet
  263   -- ^ Argument occurrences in the body
  264   -> CgStgBinding
  265   -- ^ Binding group
  266   -> (Skeleton, IdSet, Skeleton, LlStgBinding)
  267   -- ^ Let skeleton, argument occurrences, scope skeleton of binding and
  268   --   the annotated binding
  269 tagSkeletonBinding is_lne body_skel body_arg_occs (StgNonRec bndr rhs)
  270   = (let_skel, arg_occs, scope, bind')
  271   where
  272     (rhs_skel, rhs_arg_occs, rhs') = tagSkeletonRhs bndr rhs
  273     arg_occs = (body_arg_occs `unionVarSet` rhs_arg_occs) `delVarSet` bndr
  274     bind_skel
  275       | is_lne    = rhs_skel -- no closure is allocated for let-no-escapes
  276       | otherwise = ClosureSk bndr (freeVarsOfRhs rhs) rhs_skel
  277     let_skel = bothSk body_skel bind_skel
  278     occurs_as_arg = bndr `elemVarSet` body_arg_occs
  279     -- Compared to the recursive case, this exploits the fact that @bndr@ is
  280     -- never free in @rhs@.
  281     scope = body_skel
  282     bind' = StgNonRec (BindsClosure bndr occurs_as_arg) rhs'
  283 tagSkeletonBinding is_lne body_skel body_arg_occs (StgRec pairs)
  284   = (let_skel, arg_occs, scope, StgRec pairs')
  285   where
  286     (bndrs, _) = unzip pairs
  287     -- Local recursive STG bindings also regard the defined binders as free
  288     -- vars. We want to delete those for our cost model, as these are known
  289     -- calls anyway when we add them to the same top-level recursive group as
  290     -- the top-level binding currently being analysed.
  291     skel_occs_rhss' = map (uncurry tagSkeletonRhs) pairs
  292     rhss_arg_occs = map sndOf3 skel_occs_rhss'
  293     scope_occs = unionVarSets (body_arg_occs:rhss_arg_occs)
  294     arg_occs = scope_occs `delVarSetList` bndrs
  295     -- @skel_rhss@ aren't yet wrapped in closures. We'll do that in a moment,
  296     -- but we also need the un-wrapped skeletons for calculating the @scope@
  297     -- of the group, as the outer closures don't contribute to closure growth
  298     -- when we lift this specific binding.
  299     scope = foldr (bothSk . fstOf3) body_skel skel_occs_rhss'
  300     -- Now we can build the actual Skeleton for the expression just by
  301     -- iterating over each bind pair.
  302     (bind_skels, pairs') = unzip (zipWith single_bind bndrs skel_occs_rhss')
  303     let_skel = foldr bothSk body_skel bind_skels
  304     single_bind bndr (skel_rhs, _, rhs') = (bind_skel, (bndr', rhs'))
  305       where
  306         -- Here, we finally add the closure around each @skel_rhs@.
  307         bind_skel
  308           | is_lne    = skel_rhs -- no closure is allocated for let-no-escapes
  309           | otherwise = ClosureSk bndr fvs skel_rhs
  310         fvs = freeVarsOfRhs rhs' `dVarSetMinusVarSet` mkVarSet bndrs
  311         bndr' = BindsClosure bndr (bndr `elemVarSet` scope_occs)
  312 
  313 tagSkeletonRhs :: Id -> CgStgRhs -> (Skeleton, IdSet, LlStgRhs)
  314 tagSkeletonRhs _ (StgRhsCon ccs dc mn ts args)
  315   = (NilSk, mkArgOccs args, StgRhsCon ccs dc mn ts args)
  316 tagSkeletonRhs bndr (StgRhsClosure fvs ccs upd bndrs body)
  317   = (rhs_skel, body_arg_occs, StgRhsClosure fvs ccs upd bndrs' body')
  318   where
  319     bndrs' = map BoringBinder bndrs
  320     (body_skel, body_arg_occs, body') = tagSkeletonExpr body
  321     rhs_skel = rhsSk (rhsCard bndr) body_skel
  322 
  323 -- | How many times will the lambda body of the RHS bound to the given
  324 -- identifier be evaluated, relative to its defining context? This function
  325 -- computes the answer in form of a 'Card'.
  326 rhsCard :: Id -> Card
  327 rhsCard bndr
  328   | is_thunk  = oneifyCard n
  329   | otherwise = peelManyCalls (idArity bndr) cd
  330   where
  331     is_thunk = idArity bndr == 0
  332     -- Let's pray idDemandInfo is still OK after unarise...
  333     n :* cd = idDemandInfo bndr
  334 
  335 tagSkeletonAlt :: CgStgAlt -> (Skeleton, IdSet, LlStgAlt)
  336 tagSkeletonAlt (con, bndrs, rhs)
  337   = (alt_skel, arg_occs, (con, map BoringBinder bndrs, rhs'))
  338   where
  339     (alt_skel, alt_arg_occs, rhs') = tagSkeletonExpr rhs
  340     arg_occs = alt_arg_occs `delVarSetList` bndrs
  341 
  342 -- | Combines several heuristics to decide whether to lambda-lift a given
  343 -- @let@-binding to top-level. See "GHC.Stg.Lift.Analysis#when" for details.
  344 goodToLift
  345   :: DynFlags
  346   -> TopLevelFlag
  347   -> RecFlag
  348   -> (DIdSet -> DIdSet) -- ^ An expander function, turning 'InId's into
  349                         -- 'OutId's. See 'GHC.Stg.Lift.Monad.liftedIdsExpander'.
  350   -> [(BinderInfo, LlStgRhs)]
  351   -> Skeleton
  352   -> Maybe DIdSet       -- ^ @Just abs_ids@ <=> This binding is beneficial to
  353                         -- lift and @abs_ids@ are the variables it would
  354                         -- abstract over
  355 goodToLift dflags top_lvl rec_flag expander pairs scope = decide
  356   [ ("top-level", isTopLevel top_lvl) -- keep in sync with Note [When to lift]
  357   , ("memoized", any_memoized)
  358   , ("argument occurrences", arg_occs)
  359   , ("join point", is_join_point)
  360   , ("abstracts join points", abstracts_join_ids)
  361   , ("abstracts known local function", abstracts_known_local_fun)
  362   , ("args spill on stack", args_spill_on_stack)
  363   , ("increases allocation", inc_allocs)
  364   ] where
  365       profile  = targetProfile dflags
  366       platform = profilePlatform profile
  367       decide deciders
  368         | not (fancy_or deciders)
  369         = llTrace "stgLiftLams:lifting"
  370                   (ppr bndrs <+> ppr abs_ids $$
  371                    ppr allocs $$
  372                    ppr scope) $
  373           Just abs_ids
  374         | otherwise
  375         = Nothing
  376       ppr_deciders = vcat . map (text . fst) . filter snd
  377       fancy_or deciders
  378         = llTrace "stgLiftLams:goodToLift" (ppr bndrs $$ ppr_deciders deciders) $
  379           any snd deciders
  380 
  381       bndrs = map (binderInfoBndr . fst) pairs
  382       bndrs_set = mkVarSet bndrs
  383       rhss = map snd pairs
  384 
  385       -- First objective: Calculate @abs_ids@, e.g. the former free variables
  386       -- the lifted binding would abstract over. We have to merge the free
  387       -- variables of all RHS to get the set of variables that will have to be
  388       -- passed through parameters.
  389       fvs = unionDVarSets (map freeVarsOfRhs rhss)
  390       -- To lift the binding to top-level, we want to delete the lifted binders
  391       -- themselves from the free var set. Local let bindings track recursive
  392       -- occurrences in their free variable set. We neither want to apply our
  393       -- cost model to them (see 'tagSkeletonRhs'), nor pass them as parameters
  394       -- when lifted, as these are known calls. We call the resulting set the
  395       -- identifiers we abstract over, thus @abs_ids@. These are all 'OutId's.
  396       -- We will save the set in 'LiftM.e_expansions' for each of the variables
  397       -- if we perform the lift.
  398       abs_ids = expander (delDVarSetList fvs bndrs)
  399 
  400       -- We don't lift updatable thunks or constructors
  401       any_memoized = any is_memoized_rhs rhss
  402       is_memoized_rhs StgRhsCon{} = True
  403       is_memoized_rhs (StgRhsClosure _ _ upd _ _) = isUpdatable upd
  404 
  405       -- Don't lift binders occurring as arguments. This would result in complex
  406       -- argument expressions which would have to be given a name, reintroducing
  407       -- the very allocation at each call site that we wanted to get rid off in
  408       -- the first place.
  409       arg_occs = or (mapMaybe (binderInfoOccursAsArg . fst) pairs)
  410 
  411       -- These don't allocate anyway.
  412       is_join_point = any isJoinId bndrs
  413 
  414       -- Abstracting over join points/let-no-escapes spoils them.
  415       abstracts_join_ids = any isJoinId (dVarSetElems abs_ids)
  416 
  417       -- Abstracting over known local functions that aren't floated themselves
  418       -- turns a known, fast call into an unknown, slow call:
  419       --
  420       --    let f x = ...
  421       --        g y = ... f x ... -- this was a known call
  422       --    in g 4
  423       --
  424       -- After lifting @g@, but not @f@:
  425       --
  426       --    l_g f y = ... f y ... -- this is now an unknown call
  427       --    let f x = ...
  428       --    in l_g f 4
  429       --
  430       -- We can abuse the results of arity analysis for this:
  431       -- idArity f > 0 ==> known
  432       known_fun id = idArity id > 0
  433       abstracts_known_local_fun
  434         = not (liftLamsKnown dflags) && any known_fun (dVarSetElems abs_ids)
  435 
  436       -- Number of arguments of a RHS in the current binding group if we decide
  437       -- to lift it
  438       n_args
  439         = length
  440         . StgToCmm.Closure.nonVoidIds -- void parameters don't appear in Cmm
  441         . (dVarSetElems abs_ids ++)
  442         . rhsLambdaBndrs
  443       max_n_args
  444         | isRec rec_flag = liftLamsRecArgs dflags
  445         | otherwise      = liftLamsNonRecArgs dflags
  446       -- We have 5 hardware registers on x86_64 to pass arguments in. Any excess
  447       -- args are passed on the stack, which means slow memory accesses
  448       args_spill_on_stack
  449         | Just n <- max_n_args = maximum (map n_args rhss) > n
  450         | otherwise = False
  451 
  452       -- We only perform the lift if allocations didn't increase.
  453       -- Note that @clo_growth@ will be 'infinity' if there was positive growth
  454       -- under a multi-shot lambda.
  455       -- Also, abstracting over LNEs is unacceptable. LNEs might return
  456       -- unlifted tuples, which idClosureFootprint can't cope with.
  457       inc_allocs = abstracts_join_ids || allocs > 0
  458       allocs = clo_growth + mkIntWithInf (negate closuresSize)
  459       -- We calculate and then add up the size of each binding's closure.
  460       -- GHC does not currently share closure environments, and we either lift
  461       -- the entire recursive binding group or none of it.
  462       closuresSize = sum $ flip map rhss $ \rhs ->
  463         closureSize profile
  464         . dVarSetElems
  465         . expander
  466         . flip dVarSetMinusVarSet bndrs_set
  467         $ freeVarsOfRhs rhs
  468       clo_growth = closureGrowth expander (idClosureFootprint platform) bndrs_set abs_ids scope
  469 
  470 rhsLambdaBndrs :: LlStgRhs -> [Id]
  471 rhsLambdaBndrs StgRhsCon{} = []
  472 rhsLambdaBndrs (StgRhsClosure _ _ _ bndrs _) = map binderInfoBndr bndrs
  473 
  474 -- | The size in words of a function closure closing over the given 'Id's,
  475 -- including the header.
  476 closureSize :: Profile -> [Id] -> WordOff
  477 closureSize profile ids = words + pc_STD_HDR_SIZE (platformConstants (profilePlatform profile))
  478   -- We go through sTD_HDR_SIZE rather than fixedHdrSizeW so that we don't
  479   -- optimise differently when profiling is enabled.
  480   where
  481     (words, _, _)
  482       -- Functions have a StdHeader (as opposed to ThunkHeader).
  483       = StgToCmm.Layout.mkVirtHeapOffsets profile StgToCmm.Layout.StdHeader
  484       . StgToCmm.Closure.addIdReps
  485       . StgToCmm.Closure.nonVoidIds
  486       $ ids
  487 
  488 -- | The number of words a single 'Id' adds to a closure's size.
  489 -- Note that this can't handle unboxed tuples (which may still be present in
  490 -- let-no-escapes, even after Unarise), in which case
  491 -- @'GHC.StgToCmm.Closure.idPrimRep'@ will crash.
  492 idClosureFootprint:: Platform -> Id -> WordOff
  493 idClosureFootprint platform
  494   = StgToCmm.ArgRep.argRepSizeW platform
  495   . StgToCmm.ArgRep.idArgRep platform
  496 
  497 -- | @closureGrowth expander sizer f fvs@ computes the closure growth in words
  498 -- as a result of lifting @f@ to top-level. If there was any growing closure
  499 -- under a multi-shot lambda, the result will be 'infinity'.
  500 -- Also see "GHC.Stg.Lift.Analysis#clogro".
  501 closureGrowth
  502   :: (DIdSet -> DIdSet)
  503   -- ^ Expands outer free ids that were lifted to their free vars
  504   -> (Id -> Int)
  505   -- ^ Computes the closure footprint of an identifier
  506   -> IdSet
  507   -- ^ Binding group for which lifting is to be decided
  508   -> DIdSet
  509   -- ^ Free vars of the whole binding group prior to lifting it. These must be
  510   --   available at call sites if we decide to lift the binding group.
  511   -> Skeleton
  512   -- ^ Abstraction of the scope of the function
  513   -> IntWithInf
  514   -- ^ Closure growth. 'infinity' indicates there was growth under a
  515   --   (multi-shot) lambda.
  516 closureGrowth expander sizer group abs_ids = go
  517   where
  518     go NilSk = 0
  519     go (BothSk a b) = go a + go b
  520     go (AltSk a b) = max (go a) (go b)
  521     go (ClosureSk _ clo_fvs rhs)
  522       -- If no binder of the @group@ occurs free in the closure, the lifting
  523       -- won't have any effect on it and we can omit the recursive call.
  524       | n_occs == 0 = 0
  525       -- Otherwise, we account the cost of allocating the closure and add it to
  526       -- the closure growth of its RHS.
  527       | otherwise   = mkIntWithInf cost + go rhs
  528       where
  529         n_occs = sizeDVarSet (clo_fvs' `dVarSetIntersectVarSet` group)
  530         -- What we close over considering prior lifting decisions
  531         clo_fvs' = expander clo_fvs
  532         -- Variables that would additionally occur free in the closure body if
  533         -- we lift @f@
  534         newbies = abs_ids `minusDVarSet` clo_fvs'
  535         -- Lifting @f@ removes @f@ from the closure but adds all @newbies@
  536         cost = nonDetStrictFoldDVarSet (\id size -> sizer id + size) 0 newbies - n_occs
  537         -- Using a non-deterministic fold is OK here because addition is commutative.
  538     go (RhsSk n body)
  539       -- The conservative assumption would be that
  540       --   1. Every RHS with positive growth would be called multiple times,
  541       --      modulo thunks.
  542       --   2. Every RHS with negative growth wouldn't be called at all.
  543       --
  544       -- In the first case, we'd have to return 'infinity', while in the
  545       -- second case, we'd have to return 0. But we can do far better
  546       -- considering information from the demand analyser, which provides us
  547       -- with conservative estimates on minimum and maximum evaluation
  548       -- cardinality. The @body_dmd@ part of 'RhsSk' is the result of
  549       -- 'rhsCard' and accurately captures the cardinality of the RHSs body
  550       -- relative to its defining context.
  551       | isAbs n      = 0
  552       | cg <= 0      = if isStrict n then cg else 0
  553       | isUsedOnce n = cg
  554       | otherwise    = infinity
  555       where
  556         cg = go body