never executed always true always false
    1 {-
    2 (c) The GRASP/AQUA Project, Glasgow University, 1993-1998
    3 
    4 
    5                         -----------------
    6                         A demand analysis
    7                         -----------------
    8 -}
    9 
   10 
   11 module GHC.Core.Opt.DmdAnal
   12    ( DmdAnalOpts(..)
   13    , dmdAnalProgram
   14    )
   15 where
   16 
   17 import GHC.Prelude
   18 
   19 import GHC.Core.Opt.WorkWrap.Utils
   20 import GHC.Types.Demand   -- All of it
   21 import GHC.Core
   22 import GHC.Core.Multiplicity ( scaledThing )
   23 import GHC.Utils.Outputable
   24 import GHC.Types.Var.Env
   25 import GHC.Types.Var.Set
   26 import GHC.Types.Basic
   27 import Data.List        ( mapAccumL )
   28 import GHC.Core.DataCon
   29 import GHC.Types.ForeignCall ( isSafeForeignCall )
   30 import GHC.Types.Id
   31 import GHC.Core.Utils
   32 import GHC.Core.TyCon
   33 import GHC.Core.Type
   34 import GHC.Core.FVs      ( rulesRhsFreeIds, bndrRuleAndUnfoldingIds )
   35 import GHC.Core.Coercion ( Coercion )
   36 import GHC.Core.TyCo.FVs ( coVarsOfCos )
   37 import GHC.Core.FamInstEnv
   38 import GHC.Core.Opt.Arity ( typeArity )
   39 import GHC.Utils.Misc
   40 import GHC.Utils.Panic
   41 import GHC.Utils.Panic.Plain
   42 import GHC.Data.Maybe         ( isJust )
   43 import GHC.Builtin.PrimOps
   44 import GHC.Builtin.Types.Prim ( realWorldStatePrimTy )
   45 import GHC.Types.Unique.Set
   46 
   47 import GHC.Utils.Trace
   48 _ = pprTrace -- Tired of commenting out the import all the time
   49 
   50 {-
   51 ************************************************************************
   52 *                                                                      *
   53 \subsection{Top level stuff}
   54 *                                                                      *
   55 ************************************************************************
   56 -}
   57 
   58 -- | Options for the demand analysis
   59 data DmdAnalOpts = DmdAnalOpts
   60    { dmd_strict_dicts :: !Bool -- ^ Use strict dictionaries
   61    , dmd_unbox_width  :: !Int  -- ^ Use strict dictionaries
   62    }
   63 
   64 -- This is a strict alternative to (,)
   65 -- See Note [Space Leaks in Demand Analysis]
   66 data WithDmdType a = WithDmdType !DmdType !a
   67 
   68 getAnnotated :: WithDmdType a -> a
   69 getAnnotated (WithDmdType _ a) = a
   70 
   71 data DmdResult a b = R !a !b
   72 
   73 -- | Outputs a new copy of the Core program in which binders have been annotated
   74 -- with demand and strictness information.
   75 --
   76 -- Note: use `seqBinds` on the result to avoid leaks due to lazyness (cf Note
   77 -- [Stamp out space leaks in demand analysis])
   78 dmdAnalProgram :: DmdAnalOpts -> FamInstEnvs -> [CoreRule] -> CoreProgram -> CoreProgram
   79 dmdAnalProgram opts fam_envs rules binds
   80   = getAnnotated $ go (emptyAnalEnv opts fam_envs) binds
   81   where
   82     -- See Note [Analysing top-level bindings]
   83     -- and Note [Why care for top-level demand annotations?]
   84     go _   []     = WithDmdType nopDmdType []
   85     go env (b:bs) = cons_up $ dmdAnalBind TopLevel env topSubDmd b anal_body
   86       where
   87         anal_body env'
   88           | WithDmdType body_ty bs' <- go env' bs
   89           = WithDmdType (add_exported_uses env' body_ty (bindersOf b)) bs'
   90 
   91     cons_up :: WithDmdType (DmdResult b [b]) -> WithDmdType [b]
   92     cons_up (WithDmdType dmd_ty (R b' bs')) = WithDmdType dmd_ty (b' : bs')
   93 
   94     add_exported_uses :: AnalEnv -> DmdType -> [Id] -> DmdType
   95     add_exported_uses env = foldl' (add_exported_use env)
   96 
   97     -- | If @e@ is denoted by @dmd_ty@, then @add_exported_use _ dmd_ty id@
   98     -- corresponds to the demand type of @(id, e)@, but is a lot more direct.
   99     -- See Note [Analysing top-level bindings].
  100     add_exported_use :: AnalEnv -> DmdType -> Id -> DmdType
  101     add_exported_use env dmd_ty id
  102       | isExportedId id || elemVarSet id rule_fvs
  103       -- See Note [Absence analysis for stable unfoldings and RULES]
  104       = dmd_ty `plusDmdType` fst (dmdAnalStar env topDmd (Var id))
  105       | otherwise
  106       = dmd_ty
  107 
  108     rule_fvs :: IdSet
  109     rule_fvs = rulesRhsFreeIds rules
  110 
  111 -- | We attach useful (e.g. not 'topDmd') 'idDemandInfo' to top-level bindings
  112 -- that satisfy this function.
  113 --
  114 -- Basically, we want to know how top-level *functions* are *used*
  115 -- (e.g. called). The information will always be lazy.
  116 -- Any other top-level bindings are boring.
  117 --
  118 -- See also Note [Why care for top-level demand annotations?].
  119 isInterestingTopLevelFn :: Id -> Bool
  120 -- SG tried to set this to True and got a +2% ghc/alloc regression in T5642
  121 -- (which is dominated by the Simplifier) at no gain in analysis precision.
  122 -- If there was a gain, that regression might be acceptable.
  123 -- Plus, we could use LetUp for thunks and share some code with local let
  124 -- bindings.
  125 isInterestingTopLevelFn id =
  126   typeArity (idType id) `lengthExceeds` 0
  127 
  128 {- Note [Stamp out space leaks in demand analysis]
  129 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  130 The demand analysis pass outputs a new copy of the Core program in
  131 which binders have been annotated with demand and strictness
  132 information. It's tiresome to ensure that this information is fully
  133 evaluated everywhere that we produce it, so we just run a single
  134 seqBinds over the output before returning it, to ensure that there are
  135 no references holding on to the input Core program.
  136 
  137 This makes a ~30% reduction in peak memory usage when compiling
  138 DynFlags (cf #9675 and #13426).
  139 
  140 This is particularly important when we are doing late demand analysis,
  141 since we don't do a seqBinds at any point thereafter. Hence code
  142 generation would hold on to an extra copy of the Core program, via
  143 unforced thunks in demand or strictness information; and it is the
  144 most memory-intensive part of the compilation process, so this added
  145 seqBinds makes a big difference in peak memory usage.
  146 
  147 Note [Analysing top-level bindings]
  148 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  149 Consider a CoreProgram like
  150   e1 = ...
  151   n1 = ...
  152   e2 = \a b -> ... fst (n1 a b) ...
  153   n2 = \c d -> ... snd (e2 c d) ...
  154   ...
  155 where e* are exported, but n* are not.
  156 Intuitively, we can see that @n1@ is only ever called with two arguments
  157 and in every call site, the first component of the result of the call
  158 is evaluated. Thus, we'd like it to have idDemandInfo @LCL(CM(P(1L,A))@.
  159 NB: We may *not* give e2 a similar annotation, because it is exported and
  160 external callers might use it in arbitrary ways, expressed by 'topDmd'.
  161 This can then be exploited by Nested CPR and eta-expansion,
  162 see Note [Why care for top-level demand annotations?].
  163 
  164 How do we get this result? Answer: By analysing the program as if it was a let
  165 expression of this form:
  166   let e1 = ... in
  167   let n1 = ... in
  168   let e2 = ... in
  169   let n2 = ... in
  170   (e1,e2, ...)
  171 E.g. putting all bindings in nested lets and returning all exported binders in a tuple.
  172 Of course, we will not actually build that CoreExpr! Instead we faithfully
  173 simulate analysis of said expression by adding the free variable 'DmdEnv'
  174 of @e*@'s strictness signatures to the 'DmdType' we get from analysing the
  175 nested bindings.
  176 
  177 And even then the above form blows up analysis performance in T10370:
  178 If @e1@ uses many free variables, we'll unnecessarily carry their demands around
  179 with us from the moment we analyse the pair to the moment we bubble back up to
  180 the binding for @e1@. So instead we analyse as if we had
  181   let e1 = ... in
  182   (e1, let n1 = ... in
  183   (    let e2 = ... in
  184   (e2, let n2 = ... in
  185   (    ...))))
  186 That is, a series of right-nested pairs, where the @fst@ are the exported
  187 binders of the last enclosing let binding and @snd@ continues the nested
  188 lets.
  189 
  190 Variables occurring free in RULE RHSs are to be handled the same as exported Ids.
  191 See also Note [Absence analysis for stable unfoldings and RULES].
  192 
  193 Note [Why care for top-level demand annotations?]
  194 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  195 Reading Note [Analysing top-level bindings], you might think that we go through
  196 quite some trouble to get useful demands for top-level bindings. They can never
  197 be strict, for example, so why bother?
  198 
  199 First, we get to eta-expand top-level bindings that we weren't able to
  200 eta-expand before without Call Arity. From T18894b:
  201   module T18894b (f) where
  202   eta :: Int -> Int -> Int
  203   eta x = if fst (expensive x) == 13 then \y -> ... else \y -> ...
  204   f m = ... eta m 2 ... eta 2 m ...
  205 Since only @f@ is exported, we see all call sites of @eta@ and can eta-expand to
  206 arity 2.
  207 
  208 The call demands we get for some top-level bindings will also allow Nested CPR
  209 to unbox deeper. From T18894:
  210   module T18894 (h) where
  211   g m n = (2 * m, 2 `div` n)
  212   {-# NOINLINE g #-}
  213   h :: Int -> Int
  214   h m = ... snd (g m 2) ... uncurry (+) (g 2 m) ...
  215 Only @h@ is exported, hence we see that @g@ is always called in contexts were we
  216 also force the division in the second component of the pair returned by @g@.
  217 This allows Nested CPR to evaluate the division eagerly and return an I# in its
  218 position.
  219 -}
  220 
  221 {-
  222 ************************************************************************
  223 *                                                                      *
  224 \subsection{The analyser itself}
  225 *                                                                      *
  226 ************************************************************************
  227 -}
  228 
  229 -- | Analyse a binding group and its \"body\", e.g. where it is in scope.
  230 --
  231 -- It calls a function that knows how to analyse this \"body\" given
  232 -- an 'AnalEnv' with updated demand signatures for the binding group
  233 -- (reflecting their 'idDmdSigInfo') and expects to receive a
  234 -- 'DmdType' in return, which it uses to annotate the binding group with their
  235 -- 'idDemandInfo'.
  236 dmdAnalBind
  237   :: TopLevelFlag
  238   -> AnalEnv
  239   -> SubDemand                 -- ^ Demand put on the "body"
  240                                --   (important for join points)
  241   -> CoreBind
  242   -> (AnalEnv -> WithDmdType a) -- ^ How to analyse the "body", e.g.
  243                                --   where the binding is in scope
  244   -> WithDmdType (DmdResult CoreBind a)
  245 dmdAnalBind top_lvl env dmd bind anal_body = case bind of
  246   NonRec id rhs
  247     | useLetUp top_lvl id
  248     -> dmdAnalBindLetUp   top_lvl env     id rhs anal_body
  249   _ -> dmdAnalBindLetDown top_lvl env dmd bind   anal_body
  250 
  251 -- | Annotates uninteresting top level functions ('isInterestingTopLevelFn')
  252 -- with 'topDmd', the rest with the given demand.
  253 setBindIdDemandInfo :: TopLevelFlag -> Id -> Demand -> Id
  254 setBindIdDemandInfo top_lvl id dmd = setIdDemandInfo id $ case top_lvl of
  255   TopLevel | not (isInterestingTopLevelFn id) -> topDmd
  256   _                                           -> dmd
  257 
  258 -- | Let bindings can be processed in two ways:
  259 -- Down (RHS before body) or Up (body before RHS).
  260 -- This function handles the up variant.
  261 --
  262 -- It is very simple. For  let x = rhs in body
  263 --   * Demand-analyse 'body' in the current environment
  264 --   * Find the demand, 'rhs_dmd' placed on 'x' by 'body'
  265 --   * Demand-analyse 'rhs' in 'rhs_dmd'
  266 --
  267 -- This is used for a non-recursive local let without manifest lambdas (see
  268 -- 'useLetUp').
  269 --
  270 -- This is the LetUp rule in the paper “Higher-Order Cardinality Analysis”.
  271 dmdAnalBindLetUp :: TopLevelFlag
  272                  -> AnalEnv
  273                  -> Id
  274                  -> CoreExpr
  275                  -> (AnalEnv -> WithDmdType a)
  276                  -> WithDmdType (DmdResult CoreBind a)
  277 dmdAnalBindLetUp top_lvl env id rhs anal_body = WithDmdType final_ty (R (NonRec id' rhs') (body'))
  278   where
  279     WithDmdType body_ty body'   = anal_body env
  280     WithDmdType body_ty' id_dmd = findBndrDmd env body_ty id
  281     -- See Note [Finalising boxity for demand signature] in "GHC.Core.Opt.WorkWrap.Utils"
  282     id_dmd'            = finaliseBoxity (ae_fam_envs env) NotInsideInlineableFun (idType id) id_dmd
  283     !id'               = setBindIdDemandInfo top_lvl id id_dmd'
  284     (rhs_ty, rhs')     = dmdAnalStar env (dmdTransformThunkDmd rhs id_dmd') rhs
  285 
  286     -- See Note [Absence analysis for stable unfoldings and RULES]
  287     rule_fvs           = bndrRuleAndUnfoldingIds id
  288     final_ty           = body_ty' `plusDmdType` rhs_ty `keepAliveDmdType` rule_fvs
  289 
  290 -- | Let bindings can be processed in two ways:
  291 -- Down (RHS before body) or Up (body before RHS).
  292 -- This function handles the down variant.
  293 --
  294 -- It computes a demand signature (by means of 'dmdAnalRhsSig') and uses
  295 -- that at call sites in the body.
  296 --
  297 -- It is used for toplevel definitions, recursive definitions and local
  298 -- non-recursive definitions that have manifest lambdas (cf. 'useLetUp').
  299 -- Local non-recursive definitions without a lambda are handled with LetUp.
  300 --
  301 -- This is the LetDown rule in the paper “Higher-Order Cardinality Analysis”.
  302 dmdAnalBindLetDown :: TopLevelFlag -> AnalEnv -> SubDemand -> CoreBind -> (AnalEnv -> WithDmdType a) -> WithDmdType (DmdResult CoreBind a)
  303 dmdAnalBindLetDown top_lvl env dmd bind anal_body = case bind of
  304   NonRec id rhs
  305     | (env', lazy_fv, id1, rhs1) <-
  306         dmdAnalRhsSig top_lvl NonRecursive env dmd id rhs
  307     -> do_rest env' lazy_fv [(id1, rhs1)] (uncurry NonRec . only)
  308   Rec pairs
  309     | (env', lazy_fv, pairs') <- dmdFix top_lvl env dmd pairs
  310     -> do_rest env' lazy_fv pairs' Rec
  311   where
  312     do_rest env' lazy_fv pairs1 build_bind = WithDmdType final_ty (R (build_bind pairs2) body')
  313       where
  314         WithDmdType body_ty body'        = anal_body env'
  315         -- see Note [Lazy and unleashable free variables]
  316         dmd_ty                          = addLazyFVs body_ty lazy_fv
  317         WithDmdType final_ty id_dmds    = findBndrsDmds env' dmd_ty (strictMap fst pairs1)
  318         -- Important to force this as build_bind might not force it.
  319         !pairs2                         = strictZipWith do_one pairs1 id_dmds
  320         do_one (id', rhs') dmd          = ((,) $! setBindIdDemandInfo top_lvl id' dmd) $! rhs'
  321         -- If the actual demand is better than the vanilla call
  322         -- demand, you might think that we might do better to re-analyse
  323         -- the RHS with the stronger demand.
  324         -- But (a) That seldom happens, because it means that *every* path in
  325         --         the body of the let has to use that stronger demand
  326         -- (b) It often happens temporarily in when fixpointing, because
  327         --     the recursive function at first seems to place a massive demand.
  328         --     But we don't want to go to extra work when the function will
  329         --     probably iterate to something less demanding.
  330         -- In practice, all the times the actual demand on id2 is more than
  331         -- the vanilla call demand seem to be due to (b).  So we don't
  332         -- bother to re-analyse the RHS.
  333 
  334 -- If e is complicated enough to become a thunk, its contents will be evaluated
  335 -- at most once, so oneify it.
  336 dmdTransformThunkDmd :: CoreExpr -> Demand -> Demand
  337 dmdTransformThunkDmd e
  338   | exprIsTrivial e = id
  339   | otherwise       = oneifyDmd
  340 
  341 -- Do not process absent demands
  342 -- Otherwise act like in a normal demand analysis
  343 -- See ↦* relation in the Cardinality Analysis paper
  344 dmdAnalStar :: AnalEnv
  345             -> Demand   -- This one takes a *Demand*
  346             -> CoreExpr -- Should obey the let/app invariant
  347             -> (PlusDmdArg, CoreExpr)
  348 dmdAnalStar env (n :* sd) e
  349   -- NB: (:*) expands AbsDmd and BotDmd as needed
  350   -- See Note [Analysing with absent demand]
  351   | WithDmdType dmd_ty e' <- dmdAnal env sd e
  352   = assertPpr (not (isUnliftedType (exprType e)) || exprOkForSpeculation e) (ppr e)
  353     -- The argument 'e' should satisfy the let/app invariant
  354     (toPlusDmdArg $ multDmdType n dmd_ty, e')
  355 
  356 -- Main Demand Analsysis machinery
  357 dmdAnal, dmdAnal' :: AnalEnv
  358         -> SubDemand         -- The main one takes a *SubDemand*
  359         -> CoreExpr -> WithDmdType CoreExpr
  360 
  361 dmdAnal env d e = -- pprTrace "dmdAnal" (ppr d <+> ppr e) $
  362                   dmdAnal' env d e
  363 
  364 dmdAnal' _ _ (Lit lit)     = WithDmdType nopDmdType (Lit lit)
  365 dmdAnal' _ _ (Type ty)     = WithDmdType nopDmdType (Type ty) -- Doesn't happen, in fact
  366 dmdAnal' _ _ (Coercion co)
  367   = WithDmdType (unitDmdType (coercionDmdEnv co)) (Coercion co)
  368 
  369 dmdAnal' env dmd (Var var)
  370   = WithDmdType (dmdTransform env var dmd) (Var var)
  371 
  372 dmdAnal' env dmd (Cast e co)
  373   = WithDmdType (dmd_ty `plusDmdType` mkPlusDmdArg (coercionDmdEnv co)) (Cast e' co)
  374   where
  375     WithDmdType dmd_ty e' = dmdAnal env dmd e
  376 
  377 dmdAnal' env dmd (Tick t e)
  378   = WithDmdType dmd_ty (Tick t e')
  379   where
  380     WithDmdType dmd_ty e' = dmdAnal env dmd e
  381 
  382 dmdAnal' env dmd (App fun (Type ty))
  383   = WithDmdType fun_ty (App fun' (Type ty))
  384   where
  385     WithDmdType fun_ty fun' = dmdAnal env dmd fun
  386 
  387 -- Lots of the other code is there to make this
  388 -- beautiful, compositional, application rule :-)
  389 dmdAnal' env dmd (App fun arg)
  390   = -- This case handles value arguments (type args handled above)
  391     -- Crucially, coercions /are/ handled here, because they are
  392     -- value arguments (#10288)
  393     let
  394         call_dmd          = mkCalledOnceDmd dmd
  395         WithDmdType fun_ty fun' = dmdAnal env call_dmd fun
  396         (arg_dmd, res_ty) = splitDmdTy fun_ty
  397         (arg_ty, arg')    = dmdAnalStar env (dmdTransformThunkDmd arg arg_dmd) arg
  398     in
  399 --    pprTrace "dmdAnal:app" (vcat
  400 --         [ text "dmd =" <+> ppr dmd
  401 --         , text "expr =" <+> ppr (App fun arg)
  402 --         , text "fun dmd_ty =" <+> ppr fun_ty
  403 --         , text "arg dmd =" <+> ppr arg_dmd
  404 --         , text "arg dmd_ty =" <+> ppr arg_ty
  405 --         , text "res dmd_ty =" <+> ppr res_ty
  406 --         , text "overall res dmd_ty =" <+> ppr (res_ty `bothDmdType` arg_ty) ])
  407     WithDmdType (res_ty `plusDmdType` arg_ty) (App fun' arg')
  408 
  409 dmdAnal' env dmd (Lam var body)
  410   | isTyVar var
  411   = let
  412         WithDmdType body_ty body' = dmdAnal env dmd body
  413     in
  414     WithDmdType body_ty (Lam var body')
  415 
  416   | otherwise
  417   = let (n, body_dmd)    = peelCallDmd dmd
  418           -- body_dmd: a demand to analyze the body
  419 
  420         WithDmdType body_ty body' = dmdAnal env body_dmd body
  421         WithDmdType lam_ty var'   = annotateLamIdBndr env body_ty var
  422         new_dmd_type = multDmdType n lam_ty
  423     in
  424     WithDmdType new_dmd_type (Lam var' body')
  425 
  426 dmdAnal' env dmd (Case scrut case_bndr ty [Alt alt bndrs rhs])
  427   -- Only one alternative.
  428   -- If it's a DataAlt, it should be the only constructor of the type.
  429   | is_single_data_alt alt
  430   = let
  431         WithDmdType rhs_ty rhs'           = dmdAnal env dmd rhs
  432         WithDmdType alt_ty1 fld_dmds      = findBndrsDmds env rhs_ty bndrs
  433         WithDmdType alt_ty2 case_bndr_dmd = findBndrDmd env alt_ty1 case_bndr
  434         !case_bndr'                       = setIdDemandInfo case_bndr case_bndr_dmd
  435         -- Evaluation cardinality on the case binder is irrelevant and a no-op.
  436         -- What matters is its nested sub-demand!
  437         -- NB: If case_bndr_dmd is absDmd, boxity will say Unboxed, which is
  438         -- what we want, because then `seq` will put a `seqDmd` on its scrut.
  439         (_ :* case_bndr_sd) = case_bndr_dmd
  440         -- Compute demand on the scrutinee
  441         -- FORCE the result, otherwise thunks will end up retaining the
  442         -- whole DmdEnv
  443         !(!bndrs', !scrut_sd)
  444           | DataAlt _ <- alt
  445           -- See Note [Demand on the scrutinee of a product case]
  446           -- See Note [Demand on case-alternative binders]
  447           , (!scrut_sd, fld_dmds') <- addCaseBndrDmd case_bndr_sd fld_dmds
  448           , let !bndrs' = setBndrsDemandInfo bndrs fld_dmds'
  449           = (bndrs', scrut_sd)
  450           | otherwise
  451           -- __DEFAULT and literal alts. Simply add demands and discard the
  452           -- evaluation cardinality, as we evaluate the scrutinee exactly once.
  453           = assert (null bndrs) (bndrs, case_bndr_sd)
  454         fam_envs                 = ae_fam_envs env
  455         alt_ty3
  456           -- See Note [Precise exceptions and strictness analysis] in "GHC.Types.Demand"
  457           | exprMayThrowPreciseException fam_envs scrut
  458           = deferAfterPreciseException alt_ty2
  459           | otherwise
  460           = alt_ty2
  461 
  462         WithDmdType scrut_ty scrut' = dmdAnal env scrut_sd scrut
  463         res_ty             = alt_ty3 `plusDmdType` toPlusDmdArg scrut_ty
  464     in
  465 --    pprTrace "dmdAnal:Case1" (vcat [ text "scrut" <+> ppr scrut
  466 --                                   , text "dmd" <+> ppr dmd
  467 --                                   , text "case_bndr_dmd" <+> ppr (idDemandInfo case_bndr')
  468 --                                   , text "scrut_sd" <+> ppr scrut_sd
  469 --                                   , text "scrut_ty" <+> ppr scrut_ty
  470 --                                   , text "alt_ty" <+> ppr alt_ty2
  471 --                                   , text "res_ty" <+> ppr res_ty ]) $
  472     WithDmdType res_ty (Case scrut' case_bndr' ty [Alt alt bndrs' rhs'])
  473     where
  474       is_single_data_alt (DataAlt dc) = isJust $ tyConSingleAlgDataCon_maybe $ dataConTyCon dc
  475       is_single_data_alt _            = True
  476 
  477 
  478 
  479 
  480 dmdAnal' env dmd (Case scrut case_bndr ty alts)
  481   = let      -- Case expression with multiple alternatives
  482         WithDmdType alt_ty alts'     = combineAltDmds alts
  483 
  484         combineAltDmds [] = WithDmdType botDmdType []
  485         combineAltDmds (a:as) =
  486           let
  487             WithDmdType cur_ty a' = dmdAnalSumAlt env dmd case_bndr a
  488             WithDmdType rest_ty as' = combineAltDmds as
  489           in WithDmdType (lubDmdType cur_ty rest_ty) (a':as')
  490 
  491         WithDmdType alt_ty1 case_bndr_dmd = findBndrDmd env alt_ty case_bndr
  492         !case_bndr'                       = setIdDemandInfo case_bndr case_bndr_dmd
  493         WithDmdType scrut_ty scrut'       = dmdAnal env topSubDmd scrut
  494                                -- NB: Base case is botDmdType, for empty case alternatives
  495                                --     This is a unit for lubDmdType, and the right result
  496                                --     when there really are no alternatives
  497         fam_envs             = ae_fam_envs env
  498         alt_ty2
  499           -- See Note [Precise exceptions and strictness analysis] in "GHC.Types.Demand"
  500           | exprMayThrowPreciseException fam_envs scrut
  501           = deferAfterPreciseException alt_ty1
  502           | otherwise
  503           = alt_ty1
  504         res_ty               = alt_ty2 `plusDmdType` toPlusDmdArg scrut_ty
  505 
  506     in
  507 --    pprTrace "dmdAnal:Case2" (vcat [ text "scrut" <+> ppr scrut
  508 --                                   , text "scrut_ty" <+> ppr scrut_ty
  509 --                                   , text "alt_tys" <+> ppr alt_tys
  510 --                                   , text "alt_ty2" <+> ppr alt_ty2
  511 --                                   , text "res_ty" <+> ppr res_ty ]) $
  512     WithDmdType res_ty (Case scrut' case_bndr' ty alts')
  513 
  514 dmdAnal' env dmd (Let bind body)
  515   = WithDmdType final_ty (Let bind' body')
  516   where
  517     !(WithDmdType final_ty (R bind' body')) = dmdAnalBind NotTopLevel env dmd bind go'
  518     go' !env'                 = dmdAnal env' dmd body
  519 
  520 -- | A simple, syntactic analysis of whether an expression MAY throw a precise
  521 -- exception when evaluated. It's always sound to return 'True'.
  522 -- See Note [Which scrutinees may throw precise exceptions].
  523 exprMayThrowPreciseException :: FamInstEnvs -> CoreExpr -> Bool
  524 exprMayThrowPreciseException envs e
  525   | not (forcesRealWorld envs (exprType e))
  526   = False -- 1. in the Note
  527   | (Var f, _) <- collectArgs e
  528   , Just op    <- isPrimOpId_maybe f
  529   , op /= RaiseIOOp
  530   = False -- 2. in the Note
  531   | (Var f, _) <- collectArgs e
  532   , Just fcall <- isFCallId_maybe f
  533   , not (isSafeForeignCall fcall)
  534   = False -- 3. in the Note
  535   | otherwise
  536   = True  -- _. in the Note
  537 
  538 -- | Recognises types that are
  539 --    * @State# RealWorld@
  540 --    * Unboxed tuples with a @State# RealWorld@ field
  541 -- modulo coercions. This will detect 'IO' actions (even post Nested CPR! See
  542 -- T13380e) and user-written variants thereof by their type.
  543 forcesRealWorld :: FamInstEnvs -> Type -> Bool
  544 forcesRealWorld fam_envs ty
  545   | ty `eqType` realWorldStatePrimTy
  546   = True
  547   | Just (tc, tc_args, _co)  <- normSplitTyConApp_maybe fam_envs ty
  548   , isUnboxedTupleTyCon tc
  549   , let field_tys = dataConInstArgTys (tyConSingleDataCon tc) tc_args
  550   = any (eqType realWorldStatePrimTy . scaledThing) field_tys
  551   | otherwise
  552   = False
  553 
  554 dmdAnalSumAlt :: AnalEnv -> SubDemand -> Id -> Alt Var -> WithDmdType (Alt Var)
  555 dmdAnalSumAlt env dmd case_bndr (Alt con bndrs rhs)
  556   | WithDmdType rhs_ty rhs' <- dmdAnal env dmd rhs
  557   , WithDmdType alt_ty dmds <- findBndrsDmds env rhs_ty bndrs
  558   , let (_ :* case_bndr_sd) = findIdDemand alt_ty case_bndr
  559         -- See Note [Demand on case-alternative binders]
  560         -- we can't use the scrut_sd, because it says 'Prod' and we'll use
  561         -- topSubDmd anyway for scrutinees of sum types.
  562         (!_scrut_sd, dmds') = addCaseBndrDmd case_bndr_sd dmds
  563         -- Do not put a thunk into the Alt
  564         !new_ids            = setBndrsDemandInfo bndrs dmds'
  565   = WithDmdType alt_ty (Alt con new_ids rhs')
  566 
  567 -- Precondition: The SubDemand is not a Call
  568 -- See Note [Demand on the scrutinee of a product case]
  569 -- and Note [Demand on case-alternative binders]
  570 addCaseBndrDmd :: SubDemand -- On the case binder
  571                -> [Demand]  -- On the fields of the constructor
  572                -> (SubDemand, [Demand])
  573                             -- SubDemand on the case binder incl. field demands
  574                             -- and final demands for the components of the constructor
  575 addCaseBndrDmd case_sd fld_dmds
  576   | Just (_, ds) <- viewProd (length fld_dmds) scrut_sd
  577   = (scrut_sd, ds)
  578   | otherwise
  579   = pprPanic "was a call demand" (ppr case_sd $$ ppr fld_dmds) -- See the Precondition
  580   where
  581     scrut_sd = case_sd `plusSubDmd` mkProd Unboxed fld_dmds
  582 
  583 {-
  584 Note [Analysing with absent demand]
  585 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  586 Suppose we analyse an expression with demand A.  The "A" means
  587 "absent", so this expression will never be needed. What should happen?
  588 There are several wrinkles:
  589 
  590 * We *do* want to analyse the expression regardless.
  591   Reason: Note [Always analyse in virgin pass]
  592 
  593   But we can post-process the results to ignore all the usage
  594   demands coming back. This is done by multDmdType.
  595 
  596 * Nevertheless, which sub-demand should we pick for analysis?
  597   Since the demand was absent, any would do. Worker/wrapper will replace
  598   absent bindings with an absent filler anyway, so annotations in the RHS
  599   of an absent binding don't matter much.
  600   Picking 'botSubDmd' would be the most useful, but would also look a bit
  601   misleading in the Core output of DmdAnal, because all nested annotations would
  602   be bottoming. Better pick 'seqSubDmd', so that we annotate many of those
  603   nested bindings with A themselves.
  604 
  605 * In a previous incarnation of GHC we needed to be extra careful in the
  606   case of an *unlifted type*, because unlifted values are evaluated
  607   even if they are not used.  Example (see #9254):
  608      f :: (() -> (# Int#, () #)) -> ()
  609           -- Strictness signature is
  610           --    <CS(S(A,SU))>
  611           -- I.e. calls k, but discards first component of result
  612      f k = case k () of (# _, r #) -> r
  613 
  614      g :: Int -> ()
  615      g y = f (\n -> (# case y of I# y2 -> y2, n #))
  616 
  617   Here f's strictness signature says (correctly) that it calls its
  618   argument function and ignores the first component of its result.
  619   This is correct in the sense that it'd be fine to (say) modify the
  620   function so that always returned 0# in the first component.
  621 
  622   But in function g, we *will* evaluate the 'case y of ...', because
  623   it has type Int#.  So 'y' will be evaluated.  So we must record this
  624   usage of 'y', else 'g' will say 'y' is absent, and will w/w so that
  625   'y' is bound to an aBSENT_ERROR thunk.
  626 
  627   However, the argument of toSubDmd always satisfies the let/app
  628   invariant; so if it is unlifted it is also okForSpeculation, and so
  629   can be evaluated in a short finite time -- and that rules out nasty
  630   cases like the one above.  (I'm not quite sure why this was a
  631   problem in an earlier version of GHC, but it isn't now.)
  632 
  633 Note [Always analyse in virgin pass]
  634 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  635 Tricky point: make sure that we analyse in the 'virgin' pass. Consider
  636    rec { f acc x True  = f (...rec { g y = ...g... }...)
  637          f acc x False = acc }
  638 In the virgin pass for 'f' we'll give 'f' a very strict (bottom) type.
  639 That might mean that we analyse the sub-expression containing the
  640 E = "...rec g..." stuff in a bottom demand.  Suppose we *didn't analyse*
  641 E, but just returned botType.
  642 
  643 Then in the *next* (non-virgin) iteration for 'f', we might analyse E
  644 in a weaker demand, and that will trigger doing a fixpoint iteration
  645 for g.  But *because it's not the virgin pass* we won't start g's
  646 iteration at bottom.  Disaster.  (This happened in $sfibToList' of
  647 nofib/spectral/fibheaps.)
  648 
  649 So in the virgin pass we make sure that we do analyse the expression
  650 at least once, to initialise its signatures.
  651 
  652 Note [Which scrutinees may throw precise exceptions]
  653 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  654 This is the specification of 'exprMayThrowPreciseExceptions',
  655 which is important for Scenario 2 of
  656 Note [Precise exceptions and strictness analysis] in GHC.Types.Demand.
  657 
  658 For an expression @f a1 ... an :: ty@ we determine that
  659   1. False  If ty is *not* @State# RealWorld@ or an unboxed tuple thereof.
  660             This check is done by 'forcesRealWorld'.
  661             (Why not simply unboxed pairs as above? This is motivated by
  662             T13380{d,e}.)
  663   2. False  If f is a PrimOp, and it is *not* raiseIO#
  664   3. False  If f is an unsafe FFI call ('PlayRisky')
  665   _. True   Otherwise "give up".
  666 
  667 It is sound to return False in those cases, because
  668   1. We don't give any guarantees for unsafePerformIO, so no precise exceptions
  669      from pure code.
  670   2. raiseIO# is the only primop that may throw a precise exception.
  671   3. Unsafe FFI calls may not interact with the RTS (to throw, for example).
  672      See haddock on GHC.Types.ForeignCall.PlayRisky.
  673 
  674 We *need* to return False in those cases, because
  675   1. We would lose too much strictness in pure code, all over the place.
  676   2. We would lose strictness for primops like getMaskingState#, which
  677      introduces a substantial regression in
  678      GHC.IO.Handle.Internals.wantReadableHandle.
  679   3. We would lose strictness for code like GHC.Fingerprint.fingerprintData,
  680      where an intermittent FFI call to c_MD5Init would otherwise lose
  681      strictness on the arguments len and buf, leading to regressions in T9203
  682      (2%) and i386's haddock.base (5%). Tested by T13380f.
  683 
  684 In !3014 we tried a more sophisticated analysis by introducing ConOrDiv (nic)
  685 to the Divergence lattice, but in practice it turned out to be hard to untaint
  686 from 'topDiv' to 'conDiv', leading to bugs, performance regressions and
  687 complexity that didn't justify the single fixed testcase T13380c.
  688 
  689 Note [Demand on the scrutinee of a product case]
  690 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  691 When figuring out the demand on the scrutinee of a product case,
  692 we use the demands of the case alternative, i.e. id_dmds.
  693 But note that these include the demand on the case binder;
  694 see Note [Demand on case-alternative binders] in GHC.Types.Demand.
  695 This is crucial. Example:
  696    f x = case x of y { (a,b) -> k y a }
  697 If we just take scrut_demand = 1P(L,A), then we won't pass x to the
  698 worker, so the worker will rebuild
  699      x = (a, absent-error)
  700 and that'll crash.
  701 
  702 Note [Demand on case-alternative binders]
  703 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  704 The demand on a binder in a case alternative comes
  705   (a) From the demand on the binder itself
  706   (b) From the demand on the case binder
  707 Forgetting (b) led directly to #10148.
  708 
  709 Example. Source code:
  710   f x@(p,_) = if p then foo x else True
  711 
  712   foo (p,True) = True
  713   foo (p,q)    = foo (q,p)
  714 
  715 After strictness analysis, forgetting (b):
  716   f = \ (x_an1 [Dmd=1P(1L,ML)] :: (Bool, Bool)) ->
  717       case x_an1
  718       of wild_X7 [Dmd=MP(ML,ML)]
  719       { (p_an2 [Dmd=1L], ds_dnz [Dmd=A]) ->
  720       case p_an2 of _ {
  721         False -> GHC.Types.True;
  722         True -> foo wild_X7 }
  723 
  724 Note that ds_dnz is syntactically dead, but the expression bound to it is
  725 reachable through the case binder wild_X7. Now watch what happens if we inline
  726 foo's wrapper:
  727   f = \ (x_an1 [Dmd=1P(1L,ML)] :: (Bool, Bool)) ->
  728       case x_an1
  729       of _ [Dmd=MP(ML,ML)]
  730       { (p_an2 [Dmd=1L], ds_dnz [Dmd=A]) ->
  731       case p_an2 of _ {
  732         False -> GHC.Types.True;
  733         True -> $wfoo_soq GHC.Types.True ds_dnz }
  734 
  735 Look at that! ds_dnz has come back to life in the call to $wfoo_soq! A second
  736 run of demand analysis would no longer infer ds_dnz to be absent.
  737 But unlike occurrence analysis, which infers properties of the *syntactic*
  738 shape of the program, the results of demand analysis describe expressions
  739 *semantically* and are supposed to be mostly stable across Simplification.
  740 That's why we should better account for (b).
  741 In #10148, we ended up emitting a single-entry thunk instead of an updateable
  742 thunk for a let binder that was an an absent case-alt binder during DmdAnal.
  743 
  744 This is needed even for non-product types, in case the case-binder
  745 is used but the components of the case alternative are not.
  746 
  747 Note [Aggregated demand for cardinality]
  748 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  749 FIXME: This Note should be named [LetUp vs. LetDown] and probably predates
  750 said separation. SG
  751 
  752 We use different strategies for strictness and usage/cardinality to
  753 "unleash" demands captured on free variables by bindings. Let us
  754 consider the example:
  755 
  756 f1 y = let {-# NOINLINE h #-}
  757            h = y
  758        in  (h, h)
  759 
  760 We are interested in obtaining cardinality demand U1 on |y|, as it is
  761 used only in a thunk, and, therefore, is not going to be updated any
  762 more. Therefore, the demand on |y|, captured and unleashed by usage of
  763 |h| is U1. However, if we unleash this demand every time |h| is used,
  764 and then sum up the effects, the ultimate demand on |y| will be U1 +
  765 U1 = U. In order to avoid it, we *first* collect the aggregate demand
  766 on |h| in the body of let-expression, and only then apply the demand
  767 transformer:
  768 
  769 transf[x](U) = {y |-> U1}
  770 
  771 so the resulting demand on |y| is U1.
  772 
  773 The situation is, however, different for strictness, where this
  774 aggregating approach exhibits worse results because of the nature of
  775 |both| operation for strictness. Consider the example:
  776 
  777 f y c =
  778   let h x = y |seq| x
  779    in case of
  780         True  -> h True
  781         False -> y
  782 
  783 It is clear that |f| is strict in |y|, however, the suggested analysis
  784 will infer from the body of |let| that |h| is used lazily (as it is
  785 used in one branch only), therefore lazy demand will be put on its
  786 free variable |y|. Conversely, if the demand on |h| is unleashed right
  787 on the spot, we will get the desired result, namely, that |f| is
  788 strict in |y|.
  789 
  790 
  791 ************************************************************************
  792 *                                                                      *
  793                     Demand transformer
  794 *                                                                      *
  795 ************************************************************************
  796 -}
  797 
  798 dmdTransform :: AnalEnv   -- ^ The analysis environment
  799              -> Id        -- ^ The variable
  800              -> SubDemand -- ^ The evaluation context of the var
  801              -> DmdType   -- ^ The demand type unleashed by the variable in this
  802                           -- context. The returned DmdEnv includes the demand on
  803                           -- this function plus demand on its free variables
  804 -- See Note [What are demand signatures?] in "GHC.Types.Demand"
  805 dmdTransform env var sd
  806   -- Data constructors
  807   | isDataConWorkId var
  808   = dmdTransformDataConSig (idArity var) sd
  809   -- Dictionary component selectors
  810   -- Used to be controlled by a flag.
  811   -- See #18429 for some perf measurements.
  812   | Just _ <- isClassOpId_maybe var
  813   = -- pprTrace "dmdTransform:DictSel" (ppr var $$ ppr (idDmdSig var) $$ ppr sd) $
  814     dmdTransformDictSelSig (idDmdSig var) sd
  815   -- Imported functions
  816   | isGlobalId var
  817   , let res = dmdTransformSig (idDmdSig var) sd
  818   = -- pprTrace "dmdTransform:import" (vcat [ppr var, ppr (idDmdSig var), ppr sd, ppr res])
  819     res
  820   -- Top-level or local let-bound thing for which we use LetDown ('useLetUp').
  821   -- In that case, we have a strictness signature to unleash in our AnalEnv.
  822   | Just (sig, top_lvl) <- lookupSigEnv env var
  823   , let fn_ty = dmdTransformSig sig sd
  824   = -- pprTrace "dmdTransform:LetDown" (vcat [ppr var, ppr sig, ppr sd, ppr fn_ty]) $
  825     case top_lvl of
  826       NotTopLevel -> addVarDmd fn_ty var (C_11 :* sd)
  827       TopLevel
  828         | isInterestingTopLevelFn var
  829         -- Top-level things will be used multiple times or not at
  830         -- all anyway, hence the multDmd below: It means we don't
  831         -- have to track whether @var@ is used strictly or at most
  832         -- once, because ultimately it never will.
  833         -> addVarDmd fn_ty var (C_0N `multDmd` (C_11 :* sd)) -- discard strictness
  834         | otherwise
  835         -> fn_ty -- don't bother tracking; just annotate with 'topDmd' later
  836   -- Everything else:
  837   --   * Local let binders for which we use LetUp (cf. 'useLetUp')
  838   --   * Lambda binders
  839   --   * Case and constructor field binders
  840   | otherwise
  841   = -- pprTrace "dmdTransform:other" (vcat [ppr var, ppr boxity, ppr sd]) $
  842     unitDmdType (unitVarEnv var (C_11 :* sd))
  843 
  844 {- *********************************************************************
  845 *                                                                      *
  846                       Binding right-hand sides
  847 *                                                                      *
  848 ********************************************************************* -}
  849 
  850 -- | @dmdAnalRhsSig@ analyses the given RHS to compute a demand signature
  851 -- for the LetDown rule. It works as follows:
  852 --
  853 --  * assuming the weakest possible body sub-demand, L
  854 --  * looking at the definition
  855 --  * determining a strictness signature
  856 --
  857 -- Since it assumed a body sub-demand of L, the resulting signature is
  858 -- applicable at any call site.
  859 dmdAnalRhsSig
  860   :: TopLevelFlag
  861   -> RecFlag
  862   -> AnalEnv -> SubDemand
  863   -> Id -> CoreExpr
  864   -> (AnalEnv, DmdEnv, Id, CoreExpr)
  865 -- Process the RHS of the binding, add the strictness signature
  866 -- to the Id, and augment the environment with the signature as well.
  867 -- See Note [NOINLINE and strictness]
  868 dmdAnalRhsSig top_lvl rec_flag env let_dmd id rhs
  869   = -- pprTrace "dmdAnalRhsSig" (ppr id $$ ppr let_dmd $$ ppr sig $$ ppr lazy_fv) $
  870     (env', lazy_fv, id', rhs')
  871   where
  872     rhs_arity = idArity id
  873     -- See Note [Demand signatures are computed for a threshold demand based on idArity]
  874 
  875     rhs_dmd = mkCalledOnceDmds rhs_arity body_dmd
  876 
  877     body_dmd
  878       | isJoinId id
  879       -- See Note [Demand analysis for join points]
  880       -- See Note [Invariants on join points] invariant 2b, in GHC.Core
  881       --     rhs_arity matches the join arity of the join point
  882       = let_dmd
  883       | otherwise
  884       -- See Note [Unboxed demand on function bodies returning small products]
  885       = unboxedWhenSmall (ae_opts env) (unboxableResultWidth env id) topSubDmd
  886 
  887     -- See Note [Do not unbox class dictionaries]
  888     WithDmdType rhs_dmd_ty rhs' = dmdAnal (adjustInlFun id env) rhs_dmd rhs
  889     DmdType rhs_fv rhs_dmds rhs_div = rhs_dmd_ty
  890 
  891     sig = mkDmdSigForArity rhs_arity (DmdType sig_fv rhs_dmds rhs_div)
  892 
  893     id' = id `setIdDmdSig` sig
  894     !env' = extendAnalEnv top_lvl env id' sig
  895 
  896     -- See Note [Aggregated demand for cardinality]
  897     -- FIXME: That Note doesn't explain the following lines at all. The reason
  898     --        is really much different: When we have a recursive function, we'd
  899     --        have to also consider the free vars of the strictness signature
  900     --        when checking whether we found a fixed-point. That is expensive;
  901     --        we only want to check whether argument demands of the sig changed.
  902     --        reuseEnv makes it so that the FV results are stable as long as the
  903     --        last argument demands were. Strictness won't change. But used-once
  904     --        might turn into used-many even if the signature was stable and
  905     --        we'd have to do an additional iteration. reuseEnv makes sure that
  906     --        we never get used-once info for FVs of recursive functions.
  907     --        See #14816 where we try to get rid of reuseEnv.
  908     rhs_fv1 = case rec_flag of
  909                 Recursive    -> reuseEnv rhs_fv
  910                 NonRecursive -> rhs_fv
  911 
  912     -- See Note [Absence analysis for stable unfoldings and RULES]
  913     rhs_fv2 = rhs_fv1 `keepAliveDmdEnv` bndrRuleAndUnfoldingIds id
  914 
  915     -- See Note [Lazy and unleashable free variables]
  916     !(!lazy_fv, !sig_fv) = partitionVarEnv isWeakDmd rhs_fv2
  917 
  918 unboxableResultWidth :: AnalEnv -> Id -> Maybe Arity
  919 unboxableResultWidth env id
  920   | (pis,ret_ty) <- splitPiTys (idType id)
  921   , count (not . isNamedBinder) pis == idArity id
  922   , Just (tc, _tc_args, _co) <- normSplitTyConApp_maybe (ae_fam_envs env) ret_ty
  923   , Just dc <- tyConSingleAlgDataCon_maybe tc
  924   , null (dataConExTyCoVars dc) -- Can't unbox results with existentials
  925   = Just (dataConRepArity dc)
  926   | otherwise
  927   = Nothing
  928 
  929 unboxedWhenSmall :: DmdAnalOpts -> Maybe Arity -> SubDemand -> SubDemand
  930 -- See Note [Unboxed demand on function bodies returning small products]
  931 unboxedWhenSmall opts mb_n sd
  932   | Just n <- mb_n
  933   , n <= dmd_unbox_width opts
  934   = unboxSubDemand sd
  935   | otherwise
  936   = sd
  937 
  938 -- | If given the (local, non-recursive) let-bound 'Id', 'useLetUp' determines
  939 -- whether we should process the binding up (body before rhs) or down (rhs
  940 -- before body).
  941 --
  942 -- We use LetDown if there is a chance to get a useful strictness signature to
  943 -- unleash at call sites. LetDown is generally more precise than LetUp if we can
  944 -- correctly guess how it will be used in the body, that is, for which incoming
  945 -- demand the strictness signature should be computed, which allows us to
  946 -- unleash higher-order demands on arguments at call sites. This is mostly the
  947 -- case when
  948 --
  949 --   * The binding takes any arguments before performing meaningful work (cf.
  950 --     'idArity'), in which case we are interested to see how it uses them.
  951 --   * The binding is a join point, hence acting like a function, not a value.
  952 --     As a big plus, we know *precisely* how it will be used in the body; since
  953 --     it's always tail-called, we can directly unleash the incoming demand of
  954 --     the let binding on its RHS when computing a strictness signature. See
  955 --     [Demand analysis for join points].
  956 --
  957 -- Thus, if the binding is not a join point and its arity is 0, we have a thunk
  958 -- and use LetUp, implying that we have no usable demand signature available
  959 -- when we analyse the let body.
  960 --
  961 -- Since thunk evaluation is memoised, we want to unleash its 'DmdEnv' of free
  962 -- vars at most once, regardless of how many times it was forced in the body.
  963 -- This makes a real difference wrt. usage demands. The other reason is being
  964 -- able to unleash a more precise product demand on its RHS once we know how the
  965 -- thunk was used in the let body.
  966 --
  967 -- Characteristic examples, always assuming a single evaluation:
  968 --
  969 --   * @let x = 2*y in x + x@ => LetUp. Compared to LetDown, we find out that
  970 --     the expression uses @y@ at most once.
  971 --   * @let x = (a,b) in fst x@ => LetUp. Compared to LetDown, we find out that
  972 --     @b@ is absent.
  973 --   * @let f x = x*2 in f y@ => LetDown. Compared to LetUp, we find out that
  974 --     the expression uses @y@ strictly, because we have @f@'s demand signature
  975 --     available at the call site.
  976 --   * @join exit = 2*y in if a then exit else if b then exit else 3*y@ =>
  977 --     LetDown. Compared to LetUp, we find out that the expression uses @y@
  978 --     strictly, because we can unleash @exit@'s signature at each call site.
  979 --   * For a more convincing example with join points, see Note [Demand analysis
  980 --     for join points].
  981 --
  982 useLetUp :: TopLevelFlag -> Var -> Bool
  983 useLetUp top_lvl f = isNotTopLevel top_lvl && idArity f == 0 && not (isJoinId f)
  984 
  985 {- Note [Demand analysis for join points]
  986 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  987 Consider
  988    g :: (Int,Int) -> Int
  989    g (p,q) = p+q
  990 
  991    f :: T -> Int -> Int
  992    f x p = g (join j y = (p,y)
  993               in case x of
  994                    A -> j 3
  995                    B -> j 4
  996                    C -> (p,7))
  997 
  998 If j was a vanilla function definition, we'd analyse its body with
  999 evalDmd, and think that it was lazy in p.  But for join points we can
 1000 do better!  We know that j's body will (if called at all) be evaluated
 1001 with the demand that consumes the entire join-binding, in this case
 1002 the argument demand from g.  Whizzo!  g evaluates both components of
 1003 its argument pair, so p will certainly be evaluated if j is called.
 1004 
 1005 For f to be strict in p, we need /all/ paths to evaluate p; in this
 1006 case the C branch does so too, so we are fine.  So, as usual, we need
 1007 to transport demands on free variables to the call site(s).  Compare
 1008 Note [Lazy and unleashable free variables].
 1009 
 1010 The implementation is easy.  When analysing a join point, we can
 1011 analyse its body with the demand from the entire join-binding (written
 1012 let_dmd here).
 1013 
 1014 Another win for join points!  #13543.
 1015 
 1016 However, note that the strictness signature for a join point can
 1017 look a little puzzling.  E.g.
 1018 
 1019     (join j x = \y. error "urk")
 1020     (in case v of              )
 1021     (     A -> j 3             )  x
 1022     (     B -> j 4             )
 1023     (     C -> \y. blah        )
 1024 
 1025 The entire thing is in a C1(L) context, so j's strictness signature
 1026 will be    [A]b
 1027 meaning one absent argument, returns bottom.  That seems odd because
 1028 there's a \y inside.  But it's right because when consumed in a C1(L)
 1029 context the RHS of the join point is indeed bottom.
 1030 
 1031 Note [Demand signatures are computed for a threshold demand based on idArity]
 1032 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 1033 We compute demand signatures assuming idArity incoming arguments to approximate
 1034 behavior for when we have a call site with at least that many arguments. idArity
 1035 is /at least/ the number of manifest lambdas, but might be higher for PAPs and
 1036 trivial RHS (see Note [Demand analysis for trivial right-hand sides]).
 1037 
 1038 Because idArity of a function varies independently of its cardinality
 1039 properties (cf. Note [idArity varies independently of dmdTypeDepth]), we
 1040 implicitly encode the arity for when a demand signature is sound to unleash
 1041 in its 'dmdTypeDepth' (cf. Note [Understanding DmdType and DmdSig] in
 1042 GHC.Types.Demand). It is unsound to unleash a demand signature when the
 1043 incoming number of arguments is less than that.
 1044 See Note [What are demand signatures?] in GHC.Types.Demand for more details
 1045 on soundness.
 1046 
 1047 Why idArity arguments? Because that's a conservative estimate of how many
 1048 arguments we must feed a function before it does anything interesting with them.
 1049 Also it elegantly subsumes the trivial RHS and PAP case.
 1050 
 1051 There might be functions for which we might want to analyse for more incoming
 1052 arguments than idArity. Example:
 1053 
 1054   f x =
 1055     if expensive
 1056       then \y -> ... y ...
 1057       else \y -> ... y ...
 1058 
 1059 We'd analyse `f` under a unary call demand C1(L), corresponding to idArity
 1060 being 1. That's enough to look under the manifest lambda and find out how a
 1061 unary call would use `x`, but not enough to look into the lambdas in the if
 1062 branches.
 1063 
 1064 On the other hand, if we analysed for call demand C1(C1(L)), we'd get useful
 1065 strictness info for `y` (and more precise info on `x`) and possibly CPR
 1066 information, but
 1067 
 1068   * We would no longer be able to unleash the signature at unary call sites
 1069   * Performing the worker/wrapper split based on this information would be
 1070     implicitly eta-expanding `f`, playing fast and loose with divergence and
 1071     even being unsound in the presence of newtypes, so we refrain from doing so.
 1072     Also see Note [Don't eta expand in w/w] in GHC.Core.Opt.WorkWrap.
 1073 
 1074 Since we only compute one signature, we do so for arity 1. Computing multiple
 1075 signatures for different arities (i.e., polyvariance) would be entirely
 1076 possible, if it weren't for the additional runtime and implementation
 1077 complexity.
 1078 
 1079 Note [idArity varies independently of dmdTypeDepth]
 1080 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 1081 We used to check in GHC.Core.Lint that dmdTypeDepth <= idArity for a let-bound
 1082 identifier. But that means we would have to zap demand signatures every time we
 1083 reset or decrease arity. That's an unnecessary dependency, because
 1084 
 1085   * The demand signature captures a semantic property that is independent of
 1086     what the binding's current arity is
 1087   * idArity is analysis information itself, thus volatile
 1088   * We already *have* dmdTypeDepth, wo why not just use it to encode the
 1089     threshold for when to unleash the signature
 1090     (cf. Note [Understanding DmdType and DmdSig] in GHC.Types.Demand)
 1091 
 1092 Consider the following expression, for example:
 1093 
 1094     (let go x y = `x` seq ... in go) |> co
 1095 
 1096 `go` might have a strictness signature of `<1L><L>`. The simplifier will identify
 1097 `go` as a nullary join point through `joinPointBinding_maybe` and float the
 1098 coercion into the binding, leading to an arity decrease:
 1099 
 1100     join go = (\x y -> `x` seq ...) |> co in go
 1101 
 1102 With the CoreLint check, we would have to zap `go`'s perfectly viable strictness
 1103 signature.
 1104 
 1105 Note [Demand analysis for trivial right-hand sides]
 1106 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 1107 Consider
 1108     foo = plusInt |> co
 1109 where plusInt is an arity-2 function with known strictness.  Clearly
 1110 we want plusInt's strictness to propagate to foo!  But because it has
 1111 no manifest lambdas, it won't do so automatically, and indeed 'co' might
 1112 have type (Int->Int->Int) ~ T.
 1113 
 1114 Fortunately, GHC.Core.Opt.Arity gives 'foo' arity 2, which is enough for LetDown to
 1115 forward plusInt's demand signature, and all is well (see Note [Newtype arity] in
 1116 GHC.Core.Opt.Arity)! A small example is the test case NewtypeArity.
 1117 
 1118 Note [Absence analysis for stable unfoldings and RULES]
 1119 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 1120 Ticket #18638 shows that it's really important to do absence analysis
 1121 for stable unfoldings. Consider
 1122 
 1123    g = blah
 1124 
 1125    f = \x.  ...no use of g....
 1126    {- f's stable unfolding is f = \x. ...g... -}
 1127 
 1128 If f is ever inlined we use 'g'. But f's current RHS makes no use
 1129 of 'g', so if we don't look at the unfolding we'll mark g as Absent,
 1130 and transform to
 1131 
 1132    g = error "Entered absent value"
 1133    f = \x. ...
 1134    {- f's stable unfolding is f = \x. ...g... -}
 1135 
 1136 Now if f is subsequently inlined, we'll use 'g' and ... disaster.
 1137 
 1138 SOLUTION: if f has a stable unfolding, adjust its DmdEnv (the demands
 1139 on its free variables) so that no variable mentioned in its unfolding
 1140 is Absent.  This is done by the function Demand.keepAliveDmdEnv.
 1141 
 1142 ALSO: do the same for Ids free in the RHS of any RULES for f.
 1143 
 1144 PS: You may wonder how it can be that f's optimised RHS has somehow
 1145 discarded 'g', but when f is inlined we /don't/ discard g in the same
 1146 way. I think a simple example is
 1147    g = (a,b)
 1148    f = \x.  fst g
 1149    {-# INLINE f #-}
 1150 
 1151 Now f's optimised RHS will be \x.a, but if we change g to (error "..")
 1152 (since it is apparently Absent) and then inline (\x. fst g) we get
 1153 disaster.  But regardless, #18638 was a more complicated version of
 1154 this, that actually happened in practice.
 1155 -}
 1156 
 1157 {- *********************************************************************
 1158 *                                                                      *
 1159                       Fixpoints
 1160 *                                                                      *
 1161 ********************************************************************* -}
 1162 
 1163 -- Recursive bindings
 1164 dmdFix :: TopLevelFlag
 1165        -> AnalEnv                            -- Does not include bindings for this binding
 1166        -> SubDemand
 1167        -> [(Id,CoreExpr)]
 1168        -> (AnalEnv, DmdEnv, [(Id,CoreExpr)]) -- Binders annotated with strictness info
 1169 
 1170 dmdFix top_lvl env let_dmd orig_pairs
 1171   = loop 1 initial_pairs
 1172   where
 1173     -- See Note [Initialising strictness]
 1174     initial_pairs | ae_virgin env = [(setIdDmdSig id botSig, rhs) | (id, rhs) <- orig_pairs ]
 1175                   | otherwise     = orig_pairs
 1176 
 1177     -- If fixed-point iteration does not yield a result we use this instead
 1178     -- See Note [Safe abortion in the fixed-point iteration]
 1179     abort :: (AnalEnv, DmdEnv, [(Id,CoreExpr)])
 1180     abort = (env, lazy_fv', zapped_pairs)
 1181       where (lazy_fv, pairs') = step True (zapIdDmdSig orig_pairs)
 1182             -- Note [Lazy and unleashable free variables]
 1183             non_lazy_fvs = plusVarEnvList $ map (dmdSigDmdEnv . idDmdSig . fst) pairs'
 1184             lazy_fv'     = lazy_fv `plusVarEnv` mapVarEnv (const topDmd) non_lazy_fvs
 1185             zapped_pairs = zapIdDmdSig pairs'
 1186 
 1187     -- The fixed-point varies the idDmdSig field of the binders, and terminates if that
 1188     -- annotation does not change any more.
 1189     loop :: Int -> [(Id,CoreExpr)] -> (AnalEnv, DmdEnv, [(Id,CoreExpr)])
 1190     loop n pairs = -- pprTrace "dmdFix" (ppr n <+> vcat [ ppr id <+> ppr (idDmdSig id)
 1191                    --                                     | (id,_)<- pairs]) $
 1192                    loop' n pairs
 1193 
 1194     loop' n pairs
 1195       | found_fixpoint = (final_anal_env, lazy_fv, pairs')
 1196       | n == 10        = abort
 1197       | otherwise      = loop (n+1) pairs'
 1198       where
 1199         found_fixpoint    = map (idDmdSig . fst) pairs' == map (idDmdSig . fst) pairs
 1200         first_round       = n == 1
 1201         (lazy_fv, pairs') = step first_round pairs
 1202         final_anal_env    = extendAnalEnvs top_lvl env (map fst pairs')
 1203 
 1204     step :: Bool -> [(Id, CoreExpr)] -> (DmdEnv, [(Id, CoreExpr)])
 1205     step first_round pairs = (lazy_fv, pairs')
 1206       where
 1207         -- In all but the first iteration, delete the virgin flag
 1208         start_env | first_round = env
 1209                   | otherwise   = nonVirgin env
 1210 
 1211         start = (extendAnalEnvs top_lvl start_env (map fst pairs), emptyVarEnv)
 1212 
 1213         !((_,!lazy_fv), !pairs') = mapAccumL my_downRhs start pairs
 1214                 -- mapAccumL: Use the new signature to do the next pair
 1215                 -- The occurrence analyser has arranged them in a good order
 1216                 -- so this can significantly reduce the number of iterations needed
 1217 
 1218         my_downRhs (env, lazy_fv) (id,rhs)
 1219           = -- pprTrace "my_downRhs" (ppr id $$ ppr (idDmdSig id) $$ ppr sig) $
 1220             ((env', lazy_fv'), (id', rhs'))
 1221           where
 1222             !(!env', !lazy_fv1, !id', !rhs') = dmdAnalRhsSig top_lvl Recursive env let_dmd id rhs
 1223             !lazy_fv'                    = plusVarEnv_C plusDmd lazy_fv lazy_fv1
 1224 
 1225     zapIdDmdSig :: [(Id, CoreExpr)] -> [(Id, CoreExpr)]
 1226     zapIdDmdSig pairs = [(setIdDmdSig id nopSig, rhs) | (id, rhs) <- pairs ]
 1227 
 1228 {- Note [Safe abortion in the fixed-point iteration]
 1229 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 1230 Fixed-point iteration may fail to terminate. But we cannot simply give up and
 1231 return the environment and code unchanged! We still need to do one additional
 1232 round, for two reasons:
 1233 
 1234  * To get information on used free variables (both lazy and strict!)
 1235    (see Note [Lazy and unleashable free variables])
 1236  * To ensure that all expressions have been traversed at least once, and any left-over
 1237    strictness annotations have been updated.
 1238 
 1239 This final iteration does not add the variables to the strictness signature
 1240 environment, which effectively assigns them 'nopSig' (see "getStrictness")
 1241 
 1242 Note [Trimming a demand to a type]
 1243 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 1244 There are two reasons we sometimes trim a demand to match a type.
 1245   1. GADTs
 1246   2. Recursive products and widening
 1247 
 1248 More on both below.  But the botttom line is: we really don't want to
 1249 have a binder whose demand is more deeply-nested than its type
 1250 "allows". So in findBndrDmd we call trimToType and findTypeShape to
 1251 trim the demand on the binder to a form that matches the type
 1252 
 1253 Now to the reasons. For (1) consider
 1254   f :: a -> Bool
 1255   f x = case ... of
 1256           A g1 -> case (x |> g1) of (p,q) -> ...
 1257           B    -> error "urk"
 1258 
 1259 where A,B are the constructors of a GADT.  We'll get a 1P(L,L) demand
 1260 on x from the A branch, but that's a stupid demand for x itself, which
 1261 has type 'a'. Indeed we get ASSERTs going off (notably in
 1262 splitUseProdDmd, #8569).
 1263 
 1264 For (2) consider
 1265   data T = MkT Int T    -- A recursive product
 1266   f :: Int -> T -> Int
 1267   f 0 _         = 0
 1268   f _ (MkT n t) = f n t
 1269 
 1270 Here f is lazy in T, but its *usage* is infinite: P(L,P(L,P(L, ...))).
 1271 Notice that this happens because T is a product type, and is recrusive.
 1272 If we are not careful, we'll fail to iterate to a fixpoint in dmdFix,
 1273 and bale out entirely, which is inefficient and over-conservative.
 1274 
 1275 Worse, as we discovered in #18304, the size of the usages we compute
 1276 can grow /exponentially/, so even 10 iterations costs far too much.
 1277 Especially since we then discard the result.
 1278 
 1279 To avoid this we use the same findTypeShape function as for (1), but
 1280 arrange that it trims the demand if it encounters the same type constructor
 1281 twice (or three times, etc).  We use our standard RecTcChecker mechanism
 1282 for this -- see GHC.Core.Opt.WorkWrap.Utils.findTypeShape.
 1283 
 1284 This is usually call "widening".  We could do it just in dmdFix, but
 1285 since are doing this findTypeShape business /anyway/ because of (1),
 1286 and it has all the right information to hand, it's extremely
 1287 convenient to do it there.
 1288 
 1289 -}
 1290 
 1291 {- *********************************************************************
 1292 *                                                                      *
 1293                  Strictness signatures and types
 1294 *                                                                      *
 1295 ********************************************************************* -}
 1296 
 1297 unitDmdType :: DmdEnv -> DmdType
 1298 unitDmdType dmd_env = DmdType dmd_env [] topDiv
 1299 
 1300 coercionDmdEnv :: Coercion -> DmdEnv
 1301 coercionDmdEnv co = coercionsDmdEnv [co]
 1302 
 1303 coercionsDmdEnv :: [Coercion] -> DmdEnv
 1304 coercionsDmdEnv cos = mapVarEnv (const topDmd) (getUniqSet $ coVarsOfCos cos)
 1305                       -- The VarSet from coVarsOfCos is really a VarEnv Var
 1306 
 1307 addVarDmd :: DmdType -> Var -> Demand -> DmdType
 1308 addVarDmd (DmdType fv ds res) var dmd
 1309   = DmdType (extendVarEnv_C plusDmd fv var dmd) ds res
 1310 
 1311 addLazyFVs :: DmdType -> DmdEnv -> DmdType
 1312 addLazyFVs dmd_ty lazy_fvs
 1313   = dmd_ty `plusDmdType` mkPlusDmdArg lazy_fvs
 1314         -- Using plusDmdType (rather than just plus'ing the envs)
 1315         -- is vital.  Consider
 1316         --      let f = \x -> (x,y)
 1317         --      in  error (f 3)
 1318         -- Here, y is treated as a lazy-fv of f, but we must `plusDmd` that L
 1319         -- demand with the bottom coming up from 'error'
 1320         --
 1321         -- I got a loop in the fixpointer without this, due to an interaction
 1322         -- with the lazy_fv filtering in dmdAnalRhsSig.  Roughly, it was
 1323         --      letrec f n x
 1324         --          = letrec g y = x `fatbar`
 1325         --                         letrec h z = z + ...g...
 1326         --                         in h (f (n-1) x)
 1327         --      in ...
 1328         -- In the initial iteration for f, f=Bot
 1329         -- Suppose h is found to be strict in z, but the occurrence of g in its RHS
 1330         -- is lazy.  Now consider the fixpoint iteration for g, esp the demands it
 1331         -- places on its free variables.  Suppose it places none.  Then the
 1332         --      x `fatbar` ...call to h...
 1333         -- will give a x->V demand for x.  That turns into a L demand for x,
 1334         -- which floats out of the defn for h.  Without the modifyEnv, that
 1335         -- L demand doesn't get both'd with the Bot coming up from the inner
 1336         -- call to f.  So we just get an L demand for x for g.
 1337 
 1338 {-
 1339 Note [Do not strictify the argument dictionaries of a dfun]
 1340 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 1341 The typechecker can tie recursive knots involving dfuns, so we do the
 1342 conservative thing and refrain from strictifying a dfun's argument
 1343 dictionaries.
 1344 -}
 1345 
 1346 setBndrsDemandInfo :: HasCallStack => [Var] -> [Demand] -> [Var]
 1347 setBndrsDemandInfo (b:bs) ds
 1348   | isTyVar b = b : setBndrsDemandInfo bs ds
 1349 setBndrsDemandInfo (b:bs) (d:ds) =
 1350     let !new_info = setIdDemandInfo b d
 1351         !vars = setBndrsDemandInfo bs ds
 1352     in new_info : vars
 1353 setBndrsDemandInfo [] ds = assert (null ds) []
 1354 setBndrsDemandInfo bs _  = pprPanic "setBndrsDemandInfo" (ppr bs)
 1355 
 1356 annotateLamIdBndr :: AnalEnv
 1357                   -> DmdType    -- Demand type of body
 1358                   -> Id         -- Lambda binder
 1359                   -> WithDmdType Id  -- Demand type of lambda
 1360                                      -- and binder annotated with demand
 1361 
 1362 annotateLamIdBndr env dmd_ty id
 1363 -- For lambdas we add the demand to the argument demands
 1364 -- Only called for Ids
 1365   = assert (isId id) $
 1366     -- pprTrace "annLamBndr" (vcat [ppr id, ppr dmd_ty, ppr final_ty]) $
 1367     WithDmdType main_ty new_id
 1368   where
 1369     -- See Note [Finalising boxity for demand signature] in "GHC.Core.Opt.WorkWrap.Utils"
 1370     -- and Note [Do not unbox class dictionaries]
 1371     dmd'    = finaliseBoxity (ae_fam_envs env) (ae_inl_fun env) (idType id) dmd
 1372     new_id  = setIdDemandInfo id dmd'
 1373     main_ty = addDemand dmd' dmd_ty'
 1374     WithDmdType dmd_ty' dmd = findBndrDmd env dmd_ty id
 1375 
 1376 {- Note [NOINLINE and strictness]
 1377 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 1378 At one point we disabled strictness for NOINLINE functions, on the
 1379 grounds that they should be entirely opaque.  But that lost lots of
 1380 useful semantic strictness information, so now we analyse them like
 1381 any other function, and pin strictness information on them.
 1382 
 1383 That in turn forces us to worker/wrapper them; see
 1384 Note [Worker/wrapper for NOINLINE functions] in GHC.Core.Opt.WorkWrap.
 1385 
 1386 
 1387 Note [Lazy and unleashable free variables]
 1388 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 1389 We put the strict and once-used FVs in the DmdType of the Id, so
 1390 that at its call sites we unleash demands on its strict fvs.
 1391 An example is 'roll' in imaginary/wheel-sieve2
 1392 Something like this:
 1393         roll x = letrec
 1394                      go y = if ... then roll (x-1) else x+1
 1395                  in
 1396                  go ms
 1397 We want to see that roll is strict in x, which is because
 1398 go is called.   So we put the DmdEnv for x in go's DmdType.
 1399 
 1400 Another example:
 1401 
 1402         f :: Int -> Int -> Int
 1403         f x y = let t = x+1
 1404             h z = if z==0 then t else
 1405                   if z==1 then x+1 else
 1406                   x + h (z-1)
 1407         in h y
 1408 
 1409 Calling h does indeed evaluate x, but we can only see
 1410 that if we unleash a demand on x at the call site for t.
 1411 
 1412 Incidentally, here's a place where lambda-lifting h would
 1413 lose the cigar --- we couldn't see the joint strictness in t/x
 1414 
 1415         ON THE OTHER HAND
 1416 
 1417 We don't want to put *all* the fv's from the RHS into the
 1418 DmdType. Because
 1419 
 1420  * it makes the strictness signatures larger, and hence slows down fixpointing
 1421 
 1422 and
 1423 
 1424  * it is useless information at the call site anyways:
 1425    For lazy, used-many times fv's we will never get any better result than
 1426    that, no matter how good the actual demand on the function at the call site
 1427    is (unless it is always absent, but then the whole binder is useless).
 1428 
 1429 Therefore we exclude lazy multiple-used fv's from the environment in the
 1430 DmdType.
 1431 
 1432 But now the signature lies! (Missing variables are assumed to be absent.) To
 1433 make up for this, the code that analyses the binding keeps the demand on those
 1434 variable separate (usually called "lazy_fv") and adds it to the demand of the
 1435 whole binding later.
 1436 
 1437 What if we decide _not_ to store a strictness signature for a binding at all, as
 1438 we do when aborting a fixed-point iteration? The we risk losing the information
 1439 that the strict variables are being used. In that case, we take all free variables
 1440 mentioned in the (unsound) strictness signature, conservatively approximate the
 1441 demand put on them (topDmd), and add that to the "lazy_fv" returned by "dmdFix".
 1442 
 1443 
 1444 ************************************************************************
 1445 *                                                                      *
 1446 \subsection{Strictness signatures}
 1447 *                                                                      *
 1448 ************************************************************************
 1449 -}
 1450 
 1451 
 1452 data AnalEnv = AE
 1453    { ae_opts      :: !DmdAnalOpts -- ^ Analysis options
 1454    , ae_sigs      :: !SigEnv
 1455    , ae_virgin    :: !Bool -- ^ True on first iteration only
 1456                            -- See Note [Initialising strictness]
 1457    , ae_fam_envs  :: !FamInstEnvs
 1458    , ae_inl_fun   :: !InsideInlineableFun
 1459                            -- ^ Whether we analyse the body of an inlineable fun.
 1460                            -- See Note [Do not unbox class dictionaries].
 1461    }
 1462 
 1463         -- We use the se_env to tell us whether to
 1464         -- record info about a variable in the DmdEnv
 1465         -- We do so if it's a LocalId, but not top-level
 1466         --
 1467         -- The DmdEnv gives the demand on the free vars of the function
 1468         -- when it is given enough args to satisfy the strictness signature
 1469 
 1470 type SigEnv = VarEnv (DmdSig, TopLevelFlag)
 1471 
 1472 instance Outputable AnalEnv where
 1473   ppr env = text "AE" <+> braces (vcat
 1474          [ text "ae_virgin =" <+> ppr (ae_virgin env)
 1475          , text "ae_sigs =" <+> ppr (ae_sigs env)
 1476          ])
 1477 
 1478 emptyAnalEnv :: DmdAnalOpts -> FamInstEnvs -> AnalEnv
 1479 emptyAnalEnv opts fam_envs
 1480     = AE { ae_opts         = opts
 1481          , ae_sigs         = emptySigEnv
 1482          , ae_virgin       = True
 1483          , ae_fam_envs     = fam_envs
 1484          , ae_inl_fun      = NotInsideInlineableFun
 1485          }
 1486 
 1487 emptySigEnv :: SigEnv
 1488 emptySigEnv = emptyVarEnv
 1489 
 1490 -- | Extend an environment with the strictness IDs attached to the id
 1491 extendAnalEnvs :: TopLevelFlag -> AnalEnv -> [Id] -> AnalEnv
 1492 extendAnalEnvs top_lvl env vars
 1493   = env { ae_sigs = extendSigEnvs top_lvl (ae_sigs env) vars }
 1494 
 1495 extendSigEnvs :: TopLevelFlag -> SigEnv -> [Id] -> SigEnv
 1496 extendSigEnvs top_lvl sigs vars
 1497   = extendVarEnvList sigs [ (var, (idDmdSig var, top_lvl)) | var <- vars]
 1498 
 1499 extendAnalEnv :: TopLevelFlag -> AnalEnv -> Id -> DmdSig -> AnalEnv
 1500 extendAnalEnv top_lvl env var sig
 1501   = env { ae_sigs = extendSigEnv top_lvl (ae_sigs env) var sig }
 1502 
 1503 extendSigEnv :: TopLevelFlag -> SigEnv -> Id -> DmdSig -> SigEnv
 1504 extendSigEnv top_lvl sigs var sig = extendVarEnv sigs var (sig, top_lvl)
 1505 
 1506 lookupSigEnv :: AnalEnv -> Id -> Maybe (DmdSig, TopLevelFlag)
 1507 lookupSigEnv env id = lookupVarEnv (ae_sigs env) id
 1508 
 1509 nonVirgin :: AnalEnv -> AnalEnv
 1510 nonVirgin env = env { ae_virgin = False }
 1511 
 1512 -- | Sets 'ae_inl_fun' according to whether the given 'Id' has an inlineable
 1513 -- unfolding. See Note [Do not unbox class dictionaries].
 1514 adjustInlFun :: Id -> AnalEnv -> AnalEnv
 1515 adjustInlFun id env
 1516   | isStableUnfolding (realIdUnfolding id) = env { ae_inl_fun = InsideInlineableFun }
 1517   | otherwise                              = env { ae_inl_fun = NotInsideInlineableFun }
 1518 
 1519 findBndrsDmds :: AnalEnv -> DmdType -> [Var] -> WithDmdType [Demand]
 1520 -- Return the demands on the Ids in the [Var]
 1521 findBndrsDmds env dmd_ty bndrs
 1522   = go dmd_ty bndrs
 1523   where
 1524     go dmd_ty []  = WithDmdType dmd_ty []
 1525     go dmd_ty (b:bs)
 1526       | isId b    = let WithDmdType dmd_ty1 dmds = go dmd_ty bs
 1527                         WithDmdType dmd_ty2 dmd  = findBndrDmd env dmd_ty1 b
 1528                     in WithDmdType dmd_ty2  (dmd : dmds)
 1529       | otherwise = go dmd_ty bs
 1530 
 1531 findBndrDmd :: AnalEnv -> DmdType -> Id -> WithDmdType Demand
 1532 -- See Note [Trimming a demand to a type]
 1533 findBndrDmd env dmd_ty id
 1534   = -- pprTrace "findBndrDmd" (ppr id $$ ppr dmd_ty $$ ppr starting_dmd $$ ppr dmd') $
 1535     WithDmdType dmd_ty' dmd'
 1536   where
 1537     dmd' = strictify $
 1538            trimToType starting_dmd (findTypeShape fam_envs id_ty)
 1539 
 1540     (dmd_ty', starting_dmd) = peelFV dmd_ty id
 1541 
 1542     id_ty = idType id
 1543 
 1544     strictify dmd
 1545       -- See Note [Making dictionaries strict]
 1546       | dmd_strict_dicts (ae_opts env)
 1547              -- We never want to strictify a recursive let. At the moment
 1548              -- findBndrDmd is never called for recursive lets; if that
 1549              -- changes, we need a RecFlag parameter and another guard here.
 1550       = strictifyDictDmd id_ty dmd
 1551       | otherwise
 1552       = dmd
 1553 
 1554     fam_envs = ae_fam_envs env
 1555 
 1556 {- Note [Making dictionaries strict]
 1557 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 1558 The Opt_DictsStrict flag makes GHC use call-by-value for dictionaries.  Why?
 1559 
 1560 * Generally CBV is more efficient.
 1561 
 1562 * Dictionaries are always non-bottom; and never take much work to
 1563   compute.  E.g. a dfun from an instance decl always returns a dicionary
 1564   record immediately.  See DFunUnfolding in CoreSyn.
 1565   See also Note [Recursive superclasses] in TcInstDcls.
 1566 
 1567 * The strictness analyser will then unbox dictionaries and pass the
 1568   methods individually, rather than in a bundle.  If there are a lot of
 1569   methods that might be bad; but worker/wrapper already does throttling.
 1570 
 1571 * A newtype dictionary is *not* always non-bottom.  E.g.
 1572       class C a where op :: a -> a
 1573       instance C Int where op = error "urk"
 1574   Now a value of type (C Int) is just a newtype wrapper (a cast) around
 1575   the error thunk.  Don't strictify these!
 1576 
 1577 See #17758 for more background and perf numbers.
 1578 
 1579 The implementation is extremly simple: just make the strictness
 1580 analyser strictify the demand on a dictionary binder in
 1581 'findBndrDmd'.
 1582 
 1583 However there is one case where this can make performance worse.
 1584 For the principle consider some function at the core level:
 1585     myEq :: Eq a => a -> a -> Bool
 1586     myEq eqDict x y = ((==) eqDict) x y
 1587 If we make the dictionary strict then WW can fire turning this into:
 1588     $wmyEq :: (a -> a -> Bool) -> a -> a -> Bool
 1589     $wmyEq eq x y = eq x y
 1590 Which *usually* performs better. However if the dictionary is known we
 1591 are far more likely to inline a function applied to the dictionary than
 1592 to inline one applied to a function. Sometimes this makes just enough
 1593 of a difference to stop a function from inlining. This is documented in
 1594 #18421.
 1595 
 1596 It's somewhat similar to Note [Do not unbox class dictionaries] although
 1597 here our problem is with the inliner, not the specializer.
 1598 
 1599 Note [Initialising strictness]
 1600 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 1601 See section 9.2 (Finding fixpoints) of the paper.
 1602 
 1603 Our basic plan is to initialise the strictness of each Id in a
 1604 recursive group to "bottom", and find a fixpoint from there.  However,
 1605 this group B might be inside an *enclosing* recursive group A, in
 1606 which case we'll do the entire fixpoint shebang on for each iteration
 1607 of A. This can be illustrated by the following example:
 1608 
 1609 Example:
 1610 
 1611   f [] = []
 1612   f (x:xs) = let g []     = f xs
 1613                  g (y:ys) = y+1 : g ys
 1614               in g (h x)
 1615 
 1616 At each iteration of the fixpoint for f, the analyser has to find a
 1617 fixpoint for the enclosed function g. In the meantime, the demand
 1618 values for g at each iteration for f are *greater* than those we
 1619 encountered in the previous iteration for f. Therefore, we can begin
 1620 the fixpoint for g not with the bottom value but rather with the
 1621 result of the previous analysis. I.e., when beginning the fixpoint
 1622 process for g, we can start from the demand signature computed for g
 1623 previously and attached to the binding occurrence of g.
 1624 
 1625 To speed things up, we initialise each iteration of A (the enclosing
 1626 one) from the result of the last one, which is neatly recorded in each
 1627 binder.  That way we make use of earlier iterations of the fixpoint
 1628 algorithm. (Cunning plan.)
 1629 
 1630 But on the *first* iteration we want to *ignore* the current strictness
 1631 of the Id, and start from "bottom".  Nowadays the Id can have a current
 1632 strictness, because interface files record strictness for nested bindings.
 1633 To know when we are in the first iteration, we look at the ae_virgin
 1634 field of the AnalEnv.
 1635 
 1636 
 1637 Note [Final Demand Analyser run]
 1638 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 1639 Some of the information that the demand analyser determines is not always
 1640 preserved by the simplifier.  For example, the simplifier will happily rewrite
 1641   \y [Demand=MU] let x = y in x + x
 1642 to
 1643   \y [Demand=MU] y + y
 1644 which is quite a lie: Now y occurs more than just once.
 1645 
 1646 The once-used information is (currently) only used by the code
 1647 generator, though.  So:
 1648 
 1649  * We zap the used-once info in the worker-wrapper;
 1650    see Note [Zapping Used Once info in WorkWrap] in
 1651    GHC.Core.Opt.WorkWrap.
 1652    If it's not reliable, it's better not to have it at all.
 1653 
 1654  * Just before TidyCore, we add a pass of the demand analyser,
 1655       but WITHOUT subsequent worker/wrapper and simplifier,
 1656    right before TidyCore.  See SimplCore.getCoreToDo.
 1657 
 1658    This way, correct information finds its way into the module interface
 1659    (strictness signatures!) and the code generator (single-entry thunks!)
 1660 
 1661 Note that, in contrast, the single-call information (CM(..)) /can/ be
 1662 relied upon, as the simplifier tends to be very careful about not
 1663 duplicating actual function calls.
 1664 
 1665 Also see #11731.
 1666 
 1667 Note [Space Leaks in Demand Analysis]
 1668 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 1669 Ticket: #15455
 1670 MR: !5399
 1671 
 1672 In the past the result of demand analysis was not forced until the whole module
 1673 had finished being analysed. In big programs, this led to a big build up of thunks
 1674 which were all ultimately forced at the end of the analysis.
 1675 
 1676 This was because the return type of the analysis was a lazy pair:
 1677   dmdAnal :: AnalEnv -> SubDemand -> CoreExpr -> (DmdType, CoreExpr)
 1678 To avoid space leaks we added extra bangs to evaluate the DmdType component eagerly; but
 1679 we were never sure we had added enough.
 1680 The easiest way to systematically fix this was to use a strict pair type for the
 1681 return value of the analysis so that we can be more confident that the result
 1682 is incrementally computed rather than all at the end.
 1683 
 1684 A second, only loosely related point is that
 1685 the updating of Ids was not forced because the result of updating
 1686 an Id was placed into a lazy field in CoreExpr. This meant that until the end of
 1687 demand analysis, the unforced Ids would retain the DmdEnv which the demand information
 1688 was fetch from. Now we are quite careful to force Ids before putting them
 1689 back into core expressions so that we can garbage-collect the environments more eagerly.
 1690 For example see the `Case` branch of `dmdAnal'` where `case_bndr'` is forced
 1691 or `dmdAnalSumAlt`.
 1692 
 1693 The net result of all these improvements is the peak live memory usage of compiling
 1694 jsaddle-dom decreases about 4GB (from 6.5G to 2.5G). A bunch of bytes allocated benchmarks also
 1695 decrease because we allocate a lot fewer thunks which we immediately overwrite and
 1696 also runtime for the pass is faster! Overall, good wins.
 1697 
 1698 -}