never executed always true always false
1 {-
2 (c) The GRASP/AQUA Project, Glasgow University, 1993-1998
3
4
5 -----------------
6 A demand analysis
7 -----------------
8 -}
9
10
11 module GHC.Core.Opt.DmdAnal
12 ( DmdAnalOpts(..)
13 , dmdAnalProgram
14 )
15 where
16
17 import GHC.Prelude
18
19 import GHC.Core.Opt.WorkWrap.Utils
20 import GHC.Types.Demand -- All of it
21 import GHC.Core
22 import GHC.Core.Multiplicity ( scaledThing )
23 import GHC.Utils.Outputable
24 import GHC.Types.Var.Env
25 import GHC.Types.Var.Set
26 import GHC.Types.Basic
27 import Data.List ( mapAccumL )
28 import GHC.Core.DataCon
29 import GHC.Types.ForeignCall ( isSafeForeignCall )
30 import GHC.Types.Id
31 import GHC.Core.Utils
32 import GHC.Core.TyCon
33 import GHC.Core.Type
34 import GHC.Core.FVs ( rulesRhsFreeIds, bndrRuleAndUnfoldingIds )
35 import GHC.Core.Coercion ( Coercion )
36 import GHC.Core.TyCo.FVs ( coVarsOfCos )
37 import GHC.Core.FamInstEnv
38 import GHC.Core.Opt.Arity ( typeArity )
39 import GHC.Utils.Misc
40 import GHC.Utils.Panic
41 import GHC.Utils.Panic.Plain
42 import GHC.Data.Maybe ( isJust )
43 import GHC.Builtin.PrimOps
44 import GHC.Builtin.Types.Prim ( realWorldStatePrimTy )
45 import GHC.Types.Unique.Set
46
47 import GHC.Utils.Trace
48 _ = pprTrace -- Tired of commenting out the import all the time
49
50 {-
51 ************************************************************************
52 * *
53 \subsection{Top level stuff}
54 * *
55 ************************************************************************
56 -}
57
58 -- | Options for the demand analysis
59 data DmdAnalOpts = DmdAnalOpts
60 { dmd_strict_dicts :: !Bool -- ^ Use strict dictionaries
61 , dmd_unbox_width :: !Int -- ^ Use strict dictionaries
62 }
63
64 -- This is a strict alternative to (,)
65 -- See Note [Space Leaks in Demand Analysis]
66 data WithDmdType a = WithDmdType !DmdType !a
67
68 getAnnotated :: WithDmdType a -> a
69 getAnnotated (WithDmdType _ a) = a
70
71 data DmdResult a b = R !a !b
72
73 -- | Outputs a new copy of the Core program in which binders have been annotated
74 -- with demand and strictness information.
75 --
76 -- Note: use `seqBinds` on the result to avoid leaks due to lazyness (cf Note
77 -- [Stamp out space leaks in demand analysis])
78 dmdAnalProgram :: DmdAnalOpts -> FamInstEnvs -> [CoreRule] -> CoreProgram -> CoreProgram
79 dmdAnalProgram opts fam_envs rules binds
80 = getAnnotated $ go (emptyAnalEnv opts fam_envs) binds
81 where
82 -- See Note [Analysing top-level bindings]
83 -- and Note [Why care for top-level demand annotations?]
84 go _ [] = WithDmdType nopDmdType []
85 go env (b:bs) = cons_up $ dmdAnalBind TopLevel env topSubDmd b anal_body
86 where
87 anal_body env'
88 | WithDmdType body_ty bs' <- go env' bs
89 = WithDmdType (add_exported_uses env' body_ty (bindersOf b)) bs'
90
91 cons_up :: WithDmdType (DmdResult b [b]) -> WithDmdType [b]
92 cons_up (WithDmdType dmd_ty (R b' bs')) = WithDmdType dmd_ty (b' : bs')
93
94 add_exported_uses :: AnalEnv -> DmdType -> [Id] -> DmdType
95 add_exported_uses env = foldl' (add_exported_use env)
96
97 -- | If @e@ is denoted by @dmd_ty@, then @add_exported_use _ dmd_ty id@
98 -- corresponds to the demand type of @(id, e)@, but is a lot more direct.
99 -- See Note [Analysing top-level bindings].
100 add_exported_use :: AnalEnv -> DmdType -> Id -> DmdType
101 add_exported_use env dmd_ty id
102 | isExportedId id || elemVarSet id rule_fvs
103 -- See Note [Absence analysis for stable unfoldings and RULES]
104 = dmd_ty `plusDmdType` fst (dmdAnalStar env topDmd (Var id))
105 | otherwise
106 = dmd_ty
107
108 rule_fvs :: IdSet
109 rule_fvs = rulesRhsFreeIds rules
110
111 -- | We attach useful (e.g. not 'topDmd') 'idDemandInfo' to top-level bindings
112 -- that satisfy this function.
113 --
114 -- Basically, we want to know how top-level *functions* are *used*
115 -- (e.g. called). The information will always be lazy.
116 -- Any other top-level bindings are boring.
117 --
118 -- See also Note [Why care for top-level demand annotations?].
119 isInterestingTopLevelFn :: Id -> Bool
120 -- SG tried to set this to True and got a +2% ghc/alloc regression in T5642
121 -- (which is dominated by the Simplifier) at no gain in analysis precision.
122 -- If there was a gain, that regression might be acceptable.
123 -- Plus, we could use LetUp for thunks and share some code with local let
124 -- bindings.
125 isInterestingTopLevelFn id =
126 typeArity (idType id) `lengthExceeds` 0
127
128 {- Note [Stamp out space leaks in demand analysis]
129 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
130 The demand analysis pass outputs a new copy of the Core program in
131 which binders have been annotated with demand and strictness
132 information. It's tiresome to ensure that this information is fully
133 evaluated everywhere that we produce it, so we just run a single
134 seqBinds over the output before returning it, to ensure that there are
135 no references holding on to the input Core program.
136
137 This makes a ~30% reduction in peak memory usage when compiling
138 DynFlags (cf #9675 and #13426).
139
140 This is particularly important when we are doing late demand analysis,
141 since we don't do a seqBinds at any point thereafter. Hence code
142 generation would hold on to an extra copy of the Core program, via
143 unforced thunks in demand or strictness information; and it is the
144 most memory-intensive part of the compilation process, so this added
145 seqBinds makes a big difference in peak memory usage.
146
147 Note [Analysing top-level bindings]
148 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
149 Consider a CoreProgram like
150 e1 = ...
151 n1 = ...
152 e2 = \a b -> ... fst (n1 a b) ...
153 n2 = \c d -> ... snd (e2 c d) ...
154 ...
155 where e* are exported, but n* are not.
156 Intuitively, we can see that @n1@ is only ever called with two arguments
157 and in every call site, the first component of the result of the call
158 is evaluated. Thus, we'd like it to have idDemandInfo @LCL(CM(P(1L,A))@.
159 NB: We may *not* give e2 a similar annotation, because it is exported and
160 external callers might use it in arbitrary ways, expressed by 'topDmd'.
161 This can then be exploited by Nested CPR and eta-expansion,
162 see Note [Why care for top-level demand annotations?].
163
164 How do we get this result? Answer: By analysing the program as if it was a let
165 expression of this form:
166 let e1 = ... in
167 let n1 = ... in
168 let e2 = ... in
169 let n2 = ... in
170 (e1,e2, ...)
171 E.g. putting all bindings in nested lets and returning all exported binders in a tuple.
172 Of course, we will not actually build that CoreExpr! Instead we faithfully
173 simulate analysis of said expression by adding the free variable 'DmdEnv'
174 of @e*@'s strictness signatures to the 'DmdType' we get from analysing the
175 nested bindings.
176
177 And even then the above form blows up analysis performance in T10370:
178 If @e1@ uses many free variables, we'll unnecessarily carry their demands around
179 with us from the moment we analyse the pair to the moment we bubble back up to
180 the binding for @e1@. So instead we analyse as if we had
181 let e1 = ... in
182 (e1, let n1 = ... in
183 ( let e2 = ... in
184 (e2, let n2 = ... in
185 ( ...))))
186 That is, a series of right-nested pairs, where the @fst@ are the exported
187 binders of the last enclosing let binding and @snd@ continues the nested
188 lets.
189
190 Variables occurring free in RULE RHSs are to be handled the same as exported Ids.
191 See also Note [Absence analysis for stable unfoldings and RULES].
192
193 Note [Why care for top-level demand annotations?]
194 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
195 Reading Note [Analysing top-level bindings], you might think that we go through
196 quite some trouble to get useful demands for top-level bindings. They can never
197 be strict, for example, so why bother?
198
199 First, we get to eta-expand top-level bindings that we weren't able to
200 eta-expand before without Call Arity. From T18894b:
201 module T18894b (f) where
202 eta :: Int -> Int -> Int
203 eta x = if fst (expensive x) == 13 then \y -> ... else \y -> ...
204 f m = ... eta m 2 ... eta 2 m ...
205 Since only @f@ is exported, we see all call sites of @eta@ and can eta-expand to
206 arity 2.
207
208 The call demands we get for some top-level bindings will also allow Nested CPR
209 to unbox deeper. From T18894:
210 module T18894 (h) where
211 g m n = (2 * m, 2 `div` n)
212 {-# NOINLINE g #-}
213 h :: Int -> Int
214 h m = ... snd (g m 2) ... uncurry (+) (g 2 m) ...
215 Only @h@ is exported, hence we see that @g@ is always called in contexts were we
216 also force the division in the second component of the pair returned by @g@.
217 This allows Nested CPR to evaluate the division eagerly and return an I# in its
218 position.
219 -}
220
221 {-
222 ************************************************************************
223 * *
224 \subsection{The analyser itself}
225 * *
226 ************************************************************************
227 -}
228
229 -- | Analyse a binding group and its \"body\", e.g. where it is in scope.
230 --
231 -- It calls a function that knows how to analyse this \"body\" given
232 -- an 'AnalEnv' with updated demand signatures for the binding group
233 -- (reflecting their 'idDmdSigInfo') and expects to receive a
234 -- 'DmdType' in return, which it uses to annotate the binding group with their
235 -- 'idDemandInfo'.
236 dmdAnalBind
237 :: TopLevelFlag
238 -> AnalEnv
239 -> SubDemand -- ^ Demand put on the "body"
240 -- (important for join points)
241 -> CoreBind
242 -> (AnalEnv -> WithDmdType a) -- ^ How to analyse the "body", e.g.
243 -- where the binding is in scope
244 -> WithDmdType (DmdResult CoreBind a)
245 dmdAnalBind top_lvl env dmd bind anal_body = case bind of
246 NonRec id rhs
247 | useLetUp top_lvl id
248 -> dmdAnalBindLetUp top_lvl env id rhs anal_body
249 _ -> dmdAnalBindLetDown top_lvl env dmd bind anal_body
250
251 -- | Annotates uninteresting top level functions ('isInterestingTopLevelFn')
252 -- with 'topDmd', the rest with the given demand.
253 setBindIdDemandInfo :: TopLevelFlag -> Id -> Demand -> Id
254 setBindIdDemandInfo top_lvl id dmd = setIdDemandInfo id $ case top_lvl of
255 TopLevel | not (isInterestingTopLevelFn id) -> topDmd
256 _ -> dmd
257
258 -- | Let bindings can be processed in two ways:
259 -- Down (RHS before body) or Up (body before RHS).
260 -- This function handles the up variant.
261 --
262 -- It is very simple. For let x = rhs in body
263 -- * Demand-analyse 'body' in the current environment
264 -- * Find the demand, 'rhs_dmd' placed on 'x' by 'body'
265 -- * Demand-analyse 'rhs' in 'rhs_dmd'
266 --
267 -- This is used for a non-recursive local let without manifest lambdas (see
268 -- 'useLetUp').
269 --
270 -- This is the LetUp rule in the paper “Higher-Order Cardinality Analysis”.
271 dmdAnalBindLetUp :: TopLevelFlag
272 -> AnalEnv
273 -> Id
274 -> CoreExpr
275 -> (AnalEnv -> WithDmdType a)
276 -> WithDmdType (DmdResult CoreBind a)
277 dmdAnalBindLetUp top_lvl env id rhs anal_body = WithDmdType final_ty (R (NonRec id' rhs') (body'))
278 where
279 WithDmdType body_ty body' = anal_body env
280 WithDmdType body_ty' id_dmd = findBndrDmd env body_ty id
281 -- See Note [Finalising boxity for demand signature] in "GHC.Core.Opt.WorkWrap.Utils"
282 id_dmd' = finaliseBoxity (ae_fam_envs env) NotInsideInlineableFun (idType id) id_dmd
283 !id' = setBindIdDemandInfo top_lvl id id_dmd'
284 (rhs_ty, rhs') = dmdAnalStar env (dmdTransformThunkDmd rhs id_dmd') rhs
285
286 -- See Note [Absence analysis for stable unfoldings and RULES]
287 rule_fvs = bndrRuleAndUnfoldingIds id
288 final_ty = body_ty' `plusDmdType` rhs_ty `keepAliveDmdType` rule_fvs
289
290 -- | Let bindings can be processed in two ways:
291 -- Down (RHS before body) or Up (body before RHS).
292 -- This function handles the down variant.
293 --
294 -- It computes a demand signature (by means of 'dmdAnalRhsSig') and uses
295 -- that at call sites in the body.
296 --
297 -- It is used for toplevel definitions, recursive definitions and local
298 -- non-recursive definitions that have manifest lambdas (cf. 'useLetUp').
299 -- Local non-recursive definitions without a lambda are handled with LetUp.
300 --
301 -- This is the LetDown rule in the paper “Higher-Order Cardinality Analysis”.
302 dmdAnalBindLetDown :: TopLevelFlag -> AnalEnv -> SubDemand -> CoreBind -> (AnalEnv -> WithDmdType a) -> WithDmdType (DmdResult CoreBind a)
303 dmdAnalBindLetDown top_lvl env dmd bind anal_body = case bind of
304 NonRec id rhs
305 | (env', lazy_fv, id1, rhs1) <-
306 dmdAnalRhsSig top_lvl NonRecursive env dmd id rhs
307 -> do_rest env' lazy_fv [(id1, rhs1)] (uncurry NonRec . only)
308 Rec pairs
309 | (env', lazy_fv, pairs') <- dmdFix top_lvl env dmd pairs
310 -> do_rest env' lazy_fv pairs' Rec
311 where
312 do_rest env' lazy_fv pairs1 build_bind = WithDmdType final_ty (R (build_bind pairs2) body')
313 where
314 WithDmdType body_ty body' = anal_body env'
315 -- see Note [Lazy and unleashable free variables]
316 dmd_ty = addLazyFVs body_ty lazy_fv
317 WithDmdType final_ty id_dmds = findBndrsDmds env' dmd_ty (strictMap fst pairs1)
318 -- Important to force this as build_bind might not force it.
319 !pairs2 = strictZipWith do_one pairs1 id_dmds
320 do_one (id', rhs') dmd = ((,) $! setBindIdDemandInfo top_lvl id' dmd) $! rhs'
321 -- If the actual demand is better than the vanilla call
322 -- demand, you might think that we might do better to re-analyse
323 -- the RHS with the stronger demand.
324 -- But (a) That seldom happens, because it means that *every* path in
325 -- the body of the let has to use that stronger demand
326 -- (b) It often happens temporarily in when fixpointing, because
327 -- the recursive function at first seems to place a massive demand.
328 -- But we don't want to go to extra work when the function will
329 -- probably iterate to something less demanding.
330 -- In practice, all the times the actual demand on id2 is more than
331 -- the vanilla call demand seem to be due to (b). So we don't
332 -- bother to re-analyse the RHS.
333
334 -- If e is complicated enough to become a thunk, its contents will be evaluated
335 -- at most once, so oneify it.
336 dmdTransformThunkDmd :: CoreExpr -> Demand -> Demand
337 dmdTransformThunkDmd e
338 | exprIsTrivial e = id
339 | otherwise = oneifyDmd
340
341 -- Do not process absent demands
342 -- Otherwise act like in a normal demand analysis
343 -- See ↦* relation in the Cardinality Analysis paper
344 dmdAnalStar :: AnalEnv
345 -> Demand -- This one takes a *Demand*
346 -> CoreExpr -- Should obey the let/app invariant
347 -> (PlusDmdArg, CoreExpr)
348 dmdAnalStar env (n :* sd) e
349 -- NB: (:*) expands AbsDmd and BotDmd as needed
350 -- See Note [Analysing with absent demand]
351 | WithDmdType dmd_ty e' <- dmdAnal env sd e
352 = assertPpr (not (isUnliftedType (exprType e)) || exprOkForSpeculation e) (ppr e)
353 -- The argument 'e' should satisfy the let/app invariant
354 (toPlusDmdArg $ multDmdType n dmd_ty, e')
355
356 -- Main Demand Analsysis machinery
357 dmdAnal, dmdAnal' :: AnalEnv
358 -> SubDemand -- The main one takes a *SubDemand*
359 -> CoreExpr -> WithDmdType CoreExpr
360
361 dmdAnal env d e = -- pprTrace "dmdAnal" (ppr d <+> ppr e) $
362 dmdAnal' env d e
363
364 dmdAnal' _ _ (Lit lit) = WithDmdType nopDmdType (Lit lit)
365 dmdAnal' _ _ (Type ty) = WithDmdType nopDmdType (Type ty) -- Doesn't happen, in fact
366 dmdAnal' _ _ (Coercion co)
367 = WithDmdType (unitDmdType (coercionDmdEnv co)) (Coercion co)
368
369 dmdAnal' env dmd (Var var)
370 = WithDmdType (dmdTransform env var dmd) (Var var)
371
372 dmdAnal' env dmd (Cast e co)
373 = WithDmdType (dmd_ty `plusDmdType` mkPlusDmdArg (coercionDmdEnv co)) (Cast e' co)
374 where
375 WithDmdType dmd_ty e' = dmdAnal env dmd e
376
377 dmdAnal' env dmd (Tick t e)
378 = WithDmdType dmd_ty (Tick t e')
379 where
380 WithDmdType dmd_ty e' = dmdAnal env dmd e
381
382 dmdAnal' env dmd (App fun (Type ty))
383 = WithDmdType fun_ty (App fun' (Type ty))
384 where
385 WithDmdType fun_ty fun' = dmdAnal env dmd fun
386
387 -- Lots of the other code is there to make this
388 -- beautiful, compositional, application rule :-)
389 dmdAnal' env dmd (App fun arg)
390 = -- This case handles value arguments (type args handled above)
391 -- Crucially, coercions /are/ handled here, because they are
392 -- value arguments (#10288)
393 let
394 call_dmd = mkCalledOnceDmd dmd
395 WithDmdType fun_ty fun' = dmdAnal env call_dmd fun
396 (arg_dmd, res_ty) = splitDmdTy fun_ty
397 (arg_ty, arg') = dmdAnalStar env (dmdTransformThunkDmd arg arg_dmd) arg
398 in
399 -- pprTrace "dmdAnal:app" (vcat
400 -- [ text "dmd =" <+> ppr dmd
401 -- , text "expr =" <+> ppr (App fun arg)
402 -- , text "fun dmd_ty =" <+> ppr fun_ty
403 -- , text "arg dmd =" <+> ppr arg_dmd
404 -- , text "arg dmd_ty =" <+> ppr arg_ty
405 -- , text "res dmd_ty =" <+> ppr res_ty
406 -- , text "overall res dmd_ty =" <+> ppr (res_ty `bothDmdType` arg_ty) ])
407 WithDmdType (res_ty `plusDmdType` arg_ty) (App fun' arg')
408
409 dmdAnal' env dmd (Lam var body)
410 | isTyVar var
411 = let
412 WithDmdType body_ty body' = dmdAnal env dmd body
413 in
414 WithDmdType body_ty (Lam var body')
415
416 | otherwise
417 = let (n, body_dmd) = peelCallDmd dmd
418 -- body_dmd: a demand to analyze the body
419
420 WithDmdType body_ty body' = dmdAnal env body_dmd body
421 WithDmdType lam_ty var' = annotateLamIdBndr env body_ty var
422 new_dmd_type = multDmdType n lam_ty
423 in
424 WithDmdType new_dmd_type (Lam var' body')
425
426 dmdAnal' env dmd (Case scrut case_bndr ty [Alt alt bndrs rhs])
427 -- Only one alternative.
428 -- If it's a DataAlt, it should be the only constructor of the type.
429 | is_single_data_alt alt
430 = let
431 WithDmdType rhs_ty rhs' = dmdAnal env dmd rhs
432 WithDmdType alt_ty1 fld_dmds = findBndrsDmds env rhs_ty bndrs
433 WithDmdType alt_ty2 case_bndr_dmd = findBndrDmd env alt_ty1 case_bndr
434 !case_bndr' = setIdDemandInfo case_bndr case_bndr_dmd
435 -- Evaluation cardinality on the case binder is irrelevant and a no-op.
436 -- What matters is its nested sub-demand!
437 -- NB: If case_bndr_dmd is absDmd, boxity will say Unboxed, which is
438 -- what we want, because then `seq` will put a `seqDmd` on its scrut.
439 (_ :* case_bndr_sd) = case_bndr_dmd
440 -- Compute demand on the scrutinee
441 -- FORCE the result, otherwise thunks will end up retaining the
442 -- whole DmdEnv
443 !(!bndrs', !scrut_sd)
444 | DataAlt _ <- alt
445 -- See Note [Demand on the scrutinee of a product case]
446 -- See Note [Demand on case-alternative binders]
447 , (!scrut_sd, fld_dmds') <- addCaseBndrDmd case_bndr_sd fld_dmds
448 , let !bndrs' = setBndrsDemandInfo bndrs fld_dmds'
449 = (bndrs', scrut_sd)
450 | otherwise
451 -- __DEFAULT and literal alts. Simply add demands and discard the
452 -- evaluation cardinality, as we evaluate the scrutinee exactly once.
453 = assert (null bndrs) (bndrs, case_bndr_sd)
454 fam_envs = ae_fam_envs env
455 alt_ty3
456 -- See Note [Precise exceptions and strictness analysis] in "GHC.Types.Demand"
457 | exprMayThrowPreciseException fam_envs scrut
458 = deferAfterPreciseException alt_ty2
459 | otherwise
460 = alt_ty2
461
462 WithDmdType scrut_ty scrut' = dmdAnal env scrut_sd scrut
463 res_ty = alt_ty3 `plusDmdType` toPlusDmdArg scrut_ty
464 in
465 -- pprTrace "dmdAnal:Case1" (vcat [ text "scrut" <+> ppr scrut
466 -- , text "dmd" <+> ppr dmd
467 -- , text "case_bndr_dmd" <+> ppr (idDemandInfo case_bndr')
468 -- , text "scrut_sd" <+> ppr scrut_sd
469 -- , text "scrut_ty" <+> ppr scrut_ty
470 -- , text "alt_ty" <+> ppr alt_ty2
471 -- , text "res_ty" <+> ppr res_ty ]) $
472 WithDmdType res_ty (Case scrut' case_bndr' ty [Alt alt bndrs' rhs'])
473 where
474 is_single_data_alt (DataAlt dc) = isJust $ tyConSingleAlgDataCon_maybe $ dataConTyCon dc
475 is_single_data_alt _ = True
476
477
478
479
480 dmdAnal' env dmd (Case scrut case_bndr ty alts)
481 = let -- Case expression with multiple alternatives
482 WithDmdType alt_ty alts' = combineAltDmds alts
483
484 combineAltDmds [] = WithDmdType botDmdType []
485 combineAltDmds (a:as) =
486 let
487 WithDmdType cur_ty a' = dmdAnalSumAlt env dmd case_bndr a
488 WithDmdType rest_ty as' = combineAltDmds as
489 in WithDmdType (lubDmdType cur_ty rest_ty) (a':as')
490
491 WithDmdType alt_ty1 case_bndr_dmd = findBndrDmd env alt_ty case_bndr
492 !case_bndr' = setIdDemandInfo case_bndr case_bndr_dmd
493 WithDmdType scrut_ty scrut' = dmdAnal env topSubDmd scrut
494 -- NB: Base case is botDmdType, for empty case alternatives
495 -- This is a unit for lubDmdType, and the right result
496 -- when there really are no alternatives
497 fam_envs = ae_fam_envs env
498 alt_ty2
499 -- See Note [Precise exceptions and strictness analysis] in "GHC.Types.Demand"
500 | exprMayThrowPreciseException fam_envs scrut
501 = deferAfterPreciseException alt_ty1
502 | otherwise
503 = alt_ty1
504 res_ty = alt_ty2 `plusDmdType` toPlusDmdArg scrut_ty
505
506 in
507 -- pprTrace "dmdAnal:Case2" (vcat [ text "scrut" <+> ppr scrut
508 -- , text "scrut_ty" <+> ppr scrut_ty
509 -- , text "alt_tys" <+> ppr alt_tys
510 -- , text "alt_ty2" <+> ppr alt_ty2
511 -- , text "res_ty" <+> ppr res_ty ]) $
512 WithDmdType res_ty (Case scrut' case_bndr' ty alts')
513
514 dmdAnal' env dmd (Let bind body)
515 = WithDmdType final_ty (Let bind' body')
516 where
517 !(WithDmdType final_ty (R bind' body')) = dmdAnalBind NotTopLevel env dmd bind go'
518 go' !env' = dmdAnal env' dmd body
519
520 -- | A simple, syntactic analysis of whether an expression MAY throw a precise
521 -- exception when evaluated. It's always sound to return 'True'.
522 -- See Note [Which scrutinees may throw precise exceptions].
523 exprMayThrowPreciseException :: FamInstEnvs -> CoreExpr -> Bool
524 exprMayThrowPreciseException envs e
525 | not (forcesRealWorld envs (exprType e))
526 = False -- 1. in the Note
527 | (Var f, _) <- collectArgs e
528 , Just op <- isPrimOpId_maybe f
529 , op /= RaiseIOOp
530 = False -- 2. in the Note
531 | (Var f, _) <- collectArgs e
532 , Just fcall <- isFCallId_maybe f
533 , not (isSafeForeignCall fcall)
534 = False -- 3. in the Note
535 | otherwise
536 = True -- _. in the Note
537
538 -- | Recognises types that are
539 -- * @State# RealWorld@
540 -- * Unboxed tuples with a @State# RealWorld@ field
541 -- modulo coercions. This will detect 'IO' actions (even post Nested CPR! See
542 -- T13380e) and user-written variants thereof by their type.
543 forcesRealWorld :: FamInstEnvs -> Type -> Bool
544 forcesRealWorld fam_envs ty
545 | ty `eqType` realWorldStatePrimTy
546 = True
547 | Just (tc, tc_args, _co) <- normSplitTyConApp_maybe fam_envs ty
548 , isUnboxedTupleTyCon tc
549 , let field_tys = dataConInstArgTys (tyConSingleDataCon tc) tc_args
550 = any (eqType realWorldStatePrimTy . scaledThing) field_tys
551 | otherwise
552 = False
553
554 dmdAnalSumAlt :: AnalEnv -> SubDemand -> Id -> Alt Var -> WithDmdType (Alt Var)
555 dmdAnalSumAlt env dmd case_bndr (Alt con bndrs rhs)
556 | WithDmdType rhs_ty rhs' <- dmdAnal env dmd rhs
557 , WithDmdType alt_ty dmds <- findBndrsDmds env rhs_ty bndrs
558 , let (_ :* case_bndr_sd) = findIdDemand alt_ty case_bndr
559 -- See Note [Demand on case-alternative binders]
560 -- we can't use the scrut_sd, because it says 'Prod' and we'll use
561 -- topSubDmd anyway for scrutinees of sum types.
562 (!_scrut_sd, dmds') = addCaseBndrDmd case_bndr_sd dmds
563 -- Do not put a thunk into the Alt
564 !new_ids = setBndrsDemandInfo bndrs dmds'
565 = WithDmdType alt_ty (Alt con new_ids rhs')
566
567 -- Precondition: The SubDemand is not a Call
568 -- See Note [Demand on the scrutinee of a product case]
569 -- and Note [Demand on case-alternative binders]
570 addCaseBndrDmd :: SubDemand -- On the case binder
571 -> [Demand] -- On the fields of the constructor
572 -> (SubDemand, [Demand])
573 -- SubDemand on the case binder incl. field demands
574 -- and final demands for the components of the constructor
575 addCaseBndrDmd case_sd fld_dmds
576 | Just (_, ds) <- viewProd (length fld_dmds) scrut_sd
577 = (scrut_sd, ds)
578 | otherwise
579 = pprPanic "was a call demand" (ppr case_sd $$ ppr fld_dmds) -- See the Precondition
580 where
581 scrut_sd = case_sd `plusSubDmd` mkProd Unboxed fld_dmds
582
583 {-
584 Note [Analysing with absent demand]
585 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
586 Suppose we analyse an expression with demand A. The "A" means
587 "absent", so this expression will never be needed. What should happen?
588 There are several wrinkles:
589
590 * We *do* want to analyse the expression regardless.
591 Reason: Note [Always analyse in virgin pass]
592
593 But we can post-process the results to ignore all the usage
594 demands coming back. This is done by multDmdType.
595
596 * Nevertheless, which sub-demand should we pick for analysis?
597 Since the demand was absent, any would do. Worker/wrapper will replace
598 absent bindings with an absent filler anyway, so annotations in the RHS
599 of an absent binding don't matter much.
600 Picking 'botSubDmd' would be the most useful, but would also look a bit
601 misleading in the Core output of DmdAnal, because all nested annotations would
602 be bottoming. Better pick 'seqSubDmd', so that we annotate many of those
603 nested bindings with A themselves.
604
605 * In a previous incarnation of GHC we needed to be extra careful in the
606 case of an *unlifted type*, because unlifted values are evaluated
607 even if they are not used. Example (see #9254):
608 f :: (() -> (# Int#, () #)) -> ()
609 -- Strictness signature is
610 -- <CS(S(A,SU))>
611 -- I.e. calls k, but discards first component of result
612 f k = case k () of (# _, r #) -> r
613
614 g :: Int -> ()
615 g y = f (\n -> (# case y of I# y2 -> y2, n #))
616
617 Here f's strictness signature says (correctly) that it calls its
618 argument function and ignores the first component of its result.
619 This is correct in the sense that it'd be fine to (say) modify the
620 function so that always returned 0# in the first component.
621
622 But in function g, we *will* evaluate the 'case y of ...', because
623 it has type Int#. So 'y' will be evaluated. So we must record this
624 usage of 'y', else 'g' will say 'y' is absent, and will w/w so that
625 'y' is bound to an aBSENT_ERROR thunk.
626
627 However, the argument of toSubDmd always satisfies the let/app
628 invariant; so if it is unlifted it is also okForSpeculation, and so
629 can be evaluated in a short finite time -- and that rules out nasty
630 cases like the one above. (I'm not quite sure why this was a
631 problem in an earlier version of GHC, but it isn't now.)
632
633 Note [Always analyse in virgin pass]
634 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
635 Tricky point: make sure that we analyse in the 'virgin' pass. Consider
636 rec { f acc x True = f (...rec { g y = ...g... }...)
637 f acc x False = acc }
638 In the virgin pass for 'f' we'll give 'f' a very strict (bottom) type.
639 That might mean that we analyse the sub-expression containing the
640 E = "...rec g..." stuff in a bottom demand. Suppose we *didn't analyse*
641 E, but just returned botType.
642
643 Then in the *next* (non-virgin) iteration for 'f', we might analyse E
644 in a weaker demand, and that will trigger doing a fixpoint iteration
645 for g. But *because it's not the virgin pass* we won't start g's
646 iteration at bottom. Disaster. (This happened in $sfibToList' of
647 nofib/spectral/fibheaps.)
648
649 So in the virgin pass we make sure that we do analyse the expression
650 at least once, to initialise its signatures.
651
652 Note [Which scrutinees may throw precise exceptions]
653 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
654 This is the specification of 'exprMayThrowPreciseExceptions',
655 which is important for Scenario 2 of
656 Note [Precise exceptions and strictness analysis] in GHC.Types.Demand.
657
658 For an expression @f a1 ... an :: ty@ we determine that
659 1. False If ty is *not* @State# RealWorld@ or an unboxed tuple thereof.
660 This check is done by 'forcesRealWorld'.
661 (Why not simply unboxed pairs as above? This is motivated by
662 T13380{d,e}.)
663 2. False If f is a PrimOp, and it is *not* raiseIO#
664 3. False If f is an unsafe FFI call ('PlayRisky')
665 _. True Otherwise "give up".
666
667 It is sound to return False in those cases, because
668 1. We don't give any guarantees for unsafePerformIO, so no precise exceptions
669 from pure code.
670 2. raiseIO# is the only primop that may throw a precise exception.
671 3. Unsafe FFI calls may not interact with the RTS (to throw, for example).
672 See haddock on GHC.Types.ForeignCall.PlayRisky.
673
674 We *need* to return False in those cases, because
675 1. We would lose too much strictness in pure code, all over the place.
676 2. We would lose strictness for primops like getMaskingState#, which
677 introduces a substantial regression in
678 GHC.IO.Handle.Internals.wantReadableHandle.
679 3. We would lose strictness for code like GHC.Fingerprint.fingerprintData,
680 where an intermittent FFI call to c_MD5Init would otherwise lose
681 strictness on the arguments len and buf, leading to regressions in T9203
682 (2%) and i386's haddock.base (5%). Tested by T13380f.
683
684 In !3014 we tried a more sophisticated analysis by introducing ConOrDiv (nic)
685 to the Divergence lattice, but in practice it turned out to be hard to untaint
686 from 'topDiv' to 'conDiv', leading to bugs, performance regressions and
687 complexity that didn't justify the single fixed testcase T13380c.
688
689 Note [Demand on the scrutinee of a product case]
690 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
691 When figuring out the demand on the scrutinee of a product case,
692 we use the demands of the case alternative, i.e. id_dmds.
693 But note that these include the demand on the case binder;
694 see Note [Demand on case-alternative binders] in GHC.Types.Demand.
695 This is crucial. Example:
696 f x = case x of y { (a,b) -> k y a }
697 If we just take scrut_demand = 1P(L,A), then we won't pass x to the
698 worker, so the worker will rebuild
699 x = (a, absent-error)
700 and that'll crash.
701
702 Note [Demand on case-alternative binders]
703 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
704 The demand on a binder in a case alternative comes
705 (a) From the demand on the binder itself
706 (b) From the demand on the case binder
707 Forgetting (b) led directly to #10148.
708
709 Example. Source code:
710 f x@(p,_) = if p then foo x else True
711
712 foo (p,True) = True
713 foo (p,q) = foo (q,p)
714
715 After strictness analysis, forgetting (b):
716 f = \ (x_an1 [Dmd=1P(1L,ML)] :: (Bool, Bool)) ->
717 case x_an1
718 of wild_X7 [Dmd=MP(ML,ML)]
719 { (p_an2 [Dmd=1L], ds_dnz [Dmd=A]) ->
720 case p_an2 of _ {
721 False -> GHC.Types.True;
722 True -> foo wild_X7 }
723
724 Note that ds_dnz is syntactically dead, but the expression bound to it is
725 reachable through the case binder wild_X7. Now watch what happens if we inline
726 foo's wrapper:
727 f = \ (x_an1 [Dmd=1P(1L,ML)] :: (Bool, Bool)) ->
728 case x_an1
729 of _ [Dmd=MP(ML,ML)]
730 { (p_an2 [Dmd=1L], ds_dnz [Dmd=A]) ->
731 case p_an2 of _ {
732 False -> GHC.Types.True;
733 True -> $wfoo_soq GHC.Types.True ds_dnz }
734
735 Look at that! ds_dnz has come back to life in the call to $wfoo_soq! A second
736 run of demand analysis would no longer infer ds_dnz to be absent.
737 But unlike occurrence analysis, which infers properties of the *syntactic*
738 shape of the program, the results of demand analysis describe expressions
739 *semantically* and are supposed to be mostly stable across Simplification.
740 That's why we should better account for (b).
741 In #10148, we ended up emitting a single-entry thunk instead of an updateable
742 thunk for a let binder that was an an absent case-alt binder during DmdAnal.
743
744 This is needed even for non-product types, in case the case-binder
745 is used but the components of the case alternative are not.
746
747 Note [Aggregated demand for cardinality]
748 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
749 FIXME: This Note should be named [LetUp vs. LetDown] and probably predates
750 said separation. SG
751
752 We use different strategies for strictness and usage/cardinality to
753 "unleash" demands captured on free variables by bindings. Let us
754 consider the example:
755
756 f1 y = let {-# NOINLINE h #-}
757 h = y
758 in (h, h)
759
760 We are interested in obtaining cardinality demand U1 on |y|, as it is
761 used only in a thunk, and, therefore, is not going to be updated any
762 more. Therefore, the demand on |y|, captured and unleashed by usage of
763 |h| is U1. However, if we unleash this demand every time |h| is used,
764 and then sum up the effects, the ultimate demand on |y| will be U1 +
765 U1 = U. In order to avoid it, we *first* collect the aggregate demand
766 on |h| in the body of let-expression, and only then apply the demand
767 transformer:
768
769 transf[x](U) = {y |-> U1}
770
771 so the resulting demand on |y| is U1.
772
773 The situation is, however, different for strictness, where this
774 aggregating approach exhibits worse results because of the nature of
775 |both| operation for strictness. Consider the example:
776
777 f y c =
778 let h x = y |seq| x
779 in case of
780 True -> h True
781 False -> y
782
783 It is clear that |f| is strict in |y|, however, the suggested analysis
784 will infer from the body of |let| that |h| is used lazily (as it is
785 used in one branch only), therefore lazy demand will be put on its
786 free variable |y|. Conversely, if the demand on |h| is unleashed right
787 on the spot, we will get the desired result, namely, that |f| is
788 strict in |y|.
789
790
791 ************************************************************************
792 * *
793 Demand transformer
794 * *
795 ************************************************************************
796 -}
797
798 dmdTransform :: AnalEnv -- ^ The analysis environment
799 -> Id -- ^ The variable
800 -> SubDemand -- ^ The evaluation context of the var
801 -> DmdType -- ^ The demand type unleashed by the variable in this
802 -- context. The returned DmdEnv includes the demand on
803 -- this function plus demand on its free variables
804 -- See Note [What are demand signatures?] in "GHC.Types.Demand"
805 dmdTransform env var sd
806 -- Data constructors
807 | isDataConWorkId var
808 = dmdTransformDataConSig (idArity var) sd
809 -- Dictionary component selectors
810 -- Used to be controlled by a flag.
811 -- See #18429 for some perf measurements.
812 | Just _ <- isClassOpId_maybe var
813 = -- pprTrace "dmdTransform:DictSel" (ppr var $$ ppr (idDmdSig var) $$ ppr sd) $
814 dmdTransformDictSelSig (idDmdSig var) sd
815 -- Imported functions
816 | isGlobalId var
817 , let res = dmdTransformSig (idDmdSig var) sd
818 = -- pprTrace "dmdTransform:import" (vcat [ppr var, ppr (idDmdSig var), ppr sd, ppr res])
819 res
820 -- Top-level or local let-bound thing for which we use LetDown ('useLetUp').
821 -- In that case, we have a strictness signature to unleash in our AnalEnv.
822 | Just (sig, top_lvl) <- lookupSigEnv env var
823 , let fn_ty = dmdTransformSig sig sd
824 = -- pprTrace "dmdTransform:LetDown" (vcat [ppr var, ppr sig, ppr sd, ppr fn_ty]) $
825 case top_lvl of
826 NotTopLevel -> addVarDmd fn_ty var (C_11 :* sd)
827 TopLevel
828 | isInterestingTopLevelFn var
829 -- Top-level things will be used multiple times or not at
830 -- all anyway, hence the multDmd below: It means we don't
831 -- have to track whether @var@ is used strictly or at most
832 -- once, because ultimately it never will.
833 -> addVarDmd fn_ty var (C_0N `multDmd` (C_11 :* sd)) -- discard strictness
834 | otherwise
835 -> fn_ty -- don't bother tracking; just annotate with 'topDmd' later
836 -- Everything else:
837 -- * Local let binders for which we use LetUp (cf. 'useLetUp')
838 -- * Lambda binders
839 -- * Case and constructor field binders
840 | otherwise
841 = -- pprTrace "dmdTransform:other" (vcat [ppr var, ppr boxity, ppr sd]) $
842 unitDmdType (unitVarEnv var (C_11 :* sd))
843
844 {- *********************************************************************
845 * *
846 Binding right-hand sides
847 * *
848 ********************************************************************* -}
849
850 -- | @dmdAnalRhsSig@ analyses the given RHS to compute a demand signature
851 -- for the LetDown rule. It works as follows:
852 --
853 -- * assuming the weakest possible body sub-demand, L
854 -- * looking at the definition
855 -- * determining a strictness signature
856 --
857 -- Since it assumed a body sub-demand of L, the resulting signature is
858 -- applicable at any call site.
859 dmdAnalRhsSig
860 :: TopLevelFlag
861 -> RecFlag
862 -> AnalEnv -> SubDemand
863 -> Id -> CoreExpr
864 -> (AnalEnv, DmdEnv, Id, CoreExpr)
865 -- Process the RHS of the binding, add the strictness signature
866 -- to the Id, and augment the environment with the signature as well.
867 -- See Note [NOINLINE and strictness]
868 dmdAnalRhsSig top_lvl rec_flag env let_dmd id rhs
869 = -- pprTrace "dmdAnalRhsSig" (ppr id $$ ppr let_dmd $$ ppr sig $$ ppr lazy_fv) $
870 (env', lazy_fv, id', rhs')
871 where
872 rhs_arity = idArity id
873 -- See Note [Demand signatures are computed for a threshold demand based on idArity]
874
875 rhs_dmd = mkCalledOnceDmds rhs_arity body_dmd
876
877 body_dmd
878 | isJoinId id
879 -- See Note [Demand analysis for join points]
880 -- See Note [Invariants on join points] invariant 2b, in GHC.Core
881 -- rhs_arity matches the join arity of the join point
882 = let_dmd
883 | otherwise
884 -- See Note [Unboxed demand on function bodies returning small products]
885 = unboxedWhenSmall (ae_opts env) (unboxableResultWidth env id) topSubDmd
886
887 -- See Note [Do not unbox class dictionaries]
888 WithDmdType rhs_dmd_ty rhs' = dmdAnal (adjustInlFun id env) rhs_dmd rhs
889 DmdType rhs_fv rhs_dmds rhs_div = rhs_dmd_ty
890
891 sig = mkDmdSigForArity rhs_arity (DmdType sig_fv rhs_dmds rhs_div)
892
893 id' = id `setIdDmdSig` sig
894 !env' = extendAnalEnv top_lvl env id' sig
895
896 -- See Note [Aggregated demand for cardinality]
897 -- FIXME: That Note doesn't explain the following lines at all. The reason
898 -- is really much different: When we have a recursive function, we'd
899 -- have to also consider the free vars of the strictness signature
900 -- when checking whether we found a fixed-point. That is expensive;
901 -- we only want to check whether argument demands of the sig changed.
902 -- reuseEnv makes it so that the FV results are stable as long as the
903 -- last argument demands were. Strictness won't change. But used-once
904 -- might turn into used-many even if the signature was stable and
905 -- we'd have to do an additional iteration. reuseEnv makes sure that
906 -- we never get used-once info for FVs of recursive functions.
907 -- See #14816 where we try to get rid of reuseEnv.
908 rhs_fv1 = case rec_flag of
909 Recursive -> reuseEnv rhs_fv
910 NonRecursive -> rhs_fv
911
912 -- See Note [Absence analysis for stable unfoldings and RULES]
913 rhs_fv2 = rhs_fv1 `keepAliveDmdEnv` bndrRuleAndUnfoldingIds id
914
915 -- See Note [Lazy and unleashable free variables]
916 !(!lazy_fv, !sig_fv) = partitionVarEnv isWeakDmd rhs_fv2
917
918 unboxableResultWidth :: AnalEnv -> Id -> Maybe Arity
919 unboxableResultWidth env id
920 | (pis,ret_ty) <- splitPiTys (idType id)
921 , count (not . isNamedBinder) pis == idArity id
922 , Just (tc, _tc_args, _co) <- normSplitTyConApp_maybe (ae_fam_envs env) ret_ty
923 , Just dc <- tyConSingleAlgDataCon_maybe tc
924 , null (dataConExTyCoVars dc) -- Can't unbox results with existentials
925 = Just (dataConRepArity dc)
926 | otherwise
927 = Nothing
928
929 unboxedWhenSmall :: DmdAnalOpts -> Maybe Arity -> SubDemand -> SubDemand
930 -- See Note [Unboxed demand on function bodies returning small products]
931 unboxedWhenSmall opts mb_n sd
932 | Just n <- mb_n
933 , n <= dmd_unbox_width opts
934 = unboxSubDemand sd
935 | otherwise
936 = sd
937
938 -- | If given the (local, non-recursive) let-bound 'Id', 'useLetUp' determines
939 -- whether we should process the binding up (body before rhs) or down (rhs
940 -- before body).
941 --
942 -- We use LetDown if there is a chance to get a useful strictness signature to
943 -- unleash at call sites. LetDown is generally more precise than LetUp if we can
944 -- correctly guess how it will be used in the body, that is, for which incoming
945 -- demand the strictness signature should be computed, which allows us to
946 -- unleash higher-order demands on arguments at call sites. This is mostly the
947 -- case when
948 --
949 -- * The binding takes any arguments before performing meaningful work (cf.
950 -- 'idArity'), in which case we are interested to see how it uses them.
951 -- * The binding is a join point, hence acting like a function, not a value.
952 -- As a big plus, we know *precisely* how it will be used in the body; since
953 -- it's always tail-called, we can directly unleash the incoming demand of
954 -- the let binding on its RHS when computing a strictness signature. See
955 -- [Demand analysis for join points].
956 --
957 -- Thus, if the binding is not a join point and its arity is 0, we have a thunk
958 -- and use LetUp, implying that we have no usable demand signature available
959 -- when we analyse the let body.
960 --
961 -- Since thunk evaluation is memoised, we want to unleash its 'DmdEnv' of free
962 -- vars at most once, regardless of how many times it was forced in the body.
963 -- This makes a real difference wrt. usage demands. The other reason is being
964 -- able to unleash a more precise product demand on its RHS once we know how the
965 -- thunk was used in the let body.
966 --
967 -- Characteristic examples, always assuming a single evaluation:
968 --
969 -- * @let x = 2*y in x + x@ => LetUp. Compared to LetDown, we find out that
970 -- the expression uses @y@ at most once.
971 -- * @let x = (a,b) in fst x@ => LetUp. Compared to LetDown, we find out that
972 -- @b@ is absent.
973 -- * @let f x = x*2 in f y@ => LetDown. Compared to LetUp, we find out that
974 -- the expression uses @y@ strictly, because we have @f@'s demand signature
975 -- available at the call site.
976 -- * @join exit = 2*y in if a then exit else if b then exit else 3*y@ =>
977 -- LetDown. Compared to LetUp, we find out that the expression uses @y@
978 -- strictly, because we can unleash @exit@'s signature at each call site.
979 -- * For a more convincing example with join points, see Note [Demand analysis
980 -- for join points].
981 --
982 useLetUp :: TopLevelFlag -> Var -> Bool
983 useLetUp top_lvl f = isNotTopLevel top_lvl && idArity f == 0 && not (isJoinId f)
984
985 {- Note [Demand analysis for join points]
986 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
987 Consider
988 g :: (Int,Int) -> Int
989 g (p,q) = p+q
990
991 f :: T -> Int -> Int
992 f x p = g (join j y = (p,y)
993 in case x of
994 A -> j 3
995 B -> j 4
996 C -> (p,7))
997
998 If j was a vanilla function definition, we'd analyse its body with
999 evalDmd, and think that it was lazy in p. But for join points we can
1000 do better! We know that j's body will (if called at all) be evaluated
1001 with the demand that consumes the entire join-binding, in this case
1002 the argument demand from g. Whizzo! g evaluates both components of
1003 its argument pair, so p will certainly be evaluated if j is called.
1004
1005 For f to be strict in p, we need /all/ paths to evaluate p; in this
1006 case the C branch does so too, so we are fine. So, as usual, we need
1007 to transport demands on free variables to the call site(s). Compare
1008 Note [Lazy and unleashable free variables].
1009
1010 The implementation is easy. When analysing a join point, we can
1011 analyse its body with the demand from the entire join-binding (written
1012 let_dmd here).
1013
1014 Another win for join points! #13543.
1015
1016 However, note that the strictness signature for a join point can
1017 look a little puzzling. E.g.
1018
1019 (join j x = \y. error "urk")
1020 (in case v of )
1021 ( A -> j 3 ) x
1022 ( B -> j 4 )
1023 ( C -> \y. blah )
1024
1025 The entire thing is in a C1(L) context, so j's strictness signature
1026 will be [A]b
1027 meaning one absent argument, returns bottom. That seems odd because
1028 there's a \y inside. But it's right because when consumed in a C1(L)
1029 context the RHS of the join point is indeed bottom.
1030
1031 Note [Demand signatures are computed for a threshold demand based on idArity]
1032 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1033 We compute demand signatures assuming idArity incoming arguments to approximate
1034 behavior for when we have a call site with at least that many arguments. idArity
1035 is /at least/ the number of manifest lambdas, but might be higher for PAPs and
1036 trivial RHS (see Note [Demand analysis for trivial right-hand sides]).
1037
1038 Because idArity of a function varies independently of its cardinality
1039 properties (cf. Note [idArity varies independently of dmdTypeDepth]), we
1040 implicitly encode the arity for when a demand signature is sound to unleash
1041 in its 'dmdTypeDepth' (cf. Note [Understanding DmdType and DmdSig] in
1042 GHC.Types.Demand). It is unsound to unleash a demand signature when the
1043 incoming number of arguments is less than that.
1044 See Note [What are demand signatures?] in GHC.Types.Demand for more details
1045 on soundness.
1046
1047 Why idArity arguments? Because that's a conservative estimate of how many
1048 arguments we must feed a function before it does anything interesting with them.
1049 Also it elegantly subsumes the trivial RHS and PAP case.
1050
1051 There might be functions for which we might want to analyse for more incoming
1052 arguments than idArity. Example:
1053
1054 f x =
1055 if expensive
1056 then \y -> ... y ...
1057 else \y -> ... y ...
1058
1059 We'd analyse `f` under a unary call demand C1(L), corresponding to idArity
1060 being 1. That's enough to look under the manifest lambda and find out how a
1061 unary call would use `x`, but not enough to look into the lambdas in the if
1062 branches.
1063
1064 On the other hand, if we analysed for call demand C1(C1(L)), we'd get useful
1065 strictness info for `y` (and more precise info on `x`) and possibly CPR
1066 information, but
1067
1068 * We would no longer be able to unleash the signature at unary call sites
1069 * Performing the worker/wrapper split based on this information would be
1070 implicitly eta-expanding `f`, playing fast and loose with divergence and
1071 even being unsound in the presence of newtypes, so we refrain from doing so.
1072 Also see Note [Don't eta expand in w/w] in GHC.Core.Opt.WorkWrap.
1073
1074 Since we only compute one signature, we do so for arity 1. Computing multiple
1075 signatures for different arities (i.e., polyvariance) would be entirely
1076 possible, if it weren't for the additional runtime and implementation
1077 complexity.
1078
1079 Note [idArity varies independently of dmdTypeDepth]
1080 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1081 We used to check in GHC.Core.Lint that dmdTypeDepth <= idArity for a let-bound
1082 identifier. But that means we would have to zap demand signatures every time we
1083 reset or decrease arity. That's an unnecessary dependency, because
1084
1085 * The demand signature captures a semantic property that is independent of
1086 what the binding's current arity is
1087 * idArity is analysis information itself, thus volatile
1088 * We already *have* dmdTypeDepth, wo why not just use it to encode the
1089 threshold for when to unleash the signature
1090 (cf. Note [Understanding DmdType and DmdSig] in GHC.Types.Demand)
1091
1092 Consider the following expression, for example:
1093
1094 (let go x y = `x` seq ... in go) |> co
1095
1096 `go` might have a strictness signature of `<1L><L>`. The simplifier will identify
1097 `go` as a nullary join point through `joinPointBinding_maybe` and float the
1098 coercion into the binding, leading to an arity decrease:
1099
1100 join go = (\x y -> `x` seq ...) |> co in go
1101
1102 With the CoreLint check, we would have to zap `go`'s perfectly viable strictness
1103 signature.
1104
1105 Note [Demand analysis for trivial right-hand sides]
1106 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1107 Consider
1108 foo = plusInt |> co
1109 where plusInt is an arity-2 function with known strictness. Clearly
1110 we want plusInt's strictness to propagate to foo! But because it has
1111 no manifest lambdas, it won't do so automatically, and indeed 'co' might
1112 have type (Int->Int->Int) ~ T.
1113
1114 Fortunately, GHC.Core.Opt.Arity gives 'foo' arity 2, which is enough for LetDown to
1115 forward plusInt's demand signature, and all is well (see Note [Newtype arity] in
1116 GHC.Core.Opt.Arity)! A small example is the test case NewtypeArity.
1117
1118 Note [Absence analysis for stable unfoldings and RULES]
1119 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1120 Ticket #18638 shows that it's really important to do absence analysis
1121 for stable unfoldings. Consider
1122
1123 g = blah
1124
1125 f = \x. ...no use of g....
1126 {- f's stable unfolding is f = \x. ...g... -}
1127
1128 If f is ever inlined we use 'g'. But f's current RHS makes no use
1129 of 'g', so if we don't look at the unfolding we'll mark g as Absent,
1130 and transform to
1131
1132 g = error "Entered absent value"
1133 f = \x. ...
1134 {- f's stable unfolding is f = \x. ...g... -}
1135
1136 Now if f is subsequently inlined, we'll use 'g' and ... disaster.
1137
1138 SOLUTION: if f has a stable unfolding, adjust its DmdEnv (the demands
1139 on its free variables) so that no variable mentioned in its unfolding
1140 is Absent. This is done by the function Demand.keepAliveDmdEnv.
1141
1142 ALSO: do the same for Ids free in the RHS of any RULES for f.
1143
1144 PS: You may wonder how it can be that f's optimised RHS has somehow
1145 discarded 'g', but when f is inlined we /don't/ discard g in the same
1146 way. I think a simple example is
1147 g = (a,b)
1148 f = \x. fst g
1149 {-# INLINE f #-}
1150
1151 Now f's optimised RHS will be \x.a, but if we change g to (error "..")
1152 (since it is apparently Absent) and then inline (\x. fst g) we get
1153 disaster. But regardless, #18638 was a more complicated version of
1154 this, that actually happened in practice.
1155 -}
1156
1157 {- *********************************************************************
1158 * *
1159 Fixpoints
1160 * *
1161 ********************************************************************* -}
1162
1163 -- Recursive bindings
1164 dmdFix :: TopLevelFlag
1165 -> AnalEnv -- Does not include bindings for this binding
1166 -> SubDemand
1167 -> [(Id,CoreExpr)]
1168 -> (AnalEnv, DmdEnv, [(Id,CoreExpr)]) -- Binders annotated with strictness info
1169
1170 dmdFix top_lvl env let_dmd orig_pairs
1171 = loop 1 initial_pairs
1172 where
1173 -- See Note [Initialising strictness]
1174 initial_pairs | ae_virgin env = [(setIdDmdSig id botSig, rhs) | (id, rhs) <- orig_pairs ]
1175 | otherwise = orig_pairs
1176
1177 -- If fixed-point iteration does not yield a result we use this instead
1178 -- See Note [Safe abortion in the fixed-point iteration]
1179 abort :: (AnalEnv, DmdEnv, [(Id,CoreExpr)])
1180 abort = (env, lazy_fv', zapped_pairs)
1181 where (lazy_fv, pairs') = step True (zapIdDmdSig orig_pairs)
1182 -- Note [Lazy and unleashable free variables]
1183 non_lazy_fvs = plusVarEnvList $ map (dmdSigDmdEnv . idDmdSig . fst) pairs'
1184 lazy_fv' = lazy_fv `plusVarEnv` mapVarEnv (const topDmd) non_lazy_fvs
1185 zapped_pairs = zapIdDmdSig pairs'
1186
1187 -- The fixed-point varies the idDmdSig field of the binders, and terminates if that
1188 -- annotation does not change any more.
1189 loop :: Int -> [(Id,CoreExpr)] -> (AnalEnv, DmdEnv, [(Id,CoreExpr)])
1190 loop n pairs = -- pprTrace "dmdFix" (ppr n <+> vcat [ ppr id <+> ppr (idDmdSig id)
1191 -- | (id,_)<- pairs]) $
1192 loop' n pairs
1193
1194 loop' n pairs
1195 | found_fixpoint = (final_anal_env, lazy_fv, pairs')
1196 | n == 10 = abort
1197 | otherwise = loop (n+1) pairs'
1198 where
1199 found_fixpoint = map (idDmdSig . fst) pairs' == map (idDmdSig . fst) pairs
1200 first_round = n == 1
1201 (lazy_fv, pairs') = step first_round pairs
1202 final_anal_env = extendAnalEnvs top_lvl env (map fst pairs')
1203
1204 step :: Bool -> [(Id, CoreExpr)] -> (DmdEnv, [(Id, CoreExpr)])
1205 step first_round pairs = (lazy_fv, pairs')
1206 where
1207 -- In all but the first iteration, delete the virgin flag
1208 start_env | first_round = env
1209 | otherwise = nonVirgin env
1210
1211 start = (extendAnalEnvs top_lvl start_env (map fst pairs), emptyVarEnv)
1212
1213 !((_,!lazy_fv), !pairs') = mapAccumL my_downRhs start pairs
1214 -- mapAccumL: Use the new signature to do the next pair
1215 -- The occurrence analyser has arranged them in a good order
1216 -- so this can significantly reduce the number of iterations needed
1217
1218 my_downRhs (env, lazy_fv) (id,rhs)
1219 = -- pprTrace "my_downRhs" (ppr id $$ ppr (idDmdSig id) $$ ppr sig) $
1220 ((env', lazy_fv'), (id', rhs'))
1221 where
1222 !(!env', !lazy_fv1, !id', !rhs') = dmdAnalRhsSig top_lvl Recursive env let_dmd id rhs
1223 !lazy_fv' = plusVarEnv_C plusDmd lazy_fv lazy_fv1
1224
1225 zapIdDmdSig :: [(Id, CoreExpr)] -> [(Id, CoreExpr)]
1226 zapIdDmdSig pairs = [(setIdDmdSig id nopSig, rhs) | (id, rhs) <- pairs ]
1227
1228 {- Note [Safe abortion in the fixed-point iteration]
1229 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1230 Fixed-point iteration may fail to terminate. But we cannot simply give up and
1231 return the environment and code unchanged! We still need to do one additional
1232 round, for two reasons:
1233
1234 * To get information on used free variables (both lazy and strict!)
1235 (see Note [Lazy and unleashable free variables])
1236 * To ensure that all expressions have been traversed at least once, and any left-over
1237 strictness annotations have been updated.
1238
1239 This final iteration does not add the variables to the strictness signature
1240 environment, which effectively assigns them 'nopSig' (see "getStrictness")
1241
1242 Note [Trimming a demand to a type]
1243 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1244 There are two reasons we sometimes trim a demand to match a type.
1245 1. GADTs
1246 2. Recursive products and widening
1247
1248 More on both below. But the botttom line is: we really don't want to
1249 have a binder whose demand is more deeply-nested than its type
1250 "allows". So in findBndrDmd we call trimToType and findTypeShape to
1251 trim the demand on the binder to a form that matches the type
1252
1253 Now to the reasons. For (1) consider
1254 f :: a -> Bool
1255 f x = case ... of
1256 A g1 -> case (x |> g1) of (p,q) -> ...
1257 B -> error "urk"
1258
1259 where A,B are the constructors of a GADT. We'll get a 1P(L,L) demand
1260 on x from the A branch, but that's a stupid demand for x itself, which
1261 has type 'a'. Indeed we get ASSERTs going off (notably in
1262 splitUseProdDmd, #8569).
1263
1264 For (2) consider
1265 data T = MkT Int T -- A recursive product
1266 f :: Int -> T -> Int
1267 f 0 _ = 0
1268 f _ (MkT n t) = f n t
1269
1270 Here f is lazy in T, but its *usage* is infinite: P(L,P(L,P(L, ...))).
1271 Notice that this happens because T is a product type, and is recrusive.
1272 If we are not careful, we'll fail to iterate to a fixpoint in dmdFix,
1273 and bale out entirely, which is inefficient and over-conservative.
1274
1275 Worse, as we discovered in #18304, the size of the usages we compute
1276 can grow /exponentially/, so even 10 iterations costs far too much.
1277 Especially since we then discard the result.
1278
1279 To avoid this we use the same findTypeShape function as for (1), but
1280 arrange that it trims the demand if it encounters the same type constructor
1281 twice (or three times, etc). We use our standard RecTcChecker mechanism
1282 for this -- see GHC.Core.Opt.WorkWrap.Utils.findTypeShape.
1283
1284 This is usually call "widening". We could do it just in dmdFix, but
1285 since are doing this findTypeShape business /anyway/ because of (1),
1286 and it has all the right information to hand, it's extremely
1287 convenient to do it there.
1288
1289 -}
1290
1291 {- *********************************************************************
1292 * *
1293 Strictness signatures and types
1294 * *
1295 ********************************************************************* -}
1296
1297 unitDmdType :: DmdEnv -> DmdType
1298 unitDmdType dmd_env = DmdType dmd_env [] topDiv
1299
1300 coercionDmdEnv :: Coercion -> DmdEnv
1301 coercionDmdEnv co = coercionsDmdEnv [co]
1302
1303 coercionsDmdEnv :: [Coercion] -> DmdEnv
1304 coercionsDmdEnv cos = mapVarEnv (const topDmd) (getUniqSet $ coVarsOfCos cos)
1305 -- The VarSet from coVarsOfCos is really a VarEnv Var
1306
1307 addVarDmd :: DmdType -> Var -> Demand -> DmdType
1308 addVarDmd (DmdType fv ds res) var dmd
1309 = DmdType (extendVarEnv_C plusDmd fv var dmd) ds res
1310
1311 addLazyFVs :: DmdType -> DmdEnv -> DmdType
1312 addLazyFVs dmd_ty lazy_fvs
1313 = dmd_ty `plusDmdType` mkPlusDmdArg lazy_fvs
1314 -- Using plusDmdType (rather than just plus'ing the envs)
1315 -- is vital. Consider
1316 -- let f = \x -> (x,y)
1317 -- in error (f 3)
1318 -- Here, y is treated as a lazy-fv of f, but we must `plusDmd` that L
1319 -- demand with the bottom coming up from 'error'
1320 --
1321 -- I got a loop in the fixpointer without this, due to an interaction
1322 -- with the lazy_fv filtering in dmdAnalRhsSig. Roughly, it was
1323 -- letrec f n x
1324 -- = letrec g y = x `fatbar`
1325 -- letrec h z = z + ...g...
1326 -- in h (f (n-1) x)
1327 -- in ...
1328 -- In the initial iteration for f, f=Bot
1329 -- Suppose h is found to be strict in z, but the occurrence of g in its RHS
1330 -- is lazy. Now consider the fixpoint iteration for g, esp the demands it
1331 -- places on its free variables. Suppose it places none. Then the
1332 -- x `fatbar` ...call to h...
1333 -- will give a x->V demand for x. That turns into a L demand for x,
1334 -- which floats out of the defn for h. Without the modifyEnv, that
1335 -- L demand doesn't get both'd with the Bot coming up from the inner
1336 -- call to f. So we just get an L demand for x for g.
1337
1338 {-
1339 Note [Do not strictify the argument dictionaries of a dfun]
1340 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1341 The typechecker can tie recursive knots involving dfuns, so we do the
1342 conservative thing and refrain from strictifying a dfun's argument
1343 dictionaries.
1344 -}
1345
1346 setBndrsDemandInfo :: HasCallStack => [Var] -> [Demand] -> [Var]
1347 setBndrsDemandInfo (b:bs) ds
1348 | isTyVar b = b : setBndrsDemandInfo bs ds
1349 setBndrsDemandInfo (b:bs) (d:ds) =
1350 let !new_info = setIdDemandInfo b d
1351 !vars = setBndrsDemandInfo bs ds
1352 in new_info : vars
1353 setBndrsDemandInfo [] ds = assert (null ds) []
1354 setBndrsDemandInfo bs _ = pprPanic "setBndrsDemandInfo" (ppr bs)
1355
1356 annotateLamIdBndr :: AnalEnv
1357 -> DmdType -- Demand type of body
1358 -> Id -- Lambda binder
1359 -> WithDmdType Id -- Demand type of lambda
1360 -- and binder annotated with demand
1361
1362 annotateLamIdBndr env dmd_ty id
1363 -- For lambdas we add the demand to the argument demands
1364 -- Only called for Ids
1365 = assert (isId id) $
1366 -- pprTrace "annLamBndr" (vcat [ppr id, ppr dmd_ty, ppr final_ty]) $
1367 WithDmdType main_ty new_id
1368 where
1369 -- See Note [Finalising boxity for demand signature] in "GHC.Core.Opt.WorkWrap.Utils"
1370 -- and Note [Do not unbox class dictionaries]
1371 dmd' = finaliseBoxity (ae_fam_envs env) (ae_inl_fun env) (idType id) dmd
1372 new_id = setIdDemandInfo id dmd'
1373 main_ty = addDemand dmd' dmd_ty'
1374 WithDmdType dmd_ty' dmd = findBndrDmd env dmd_ty id
1375
1376 {- Note [NOINLINE and strictness]
1377 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1378 At one point we disabled strictness for NOINLINE functions, on the
1379 grounds that they should be entirely opaque. But that lost lots of
1380 useful semantic strictness information, so now we analyse them like
1381 any other function, and pin strictness information on them.
1382
1383 That in turn forces us to worker/wrapper them; see
1384 Note [Worker/wrapper for NOINLINE functions] in GHC.Core.Opt.WorkWrap.
1385
1386
1387 Note [Lazy and unleashable free variables]
1388 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1389 We put the strict and once-used FVs in the DmdType of the Id, so
1390 that at its call sites we unleash demands on its strict fvs.
1391 An example is 'roll' in imaginary/wheel-sieve2
1392 Something like this:
1393 roll x = letrec
1394 go y = if ... then roll (x-1) else x+1
1395 in
1396 go ms
1397 We want to see that roll is strict in x, which is because
1398 go is called. So we put the DmdEnv for x in go's DmdType.
1399
1400 Another example:
1401
1402 f :: Int -> Int -> Int
1403 f x y = let t = x+1
1404 h z = if z==0 then t else
1405 if z==1 then x+1 else
1406 x + h (z-1)
1407 in h y
1408
1409 Calling h does indeed evaluate x, but we can only see
1410 that if we unleash a demand on x at the call site for t.
1411
1412 Incidentally, here's a place where lambda-lifting h would
1413 lose the cigar --- we couldn't see the joint strictness in t/x
1414
1415 ON THE OTHER HAND
1416
1417 We don't want to put *all* the fv's from the RHS into the
1418 DmdType. Because
1419
1420 * it makes the strictness signatures larger, and hence slows down fixpointing
1421
1422 and
1423
1424 * it is useless information at the call site anyways:
1425 For lazy, used-many times fv's we will never get any better result than
1426 that, no matter how good the actual demand on the function at the call site
1427 is (unless it is always absent, but then the whole binder is useless).
1428
1429 Therefore we exclude lazy multiple-used fv's from the environment in the
1430 DmdType.
1431
1432 But now the signature lies! (Missing variables are assumed to be absent.) To
1433 make up for this, the code that analyses the binding keeps the demand on those
1434 variable separate (usually called "lazy_fv") and adds it to the demand of the
1435 whole binding later.
1436
1437 What if we decide _not_ to store a strictness signature for a binding at all, as
1438 we do when aborting a fixed-point iteration? The we risk losing the information
1439 that the strict variables are being used. In that case, we take all free variables
1440 mentioned in the (unsound) strictness signature, conservatively approximate the
1441 demand put on them (topDmd), and add that to the "lazy_fv" returned by "dmdFix".
1442
1443
1444 ************************************************************************
1445 * *
1446 \subsection{Strictness signatures}
1447 * *
1448 ************************************************************************
1449 -}
1450
1451
1452 data AnalEnv = AE
1453 { ae_opts :: !DmdAnalOpts -- ^ Analysis options
1454 , ae_sigs :: !SigEnv
1455 , ae_virgin :: !Bool -- ^ True on first iteration only
1456 -- See Note [Initialising strictness]
1457 , ae_fam_envs :: !FamInstEnvs
1458 , ae_inl_fun :: !InsideInlineableFun
1459 -- ^ Whether we analyse the body of an inlineable fun.
1460 -- See Note [Do not unbox class dictionaries].
1461 }
1462
1463 -- We use the se_env to tell us whether to
1464 -- record info about a variable in the DmdEnv
1465 -- We do so if it's a LocalId, but not top-level
1466 --
1467 -- The DmdEnv gives the demand on the free vars of the function
1468 -- when it is given enough args to satisfy the strictness signature
1469
1470 type SigEnv = VarEnv (DmdSig, TopLevelFlag)
1471
1472 instance Outputable AnalEnv where
1473 ppr env = text "AE" <+> braces (vcat
1474 [ text "ae_virgin =" <+> ppr (ae_virgin env)
1475 , text "ae_sigs =" <+> ppr (ae_sigs env)
1476 ])
1477
1478 emptyAnalEnv :: DmdAnalOpts -> FamInstEnvs -> AnalEnv
1479 emptyAnalEnv opts fam_envs
1480 = AE { ae_opts = opts
1481 , ae_sigs = emptySigEnv
1482 , ae_virgin = True
1483 , ae_fam_envs = fam_envs
1484 , ae_inl_fun = NotInsideInlineableFun
1485 }
1486
1487 emptySigEnv :: SigEnv
1488 emptySigEnv = emptyVarEnv
1489
1490 -- | Extend an environment with the strictness IDs attached to the id
1491 extendAnalEnvs :: TopLevelFlag -> AnalEnv -> [Id] -> AnalEnv
1492 extendAnalEnvs top_lvl env vars
1493 = env { ae_sigs = extendSigEnvs top_lvl (ae_sigs env) vars }
1494
1495 extendSigEnvs :: TopLevelFlag -> SigEnv -> [Id] -> SigEnv
1496 extendSigEnvs top_lvl sigs vars
1497 = extendVarEnvList sigs [ (var, (idDmdSig var, top_lvl)) | var <- vars]
1498
1499 extendAnalEnv :: TopLevelFlag -> AnalEnv -> Id -> DmdSig -> AnalEnv
1500 extendAnalEnv top_lvl env var sig
1501 = env { ae_sigs = extendSigEnv top_lvl (ae_sigs env) var sig }
1502
1503 extendSigEnv :: TopLevelFlag -> SigEnv -> Id -> DmdSig -> SigEnv
1504 extendSigEnv top_lvl sigs var sig = extendVarEnv sigs var (sig, top_lvl)
1505
1506 lookupSigEnv :: AnalEnv -> Id -> Maybe (DmdSig, TopLevelFlag)
1507 lookupSigEnv env id = lookupVarEnv (ae_sigs env) id
1508
1509 nonVirgin :: AnalEnv -> AnalEnv
1510 nonVirgin env = env { ae_virgin = False }
1511
1512 -- | Sets 'ae_inl_fun' according to whether the given 'Id' has an inlineable
1513 -- unfolding. See Note [Do not unbox class dictionaries].
1514 adjustInlFun :: Id -> AnalEnv -> AnalEnv
1515 adjustInlFun id env
1516 | isStableUnfolding (realIdUnfolding id) = env { ae_inl_fun = InsideInlineableFun }
1517 | otherwise = env { ae_inl_fun = NotInsideInlineableFun }
1518
1519 findBndrsDmds :: AnalEnv -> DmdType -> [Var] -> WithDmdType [Demand]
1520 -- Return the demands on the Ids in the [Var]
1521 findBndrsDmds env dmd_ty bndrs
1522 = go dmd_ty bndrs
1523 where
1524 go dmd_ty [] = WithDmdType dmd_ty []
1525 go dmd_ty (b:bs)
1526 | isId b = let WithDmdType dmd_ty1 dmds = go dmd_ty bs
1527 WithDmdType dmd_ty2 dmd = findBndrDmd env dmd_ty1 b
1528 in WithDmdType dmd_ty2 (dmd : dmds)
1529 | otherwise = go dmd_ty bs
1530
1531 findBndrDmd :: AnalEnv -> DmdType -> Id -> WithDmdType Demand
1532 -- See Note [Trimming a demand to a type]
1533 findBndrDmd env dmd_ty id
1534 = -- pprTrace "findBndrDmd" (ppr id $$ ppr dmd_ty $$ ppr starting_dmd $$ ppr dmd') $
1535 WithDmdType dmd_ty' dmd'
1536 where
1537 dmd' = strictify $
1538 trimToType starting_dmd (findTypeShape fam_envs id_ty)
1539
1540 (dmd_ty', starting_dmd) = peelFV dmd_ty id
1541
1542 id_ty = idType id
1543
1544 strictify dmd
1545 -- See Note [Making dictionaries strict]
1546 | dmd_strict_dicts (ae_opts env)
1547 -- We never want to strictify a recursive let. At the moment
1548 -- findBndrDmd is never called for recursive lets; if that
1549 -- changes, we need a RecFlag parameter and another guard here.
1550 = strictifyDictDmd id_ty dmd
1551 | otherwise
1552 = dmd
1553
1554 fam_envs = ae_fam_envs env
1555
1556 {- Note [Making dictionaries strict]
1557 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1558 The Opt_DictsStrict flag makes GHC use call-by-value for dictionaries. Why?
1559
1560 * Generally CBV is more efficient.
1561
1562 * Dictionaries are always non-bottom; and never take much work to
1563 compute. E.g. a dfun from an instance decl always returns a dicionary
1564 record immediately. See DFunUnfolding in CoreSyn.
1565 See also Note [Recursive superclasses] in TcInstDcls.
1566
1567 * The strictness analyser will then unbox dictionaries and pass the
1568 methods individually, rather than in a bundle. If there are a lot of
1569 methods that might be bad; but worker/wrapper already does throttling.
1570
1571 * A newtype dictionary is *not* always non-bottom. E.g.
1572 class C a where op :: a -> a
1573 instance C Int where op = error "urk"
1574 Now a value of type (C Int) is just a newtype wrapper (a cast) around
1575 the error thunk. Don't strictify these!
1576
1577 See #17758 for more background and perf numbers.
1578
1579 The implementation is extremly simple: just make the strictness
1580 analyser strictify the demand on a dictionary binder in
1581 'findBndrDmd'.
1582
1583 However there is one case where this can make performance worse.
1584 For the principle consider some function at the core level:
1585 myEq :: Eq a => a -> a -> Bool
1586 myEq eqDict x y = ((==) eqDict) x y
1587 If we make the dictionary strict then WW can fire turning this into:
1588 $wmyEq :: (a -> a -> Bool) -> a -> a -> Bool
1589 $wmyEq eq x y = eq x y
1590 Which *usually* performs better. However if the dictionary is known we
1591 are far more likely to inline a function applied to the dictionary than
1592 to inline one applied to a function. Sometimes this makes just enough
1593 of a difference to stop a function from inlining. This is documented in
1594 #18421.
1595
1596 It's somewhat similar to Note [Do not unbox class dictionaries] although
1597 here our problem is with the inliner, not the specializer.
1598
1599 Note [Initialising strictness]
1600 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1601 See section 9.2 (Finding fixpoints) of the paper.
1602
1603 Our basic plan is to initialise the strictness of each Id in a
1604 recursive group to "bottom", and find a fixpoint from there. However,
1605 this group B might be inside an *enclosing* recursive group A, in
1606 which case we'll do the entire fixpoint shebang on for each iteration
1607 of A. This can be illustrated by the following example:
1608
1609 Example:
1610
1611 f [] = []
1612 f (x:xs) = let g [] = f xs
1613 g (y:ys) = y+1 : g ys
1614 in g (h x)
1615
1616 At each iteration of the fixpoint for f, the analyser has to find a
1617 fixpoint for the enclosed function g. In the meantime, the demand
1618 values for g at each iteration for f are *greater* than those we
1619 encountered in the previous iteration for f. Therefore, we can begin
1620 the fixpoint for g not with the bottom value but rather with the
1621 result of the previous analysis. I.e., when beginning the fixpoint
1622 process for g, we can start from the demand signature computed for g
1623 previously and attached to the binding occurrence of g.
1624
1625 To speed things up, we initialise each iteration of A (the enclosing
1626 one) from the result of the last one, which is neatly recorded in each
1627 binder. That way we make use of earlier iterations of the fixpoint
1628 algorithm. (Cunning plan.)
1629
1630 But on the *first* iteration we want to *ignore* the current strictness
1631 of the Id, and start from "bottom". Nowadays the Id can have a current
1632 strictness, because interface files record strictness for nested bindings.
1633 To know when we are in the first iteration, we look at the ae_virgin
1634 field of the AnalEnv.
1635
1636
1637 Note [Final Demand Analyser run]
1638 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1639 Some of the information that the demand analyser determines is not always
1640 preserved by the simplifier. For example, the simplifier will happily rewrite
1641 \y [Demand=MU] let x = y in x + x
1642 to
1643 \y [Demand=MU] y + y
1644 which is quite a lie: Now y occurs more than just once.
1645
1646 The once-used information is (currently) only used by the code
1647 generator, though. So:
1648
1649 * We zap the used-once info in the worker-wrapper;
1650 see Note [Zapping Used Once info in WorkWrap] in
1651 GHC.Core.Opt.WorkWrap.
1652 If it's not reliable, it's better not to have it at all.
1653
1654 * Just before TidyCore, we add a pass of the demand analyser,
1655 but WITHOUT subsequent worker/wrapper and simplifier,
1656 right before TidyCore. See SimplCore.getCoreToDo.
1657
1658 This way, correct information finds its way into the module interface
1659 (strictness signatures!) and the code generator (single-entry thunks!)
1660
1661 Note that, in contrast, the single-call information (CM(..)) /can/ be
1662 relied upon, as the simplifier tends to be very careful about not
1663 duplicating actual function calls.
1664
1665 Also see #11731.
1666
1667 Note [Space Leaks in Demand Analysis]
1668 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1669 Ticket: #15455
1670 MR: !5399
1671
1672 In the past the result of demand analysis was not forced until the whole module
1673 had finished being analysed. In big programs, this led to a big build up of thunks
1674 which were all ultimately forced at the end of the analysis.
1675
1676 This was because the return type of the analysis was a lazy pair:
1677 dmdAnal :: AnalEnv -> SubDemand -> CoreExpr -> (DmdType, CoreExpr)
1678 To avoid space leaks we added extra bangs to evaluate the DmdType component eagerly; but
1679 we were never sure we had added enough.
1680 The easiest way to systematically fix this was to use a strict pair type for the
1681 return value of the analysis so that we can be more confident that the result
1682 is incrementally computed rather than all at the end.
1683
1684 A second, only loosely related point is that
1685 the updating of Ids was not forced because the result of updating
1686 an Id was placed into a lazy field in CoreExpr. This meant that until the end of
1687 demand analysis, the unforced Ids would retain the DmdEnv which the demand information
1688 was fetch from. Now we are quite careful to force Ids before putting them
1689 back into core expressions so that we can garbage-collect the environments more eagerly.
1690 For example see the `Case` branch of `dmdAnal'` where `case_bndr'` is forced
1691 or `dmdAnalSumAlt`.
1692
1693 The net result of all these improvements is the peak live memory usage of compiling
1694 jsaddle-dom decreases about 4GB (from 6.5G to 2.5G). A bunch of bytes allocated benchmarks also
1695 decrease because we allocate a lot fewer thunks which we immediately overwrite and
1696 also runtime for the pass is faster! Overall, good wins.
1697
1698 -}