never executed always true always false
1 {-# LANGUAGE BangPatterns #-}
2 {-# LANGUAGE ViewPatterns #-}
3
4 {-# OPTIONS_GHC -Wno-incomplete-record-updates #-}
5
6 {-
7 (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
8
9 ************************************************************************
10 * *
11 \section[OccurAnal]{Occurrence analysis pass}
12 * *
13 ************************************************************************
14
15 The occurrence analyser re-typechecks a core expression, returning a new
16 core expression with (hopefully) improved usage information.
17 -}
18
19 module GHC.Core.Opt.OccurAnal ( occurAnalysePgm, occurAnalyseExpr ) where
20
21 import GHC.Prelude
22
23 import GHC.Core
24 import GHC.Core.FVs
25 import GHC.Core.Utils ( exprIsTrivial, isDefaultAlt, isExpandableApp,
26 stripTicksTopE, mkTicks )
27 import GHC.Core.Opt.Arity ( joinRhsArity )
28 import GHC.Core.Coercion
29 import GHC.Core.Type
30 import GHC.Core.TyCo.FVs( tyCoVarsOfMCo )
31
32 import GHC.Data.Maybe( isJust )
33 import GHC.Data.Graph.Directed ( SCC(..), Node(..)
34 , stronglyConnCompFromEdgedVerticesUniq
35 , stronglyConnCompFromEdgedVerticesUniqR )
36 import GHC.Types.Unique
37 import GHC.Types.Unique.FM
38 import GHC.Types.Unique.Set
39 import GHC.Types.Id
40 import GHC.Types.Id.Info
41 import GHC.Types.Basic
42 import GHC.Types.Tickish
43 import GHC.Types.Var.Set
44 import GHC.Types.Var.Env
45 import GHC.Types.Var
46 import GHC.Types.Demand ( argOneShots, argsOneShots )
47
48 import GHC.Utils.Outputable
49 import GHC.Utils.Panic
50 import GHC.Utils.Panic.Plain
51 import GHC.Utils.Misc
52 import GHC.Utils.Trace
53
54 import GHC.Builtin.Names( runRWKey )
55 import GHC.Unit.Module( Module )
56
57 import Data.List (mapAccumL, mapAccumR)
58
59 {-
60 ************************************************************************
61 * *
62 occurAnalysePgm, occurAnalyseExpr
63 * *
64 ************************************************************************
65
66 Here's the externally-callable interface:
67 -}
68
69 -- | Do occurrence analysis, and discard occurrence info returned
70 occurAnalyseExpr :: CoreExpr -> CoreExpr
71 occurAnalyseExpr expr = expr'
72 where
73 (WithUsageDetails _ expr') = occAnal initOccEnv expr
74
75 occurAnalysePgm :: Module -- Used only in debug output
76 -> (Id -> Bool) -- Active unfoldings
77 -> (Activation -> Bool) -- Active rules
78 -> [CoreRule] -- Local rules for imported Ids
79 -> CoreProgram -> CoreProgram
80 occurAnalysePgm this_mod active_unf active_rule imp_rules binds
81 | isEmptyDetails final_usage
82 = occ_anald_binds
83
84 | otherwise -- See Note [Glomming]
85 = warnPprTrace True (hang (text "Glomming in" <+> ppr this_mod <> colon)
86 2 (ppr final_usage))
87 occ_anald_glommed_binds
88 where
89 init_env = initOccEnv { occ_rule_act = active_rule
90 , occ_unf_act = active_unf }
91
92 (WithUsageDetails final_usage occ_anald_binds) = go init_env binds
93 (WithUsageDetails _ occ_anald_glommed_binds) = occAnalRecBind init_env TopLevel
94 imp_rule_edges
95 (flattenBinds binds)
96 initial_uds
97 -- It's crucial to re-analyse the glommed-together bindings
98 -- so that we establish the right loop breakers. Otherwise
99 -- we can easily create an infinite loop (#9583 is an example)
100 --
101 -- Also crucial to re-analyse the /original/ bindings
102 -- in case the first pass accidentally discarded as dead code
103 -- a binding that was actually needed (albeit before its
104 -- definition site). #17724 threw this up.
105
106 initial_uds = addManyOccs emptyDetails (rulesFreeVars imp_rules)
107 -- The RULES declarations keep things alive!
108
109 -- imp_rule_edges maps a top-level local binder 'f' to the
110 -- RHS free vars of any IMP-RULE, a local RULE for an imported function,
111 -- where 'f' appears on the LHS
112 -- e.g. RULE foldr f = blah
113 -- imp_rule_edges contains f :-> fvs(blah)
114 -- We treat such RULES as extra rules for 'f'
115 -- See Note [Preventing loops due to imported functions rules]
116 imp_rule_edges :: ImpRuleEdges
117 imp_rule_edges = foldr (plusVarEnv_C (++)) emptyVarEnv
118 [ mapVarEnv (const [(act,rhs_fvs)]) $ getUniqSet $
119 exprsFreeIds args `delVarSetList` bndrs
120 | Rule { ru_act = act, ru_bndrs = bndrs
121 , ru_args = args, ru_rhs = rhs } <- imp_rules
122 -- Not BuiltinRules; see Note [Plugin rules]
123 , let rhs_fvs = exprFreeIds rhs `delVarSetList` bndrs ]
124
125 go :: OccEnv -> [CoreBind] -> WithUsageDetails [CoreBind]
126 go !_ []
127 = WithUsageDetails initial_uds []
128 go env (bind:binds)
129 = WithUsageDetails final_usage (bind' ++ binds')
130 where
131 (WithUsageDetails bs_usage binds') = go env binds
132 (WithUsageDetails final_usage bind') = occAnalBind env TopLevel imp_rule_edges bind bs_usage
133
134 {- *********************************************************************
135 * *
136 IMP-RULES
137 Local rules for imported functions
138 * *
139 ********************************************************************* -}
140
141 type ImpRuleEdges = IdEnv [(Activation, VarSet)]
142 -- Mapping from a local Id 'f' to info about its IMP-RULES,
143 -- i.e. /local/ rules for an imported Id that mention 'f' on the LHS
144 -- We record (a) its Activation and (b) the RHS free vars
145 -- See Note [IMP-RULES: local rules for imported functions]
146
147 noImpRuleEdges :: ImpRuleEdges
148 noImpRuleEdges = emptyVarEnv
149
150 lookupImpRules :: ImpRuleEdges -> Id -> [(Activation,VarSet)]
151 lookupImpRules imp_rule_edges bndr
152 = case lookupVarEnv imp_rule_edges bndr of
153 Nothing -> []
154 Just vs -> vs
155
156 impRulesScopeUsage :: [(Activation,VarSet)] -> UsageDetails
157 -- Variable mentioned in RHS of an IMP-RULE for the bndr,
158 -- whether active or not
159 impRulesScopeUsage imp_rules_info
160 = foldr add emptyDetails imp_rules_info
161 where
162 add (_,vs) usage = addManyOccs usage vs
163
164 impRulesActiveFvs :: (Activation -> Bool) -> VarSet
165 -> [(Activation,VarSet)] -> VarSet
166 impRulesActiveFvs is_active bndr_set vs
167 = foldr add emptyVarSet vs `intersectVarSet` bndr_set
168 where
169 add (act,vs) acc | is_active act = vs `unionVarSet` acc
170 | otherwise = acc
171
172 {- Note [IMP-RULES: local rules for imported functions]
173 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
174 We quite often have
175 * A /local/ rule
176 * for an /imported/ function
177 like this:
178 foo x = blah
179 {-# RULE "map/foo" forall xs. map foo xs = xs #-}
180 We call them IMP-RULES. They are important in practice, and occur a
181 lot in the libraries.
182
183 IMP-RULES are held in mg_rules of ModGuts, and passed in to
184 occurAnalysePgm.
185
186 Main Invariant:
187
188 * Throughout, we treat an IMP-RULE that mentions 'f' on its LHS
189 just like a RULE for f.
190
191 Note [IMP-RULES: unavoidable loops]
192 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
193 Consider this
194 f = /\a. B.g a
195 RULE B.g Int = 1 + f Int
196 Note that
197 * The RULE is for an imported function.
198 * f is non-recursive
199 Now we
200 can get
201 f Int --> B.g Int Inlining f
202 --> 1 + f Int Firing RULE
203 and so the simplifier goes into an infinite loop. This
204 would not happen if the RULE was for a local function,
205 because we keep track of dependencies through rules. But
206 that is pretty much impossible to do for imported Ids. Suppose
207 f's definition had been
208 f = /\a. C.h a
209 where (by some long and devious process), C.h eventually inlines to
210 B.g. We could only spot such loops by exhaustively following
211 unfoldings of C.h etc, in case we reach B.g, and hence (via the RULE)
212 f.
213
214 We regard this potential infinite loop as a *programmer* error.
215 It's up the programmer not to write silly rules like
216 RULE f x = f x
217 and the example above is just a more complicated version.
218
219 Note [Specialising imported functions] (referred to from Specialise)
220 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
221 For *automatically-generated* rules, the programmer can't be
222 responsible for the "programmer error" in Note [IMP-RULES: unavoidable
223 loops]. In particular, consider specialising a recursive function
224 defined in another module. If we specialise a recursive function B.g,
225 we get
226 g_spec = .....(B.g Int).....
227 RULE B.g Int = g_spec
228 Here, g_spec doesn't look recursive, but when the rule fires, it
229 becomes so. And if B.g was mutually recursive, the loop might not be
230 as obvious as it is here.
231
232 To avoid this,
233 * When specialising a function that is a loop breaker,
234 give a NOINLINE pragma to the specialised function
235
236 Note [Preventing loops due to imported functions rules]
237 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
238 Consider:
239 import GHC.Base (foldr)
240
241 {-# RULES "filterList" forall p. foldr (filterFB (:) p) [] = filter p #-}
242 filter p xs = build (\c n -> foldr (filterFB c p) n xs)
243 filterFB c p = ...
244
245 f = filter p xs
246
247 Note that filter is not a loop-breaker, so what happens is:
248 f = filter p xs
249 = {inline} build (\c n -> foldr (filterFB c p) n xs)
250 = {inline} foldr (filterFB (:) p) [] xs
251 = {RULE} filter p xs
252
253 We are in an infinite loop.
254
255 A more elaborate example (that I actually saw in practice when I went to
256 mark GHC.List.filter as INLINABLE) is as follows. Say I have this module:
257 {-# LANGUAGE RankNTypes #-}
258 module GHCList where
259
260 import Prelude hiding (filter)
261 import GHC.Base (build)
262
263 {-# INLINABLE filter #-}
264 filter :: (a -> Bool) -> [a] -> [a]
265 filter p [] = []
266 filter p (x:xs) = if p x then x : filter p xs else filter p xs
267
268 {-# NOINLINE [0] filterFB #-}
269 filterFB :: (a -> b -> b) -> (a -> Bool) -> a -> b -> b
270 filterFB c p x r | p x = x `c` r
271 | otherwise = r
272
273 {-# RULES
274 "filter" [~1] forall p xs. filter p xs = build (\c n -> foldr
275 (filterFB c p) n xs)
276 "filterList" [1] forall p. foldr (filterFB (:) p) [] = filter p
277 #-}
278
279 Then (because RULES are applied inside INLINABLE unfoldings, but inlinings
280 are not), the unfolding given to "filter" in the interface file will be:
281 filter p [] = []
282 filter p (x:xs) = if p x then x : build (\c n -> foldr (filterFB c p) n xs)
283 else build (\c n -> foldr (filterFB c p) n xs
284
285 Note that because this unfolding does not mention "filter", filter is not
286 marked as a strong loop breaker. Therefore at a use site in another module:
287 filter p xs
288 = {inline}
289 case xs of [] -> []
290 (x:xs) -> if p x then x : build (\c n -> foldr (filterFB c p) n xs)
291 else build (\c n -> foldr (filterFB c p) n xs)
292
293 build (\c n -> foldr (filterFB c p) n xs)
294 = {inline} foldr (filterFB (:) p) [] xs
295 = {RULE} filter p xs
296
297 And we are in an infinite loop again, except that this time the loop is producing an
298 infinitely large *term* (an unrolling of filter) and so the simplifier finally
299 dies with "ticks exhausted"
300
301 SOLUTION: we treat the rule "filterList" as an extra rule for 'filterFB'
302 because it mentions 'filterFB' on the LHS. This is the Main Invariant
303 in Note [IMP-RULES: local rules for imported functions].
304
305 So, during loop-breaker analysis:
306
307 - for each active RULE for a local function 'f' we add an edge between
308 'f' and the local FVs of the rule RHS
309
310 - for each active RULE for an *imported* function we add dependency
311 edges between the *local* FVS of the rule LHS and the *local* FVS of
312 the rule RHS.
313
314 Even with this extra hack we aren't always going to get things
315 right. For example, it might be that the rule LHS mentions an imported
316 Id, and another module has a RULE that can rewrite that imported Id to
317 one of our local Ids.
318
319 Note [Plugin rules]
320 ~~~~~~~~~~~~~~~~~~~
321 Conal Elliott (#11651) built a GHC plugin that added some
322 BuiltinRules (for imported Ids) to the mg_rules field of ModGuts, to
323 do some domain-specific transformations that could not be expressed
324 with an ordinary pattern-matching CoreRule. But then we can't extract
325 the dependencies (in imp_rule_edges) from ru_rhs etc, because a
326 BuiltinRule doesn't have any of that stuff.
327
328 So we simply assume that BuiltinRules have no dependencies, and filter
329 them out from the imp_rule_edges comprehension.
330
331 Note [Glomming]
332 ~~~~~~~~~~~~~~~
333 RULES for imported Ids can make something at the top refer to
334 something at the bottom:
335
336 foo = ...(B.f @Int)...
337 $sf = blah
338 RULE: B.f @Int = $sf
339
340 Applying this rule makes foo refer to $sf, although foo doesn't appear to
341 depend on $sf. (And, as in Note [Rules for imported functions], the
342 dependency might be more indirect. For example, foo might mention C.t
343 rather than B.f, where C.t eventually inlines to B.f.)
344
345 NOTICE that this cannot happen for rules whose head is a
346 locally-defined function, because we accurately track dependencies
347 through RULES. It only happens for rules whose head is an imported
348 function (B.f in the example above).
349
350 Solution:
351 - When simplifying, bring all top level identifiers into
352 scope at the start, ignoring the Rec/NonRec structure, so
353 that when 'h' pops up in f's rhs, we find it in the in-scope set
354 (as the simplifier generally expects). This happens in simplTopBinds.
355
356 - In the occurrence analyser, if there are any out-of-scope
357 occurrences that pop out of the top, which will happen after
358 firing the rule: f = \x -> h x
359 h = \y -> 3
360 then just glom all the bindings into a single Rec, so that
361 the *next* iteration of the occurrence analyser will sort
362 them all out. This part happens in occurAnalysePgm.
363 -}
364
365 {-
366 ************************************************************************
367 * *
368 Bindings
369 * *
370 ************************************************************************
371
372 Note [Recursive bindings: the grand plan]
373 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
374 Loop breaking is surprisingly subtle. First read the section 4 of
375 "Secrets of the GHC inliner". This describes our basic plan. We
376 avoid infinite inlinings by choosing loop breakers, and ensuring that
377 a loop breaker cuts each loop.
378
379 See also Note [Inlining and hs-boot files] in GHC.Core.ToIface, which
380 deals with a closely related source of infinite loops.
381
382 When we come across a binding group
383 Rec { x1 = r1; ...; xn = rn }
384 we treat it like this (occAnalRecBind):
385
386 1. Note [Forming Rec groups]
387 Occurrence-analyse each right hand side, and build a
388 "Details" for each binding to capture the results.
389 Wrap the details in a LetrecNode, ready for SCC analysis.
390 All this is done by makeNode.
391
392 The edges of this graph are the "scope edges".
393
394 2. Do SCC-analysis on these Nodes:
395 - Each CyclicSCC will become a new Rec
396 - Each AcyclicSCC will become a new NonRec
397
398 The key property is that every free variable of a binding is
399 accounted for by the scope edges, so that when we are done
400 everything is still in scope.
401
402 3. For each AcyclicSCC, just make a NonRec binding.
403
404 4. For each CyclicSCC of the scope-edge SCC-analysis in (2), we
405 identify suitable loop-breakers to ensure that inlining terminates.
406 This is done by occAnalRec.
407
408 To do so, form the loop-breaker graph, do SCC analysis. For each
409 CyclicSCC we choose a loop breaker, delete all edges to that node,
410 re-analyse the SCC, and iterate. See Note [Choosing loop breakers]
411 for the details
412
413
414 Note [Dead code]
415 ~~~~~~~~~~~~~~~~
416 Dropping dead code for a cyclic Strongly Connected Component is done
417 in a very simple way:
418
419 the entire SCC is dropped if none of its binders are mentioned
420 in the body; otherwise the whole thing is kept.
421
422 The key observation is that dead code elimination happens after
423 dependency analysis: so 'occAnalBind' processes SCCs instead of the
424 original term's binding groups.
425
426 Thus 'occAnalBind' does indeed drop 'f' in an example like
427
428 letrec f = ...g...
429 g = ...(...g...)...
430 in
431 ...g...
432
433 when 'g' no longer uses 'f' at all (eg 'f' does not occur in a RULE in
434 'g'). 'occAnalBind' first consumes 'CyclicSCC g' and then it consumes
435 'AcyclicSCC f', where 'body_usage' won't contain 'f'.
436
437 Note [Forming Rec groups]
438 ~~~~~~~~~~~~~~~~~~~~~~~~~
439 The key point about the "Forming Rec groups" step is that it /preserves
440 scoping/. If 'x' is mentioned, it had better be bound somewhere. So if
441 we start with
442 Rec { f = ...h...
443 ; g = ...f...
444 ; h = ...f... }
445 we can split into SCCs
446 Rec { f = ...h...
447 ; h = ..f... }
448 NonRec { g = ...f... }
449
450 We put bindings {f = ef; g = eg } in a Rec group if "f uses g" and "g
451 uses f", no matter how indirectly. We do a SCC analysis with an edge
452 f -> g if "f mentions g". That is, g is free in:
453 a) the rhs 'ef'
454 b) or the RHS of a rule for f, whether active or inactive
455 Note [Rules are extra RHSs]
456 c) or the LHS or a rule for f, whether active or inactive
457 Note [Rule dependency info]
458 d) the RHS of an /active/ local IMP-RULE
459 Note [IMP-RULES: local rules for imported functions]
460
461 (b) and (c) apply regardless of the activation of the RULE, because even if
462 the rule is inactive its free variables must be bound. But (d) doesn't need
463 to worry about this because IMP-RULES are always notionally at the bottom
464 of the file.
465
466 * Note [Rules are extra RHSs]
467 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
468 A RULE for 'f' is like an extra RHS for 'f'. That way the "parent"
469 keeps the specialised "children" alive. If the parent dies
470 (because it isn't referenced any more), then the children will die
471 too (unless they are already referenced directly).
472
473 So in Example [eftInt], eftInt and eftIntFB will be put in the
474 same Rec, even though their 'main' RHSs are both non-recursive.
475
476 We must also include inactive rules, so that their free vars
477 remain in scope.
478
479 * Note [Rule dependency info]
480 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
481 The VarSet in a RuleInfo is used for dependency analysis in the
482 occurrence analyser. We must track free vars in *both* lhs and rhs.
483 Hence use of idRuleVars, rather than idRuleRhsVars in occAnalBind.
484 Why both? Consider
485 x = y
486 RULE f x = v+4
487 Then if we substitute y for x, we'd better do so in the
488 rule's LHS too, so we'd better ensure the RULE appears to mention 'x'
489 as well as 'v'
490
491 * Note [Rules are visible in their own rec group]
492 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
493 We want the rules for 'f' to be visible in f's right-hand side.
494 And we'd like them to be visible in other functions in f's Rec
495 group. E.g. in Note [Specialisation rules] we want f' rule
496 to be visible in both f's RHS, and fs's RHS.
497
498 This means that we must simplify the RULEs first, before looking
499 at any of the definitions. This is done by Simplify.simplRecBind,
500 when it calls addLetIdInfo.
501
502 Note [Stable unfoldings]
503 ~~~~~~~~~~~~~~~~~~~~~~~~
504 None of the above stuff about RULES applies to a stable unfolding
505 stored in a CoreUnfolding. The unfolding, if any, is simplified
506 at the same time as the regular RHS of the function (ie *not* like
507 Note [Rules are visible in their own rec group]), so it should be
508 treated *exactly* like an extra RHS.
509
510 Or, rather, when computing loop-breaker edges,
511 * If f has an INLINE pragma, and it is active, we treat the
512 INLINE rhs as f's rhs
513 * If it's inactive, we treat f as having no rhs
514 * If it has no INLINE pragma, we look at f's actual rhs
515
516
517 There is a danger that we'll be sub-optimal if we see this
518 f = ...f...
519 [INLINE f = ..no f...]
520 where f is recursive, but the INLINE is not. This can just about
521 happen with a sufficiently odd set of rules; eg
522
523 foo :: Int -> Int
524 {-# INLINE [1] foo #-}
525 foo x = x+1
526
527 bar :: Int -> Int
528 {-# INLINE [1] bar #-}
529 bar x = foo x + 1
530
531 {-# RULES "foo" [~1] forall x. foo x = bar x #-}
532
533 Here the RULE makes bar recursive; but it's INLINE pragma remains
534 non-recursive. It's tempting to then say that 'bar' should not be
535 a loop breaker, but an attempt to do so goes wrong in two ways:
536 a) We may get
537 $df = ...$cfoo...
538 $cfoo = ...$df....
539 [INLINE $cfoo = ...no-$df...]
540 But we want $cfoo to depend on $df explicitly so that we
541 put the bindings in the right order to inline $df in $cfoo
542 and perhaps break the loop altogether. (Maybe this
543 b)
544
545
546 Example [eftInt]
547 ~~~~~~~~~~~~~~~
548 Example (from GHC.Enum):
549
550 eftInt :: Int# -> Int# -> [Int]
551 eftInt x y = ...(non-recursive)...
552
553 {-# INLINE [0] eftIntFB #-}
554 eftIntFB :: (Int -> r -> r) -> r -> Int# -> Int# -> r
555 eftIntFB c n x y = ...(non-recursive)...
556
557 {-# RULES
558 "eftInt" [~1] forall x y. eftInt x y = build (\ c n -> eftIntFB c n x y)
559 "eftIntList" [1] eftIntFB (:) [] = eftInt
560 #-}
561
562 Note [Specialisation rules]
563 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
564 Consider this group, which is typical of what SpecConstr builds:
565
566 fs a = ....f (C a)....
567 f x = ....f (C a)....
568 {-# RULE f (C a) = fs a #-}
569
570 So 'f' and 'fs' are in the same Rec group (since f refers to fs via its RULE).
571
572 But watch out! If 'fs' is not chosen as a loop breaker, we may get an infinite loop:
573 - the RULE is applied in f's RHS (see Note [Self-recursive rules] in GHC.Core.Opt.Simplify
574 - fs is inlined (say it's small)
575 - now there's another opportunity to apply the RULE
576
577 This showed up when compiling Control.Concurrent.Chan.getChanContents.
578 Hence the transitive rule_fv_env stuff described in
579 Note [Rules and loop breakers].
580
581 ------------------------------------------------------------
582 Note [Finding join points]
583 ~~~~~~~~~~~~~~~~~~~~~~~~~~
584 It's the occurrence analyser's job to find bindings that we can turn into join
585 points, but it doesn't perform that transformation right away. Rather, it marks
586 the eligible bindings as part of their occurrence data, leaving it to the
587 simplifier (or to simpleOptPgm) to actually change the binder's 'IdDetails'.
588 The simplifier then eta-expands the RHS if needed and then updates the
589 occurrence sites. Dividing the work this way means that the occurrence analyser
590 still only takes one pass, yet one can always tell the difference between a
591 function call and a jump by looking at the occurrence (because the same pass
592 changes the 'IdDetails' and propagates the binders to their occurrence sites).
593
594 To track potential join points, we use the 'occ_tail' field of OccInfo. A value
595 of `AlwaysTailCalled n` indicates that every occurrence of the variable is a
596 tail call with `n` arguments (counting both value and type arguments). Otherwise
597 'occ_tail' will be 'NoTailCallInfo'. The tail call info flows bottom-up with the
598 rest of 'OccInfo' until it goes on the binder.
599
600 Note [Join points and unfoldings/rules]
601 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
602 Consider
603 let j2 y = blah
604 let j x = j2 (x+x)
605 {-# INLINE [2] j #-}
606 in case e of { A -> j 1; B -> ...; C -> j 2 }
607
608 Before j is inlined, we'll have occurrences of j2 in
609 both j's RHS and in its stable unfolding. We want to discover
610 j2 as a join point. So we must do the adjustRhsUsage thing
611 on j's RHS. That's why we pass mb_join_arity to calcUnfolding.
612
613 Aame with rules. Suppose we have:
614
615 let j :: Int -> Int
616 j y = 2 * y
617 let k :: Int -> Int -> Int
618 {-# RULES "SPEC k 0" k 0 y = j y #-}
619 k x y = x + 2 * y
620 in case e of { A -> k 1 2; B -> k 3 5; C -> blah }
621
622 We identify k as a join point, and we want j to be a join point too.
623 Without the RULE it would be, and we don't want the RULE to mess it
624 up. So provided the join-point arity of k matches the args of the
625 rule we can allow the tail-cal info from the RHS of the rule to
626 propagate.
627
628 * Wrinkle for Rec case. In the recursive case we don't know the
629 join-point arity in advance, when calling occAnalUnfolding and
630 occAnalRules. (See makeNode.) We don't want to pass Nothing,
631 because then a recursive joinrec might lose its join-poin-hood
632 when SpecConstr adds a RULE. So we just make do with the
633 *current* join-poin-hood, stored in the Id.
634
635 In the non-recursive case things are simple: see occAnalNonRecBind
636
637 * Wrinkle for RULES. Suppose the example was a bit different:
638 let j :: Int -> Int
639 j y = 2 * y
640 k :: Int -> Int -> Int
641 {-# RULES "SPEC k 0" k 0 = j #-}
642 k x y = x + 2 * y
643 in ...
644 If we eta-expanded the rule all would be well, but as it stands the
645 one arg of the rule don't match the join-point arity of 2.
646
647 Conceivably we could notice that a potential join point would have
648 an "undersaturated" rule and account for it. This would mean we
649 could make something that's been specialised a join point, for
650 instance. But local bindings are rarely specialised, and being
651 overly cautious about rules only costs us anything when, for some `j`:
652
653 * Before specialisation, `j` has non-tail calls, so it can't be a join point.
654 * During specialisation, `j` gets specialised and thus acquires rules.
655 * Sometime afterward, the non-tail calls to `j` disappear (as dead code, say),
656 and so now `j` *could* become a join point.
657
658 This appears to be very rare in practice. TODO Perhaps we should gather
659 statistics to be sure.
660
661 Note [Unfoldings and join points]
662 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
663 We assume that anything in an unfolding occurs multiple times, since
664 unfoldings are often copied (that's the whole point!). But we still
665 need to track tail calls for the purpose of finding join points.
666
667
668 ------------------------------------------------------------
669 Note [Adjusting right-hand sides]
670 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
671 There's a bit of a dance we need to do after analysing a lambda expression or
672 a right-hand side. In particular, we need to
673
674 a) call 'markAllInsideLam' *unless* the binding is for a thunk, a one-shot
675 lambda, or a non-recursive join point; and
676 b) call 'markAllNonTail' *unless* the binding is for a join point.
677
678 Some examples, with how the free occurrences in e (assumed not to be a value
679 lambda) get marked:
680
681 inside lam non-tail-called
682 ------------------------------------------------------------
683 let x = e No Yes
684 let f = \x -> e Yes Yes
685 let f = \x{OneShot} -> e No Yes
686 \x -> e Yes Yes
687 join j x = e No No
688 joinrec j x = e Yes No
689
690 There are a few other caveats; most importantly, if we're marking a binding as
691 'AlwaysTailCalled', it's *going* to be a join point, so we treat it as one so
692 that the effect cascades properly. Consequently, at the time the RHS is
693 analysed, we won't know what adjustments to make; thus 'occAnalLamOrRhs' must
694 return the unadjusted 'UsageDetails', to be adjusted by 'adjustRhsUsage' once
695 join-point-hood has been decided.
696
697 Thus the overall sequence taking place in 'occAnalNonRecBind' and
698 'occAnalRecBind' is as follows:
699
700 1. Call 'occAnalLamOrRhs' to find usage information for the RHS.
701 2. Call 'tagNonRecBinder' or 'tagRecBinders', which decides whether to make
702 the binding a join point.
703 3. Call 'adjustRhsUsage' accordingly. (Done as part of 'tagRecBinders' when
704 recursive.)
705
706 (In the recursive case, this logic is spread between 'makeNode' and
707 'occAnalRec'.)
708 -}
709
710
711 data WithUsageDetails a = WithUsageDetails !UsageDetails !a
712
713 ------------------------------------------------------------------
714 -- occAnalBind
715 ------------------------------------------------------------------
716
717 occAnalBind :: OccEnv -- The incoming OccEnv
718 -> TopLevelFlag
719 -> ImpRuleEdges
720 -> CoreBind
721 -> UsageDetails -- Usage details of scope
722 -> WithUsageDetails [CoreBind] -- Of the whole let(rec)
723
724 occAnalBind !env lvl top_env (NonRec binder rhs) body_usage
725 = occAnalNonRecBind env lvl top_env binder rhs body_usage
726 occAnalBind env lvl top_env (Rec pairs) body_usage
727 = occAnalRecBind env lvl top_env pairs body_usage
728
729 -----------------
730 occAnalNonRecBind :: OccEnv -> TopLevelFlag -> ImpRuleEdges -> Var -> CoreExpr
731 -> UsageDetails -> WithUsageDetails [CoreBind]
732 occAnalNonRecBind !env lvl imp_rule_edges bndr rhs body_usage
733 | isTyVar bndr -- A type let; we don't gather usage info
734 = WithUsageDetails body_usage [NonRec bndr rhs]
735
736 | not (bndr `usedIn` body_usage) -- It's not mentioned
737 = WithUsageDetails body_usage []
738
739 | otherwise -- It's mentioned in the body
740 = WithUsageDetails (body_usage' `andUDs` rhs_usage) [NonRec final_bndr rhs']
741 where
742 (body_usage', tagged_bndr) = tagNonRecBinder lvl body_usage bndr
743 final_bndr = tagged_bndr `setIdUnfolding` unf'
744 `setIdSpecialisation` mkRuleInfo rules'
745 rhs_usage = rhs_uds `andUDs` unf_uds `andUDs` rule_uds
746
747 -- Get the join info from the *new* decision
748 -- See Note [Join points and unfoldings/rules]
749 mb_join_arity = willBeJoinId_maybe tagged_bndr
750 is_join_point = isJust mb_join_arity
751
752 --------- Right hand side ---------
753 env1 | is_join_point = env -- See Note [Join point RHSs]
754 | certainly_inline = env -- See Note [Cascading inlines]
755 | otherwise = rhsCtxt env
756
757 -- See Note [Sources of one-shot information]
758 rhs_env = env1 { occ_one_shots = argOneShots dmd }
759 (WithUsageDetails rhs_uds rhs') = occAnalRhs rhs_env NonRecursive mb_join_arity rhs
760
761 --------- Unfolding ---------
762 -- See Note [Unfoldings and join points]
763 unf | isId bndr = idUnfolding bndr
764 | otherwise = NoUnfolding
765 (WithUsageDetails unf_uds unf') = occAnalUnfolding rhs_env NonRecursive mb_join_arity unf
766
767 --------- Rules ---------
768 -- See Note [Rules are extra RHSs] and Note [Rule dependency info]
769 rules_w_uds = occAnalRules rhs_env mb_join_arity bndr
770 rules' = map fstOf3 rules_w_uds
771 imp_rule_uds = impRulesScopeUsage (lookupImpRules imp_rule_edges bndr)
772 -- imp_rule_uds: consider
773 -- h = ...
774 -- g = ...
775 -- RULE map g = h
776 -- Then we want to ensure that h is in scope everwhere
777 -- that g is (since the RULE might turn g into h), so
778 -- we make g mention h.
779
780 rule_uds = foldr add_rule_uds imp_rule_uds rules_w_uds
781 add_rule_uds (_, l, r) uds = l `andUDs` r `andUDs` uds
782
783 ----------
784 occ = idOccInfo tagged_bndr
785 certainly_inline -- See Note [Cascading inlines]
786 = case occ of
787 OneOcc { occ_in_lam = NotInsideLam, occ_n_br = 1 }
788 -> active && not_stable
789 _ -> False
790
791 dmd = idDemandInfo bndr
792 active = isAlwaysActive (idInlineActivation bndr)
793 not_stable = not (isStableUnfolding (idUnfolding bndr))
794
795 -----------------
796 occAnalRecBind :: OccEnv -> TopLevelFlag -> ImpRuleEdges -> [(Var,CoreExpr)]
797 -> UsageDetails -> WithUsageDetails [CoreBind]
798 -- For a recursive group, we
799 -- * occ-analyse all the RHSs
800 -- * compute strongly-connected components
801 -- * feed those components to occAnalRec
802 -- See Note [Recursive bindings: the grand plan]
803 occAnalRecBind !env lvl imp_rule_edges pairs body_usage
804 = foldr (occAnalRec rhs_env lvl) (WithUsageDetails body_usage []) sccs
805 where
806 sccs :: [SCC Details]
807 sccs = {-# SCC "occAnalBind.scc" #-}
808 stronglyConnCompFromEdgedVerticesUniq nodes
809
810 nodes :: [LetrecNode]
811 nodes = {-# SCC "occAnalBind.assoc" #-}
812 map (makeNode rhs_env imp_rule_edges bndr_set) pairs
813
814 bndrs = map fst pairs
815 bndr_set = mkVarSet bndrs
816 rhs_env = env `addInScope` bndrs
817
818
819 -----------------------------
820 occAnalRec :: OccEnv -> TopLevelFlag
821 -> SCC Details
822 -> WithUsageDetails [CoreBind]
823 -> WithUsageDetails [CoreBind]
824
825 -- The NonRec case is just like a Let (NonRec ...) above
826 occAnalRec !_ lvl (AcyclicSCC (ND { nd_bndr = bndr, nd_rhs = rhs
827 , nd_uds = rhs_uds, nd_rhs_bndrs = rhs_bndrs }))
828 (WithUsageDetails body_uds binds)
829 | not (bndr `usedIn` body_uds)
830 = WithUsageDetails body_uds binds -- See Note [Dead code]
831
832 | otherwise -- It's mentioned in the body
833 = WithUsageDetails (body_uds' `andUDs` rhs_uds')
834 (NonRec tagged_bndr rhs : binds)
835 where
836 (body_uds', tagged_bndr) = tagNonRecBinder lvl body_uds bndr
837 rhs_uds' = adjustRhsUsage NonRecursive (willBeJoinId_maybe tagged_bndr)
838 rhs_bndrs rhs_uds
839
840 -- The Rec case is the interesting one
841 -- See Note [Recursive bindings: the grand plan]
842 -- See Note [Loop breaking]
843 occAnalRec env lvl (CyclicSCC details_s) (WithUsageDetails body_uds binds)
844 | not (any (`usedIn` body_uds) bndrs) -- NB: look at body_uds, not total_uds
845 = WithUsageDetails body_uds binds -- See Note [Dead code]
846
847 | otherwise -- At this point we always build a single Rec
848 = -- pprTrace "occAnalRec" (ppr loop_breaker_nodes)
849 WithUsageDetails final_uds (Rec pairs : binds)
850
851 where
852 bndrs = map nd_bndr details_s
853 all_simple = all nd_simple details_s
854
855 ------------------------------
856 -- Make the nodes for the loop-breaker analysis
857 -- See Note [Choosing loop breakers] for loop_breaker_nodes
858 final_uds :: UsageDetails
859 loop_breaker_nodes :: [LetrecNode]
860 (WithUsageDetails final_uds loop_breaker_nodes) = mkLoopBreakerNodes env lvl body_uds details_s
861
862 ------------------------------
863 active_rule_fvs :: VarSet
864 active_rule_fvs = mapUnionVarSet nd_active_rule_fvs details_s
865
866 ---------------------------
867 -- Now reconstruct the cycle
868 pairs :: [(Id,CoreExpr)]
869 pairs | all_simple = reOrderNodes 0 active_rule_fvs loop_breaker_nodes []
870 | otherwise = loopBreakNodes 0 active_rule_fvs loop_breaker_nodes []
871 -- In the common case when all are "simple" (no rules at all)
872 -- the loop_breaker_nodes will include all the scope edges
873 -- so a SCC computation would yield a single CyclicSCC result;
874 -- and reOrderNodes deals with exactly that case.
875 -- Saves a SCC analysis in a common case
876
877
878 {- *********************************************************************
879 * *
880 Loop breaking
881 * *
882 ********************************************************************* -}
883
884 {- Note [Choosing loop breakers]
885 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
886 In Step 4 in Note [Recursive bindings: the grand plan]), occAnalRec does
887 loop-breaking on each CyclicSCC of the original program:
888
889 * mkLoopBreakerNodes: Form the loop-breaker graph for that CyclicSCC
890
891 * loopBreakNodes: Do SCC analysis on it
892
893 * reOrderNodes: For each CyclicSCC, pick a loop breaker
894 * Delete edges to that loop breaker
895 * Do another SCC analysis on that reduced SCC
896 * Repeat
897
898 To form the loop-breaker graph, we construct a new set of Nodes, the
899 "loop-breaker nodes", with the same details but different edges, the
900 "loop-breaker edges". The loop-breaker nodes have both more and fewer
901 dependencies than the scope edges:
902
903 More edges:
904 If f calls g, and g has an active rule that mentions h then
905 we add an edge from f -> h. See Note [Rules and loop breakers].
906
907 Fewer edges: we only include dependencies
908 * only on /active/ rules,
909 * on rule /RHSs/ (not LHSs)
910
911 The scope edges, by contrast, must be much more inclusive.
912
913 The nd_simple flag tracks the common case when a binding has no RULES
914 at all, in which case the loop-breaker edges will be identical to the
915 scope edges.
916
917 Note that in Example [eftInt], *neither* eftInt *nor* eftIntFB is
918 chosen as a loop breaker, because their RHSs don't mention each other.
919 And indeed both can be inlined safely.
920
921 Note [inl_fvs]
922 ~~~~~~~~~~~~~~
923 Note that the loop-breaker graph includes edges for occurrences in
924 /both/ the RHS /and/ the stable unfolding. Consider this, which actually
925 occurred when compiling BooleanFormula.hs in GHC:
926
927 Rec { lvl1 = go
928 ; lvl2[StableUnf = go] = lvl1
929 ; go = ...go...lvl2... }
930
931 From the point of view of infinite inlining, we need only these edges:
932 lvl1 :-> go
933 lvl2 :-> go -- The RHS lvl1 will never be used for inlining
934 go :-> go, lvl2
935
936 But the danger is that, lacking any edge to lvl1, we'll put it at the
937 end thus
938 Rec { lvl2[ StableUnf = go] = lvl1
939 ; go[LoopBreaker] = ...go...lvl2... }
940 ; lvl1[Occ=Once] = go }
941
942 And now the Simplifer will try to use PreInlineUnconditionally on lvl1
943 (which occurs just once), but because it is last we won't actually
944 substitute in lvl2. Sigh.
945
946 To avoid this possiblity, we include edges from lvl2 to /both/ its
947 stable unfolding /and/ its RHS. Hence the defn of inl_fvs in
948 makeNode. Maybe we could be more clever, but it's very much a corner
949 case.
950
951 Note [Weak loop breakers]
952 ~~~~~~~~~~~~~~~~~~~~~~~~~
953 There is a last nasty wrinkle. Suppose we have
954
955 Rec { f = f_rhs
956 RULE f [] = g
957
958 h = h_rhs
959 g = h
960 ...more...
961 }
962
963 Remember that we simplify the RULES before any RHS (see Note
964 [Rules are visible in their own rec group] above).
965
966 So we must *not* postInlineUnconditionally 'g', even though
967 its RHS turns out to be trivial. (I'm assuming that 'g' is
968 not chosen as a loop breaker.) Why not? Because then we
969 drop the binding for 'g', which leaves it out of scope in the
970 RULE!
971
972 Here's a somewhat different example of the same thing
973 Rec { q = r
974 ; r = ...p...
975 ; p = p_rhs
976 RULE p [] = q }
977 Here the RULE is "below" q, but we *still* can't postInlineUnconditionally
978 q, because the RULE for p is active throughout. So the RHS of r
979 might rewrite to r = ...q...
980 So q must remain in scope in the output program!
981
982 We "solve" this by:
983
984 Make q a "weak" loop breaker (OccInfo = IAmLoopBreaker True)
985 iff q is a mentioned in the RHS of an active RULE in the Rec group
986
987 A normal "strong" loop breaker has IAmLoopBreaker False. So:
988
989 Inline postInlineUnconditionally
990 strong IAmLoopBreaker False no no
991 weak IAmLoopBreaker True yes no
992 other yes yes
993
994 The **sole** reason for this kind of loop breaker is so that
995 postInlineUnconditionally does not fire. Ugh.
996
997 Annoyingly, since we simplify the rules *first* we'll never inline
998 q into p's RULE. That trivial binding for q will hang around until
999 we discard the rule. Yuk. But it's rare.
1000
1001 Note [Rules and loop breakers]
1002 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1003 When we form the loop-breaker graph (Step 4 in Note [Recursive
1004 bindings: the grand plan]), we must be careful about RULEs.
1005
1006 For a start, we want a loop breaker to cut every cycle, so inactive
1007 rules play no part; we need only consider /active/ rules.
1008 See Note [Finding rule RHS free vars]
1009
1010 The second point is more subtle. A RULE is like an equation for
1011 'f' that is *always* inlined if it is applicable. We do *not* disable
1012 rules for loop-breakers. It's up to whoever makes the rules to make
1013 sure that the rules themselves always terminate. See Note [Rules for
1014 recursive functions] in GHC.Core.Opt.Simplify
1015
1016 Hence, if
1017 f's RHS (or its stable unfolding if it has one) mentions g, and
1018 g has a RULE that mentions h, and
1019 h has a RULE that mentions f
1020
1021 then we *must* choose f to be a loop breaker. Example: see Note
1022 [Specialisation rules]. So out plan is this:
1023
1024 Take the free variables of f's RHS, and augment it with all the
1025 variables reachable by a transitive sequence RULES from those
1026 starting points.
1027
1028 That is the whole reason for computing rule_fv_env in mkLoopBreakerNodes.
1029 Wrinkles:
1030
1031 * We only consider /active/ rules. See Note [Finding rule RHS free vars]
1032
1033 * We need only consider free vars that are also binders in this Rec
1034 group. See also Note [Finding rule RHS free vars]
1035
1036 * We only consider variables free in the *RHS* of the rule, in
1037 contrast to the way we build the Rec group in the first place (Note
1038 [Rule dependency info])
1039
1040 * Why "transitive sequence of rules"? Because active rules apply
1041 unconditionally, without checking loop-breaker-ness.
1042 See Note [Loop breaker dependencies].
1043
1044 Note [Finding rule RHS free vars]
1045 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1046 Consider this real example from Data Parallel Haskell
1047 tagZero :: Array Int -> Array Tag
1048 {-# INLINE [1] tagZeroes #-}
1049 tagZero xs = pmap (\x -> fromBool (x==0)) xs
1050
1051 {-# RULES "tagZero" [~1] forall xs n.
1052 pmap fromBool <blah blah> = tagZero xs #-}
1053 So tagZero's RHS mentions pmap, and pmap's RULE mentions tagZero.
1054 However, tagZero can only be inlined in phase 1 and later, while
1055 the RULE is only active *before* phase 1. So there's no problem.
1056
1057 To make this work, we look for the RHS free vars only for
1058 *active* rules. That's the reason for the occ_rule_act field
1059 of the OccEnv.
1060
1061 Note [loopBreakNodes]
1062 ~~~~~~~~~~~~~~~~~~~~~
1063 loopBreakNodes is applied to the list of nodes for a cyclic strongly
1064 connected component (there's guaranteed to be a cycle). It returns
1065 the same nodes, but
1066 a) in a better order,
1067 b) with some of the Ids having a IAmALoopBreaker pragma
1068
1069 The "loop-breaker" Ids are sufficient to break all cycles in the SCC. This means
1070 that the simplifier can guarantee not to loop provided it never records an inlining
1071 for these no-inline guys.
1072
1073 Furthermore, the order of the binds is such that if we neglect dependencies
1074 on the no-inline Ids then the binds are topologically sorted. This means
1075 that the simplifier will generally do a good job if it works from top bottom,
1076 recording inlinings for any Ids which aren't marked as "no-inline" as it goes.
1077 -}
1078
1079 type Binding = (Id,CoreExpr)
1080
1081 -- See Note [loopBreakNodes]
1082 loopBreakNodes :: Int
1083 -> VarSet -- Binders whose dependencies may be "missing"
1084 -- See Note [Weak loop breakers]
1085 -> [LetrecNode]
1086 -> [Binding] -- Append these to the end
1087 -> [Binding]
1088
1089 -- Return the bindings sorted into a plausible order, and marked with loop breakers.
1090 -- See Note [loopBreakNodes]
1091 loopBreakNodes depth weak_fvs nodes binds
1092 = -- pprTrace "loopBreakNodes" (ppr nodes) $
1093 go (stronglyConnCompFromEdgedVerticesUniqR nodes)
1094 where
1095 go [] = binds
1096 go (scc:sccs) = loop_break_scc scc (go sccs)
1097
1098 loop_break_scc scc binds
1099 = case scc of
1100 AcyclicSCC node -> nodeBinding (mk_non_loop_breaker weak_fvs) node : binds
1101 CyclicSCC nodes -> reOrderNodes depth weak_fvs nodes binds
1102
1103 ----------------------------------
1104 reOrderNodes :: Int -> VarSet -> [LetrecNode] -> [Binding] -> [Binding]
1105 -- Choose a loop breaker, mark it no-inline,
1106 -- and call loopBreakNodes on the rest
1107 reOrderNodes _ _ [] _ = panic "reOrderNodes"
1108 reOrderNodes _ _ [node] binds = nodeBinding mk_loop_breaker node : binds
1109 reOrderNodes depth weak_fvs (node : nodes) binds
1110 = -- pprTrace "reOrderNodes" (vcat [ text "unchosen" <+> ppr unchosen
1111 -- , text "chosen" <+> ppr chosen_nodes ]) $
1112 loopBreakNodes new_depth weak_fvs unchosen $
1113 (map (nodeBinding mk_loop_breaker) chosen_nodes ++ binds)
1114 where
1115 (chosen_nodes, unchosen) = chooseLoopBreaker approximate_lb
1116 (nd_score (node_payload node))
1117 [node] [] nodes
1118
1119 approximate_lb = depth >= 2
1120 new_depth | approximate_lb = 0
1121 | otherwise = depth+1
1122 -- After two iterations (d=0, d=1) give up
1123 -- and approximate, returning to d=0
1124
1125 nodeBinding :: (Id -> Id) -> LetrecNode -> Binding
1126 nodeBinding set_id_occ (node_payload -> ND { nd_bndr = bndr, nd_rhs = rhs})
1127 = (set_id_occ bndr, rhs)
1128
1129 mk_loop_breaker :: Id -> Id
1130 mk_loop_breaker bndr
1131 = bndr `setIdOccInfo` occ'
1132 where
1133 occ' = strongLoopBreaker { occ_tail = tail_info }
1134 tail_info = tailCallInfo (idOccInfo bndr)
1135
1136 mk_non_loop_breaker :: VarSet -> Id -> Id
1137 -- See Note [Weak loop breakers]
1138 mk_non_loop_breaker weak_fvs bndr
1139 | bndr `elemVarSet` weak_fvs = setIdOccInfo bndr occ'
1140 | otherwise = bndr
1141 where
1142 occ' = weakLoopBreaker { occ_tail = tail_info }
1143 tail_info = tailCallInfo (idOccInfo bndr)
1144
1145 ----------------------------------
1146 chooseLoopBreaker :: Bool -- True <=> Too many iterations,
1147 -- so approximate
1148 -> NodeScore -- Best score so far
1149 -> [LetrecNode] -- Nodes with this score
1150 -> [LetrecNode] -- Nodes with higher scores
1151 -> [LetrecNode] -- Unprocessed nodes
1152 -> ([LetrecNode], [LetrecNode])
1153 -- This loop looks for the bind with the lowest score
1154 -- to pick as the loop breaker. The rest accumulate in
1155 chooseLoopBreaker _ _ loop_nodes acc []
1156 = (loop_nodes, acc) -- Done
1157
1158 -- If approximate_loop_breaker is True, we pick *all*
1159 -- nodes with lowest score, else just one
1160 -- See Note [Complexity of loop breaking]
1161 chooseLoopBreaker approx_lb loop_sc loop_nodes acc (node : nodes)
1162 | approx_lb
1163 , rank sc == rank loop_sc
1164 = chooseLoopBreaker approx_lb loop_sc (node : loop_nodes) acc nodes
1165
1166 | sc `betterLB` loop_sc -- Better score so pick this new one
1167 = chooseLoopBreaker approx_lb sc [node] (loop_nodes ++ acc) nodes
1168
1169 | otherwise -- Worse score so don't pick it
1170 = chooseLoopBreaker approx_lb loop_sc loop_nodes (node : acc) nodes
1171 where
1172 sc = nd_score (node_payload node)
1173
1174 {-
1175 Note [Complexity of loop breaking]
1176 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1177 The loop-breaking algorithm knocks out one binder at a time, and
1178 performs a new SCC analysis on the remaining binders. That can
1179 behave very badly in tightly-coupled groups of bindings; in the
1180 worst case it can be (N**2)*log N, because it does a full SCC
1181 on N, then N-1, then N-2 and so on.
1182
1183 To avoid this, we switch plans after 2 (or whatever) attempts:
1184 Plan A: pick one binder with the lowest score, make it
1185 a loop breaker, and try again
1186 Plan B: pick *all* binders with the lowest score, make them
1187 all loop breakers, and try again
1188 Since there are only a small finite number of scores, this will
1189 terminate in a constant number of iterations, rather than O(N)
1190 iterations.
1191
1192 You might thing that it's very unlikely, but RULES make it much
1193 more likely. Here's a real example from #1969:
1194 Rec { $dm = \d.\x. op d
1195 {-# RULES forall d. $dm Int d = $s$dm1
1196 forall d. $dm Bool d = $s$dm2 #-}
1197
1198 dInt = MkD .... opInt ...
1199 dInt = MkD .... opBool ...
1200 opInt = $dm dInt
1201 opBool = $dm dBool
1202
1203 $s$dm1 = \x. op dInt
1204 $s$dm2 = \x. op dBool }
1205 The RULES stuff means that we can't choose $dm as a loop breaker
1206 (Note [Choosing loop breakers]), so we must choose at least (say)
1207 opInt *and* opBool, and so on. The number of loop breakders is
1208 linear in the number of instance declarations.
1209
1210 Note [Loop breakers and INLINE/INLINABLE pragmas]
1211 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1212 Avoid choosing a function with an INLINE pramga as the loop breaker!
1213 If such a function is mutually-recursive with a non-INLINE thing,
1214 then the latter should be the loop-breaker.
1215
1216 It's vital to distinguish between INLINE and INLINABLE (the
1217 Bool returned by hasStableCoreUnfolding_maybe). If we start with
1218 Rec { {-# INLINABLE f #-}
1219 f x = ...f... }
1220 and then worker/wrapper it through strictness analysis, we'll get
1221 Rec { {-# INLINABLE $wf #-}
1222 $wf p q = let x = (p,q) in ...f...
1223
1224 {-# INLINE f #-}
1225 f x = case x of (p,q) -> $wf p q }
1226
1227 Now it is vital that we choose $wf as the loop breaker, so we can
1228 inline 'f' in '$wf'.
1229
1230 Note [DFuns should not be loop breakers]
1231 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1232 It's particularly bad to make a DFun into a loop breaker. See
1233 Note [How instance declarations are translated] in GHC.Tc.TyCl.Instance
1234
1235 We give DFuns a higher score than ordinary CONLIKE things because
1236 if there's a choice we want the DFun to be the non-loop breaker. Eg
1237
1238 rec { sc = /\ a \$dC. $fBWrap (T a) ($fCT @ a $dC)
1239
1240 $fCT :: forall a_afE. (Roman.C a_afE) => Roman.C (Roman.T a_afE)
1241 {-# DFUN #-}
1242 $fCT = /\a \$dC. MkD (T a) ((sc @ a $dC) |> blah) ($ctoF @ a $dC)
1243 }
1244
1245 Here 'sc' (the superclass) looks CONLIKE, but we'll never get to it
1246 if we can't unravel the DFun first.
1247
1248 Note [Constructor applications]
1249 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1250 It's really really important to inline dictionaries. Real
1251 example (the Enum Ordering instance from GHC.Base):
1252
1253 rec f = \ x -> case d of (p,q,r) -> p x
1254 g = \ x -> case d of (p,q,r) -> q x
1255 d = (v, f, g)
1256
1257 Here, f and g occur just once; but we can't inline them into d.
1258 On the other hand we *could* simplify those case expressions if
1259 we didn't stupidly choose d as the loop breaker.
1260 But we won't because constructor args are marked "Many".
1261 Inlining dictionaries is really essential to unravelling
1262 the loops in static numeric dictionaries, see GHC.Float.
1263
1264 Note [Closure conversion]
1265 ~~~~~~~~~~~~~~~~~~~~~~~~~
1266 We treat (\x. C p q) as a high-score candidate in the letrec scoring algorithm.
1267 The immediate motivation came from the result of a closure-conversion transformation
1268 which generated code like this:
1269
1270 data Clo a b = forall c. Clo (c -> a -> b) c
1271
1272 ($:) :: Clo a b -> a -> b
1273 Clo f env $: x = f env x
1274
1275 rec { plus = Clo plus1 ()
1276
1277 ; plus1 _ n = Clo plus2 n
1278
1279 ; plus2 Zero n = n
1280 ; plus2 (Succ m) n = Succ (plus $: m $: n) }
1281
1282 If we inline 'plus' and 'plus1', everything unravels nicely. But if
1283 we choose 'plus1' as the loop breaker (which is entirely possible
1284 otherwise), the loop does not unravel nicely.
1285
1286
1287 @occAnalUnfolding@ deals with the question of bindings where the Id is marked
1288 by an INLINE pragma. For these we record that anything which occurs
1289 in its RHS occurs many times. This pessimistically assumes that this
1290 inlined binder also occurs many times in its scope, but if it doesn't
1291 we'll catch it next time round. At worst this costs an extra simplifier pass.
1292 ToDo: try using the occurrence info for the inline'd binder.
1293
1294 [March 97] We do the same for atomic RHSs. Reason: see notes with loopBreakSCC.
1295 [June 98, SLPJ] I've undone this change; I don't understand it. See notes with loopBreakSCC.
1296
1297
1298 ************************************************************************
1299 * *
1300 Making nodes
1301 * *
1302 ************************************************************************
1303 -}
1304
1305 type LetrecNode = Node Unique Details -- Node comes from Digraph
1306 -- The Unique key is gotten from the Id
1307 data Details
1308 = ND { nd_bndr :: Id -- Binder
1309
1310 , nd_rhs :: CoreExpr -- RHS, already occ-analysed
1311
1312 , nd_rhs_bndrs :: [CoreBndr] -- Outer lambdas of RHS
1313 -- INVARIANT: (nd_rhs_bndrs nd, _) ==
1314 -- collectBinders (nd_rhs nd)
1315
1316 , nd_uds :: UsageDetails -- Usage from RHS, and RULES, and stable unfoldings
1317 -- ignoring phase (ie assuming all are active)
1318 -- See Note [Forming Rec groups]
1319
1320 , nd_inl :: IdSet -- Free variables of the stable unfolding and the RHS
1321 -- but excluding any RULES
1322 -- This is the IdSet that may be used if the Id is inlined
1323
1324 , nd_simple :: Bool -- True iff this binding has no local RULES
1325 -- If all nodes are simple we don't need a loop-breaker
1326 -- dep-anal before reconstructing.
1327
1328 , nd_active_rule_fvs :: IdSet -- Variables bound in this Rec group that are free
1329 -- in the RHS of an active rule for this bndr
1330
1331 , nd_score :: NodeScore
1332 }
1333
1334 instance Outputable Details where
1335 ppr nd = text "ND" <> braces
1336 (sep [ text "bndr =" <+> ppr (nd_bndr nd)
1337 , text "uds =" <+> ppr (nd_uds nd)
1338 , text "inl =" <+> ppr (nd_inl nd)
1339 , text "simple =" <+> ppr (nd_simple nd)
1340 , text "active_rule_fvs =" <+> ppr (nd_active_rule_fvs nd)
1341 , text "score =" <+> ppr (nd_score nd)
1342 ])
1343
1344 -- The NodeScore is compared lexicographically;
1345 -- e.g. lower rank wins regardless of size
1346 type NodeScore = ( Int -- Rank: lower => more likely to be picked as loop breaker
1347 , Int -- Size of rhs: higher => more likely to be picked as LB
1348 -- Maxes out at maxExprSize; we just use it to prioritise
1349 -- small functions
1350 , Bool ) -- Was it a loop breaker before?
1351 -- True => more likely to be picked
1352 -- Note [Loop breakers, node scoring, and stability]
1353
1354 rank :: NodeScore -> Int
1355 rank (r, _, _) = r
1356
1357 makeNode :: OccEnv -> ImpRuleEdges -> VarSet
1358 -> (Var, CoreExpr) -> LetrecNode
1359 -- See Note [Recursive bindings: the grand plan]
1360 makeNode !env imp_rule_edges bndr_set (bndr, rhs)
1361 = DigraphNode { node_payload = details
1362 , node_key = varUnique bndr
1363 , node_dependencies = nonDetKeysUniqSet scope_fvs }
1364 -- It's OK to use nonDetKeysUniqSet here as stronglyConnCompFromEdgedVerticesR
1365 -- is still deterministic with edges in nondeterministic order as
1366 -- explained in Note [Deterministic SCC] in GHC.Data.Graph.Directed.
1367 where
1368 details = ND { nd_bndr = bndr'
1369 , nd_rhs = rhs'
1370 , nd_rhs_bndrs = bndrs'
1371 , nd_uds = scope_uds
1372 , nd_inl = inl_fvs
1373 , nd_simple = null rules_w_uds && null imp_rule_info
1374 , nd_active_rule_fvs = active_rule_fvs
1375 , nd_score = pprPanic "makeNodeDetails" (ppr bndr) }
1376
1377 bndr' = bndr `setIdUnfolding` unf'
1378 `setIdSpecialisation` mkRuleInfo rules'
1379
1380 inl_uds = rhs_uds `andUDs` unf_uds
1381 scope_uds = inl_uds `andUDs` rule_uds
1382 -- Note [Rules are extra RHSs]
1383 -- Note [Rule dependency info]
1384 scope_fvs = udFreeVars bndr_set scope_uds
1385 -- scope_fvs: all occurrences from this binder: RHS, unfolding,
1386 -- and RULES, both LHS and RHS thereof, active or inactive
1387
1388 inl_fvs = udFreeVars bndr_set inl_uds
1389 -- inl_fvs: vars that would become free if the function was inlined.
1390 -- We conservatively approximate that by thefree vars from the RHS
1391 -- and the unfolding together.
1392 -- See Note [inl_fvs]
1393
1394 mb_join_arity = isJoinId_maybe bndr
1395 -- Get join point info from the *current* decision
1396 -- We don't know what the new decision will be!
1397 -- Using the old decision at least allows us to
1398 -- preserve existing join point, even RULEs are added
1399 -- See Note [Join points and unfoldings/rules]
1400
1401 --------- Right hand side ---------
1402 -- Constructing the edges for the main Rec computation
1403 -- See Note [Forming Rec groups]
1404 -- Do not use occAnalRhs because we don't yet know
1405 -- the final answer for mb_join_arity
1406 (bndrs, body) = collectBinders rhs
1407 rhs_env = rhsCtxt env
1408 (WithUsageDetails rhs_uds (bndrs', body')) = occAnalLamOrRhs rhs_env bndrs body
1409 rhs' = mkLams bndrs' body'
1410
1411 --------- Unfolding ---------
1412 -- See Note [Unfoldings and join points]
1413 unf = realIdUnfolding bndr -- realIdUnfolding: Ignore loop-breaker-ness
1414 -- here because that is what we are setting!
1415 (WithUsageDetails unf_uds unf') = occAnalUnfolding rhs_env Recursive mb_join_arity unf
1416
1417 --------- IMP-RULES --------
1418 is_active = occ_rule_act env :: Activation -> Bool
1419 imp_rule_info = lookupImpRules imp_rule_edges bndr
1420 imp_rule_uds = impRulesScopeUsage imp_rule_info
1421 imp_rule_fvs = impRulesActiveFvs is_active bndr_set imp_rule_info
1422
1423 --------- All rules --------
1424 rules_w_uds :: [(CoreRule, UsageDetails, UsageDetails)]
1425 rules_w_uds = occAnalRules rhs_env mb_join_arity bndr
1426 rules' = map fstOf3 rules_w_uds
1427
1428 rule_uds = foldr add_rule_uds imp_rule_uds rules_w_uds
1429 add_rule_uds (_, l, r) uds = l `andUDs` r `andUDs` uds
1430
1431 active_rule_fvs = foldr add_active_rule imp_rule_fvs rules_w_uds
1432 add_active_rule (rule, _, rhs_uds) fvs
1433 | is_active (ruleActivation rule)
1434 = udFreeVars bndr_set rhs_uds `unionVarSet` fvs
1435 | otherwise
1436 = fvs
1437
1438
1439 mkLoopBreakerNodes :: OccEnv -> TopLevelFlag
1440 -> UsageDetails -- for BODY of let
1441 -> [Details]
1442 -> WithUsageDetails [LetrecNode] -- adjusted
1443 -- See Note [Choosing loop breakers]
1444 -- This function primarily creates the Nodes for the
1445 -- loop-breaker SCC analysis. More specifically:
1446 -- a) tag each binder with its occurrence info
1447 -- b) add a NodeScore to each node
1448 -- c) make a Node with the right dependency edges for
1449 -- the loop-breaker SCC analysis
1450 -- d) adjust each RHS's usage details according to
1451 -- the binder's (new) shotness and join-point-hood
1452 mkLoopBreakerNodes !env lvl body_uds details_s
1453 = WithUsageDetails final_uds (zipWithEqual "mkLoopBreakerNodes" mk_lb_node details_s bndrs')
1454 where
1455 (final_uds, bndrs')
1456 = tagRecBinders lvl body_uds
1457 [ (bndr, uds, rhs_bndrs)
1458 | ND { nd_bndr = bndr, nd_uds = uds, nd_rhs_bndrs = rhs_bndrs }
1459 <- details_s ]
1460
1461 mk_lb_node nd@(ND { nd_bndr = old_bndr, nd_inl = inl_fvs }) new_bndr
1462 = DigraphNode { node_payload = new_nd
1463 , node_key = varUnique old_bndr
1464 , node_dependencies = nonDetKeysUniqSet lb_deps }
1465 -- It's OK to use nonDetKeysUniqSet here as
1466 -- stronglyConnCompFromEdgedVerticesR is still deterministic with edges
1467 -- in nondeterministic order as explained in
1468 -- Note [Deterministic SCC] in GHC.Data.Graph.Directed.
1469 where
1470 new_nd = nd { nd_bndr = new_bndr, nd_score = score }
1471 score = nodeScore env new_bndr lb_deps nd
1472 lb_deps = extendFvs_ rule_fv_env inl_fvs
1473 -- See Note [Loop breaker dependencies]
1474
1475 rule_fv_env :: IdEnv IdSet
1476 -- Maps a variable f to the variables from this group
1477 -- reachable by a sequence of RULES starting with f
1478 -- Domain is *subset* of bound vars (others have no rule fvs)
1479 -- See Note [Finding rule RHS free vars]
1480 -- Why transClosureFV? See Note [Loop breaker dependencies]
1481 rule_fv_env = transClosureFV $ mkVarEnv $
1482 [ (b, rule_fvs)
1483 | ND { nd_bndr = b, nd_active_rule_fvs = rule_fvs } <- details_s
1484 , not (isEmptyVarSet rule_fvs) ]
1485
1486 {- Note [Loop breaker dependencies]
1487 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1488 The loop breaker dependencies of x in a recursive
1489 group { f1 = e1; ...; fn = en } are:
1490
1491 - The "inline free variables" of f: the fi free in
1492 f's stable unfolding and RHS; see Note [inl_fvs]
1493
1494 - Any fi reachable from those inline free variables by a sequence
1495 of RULE rewrites. Remember, rule rewriting is not affected
1496 by fi being a loop breaker, so we have to take the transitive
1497 closure in case f is the only possible loop breaker in the loop.
1498
1499 Hence rule_fv_env. We need only account for /active/ rules.
1500 -}
1501
1502 ------------------------------------------
1503 nodeScore :: OccEnv
1504 -> Id -- Binder with new occ-info
1505 -> VarSet -- Loop-breaker dependencies
1506 -> Details
1507 -> NodeScore
1508 nodeScore !env new_bndr lb_deps
1509 (ND { nd_bndr = old_bndr, nd_rhs = bind_rhs })
1510
1511 | not (isId old_bndr) -- A type or coercion variable is never a loop breaker
1512 = (100, 0, False)
1513
1514 | old_bndr `elemVarSet` lb_deps -- Self-recursive things are great loop breakers
1515 = (0, 0, True) -- See Note [Self-recursion and loop breakers]
1516
1517 | not (occ_unf_act env old_bndr) -- A binder whose inlining is inactive (e.g. has
1518 = (0, 0, True) -- a NOINLINE pragma) makes a great loop breaker
1519
1520 | exprIsTrivial rhs
1521 = mk_score 10 -- Practically certain to be inlined
1522 -- Used to have also: && not (isExportedId bndr)
1523 -- But I found this sometimes cost an extra iteration when we have
1524 -- rec { d = (a,b); a = ...df...; b = ...df...; df = d }
1525 -- where df is the exported dictionary. Then df makes a really
1526 -- bad choice for loop breaker
1527
1528 | DFunUnfolding { df_args = args } <- old_unf
1529 -- Never choose a DFun as a loop breaker
1530 -- Note [DFuns should not be loop breakers]
1531 = (9, length args, is_lb)
1532
1533 -- Data structures are more important than INLINE pragmas
1534 -- so that dictionary/method recursion unravels
1535
1536 | CoreUnfolding { uf_guidance = UnfWhen {} } <- old_unf
1537 = mk_score 6
1538
1539 | is_con_app rhs -- Data types help with cases:
1540 = mk_score 5 -- Note [Constructor applications]
1541
1542 | isStableUnfolding old_unf
1543 , can_unfold
1544 = mk_score 3
1545
1546 | isOneOcc (idOccInfo new_bndr)
1547 = mk_score 2 -- Likely to be inlined
1548
1549 | can_unfold -- The Id has some kind of unfolding
1550 = mk_score 1
1551
1552 | otherwise
1553 = (0, 0, is_lb)
1554
1555 where
1556 mk_score :: Int -> NodeScore
1557 mk_score rank = (rank, rhs_size, is_lb)
1558
1559 -- is_lb: see Note [Loop breakers, node scoring, and stability]
1560 is_lb = isStrongLoopBreaker (idOccInfo old_bndr)
1561
1562 old_unf = realIdUnfolding old_bndr
1563 can_unfold = canUnfold old_unf
1564 rhs = case old_unf of
1565 CoreUnfolding { uf_src = src, uf_tmpl = unf_rhs }
1566 | isStableSource src
1567 -> unf_rhs
1568 _ -> bind_rhs
1569 -- 'bind_rhs' is irrelevant for inlining things with a stable unfolding
1570 rhs_size = case old_unf of
1571 CoreUnfolding { uf_guidance = guidance }
1572 | UnfIfGoodArgs { ug_size = size } <- guidance
1573 -> size
1574 _ -> cheapExprSize rhs
1575
1576
1577 -- Checking for a constructor application
1578 -- Cheap and cheerful; the simplifier moves casts out of the way
1579 -- The lambda case is important to spot x = /\a. C (f a)
1580 -- which comes up when C is a dictionary constructor and
1581 -- f is a default method.
1582 -- Example: the instance for Show (ST s a) in GHC.ST
1583 --
1584 -- However we *also* treat (\x. C p q) as a con-app-like thing,
1585 -- Note [Closure conversion]
1586 is_con_app (Var v) = isConLikeId v
1587 is_con_app (App f _) = is_con_app f
1588 is_con_app (Lam _ e) = is_con_app e
1589 is_con_app (Tick _ e) = is_con_app e
1590 is_con_app _ = False
1591
1592 maxExprSize :: Int
1593 maxExprSize = 20 -- Rather arbitrary
1594
1595 cheapExprSize :: CoreExpr -> Int
1596 -- Maxes out at maxExprSize
1597 cheapExprSize e
1598 = go 0 e
1599 where
1600 go n e | n >= maxExprSize = n
1601 | otherwise = go1 n e
1602
1603 go1 n (Var {}) = n+1
1604 go1 n (Lit {}) = n+1
1605 go1 n (Type {}) = n
1606 go1 n (Coercion {}) = n
1607 go1 n (Tick _ e) = go1 n e
1608 go1 n (Cast e _) = go1 n e
1609 go1 n (App f a) = go (go1 n f) a
1610 go1 n (Lam b e)
1611 | isTyVar b = go1 n e
1612 | otherwise = go (n+1) e
1613 go1 n (Let b e) = gos (go1 n e) (rhssOfBind b)
1614 go1 n (Case e _ _ as) = gos (go1 n e) (rhssOfAlts as)
1615
1616 gos n [] = n
1617 gos n (e:es) | n >= maxExprSize = n
1618 | otherwise = gos (go1 n e) es
1619
1620 betterLB :: NodeScore -> NodeScore -> Bool
1621 -- If n1 `betterLB` n2 then choose n1 as the loop breaker
1622 betterLB (rank1, size1, lb1) (rank2, size2, _)
1623 | rank1 < rank2 = True
1624 | rank1 > rank2 = False
1625 | size1 < size2 = False -- Make the bigger n2 into the loop breaker
1626 | size1 > size2 = True
1627 | lb1 = True -- Tie-break: if n1 was a loop breaker before, choose it
1628 | otherwise = False -- See Note [Loop breakers, node scoring, and stability]
1629
1630 {- Note [Self-recursion and loop breakers]
1631 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1632 If we have
1633 rec { f = ...f...g...
1634 ; g = .....f... }
1635 then 'f' has to be a loop breaker anyway, so we may as well choose it
1636 right away, so that g can inline freely.
1637
1638 This is really just a cheap hack. Consider
1639 rec { f = ...g...
1640 ; g = ..f..h...
1641 ; h = ...f....}
1642 Here f or g are better loop breakers than h; but we might accidentally
1643 choose h. Finding the minimal set of loop breakers is hard.
1644
1645 Note [Loop breakers, node scoring, and stability]
1646 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1647 To choose a loop breaker, we give a NodeScore to each node in the SCC,
1648 and pick the one with the best score (according to 'betterLB').
1649
1650 We need to be jolly careful (#12425, #12234) about the stability
1651 of this choice. Suppose we have
1652
1653 let rec { f = ...g...g...
1654 ; g = ...f...f... }
1655 in
1656 case x of
1657 True -> ...f..
1658 False -> ..f...
1659
1660 In each iteration of the simplifier the occurrence analyser OccAnal
1661 chooses a loop breaker. Suppose in iteration 1 it choose g as the loop
1662 breaker. That means it is free to inline f.
1663
1664 Suppose that GHC decides to inline f in the branches of the case, but
1665 (for some reason; eg it is not saturated) in the rhs of g. So we get
1666
1667 let rec { f = ...g...g...
1668 ; g = ...f...f... }
1669 in
1670 case x of
1671 True -> ...g...g.....
1672 False -> ..g..g....
1673
1674 Now suppose that, for some reason, in the next iteration the occurrence
1675 analyser chooses f as the loop breaker, so it can freely inline g. And
1676 again for some reason the simplifier inlines g at its calls in the case
1677 branches, but not in the RHS of f. Then we get
1678
1679 let rec { f = ...g...g...
1680 ; g = ...f...f... }
1681 in
1682 case x of
1683 True -> ...(...f...f...)...(...f..f..).....
1684 False -> ..(...f...f...)...(..f..f...)....
1685
1686 You can see where this is going! Each iteration of the simplifier
1687 doubles the number of calls to f or g. No wonder GHC is slow!
1688
1689 (In the particular example in comment:3 of #12425, f and g are the two
1690 mutually recursive fmap instances for CondT and Result. They are both
1691 marked INLINE which, oddly, is why they don't inline in each other's
1692 RHS, because the call there is not saturated.)
1693
1694 The root cause is that we flip-flop on our choice of loop breaker. I
1695 always thought it didn't matter, and indeed for any single iteration
1696 to terminate, it doesn't matter. But when we iterate, it matters a
1697 lot!!
1698
1699 So The Plan is this:
1700 If there is a tie, choose the node that
1701 was a loop breaker last time round
1702
1703 Hence the is_lb field of NodeScore
1704
1705 ************************************************************************
1706 * *
1707 Right hand sides
1708 * *
1709 ************************************************************************
1710 -}
1711
1712 occAnalRhs :: OccEnv -> RecFlag -> Maybe JoinArity
1713 -> CoreExpr -- RHS
1714 -> WithUsageDetails CoreExpr
1715 occAnalRhs !env is_rec mb_join_arity rhs
1716 = let
1717 (bndrs, body) = collectBinders rhs
1718 (WithUsageDetails body_usage (bndrs',body')) = occAnalLamOrRhs env bndrs body
1719 final_bndrs | isRec is_rec = bndrs'
1720 | otherwise = markJoinOneShots mb_join_arity bndrs'
1721 -- For a /non-recursive/ join point we can mark all
1722 -- its join-lambda as one-shot; and it's a good idea to do so
1723
1724 -- Final adjustment
1725 rhs_usage = adjustRhsUsage is_rec mb_join_arity final_bndrs body_usage
1726 in WithUsageDetails rhs_usage (mkLams final_bndrs body')
1727
1728 occAnalUnfolding :: OccEnv
1729 -> RecFlag
1730 -> Maybe JoinArity -- See Note [Join points and unfoldings/rules]
1731 -> Unfolding
1732 -> WithUsageDetails Unfolding
1733 -- Occurrence-analyse a stable unfolding;
1734 -- discard a non-stable one altogether.
1735 occAnalUnfolding !env is_rec mb_join_arity unf
1736 = case unf of
1737 unf@(CoreUnfolding { uf_tmpl = rhs, uf_src = src })
1738 | isStableSource src ->
1739 let
1740 (WithUsageDetails usage rhs') = occAnalRhs env is_rec mb_join_arity rhs
1741
1742 unf' | noBinderSwaps env = unf -- Note [Unfoldings and rules]
1743 | otherwise = unf { uf_tmpl = rhs' }
1744 in WithUsageDetails (markAllMany usage) unf'
1745 -- markAllMany: see Note [Occurrences in stable unfoldings]
1746 | otherwise -> WithUsageDetails emptyDetails unf
1747 -- For non-Stable unfoldings we leave them undisturbed, but
1748 -- don't count their usage because the simplifier will discard them.
1749 -- We leave them undisturbed because nodeScore uses their size info
1750 -- to guide its decisions. It's ok to leave un-substituted
1751 -- expressions in the tree because all the variables that were in
1752 -- scope remain in scope; there is no cloning etc.
1753
1754 unf@(DFunUnfolding { df_bndrs = bndrs, df_args = args })
1755 -> WithUsageDetails final_usage (unf { df_args = args' })
1756 where
1757 env' = env `addInScope` bndrs
1758 (WithUsageDetails usage args') = occAnalList env' args
1759 final_usage = markAllManyNonTail (delDetailsList usage bndrs)
1760 `addLamCoVarOccs` bndrs
1761
1762 unf -> WithUsageDetails emptyDetails unf
1763
1764 occAnalRules :: OccEnv
1765 -> Maybe JoinArity -- See Note [Join points and unfoldings/rules]
1766 -> Id -- Get rules from here
1767 -> [(CoreRule, -- Each (non-built-in) rule
1768 UsageDetails, -- Usage details for LHS
1769 UsageDetails)] -- Usage details for RHS
1770 occAnalRules !env mb_join_arity bndr
1771 = map occ_anal_rule (idCoreRules bndr)
1772 where
1773 occ_anal_rule rule@(Rule { ru_bndrs = bndrs, ru_args = args, ru_rhs = rhs })
1774 = (rule', lhs_uds', rhs_uds')
1775 where
1776 env' = env `addInScope` bndrs
1777 rule' | noBinderSwaps env = rule -- Note [Unfoldings and rules]
1778 | otherwise = rule { ru_args = args', ru_rhs = rhs' }
1779
1780 (WithUsageDetails lhs_uds args') = occAnalList env' args
1781 lhs_uds' = markAllManyNonTail (lhs_uds `delDetailsList` bndrs)
1782 `addLamCoVarOccs` bndrs
1783
1784 (WithUsageDetails rhs_uds rhs') = occAnal env' rhs
1785 -- Note [Rules are extra RHSs]
1786 -- Note [Rule dependency info]
1787 rhs_uds' = markAllNonTailIf (not exact_join) $
1788 markAllMany $
1789 rhs_uds `delDetailsList` bndrs
1790
1791 exact_join = exactJoin mb_join_arity args
1792 -- See Note [Join points and unfoldings/rules]
1793
1794 occ_anal_rule other_rule = (other_rule, emptyDetails, emptyDetails)
1795
1796 {- Note [Join point RHSs]
1797 ~~~~~~~~~~~~~~~~~~~~~~~~~
1798 Consider
1799 x = e
1800 join j = Just x
1801
1802 We want to inline x into j right away, so we don't want to give
1803 the join point a RhsCtxt (#14137). It's not a huge deal, because
1804 the FloatIn pass knows to float into join point RHSs; and the simplifier
1805 does not float things out of join point RHSs. But it's a simple, cheap
1806 thing to do. See #14137.
1807
1808 Note [Occurrences in stable unfoldings]
1809 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1810 Consider
1811 f p = BIG
1812 {-# INLINE g #-}
1813 g y = not (f y)
1814 where this is the /only/ occurrence of 'f'. So 'g' will get a stable
1815 unfolding. Now suppose that g's RHS gets optimised (perhaps by a rule
1816 or inlining f) so that it doesn't mention 'f' any more. Now the last
1817 remaining call to f is in g's Stable unfolding. But, even though there
1818 is only one syntactic occurrence of f, we do /not/ want to do
1819 preinlineUnconditionally here!
1820
1821 The INLINE pragma says "inline exactly this RHS"; perhaps the
1822 programmer wants to expose that 'not', say. If we inline f that will make
1823 the Stable unfoldign big, and that wasn't what the programmer wanted.
1824
1825 Another way to think about it: if we inlined g as-is into multiple
1826 call sites, now there's be multiple calls to f.
1827
1828 Bottom line: treat all occurrences in a stable unfolding as "Many".
1829
1830 Note [Unfoldings and rules]
1831 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
1832 Generally unfoldings and rules are already occurrence-analysed, so we
1833 don't want to reconstruct their trees; we just want to analyse them to
1834 find how they use their free variables.
1835
1836 EXCEPT if there is a binder-swap going on, in which case we do want to
1837 produce a new tree.
1838
1839 So we have a fast-path that keeps the old tree if the occ_bs_env is
1840 empty. This just saves a bit of allocation and reconstruction; not
1841 a big deal.
1842
1843 Note [Cascading inlines]
1844 ~~~~~~~~~~~~~~~~~~~~~~~~
1845 By default we use an rhsCtxt for the RHS of a binding. This tells the
1846 occ anal n that it's looking at an RHS, which has an effect in
1847 occAnalApp. In particular, for constructor applications, it makes
1848 the arguments appear to have NoOccInfo, so that we don't inline into
1849 them. Thus x = f y
1850 k = Just x
1851 we do not want to inline x.
1852
1853 But there's a problem. Consider
1854 x1 = a0 : []
1855 x2 = a1 : x1
1856 x3 = a2 : x2
1857 g = f x3
1858 First time round, it looks as if x1 and x2 occur as an arg of a
1859 let-bound constructor ==> give them a many-occurrence.
1860 But then x3 is inlined (unconditionally as it happens) and
1861 next time round, x2 will be, and the next time round x1 will be
1862 Result: multiple simplifier iterations. Sigh.
1863
1864 So, when analysing the RHS of x3 we notice that x3 will itself
1865 definitely inline the next time round, and so we analyse x3's rhs in
1866 an ordinary context, not rhsCtxt. Hence the "certainly_inline" stuff.
1867
1868 Annoyingly, we have to approximate GHC.Core.Opt.Simplify.Utils.preInlineUnconditionally.
1869 If (a) the RHS is expandable (see isExpandableApp in occAnalApp), and
1870 (b) certainly_inline says "yes" when preInlineUnconditionally says "no"
1871 then the simplifier iterates indefinitely:
1872 x = f y
1873 k = Just x -- We decide that k is 'certainly_inline'
1874 v = ...k... -- but preInlineUnconditionally doesn't inline it
1875 inline ==>
1876 k = Just (f y)
1877 v = ...k...
1878 float ==>
1879 x1 = f y
1880 k = Just x1
1881 v = ...k...
1882
1883 This is worse than the slow cascade, so we only want to say "certainly_inline"
1884 if it really is certain. Look at the note with preInlineUnconditionally
1885 for the various clauses.
1886
1887
1888 ************************************************************************
1889 * *
1890 Expressions
1891 * *
1892 ************************************************************************
1893 -}
1894
1895 occAnalList :: OccEnv -> [CoreExpr] -> WithUsageDetails [CoreExpr]
1896 occAnalList !_ [] = WithUsageDetails emptyDetails []
1897 occAnalList env (e:es) = let
1898 (WithUsageDetails uds1 e') = occAnal env e
1899 (WithUsageDetails uds2 es') = occAnalList env es
1900 in WithUsageDetails (uds1 `andUDs` uds2) (e' : es')
1901
1902 occAnal :: OccEnv
1903 -> CoreExpr
1904 -> WithUsageDetails CoreExpr -- Gives info only about the "interesting" Ids
1905
1906 occAnal !_ expr@(Lit _) = WithUsageDetails emptyDetails expr
1907
1908 occAnal env expr@(Var _) = occAnalApp env (expr, [], [])
1909 -- At one stage, I gathered the idRuleVars for the variable here too,
1910 -- which in a way is the right thing to do.
1911 -- But that went wrong right after specialisation, when
1912 -- the *occurrences* of the overloaded function didn't have any
1913 -- rules in them, so the *specialised* versions looked as if they
1914 -- weren't used at all.
1915
1916 occAnal _ expr@(Type ty)
1917 = WithUsageDetails (addManyOccs emptyDetails (coVarsOfType ty)) expr
1918 occAnal _ expr@(Coercion co)
1919 = WithUsageDetails (addManyOccs emptyDetails (coVarsOfCo co)) expr
1920 -- See Note [Gather occurrences of coercion variables]
1921
1922 {- Note [Gather occurrences of coercion variables]
1923 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1924 We need to gather info about what coercion variables appear, for two reasons:
1925
1926 1. So that we can sort them into the right place when doing dependency analysis.
1927
1928 2. So that we know when they are surely dead.
1929
1930 It is useful to know when they a coercion variable is surely dead,
1931 when we want to discard a case-expression, in GHC.Core.Opt.Simplify.rebuildCase.
1932 For example (#20143):
1933
1934 case unsafeEqualityProof @blah of
1935 UnsafeRefl cv -> ...no use of cv...
1936
1937 Here we can discard the case, since unsafeEqualityProof always terminates.
1938 But only if the coercion variable 'cv' is unused.
1939
1940 Another example from #15696: we had something like
1941 case eq_sel d of co -> ...(typeError @(...co...) "urk")...
1942 Then 'd' was substituted by a dictionary, so the expression
1943 simpified to
1944 case (Coercion <blah>) of cv -> ...(typeError @(...cv...) "urk")...
1945
1946 We can only drop the case altogether if 'cv' is unused, which is not
1947 the case here.
1948
1949 Conclusion: we need accurate dead-ness info for CoVars.
1950 We gather CoVar occurrences from:
1951
1952 * The (Type ty) and (Coercion co) cases of occAnal
1953
1954 * The type 'ty' of a lambda-binder (\(x:ty). blah)
1955 See addLamCoVarOccs
1956
1957 But it is not necessary to gather CoVars from the types of other binders.
1958
1959 * For let-binders, if the type mentions a CoVar, so will the RHS (since
1960 it has the same type)
1961
1962 * For case-alt binders, if the type mentions a CoVar, so will the scrutinee
1963 (since it has the same type)
1964 -}
1965
1966 occAnal env (Tick tickish body)
1967 | SourceNote{} <- tickish
1968 = WithUsageDetails usage (Tick tickish body')
1969 -- SourceNotes are best-effort; so we just proceed as usual.
1970 -- If we drop a tick due to the issues described below it's
1971 -- not the end of the world.
1972
1973 | tickish `tickishScopesLike` SoftScope
1974 = WithUsageDetails (markAllNonTail usage) (Tick tickish body')
1975
1976 | Breakpoint _ _ ids <- tickish
1977 = WithUsageDetails (usage_lam `andUDs` foldr addManyOcc emptyDetails ids) (Tick tickish body')
1978 -- never substitute for any of the Ids in a Breakpoint
1979
1980 | otherwise
1981 = WithUsageDetails usage_lam (Tick tickish body')
1982 where
1983 (WithUsageDetails usage body') = occAnal env body
1984 -- for a non-soft tick scope, we can inline lambdas only
1985 usage_lam = markAllNonTail (markAllInsideLam usage)
1986 -- TODO There may be ways to make ticks and join points play
1987 -- nicer together, but right now there are problems:
1988 -- let j x = ... in tick<t> (j 1)
1989 -- Making j a join point may cause the simplifier to drop t
1990 -- (if the tick is put into the continuation). So we don't
1991 -- count j 1 as a tail call.
1992 -- See #14242.
1993
1994 occAnal env (Cast expr co)
1995 = let
1996 (WithUsageDetails usage expr') = occAnal env expr
1997 usage1 = markAllManyNonTailIf (isRhsEnv env) usage
1998 -- usage1: if we see let x = y `cast` co
1999 -- then mark y as 'Many' so that we don't
2000 -- immediately inline y again.
2001 usage2 = addManyOccs usage1 (coVarsOfCo co)
2002 -- usage2: see Note [Gather occurrences of coercion variables]
2003 in WithUsageDetails (markAllNonTail usage2) (Cast expr' co)
2004
2005 occAnal env app@(App _ _)
2006 = occAnalApp env (collectArgsTicks tickishFloatable app)
2007
2008 -- Ignore type variables altogether
2009 -- (a) occurrences inside type lambdas only not marked as InsideLam
2010 -- (b) type variables not in environment
2011
2012 occAnal env (Lam x body)
2013 | isTyVar x
2014 = let
2015 (WithUsageDetails body_usage body') = occAnal env body
2016 in WithUsageDetails (markAllNonTail body_usage) (Lam x body')
2017
2018 {- Note [Occurrence analysis for lambda binders]
2019 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2020 For value lambdas we do a special hack. Consider
2021 (\x. \y. ...x...)
2022 If we did nothing, x is used inside the \y, so would be marked
2023 as dangerous to dup. But in the common case where the abstraction
2024 is applied to two arguments this is over-pessimistic, which delays
2025 inlining x, which forces more simplifier iterations.
2026
2027 So instead, we just mark each binder with its occurrence info in the
2028 *body* of the multiple lambda. Then, the simplifier is careful when
2029 partially applying lambdas. See the calls to zapLamBndrs in
2030 GHC.Core.Opt.Simplify.simplExprF1
2031 GHC.Core.SimpleOpt.simple_app
2032 -}
2033
2034 occAnal env expr@(Lam _ _)
2035 = -- See Note [Occurrence analysis for lambda binders]
2036 let
2037 (bndrs, body) = collectBinders expr
2038 (WithUsageDetails usage (tagged_bndrs, body')) = occAnalLamOrRhs env bndrs body
2039 expr' = mkLams tagged_bndrs body'
2040 usage1 = markAllNonTail usage
2041 one_shot_gp = all isOneShotBndr tagged_bndrs
2042 final_usage = markAllInsideLamIf (not one_shot_gp) usage1
2043 `addLamCoVarOccs` bndrs
2044 -- See Note [Gather occurrences of coercion variables]
2045 in WithUsageDetails final_usage expr'
2046
2047 occAnal env (Case scrut bndr ty alts)
2048 = let
2049 (WithUsageDetails scrut_usage scrut') = occAnal (scrutCtxt env alts) scrut
2050 alt_env = addBndrSwap scrut' bndr $ env { occ_encl = OccVanilla } `addInScope` [bndr]
2051 (alts_usage_s, alts') = mapAndUnzip ((\(WithUsageDetails uds a) -> (uds,a)) . occAnalAlt alt_env) alts
2052 alts_usage = foldr orUDs emptyDetails alts_usage_s
2053 (alts_usage1, tagged_bndr) = tagLamBinder alts_usage bndr
2054 total_usage = markAllNonTail scrut_usage `andUDs` alts_usage1
2055 -- Alts can have tail calls, but the scrutinee can't
2056 in WithUsageDetails total_usage (Case scrut' tagged_bndr ty alts')
2057
2058 occAnal env (Let bind body)
2059 = let
2060 (WithUsageDetails body_usage body') = occAnal (env `addInScope` bindersOf bind) body
2061 (WithUsageDetails final_usage new_binds) = occAnalBind env NotTopLevel
2062 noImpRuleEdges bind body_usage
2063 in WithUsageDetails final_usage (mkLets new_binds body')
2064
2065 occAnalArgs :: OccEnv -> CoreExpr -> [CoreExpr] -> [OneShots] -> WithUsageDetails CoreExpr
2066 -- The `fun` argument is just an accumulating parameter,
2067 -- the base for building the application we return
2068 occAnalArgs !env fun args !one_shots
2069 = go emptyDetails fun args one_shots
2070 where
2071 go uds fun [] _ = WithUsageDetails uds fun
2072 go uds fun (arg:args) one_shots
2073 = go (uds `andUDs` arg_uds) (fun `App` arg') args one_shots'
2074 where
2075 !(WithUsageDetails arg_uds arg') = occAnal arg_env arg
2076 !(arg_env, one_shots')
2077 | isTypeArg arg = (env, one_shots)
2078 | otherwise = valArgCtxt env one_shots
2079
2080 {-
2081 Applications are dealt with specially because we want
2082 the "build hack" to work.
2083
2084 Note [Arguments of let-bound constructors]
2085 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2086 Consider
2087 f x = let y = expensive x in
2088 let z = (True,y) in
2089 (case z of {(p,q)->q}, case z of {(p,q)->q})
2090 We feel free to duplicate the WHNF (True,y), but that means
2091 that y may be duplicated thereby.
2092
2093 If we aren't careful we duplicate the (expensive x) call!
2094 Constructors are rather like lambdas in this way.
2095 -}
2096
2097 occAnalApp :: OccEnv
2098 -> (Expr CoreBndr, [Arg CoreBndr], [CoreTickish])
2099 -> WithUsageDetails (Expr CoreBndr)
2100 -- Naked variables (not applied) end up here too
2101 occAnalApp !env (Var fun, args, ticks)
2102 -- Account for join arity of runRW# continuation
2103 -- See Note [Simplification of runRW#]
2104 --
2105 -- NB: Do not be tempted to make the next (Var fun, args, tick)
2106 -- equation into an 'otherwise' clause for this equation
2107 -- The former has a bang-pattern to occ-anal the args, and
2108 -- we don't want to occ-anal them twice in the runRW# case!
2109 -- This caused #18296
2110 | fun `hasKey` runRWKey
2111 , [t1, t2, arg] <- args
2112 , let (WithUsageDetails usage arg') = occAnalRhs env NonRecursive (Just 1) arg
2113 = WithUsageDetails usage (mkTicks ticks $ mkApps (Var fun) [t1, t2, arg'])
2114
2115 occAnalApp env (Var fun_id, args, ticks)
2116 = WithUsageDetails all_uds (mkTicks ticks app')
2117 where
2118 -- Lots of banged bindings: this is a very heavily bit of code,
2119 -- so it pays not to make lots of thunks here, all of which
2120 -- will ultimately be forced.
2121 !(fun', fun_id') = lookupBndrSwap env fun_id
2122 !(WithUsageDetails args_uds app') = occAnalArgs env fun' args one_shots
2123
2124 fun_uds = mkOneOcc fun_id' int_cxt n_args
2125 -- NB: fun_uds is computed for fun_id', not fun_id
2126 -- See (BS1) in Note [The binder-swap substitution]
2127
2128 all_uds = fun_uds `andUDs` final_args_uds
2129
2130 !final_args_uds = markAllNonTail $
2131 markAllInsideLamIf (isRhsEnv env && is_exp) $
2132 args_uds
2133 -- We mark the free vars of the argument of a constructor or PAP
2134 -- as "inside-lambda", if it is the RHS of a let(rec).
2135 -- This means that nothing gets inlined into a constructor or PAP
2136 -- argument position, which is what we want. Typically those
2137 -- constructor arguments are just variables, or trivial expressions.
2138 -- We use inside-lam because it's like eta-expanding the PAP.
2139 --
2140 -- This is the *whole point* of the isRhsEnv predicate
2141 -- See Note [Arguments of let-bound constructors]
2142
2143 !n_val_args = valArgCount args
2144 !n_args = length args
2145 !int_cxt = case occ_encl env of
2146 OccScrut -> IsInteresting
2147 _other | n_val_args > 0 -> IsInteresting
2148 | otherwise -> NotInteresting
2149
2150 !is_exp = isExpandableApp fun_id n_val_args
2151 -- See Note [CONLIKE pragma] in GHC.Types.Basic
2152 -- The definition of is_exp should match that in GHC.Core.Opt.Simplify.prepareRhs
2153
2154 one_shots = argsOneShots (idDmdSig fun_id) guaranteed_val_args
2155 guaranteed_val_args = n_val_args + length (takeWhile isOneShotInfo
2156 (occ_one_shots env))
2157 -- See Note [Sources of one-shot information], bullet point A']
2158
2159 occAnalApp env (fun, args, ticks)
2160 = WithUsageDetails (markAllNonTail (fun_uds `andUDs` args_uds))
2161 (mkTicks ticks app')
2162 where
2163 !(WithUsageDetails args_uds app') = occAnalArgs env fun' args []
2164 !(WithUsageDetails fun_uds fun') = occAnal (addAppCtxt env args) fun
2165 -- The addAppCtxt is a bit cunning. One iteration of the simplifier
2166 -- often leaves behind beta redexs like
2167 -- (\x y -> e) a1 a2
2168 -- Here we would like to mark x,y as one-shot, and treat the whole
2169 -- thing much like a let. We do this by pushing some OneShotLam items
2170 -- onto the context stack.
2171
2172 addAppCtxt :: OccEnv -> [Arg CoreBndr] -> OccEnv
2173 addAppCtxt env@(OccEnv { occ_one_shots = ctxt }) args
2174 | n_val_args > 0
2175 = env { occ_one_shots = replicate n_val_args OneShotLam ++ ctxt
2176 , occ_encl = OccVanilla }
2177 -- OccVanilla: the function part of the application
2178 -- is no longer on OccRhs or OccScrut
2179 | otherwise
2180 = env
2181 where
2182 n_val_args = valArgCount args
2183
2184
2185 {-
2186 Note [Sources of one-shot information]
2187 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2188 The occurrence analyser obtains one-shot-lambda information from two sources:
2189
2190 A: Saturated applications: eg f e1 .. en
2191
2192 In general, given a call (f e1 .. en) we can propagate one-shot info from
2193 f's strictness signature into e1 .. en, but /only/ if n is enough to
2194 saturate the strictness signature. A strictness signature like
2195
2196 f :: C1(C1(L))LS
2197
2198 means that *if f is applied to three arguments* then it will guarantee to
2199 call its first argument at most once, and to call the result of that at
2200 most once. But if f has fewer than three arguments, all bets are off; e.g.
2201
2202 map (f (\x y. expensive) e2) xs
2203
2204 Here the \x y abstraction may be called many times (once for each element of
2205 xs) so we should not mark x and y as one-shot. But if it was
2206
2207 map (f (\x y. expensive) 3 2) xs
2208
2209 then the first argument of f will be called at most once.
2210
2211 The one-shot info, derived from f's strictness signature, is
2212 computed by 'argsOneShots', called in occAnalApp.
2213
2214 A': Non-obviously saturated applications: eg build (f (\x y -> expensive))
2215 where f is as above.
2216
2217 In this case, f is only manifestly applied to one argument, so it does not
2218 look saturated. So by the previous point, we should not use its strictness
2219 signature to learn about the one-shotness of \x y. But in this case we can:
2220 build is fully applied, so we may use its strictness signature; and from
2221 that we learn that build calls its argument with two arguments *at most once*.
2222
2223 So there is really only one call to f, and it will have three arguments. In
2224 that sense, f is saturated, and we may proceed as described above.
2225
2226 Hence the computation of 'guaranteed_val_args' in occAnalApp, using
2227 '(occ_one_shots env)'. See also #13227, comment:9
2228
2229 B: Let-bindings: eg let f = \c. let ... in \n -> blah
2230 in (build f, build f)
2231
2232 Propagate one-shot info from the demanand-info on 'f' to the
2233 lambdas in its RHS (which may not be syntactically at the top)
2234
2235 This information must have come from a previous run of the demanand
2236 analyser.
2237
2238 Previously, the demand analyser would *also* set the one-shot information, but
2239 that code was buggy (see #11770), so doing it only in on place, namely here, is
2240 saner.
2241
2242 Note [OneShots]
2243 ~~~~~~~~~~~~~~~
2244 When analysing an expression, the occ_one_shots argument contains information
2245 about how the function is being used. The length of the list indicates
2246 how many arguments will eventually be passed to the analysed expression,
2247 and the OneShotInfo indicates whether this application is once or multiple times.
2248
2249 Example:
2250
2251 Context of f occ_one_shots when analysing f
2252
2253 f 1 2 [OneShot, OneShot]
2254 map (f 1) [OneShot, NoOneShotInfo]
2255 build f [OneShot, OneShot]
2256 f 1 2 `seq` f 2 1 [NoOneShotInfo, OneShot]
2257
2258 Note [Binders in case alternatives]
2259 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2260 Consider
2261 case x of y { (a,b) -> f y }
2262 We treat 'a', 'b' as dead, because they don't physically occur in the
2263 case alternative. (Indeed, a variable is dead iff it doesn't occur in
2264 its scope in the output of OccAnal.) It really helps to know when
2265 binders are unused. See esp the call to isDeadBinder in
2266 Simplify.mkDupableAlt
2267
2268 In this example, though, the Simplifier will bring 'a' and 'b' back to
2269 life, because it binds 'y' to (a,b) (imagine got inlined and
2270 scrutinised y).
2271 -}
2272
2273 occAnalLamOrRhs :: OccEnv -> [CoreBndr] -> CoreExpr
2274 -> WithUsageDetails ([CoreBndr], CoreExpr)
2275 -- Tags the returned binders with their OccInfo, but does
2276 -- not do any markInsideLam to the returned usage details
2277 occAnalLamOrRhs !env [] body
2278 = let (WithUsageDetails body_usage body') = occAnal env body
2279 in WithUsageDetails body_usage ([], body')
2280 -- RHS of thunk or nullary join point
2281
2282 occAnalLamOrRhs env (bndr:bndrs) body
2283 | isTyVar bndr
2284 = -- Important: Keep the environment so that we don't inline into an RHS like
2285 -- \(@ x) -> C @x (f @x)
2286 -- (see the beginning of Note [Cascading inlines]).
2287 let
2288 (WithUsageDetails body_usage (bndrs',body')) = occAnalLamOrRhs env bndrs body
2289 in WithUsageDetails body_usage (bndr:bndrs', body')
2290
2291 occAnalLamOrRhs env binders body
2292 = let
2293 (WithUsageDetails body_usage body') = occAnal env_body body
2294 (final_usage, tagged_binders) = tagLamBinders body_usage binders'
2295 -- Use binders' to put one-shot info on the lambdas
2296 in
2297 WithUsageDetails final_usage (tagged_binders, body')
2298 where
2299 env1 = env `addInScope` binders
2300 (env_body, binders') = oneShotGroup env1 binders
2301
2302 occAnalAlt :: OccEnv
2303 -> CoreAlt -> WithUsageDetails (Alt IdWithOccInfo)
2304 occAnalAlt !env (Alt con bndrs rhs)
2305 = let
2306 (WithUsageDetails rhs_usage1 rhs1) = occAnal (env `addInScope` bndrs) rhs
2307 (alt_usg, tagged_bndrs) = tagLamBinders rhs_usage1 bndrs
2308 in -- See Note [Binders in case alternatives]
2309 WithUsageDetails alt_usg (Alt con tagged_bndrs rhs1)
2310
2311 {-
2312 ************************************************************************
2313 * *
2314 OccEnv
2315 * *
2316 ************************************************************************
2317 -}
2318
2319 data OccEnv
2320 = OccEnv { occ_encl :: !OccEncl -- Enclosing context information
2321 , occ_one_shots :: !OneShots -- See Note [OneShots]
2322 , occ_unf_act :: Id -> Bool -- Which Id unfoldings are active
2323 , occ_rule_act :: Activation -> Bool -- Which rules are active
2324 -- See Note [Finding rule RHS free vars]
2325
2326 -- See Note [The binder-swap substitution]
2327 -- If x :-> (y, co) is in the env,
2328 -- then please replace x by (y |> sym mco)
2329 -- Invariant of course: idType x = exprType (y |> sym mco)
2330 , occ_bs_env :: !(VarEnv (OutId, MCoercion))
2331 , occ_bs_rng :: !VarSet -- Vars free in the range of occ_bs_env
2332 -- Domain is Global and Local Ids
2333 -- Range is just Local Ids
2334 }
2335
2336
2337 -----------------------------
2338 -- OccEncl is used to control whether to inline into constructor arguments
2339 -- For example:
2340 -- x = (p,q) -- Don't inline p or q
2341 -- y = /\a -> (p a, q a) -- Still don't inline p or q
2342 -- z = f (p,q) -- Do inline p,q; it may make a rule fire
2343 -- So OccEncl tells enough about the context to know what to do when
2344 -- we encounter a constructor application or PAP.
2345 --
2346 -- OccScrut is used to set the "interesting context" field of OncOcc
2347
2348 data OccEncl
2349 = OccRhs -- RHS of let(rec), albeit perhaps inside a type lambda
2350 -- Don't inline into constructor args here
2351
2352 | OccScrut -- Scrutintee of a case
2353 -- Can inline into constructor args
2354
2355 | OccVanilla -- Argument of function, body of lambda, etc
2356 -- Do inline into constructor args here
2357
2358 instance Outputable OccEncl where
2359 ppr OccRhs = text "occRhs"
2360 ppr OccScrut = text "occScrut"
2361 ppr OccVanilla = text "occVanilla"
2362
2363 -- See note [OneShots]
2364 type OneShots = [OneShotInfo]
2365
2366 initOccEnv :: OccEnv
2367 initOccEnv
2368 = OccEnv { occ_encl = OccVanilla
2369 , occ_one_shots = []
2370
2371 -- To be conservative, we say that all
2372 -- inlines and rules are active
2373 , occ_unf_act = \_ -> True
2374 , occ_rule_act = \_ -> True
2375
2376 , occ_bs_env = emptyVarEnv
2377 , occ_bs_rng = emptyVarSet }
2378
2379 noBinderSwaps :: OccEnv -> Bool
2380 noBinderSwaps (OccEnv { occ_bs_env = bs_env }) = isEmptyVarEnv bs_env
2381
2382 scrutCtxt :: OccEnv -> [CoreAlt] -> OccEnv
2383 scrutCtxt !env alts
2384 | interesting_alts = env { occ_encl = OccScrut, occ_one_shots = [] }
2385 | otherwise = env { occ_encl = OccVanilla, occ_one_shots = [] }
2386 where
2387 interesting_alts = case alts of
2388 [] -> False
2389 [alt] -> not (isDefaultAlt alt)
2390 _ -> True
2391 -- 'interesting_alts' is True if the case has at least one
2392 -- non-default alternative. That in turn influences
2393 -- pre/postInlineUnconditionally. Grep for "occ_int_cxt"!
2394
2395 rhsCtxt :: OccEnv -> OccEnv
2396 rhsCtxt !env = env { occ_encl = OccRhs, occ_one_shots = [] }
2397
2398 valArgCtxt :: OccEnv -> [OneShots] -> (OccEnv, [OneShots])
2399 valArgCtxt !env []
2400 = (env { occ_encl = OccVanilla, occ_one_shots = [] }, [])
2401 valArgCtxt env (one_shots:one_shots_s)
2402 = (env { occ_encl = OccVanilla, occ_one_shots = one_shots }, one_shots_s)
2403
2404 isRhsEnv :: OccEnv -> Bool
2405 isRhsEnv (OccEnv { occ_encl = cxt }) = case cxt of
2406 OccRhs -> True
2407 _ -> False
2408
2409 addInScope :: OccEnv -> [Var] -> OccEnv
2410 -- See Note [The binder-swap substitution]
2411 addInScope env@(OccEnv { occ_bs_env = swap_env, occ_bs_rng = rng_vars }) bndrs
2412 | any (`elemVarSet` rng_vars) bndrs = env { occ_bs_env = emptyVarEnv, occ_bs_rng = emptyVarSet }
2413 | otherwise = env { occ_bs_env = swap_env `delVarEnvList` bndrs }
2414
2415 oneShotGroup :: OccEnv -> [CoreBndr]
2416 -> ( OccEnv
2417 , [CoreBndr] )
2418 -- The result binders have one-shot-ness set that they might not have had originally.
2419 -- This happens in (build (\c n -> e)). Here the occurrence analyser
2420 -- linearity context knows that c,n are one-shot, and it records that fact in
2421 -- the binder. This is useful to guide subsequent float-in/float-out transformations
2422
2423 oneShotGroup env@(OccEnv { occ_one_shots = ctxt }) bndrs
2424 = go ctxt bndrs []
2425 where
2426 go ctxt [] rev_bndrs
2427 = ( env { occ_one_shots = ctxt, occ_encl = OccVanilla }
2428 , reverse rev_bndrs )
2429
2430 go [] bndrs rev_bndrs
2431 = ( env { occ_one_shots = [], occ_encl = OccVanilla }
2432 , reverse rev_bndrs ++ bndrs )
2433
2434 go ctxt@(one_shot : ctxt') (bndr : bndrs) rev_bndrs
2435 | isId bndr = go ctxt' bndrs (bndr': rev_bndrs)
2436 | otherwise = go ctxt bndrs (bndr : rev_bndrs)
2437 where
2438 bndr' = updOneShotInfo bndr one_shot
2439 -- Use updOneShotInfo, not setOneShotInfo, as pre-existing
2440 -- one-shot info might be better than what we can infer, e.g.
2441 -- due to explicit use of the magic 'oneShot' function.
2442 -- See Note [The oneShot function]
2443
2444
2445 markJoinOneShots :: Maybe JoinArity -> [Var] -> [Var]
2446 -- Mark the lambdas of a non-recursive join point as one-shot.
2447 -- This is good to prevent gratuitous float-out etc
2448 markJoinOneShots mb_join_arity bndrs
2449 = case mb_join_arity of
2450 Nothing -> bndrs
2451 Just n -> go n bndrs
2452 where
2453 go 0 bndrs = bndrs
2454 go _ [] = [] -- This can legitimately happen.
2455 -- e.g. let j = case ... in j True
2456 -- This will become an arity-1 join point after the
2457 -- simplifier has eta-expanded it; but it may not have
2458 -- enough lambdas /yet/. (Lint checks that JoinIds do
2459 -- have enough lambdas.)
2460 go n (b:bs) = b' : go (n-1) bs
2461 where
2462 b' | isId b = setOneShotLambda b
2463 | otherwise = b
2464
2465 --------------------
2466 transClosureFV :: VarEnv VarSet -> VarEnv VarSet
2467 -- If (f,g), (g,h) are in the input, then (f,h) is in the output
2468 -- as well as (f,g), (g,h)
2469 transClosureFV env
2470 | no_change = env
2471 | otherwise = transClosureFV (listToUFM_Directly new_fv_list)
2472 where
2473 (no_change, new_fv_list) = mapAccumL bump True (nonDetUFMToList env)
2474 -- It's OK to use nonDetUFMToList here because we'll forget the
2475 -- ordering by creating a new set with listToUFM
2476 bump no_change (b,fvs)
2477 | no_change_here = (no_change, (b,fvs))
2478 | otherwise = (False, (b,new_fvs))
2479 where
2480 (new_fvs, no_change_here) = extendFvs env fvs
2481
2482 -------------
2483 extendFvs_ :: VarEnv VarSet -> VarSet -> VarSet
2484 extendFvs_ env s = fst (extendFvs env s) -- Discard the Bool flag
2485
2486 extendFvs :: VarEnv VarSet -> VarSet -> (VarSet, Bool)
2487 -- (extendFVs env s) returns
2488 -- (s `union` env(s), env(s) `subset` s)
2489 extendFvs env s
2490 | isNullUFM env
2491 = (s, True)
2492 | otherwise
2493 = (s `unionVarSet` extras, extras `subVarSet` s)
2494 where
2495 extras :: VarSet -- env(s)
2496 extras = nonDetStrictFoldUFM unionVarSet emptyVarSet $
2497 -- It's OK to use nonDetStrictFoldUFM here because unionVarSet commutes
2498 intersectUFM_C (\x _ -> x) env (getUniqSet s)
2499
2500 {-
2501 ************************************************************************
2502 * *
2503 Binder swap
2504 * *
2505 ************************************************************************
2506
2507 Note [Binder swap]
2508 ~~~~~~~~~~~~~~~~~~
2509 The "binder swap" transformation swaps occurrence of the
2510 scrutinee of a case for occurrences of the case-binder:
2511
2512 (1) case x of b { pi -> ri }
2513 ==>
2514 case x of b { pi -> ri[b/x] }
2515
2516 (2) case (x |> co) of b { pi -> ri }
2517 ==>
2518 case (x |> co) of b { pi -> ri[b |> sym co/x] }
2519
2520 The substitution ri[b/x] etc is done by the occurrence analyser.
2521 See Note [The binder-swap substitution].
2522
2523 There are two reasons for making this swap:
2524
2525 (A) It reduces the number of occurrences of the scrutinee, x.
2526 That in turn might reduce its occurrences to one, so we
2527 can inline it and save an allocation. E.g.
2528 let x = factorial y in case x of b { I# v -> ...x... }
2529 If we replace 'x' by 'b' in the alternative we get
2530 let x = factorial y in case x of b { I# v -> ...b... }
2531 and now we can inline 'x', thus
2532 case (factorial y) of b { I# v -> ...b... }
2533
2534 (B) The case-binder b has unfolding information; in the
2535 example above we know that b = I# v. That in turn allows
2536 nested cases to simplify. Consider
2537 case x of b { I# v ->
2538 ...(case x of b2 { I# v2 -> rhs })...
2539 If we replace 'x' by 'b' in the alternative we get
2540 case x of b { I# v ->
2541 ...(case b of b2 { I# v2 -> rhs })...
2542 and now it is trivial to simplify the inner case:
2543 case x of b { I# v ->
2544 ...(let b2 = b in rhs)...
2545
2546 The same can happen even if the scrutinee is a variable
2547 with a cast: see Note [Case of cast]
2548
2549 The reason for doing these transformations /here in the occurrence
2550 analyser/ is because it allows us to adjust the OccInfo for 'x' and
2551 'b' as we go.
2552
2553 * Suppose the only occurrences of 'x' are the scrutinee and in the
2554 ri; then this transformation makes it occur just once, and hence
2555 get inlined right away.
2556
2557 * If instead the Simplifier replaces occurrences of x with
2558 occurrences of b, that will mess up b's occurrence info. That in
2559 turn might have consequences.
2560
2561 There is a danger though. Consider
2562 let v = x +# y
2563 in case (f v) of w -> ...v...v...
2564 And suppose that (f v) expands to just v. Then we'd like to
2565 use 'w' instead of 'v' in the alternative. But it may be too
2566 late; we may have substituted the (cheap) x+#y for v in the
2567 same simplifier pass that reduced (f v) to v.
2568
2569 I think this is just too bad. CSE will recover some of it.
2570
2571 Note [The binder-swap substitution]
2572 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2573 The binder-swap is implemented by the occ_bs_env field of OccEnv.
2574 There are two main pieces:
2575
2576 * Given case x |> co of b { alts }
2577 we add [x :-> (b, co)] to the occ_bs_env environment; this is
2578 done by addBndrSwap.
2579
2580 * Then, at an occurrence of a variable, we look up in the occ_bs_env
2581 to perform the swap. This is done by lookupBndrSwap.
2582
2583 Some tricky corners:
2584
2585 (BS1) We do the substitution before gathering occurrence info. So in
2586 the above example, an occurrence of x turns into an occurrence
2587 of b, and that's what we gather in the UsageDetails. It's as
2588 if the binder-swap occurred before occurrence analysis. See
2589 the computation of fun_uds in occAnalApp.
2590
2591 (BS2) When doing a lookup in occ_bs_env, we may need to iterate,
2592 as you can see implemented in lookupBndrSwap. Why?
2593 Consider case x of a { 1# -> e1; DEFAULT ->
2594 case x of b { 2# -> e2; DEFAULT ->
2595 case x of c { 3# -> e3; DEFAULT -> ..x..a..b.. }}}
2596 At the first case addBndrSwap will extend occ_bs_env with
2597 [x :-> a]
2598 At the second case we occ-anal the scrutinee 'x', which looks up
2599 'x in occ_bs_env, returning 'a', as it should.
2600 Then addBndrSwap will add [a :-> b] to occ_bs_env, yielding
2601 occ_bs_env = [x :-> a, a :-> b]
2602 At the third case we'll again look up 'x' which returns 'a'.
2603 But we don't want to stop the lookup there, else we'll end up with
2604 case x of a { 1# -> e1; DEFAULT ->
2605 case a of b { 2# -> e2; DEFAULT ->
2606 case a of c { 3# -> e3; DEFAULT -> ..a..b..c.. }}}
2607 Instead, we want iterate the lookup in addBndrSwap, to give
2608 case x of a { 1# -> e1; DEFAULT ->
2609 case a of b { 2# -> e2; DEFAULT ->
2610 case b of c { 3# -> e3; DEFAULT -> ..c..c..c.. }}}
2611 This makes a particular difference for case-merge, which works
2612 only if the scrutinee is the case-binder of the immediately enclosing
2613 case (Note [Merge Nested Cases] in GHC.Core.Opt.Simplify.Utils
2614 See #19581 for the bug report that showed this up.
2615
2616 (BS3) We need care when shadowing. Suppose [x :-> b] is in occ_bs_env,
2617 and we encounter:
2618 - \x. blah
2619 Here we want to delete the x-binding from occ_bs_env
2620
2621 - \b. blah
2622 This is harder: we really want to delete all bindings that
2623 have 'b' free in the range. That is a bit tiresome to implement,
2624 so we compromise. We keep occ_bs_rng, which is the set of
2625 free vars of rng(occc_bs_env). If a binder shadows any of these
2626 variables, we discard all of occ_bs_env. Safe, if a bit
2627 brutal. NB, however: the simplifer de-shadows the code, so the
2628 next time around this won't happen.
2629
2630 These checks are implemented in addInScope.
2631
2632 The occurrence analyser itself does /not/ do cloning. It could, in
2633 principle, but it'd make it a bit more complicated and there is no
2634 great benefit. The simplifer uses cloning to get a no-shadowing
2635 situation, the care-when-shadowing behaviour above isn't needed for
2636 long.
2637
2638 (BS4) The domain of occ_bs_env can include GlobaIds. Eg
2639 case M.foo of b { alts }
2640 We extend occ_bs_env with [M.foo :-> b]. That's fine.
2641
2642 (BS5) We have to apply the occ_bs_env substitution uniformly,
2643 including to (local) rules and unfoldings.
2644
2645 Historical note
2646 ---------------
2647 We used to do the binder-swap transformation by introducing
2648 a proxy let-binding, thus;
2649
2650 case x of b { pi -> ri }
2651 ==>
2652 case x of b { pi -> let x = b in ri }
2653
2654 But that had two problems:
2655
2656 1. If 'x' is an imported GlobalId, we'd end up with a GlobalId
2657 on the LHS of a let-binding which isn't allowed. We worked
2658 around this for a while by "localising" x, but it turned
2659 out to be very painful #16296,
2660
2661 2. In CorePrep we use the occurrence analyser to do dead-code
2662 elimination (see Note [Dead code in CorePrep]). But that
2663 occasionally led to an unlifted let-binding
2664 case x of b { DEFAULT -> let x::Int# = b in ... }
2665 which disobeys one of CorePrep's output invariants (no unlifted
2666 let-bindings) -- see #5433.
2667
2668 Doing a substitution (via occ_bs_env) is much better.
2669
2670 Note [Case of cast]
2671 ~~~~~~~~~~~~~~~~~~~
2672 Consider case (x `cast` co) of b { I# ->
2673 ... (case (x `cast` co) of {...}) ...
2674 We'd like to eliminate the inner case. That is the motivation for
2675 equation (2) in Note [Binder swap]. When we get to the inner case, we
2676 inline x, cancel the casts, and away we go.
2677
2678 Note [Zap case binders in proxy bindings]
2679 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2680 From the original
2681 case x of cb(dead) { p -> ...x... }
2682 we will get
2683 case x of cb(live) { p -> ...cb... }
2684
2685 Core Lint never expects to find an *occurrence* of an Id marked
2686 as Dead, so we must zap the OccInfo on cb before making the
2687 binding x = cb. See #5028.
2688
2689 NB: the OccInfo on /occurrences/ really doesn't matter much; the simplifier
2690 doesn't use it. So this is only to satisfy the perhaps-over-picky Lint.
2691
2692 Historical note [no-case-of-case]
2693 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2694 We *used* to suppress the binder-swap in case expressions when
2695 -fno-case-of-case is on. Old remarks:
2696 "This happens in the first simplifier pass,
2697 and enhances full laziness. Here's the bad case:
2698 f = \ y -> ...(case x of I# v -> ...(case x of ...) ... )
2699 If we eliminate the inner case, we trap it inside the I# v -> arm,
2700 which might prevent some full laziness happening. I've seen this
2701 in action in spectral/cichelli/Prog.hs:
2702 [(m,n) | m <- [1..max], n <- [1..max]]
2703 Hence the check for NoCaseOfCase."
2704 However, now the full-laziness pass itself reverses the binder-swap, so this
2705 check is no longer necessary.
2706
2707 Historical note [Suppressing the case binder-swap]
2708 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2709 This old note describes a problem that is also fixed by doing the
2710 binder-swap in OccAnal:
2711
2712 There is another situation when it might make sense to suppress the
2713 case-expression binde-swap. If we have
2714
2715 case x of w1 { DEFAULT -> case x of w2 { A -> e1; B -> e2 }
2716 ...other cases .... }
2717
2718 We'll perform the binder-swap for the outer case, giving
2719
2720 case x of w1 { DEFAULT -> case w1 of w2 { A -> e1; B -> e2 }
2721 ...other cases .... }
2722
2723 But there is no point in doing it for the inner case, because w1 can't
2724 be inlined anyway. Furthermore, doing the case-swapping involves
2725 zapping w2's occurrence info (see paragraphs that follow), and that
2726 forces us to bind w2 when doing case merging. So we get
2727
2728 case x of w1 { A -> let w2 = w1 in e1
2729 B -> let w2 = w1 in e2
2730 ...other cases .... }
2731
2732 This is plain silly in the common case where w2 is dead.
2733
2734 Even so, I can't see a good way to implement this idea. I tried
2735 not doing the binder-swap if the scrutinee was already evaluated
2736 but that failed big-time:
2737
2738 data T = MkT !Int
2739
2740 case v of w { MkT x ->
2741 case x of x1 { I# y1 ->
2742 case x of x2 { I# y2 -> ...
2743
2744 Notice that because MkT is strict, x is marked "evaluated". But to
2745 eliminate the last case, we must either make sure that x (as well as
2746 x1) has unfolding MkT y1. The straightforward thing to do is to do
2747 the binder-swap. So this whole note is a no-op.
2748
2749 It's fixed by doing the binder-swap in OccAnal because we can do the
2750 binder-swap unconditionally and still get occurrence analysis
2751 information right.
2752 -}
2753
2754 addBndrSwap :: OutExpr -> Id -> OccEnv -> OccEnv
2755 -- See Note [The binder-swap substitution]
2756 addBndrSwap scrut case_bndr
2757 env@(OccEnv { occ_bs_env = swap_env, occ_bs_rng = rng_vars })
2758 | Just (scrut_var, mco) <- get_scrut_var (stripTicksTopE (const True) scrut)
2759 , scrut_var /= case_bndr
2760 -- Consider: case x of x { ... }
2761 -- Do not add [x :-> x] to occ_bs_env, else lookupBndrSwap will loop
2762 = env { occ_bs_env = extendVarEnv swap_env scrut_var (case_bndr', mco)
2763 , occ_bs_rng = rng_vars `extendVarSet` case_bndr'
2764 `unionVarSet` tyCoVarsOfMCo mco }
2765
2766 | otherwise
2767 = env
2768 where
2769 get_scrut_var :: OutExpr -> Maybe (OutVar, MCoercion)
2770 get_scrut_var (Var v) = Just (v, MRefl)
2771 get_scrut_var (Cast (Var v) co) = Just (v, MCo co) -- See Note [Case of cast]
2772 get_scrut_var _ = Nothing
2773
2774 case_bndr' = zapIdOccInfo case_bndr
2775 -- See Note [Zap case binders in proxy bindings]
2776
2777 lookupBndrSwap :: OccEnv -> Id -> (CoreExpr, Id)
2778 -- See Note [The binder-swap substitution]
2779 -- Returns an expression of the same type as Id
2780 lookupBndrSwap env@(OccEnv { occ_bs_env = bs_env }) bndr
2781 = case lookupVarEnv bs_env bndr of {
2782 Nothing -> (Var bndr, bndr) ;
2783 Just (bndr1, mco) ->
2784
2785 -- Why do we iterate here?
2786 -- See (BS2) in Note [The binder-swap substitution]
2787 case lookupBndrSwap env bndr1 of
2788 (fun, fun_id) -> (add_cast fun mco, fun_id) }
2789
2790 where
2791 add_cast fun MRefl = fun
2792 add_cast fun (MCo co) = Cast fun (mkSymCo co)
2793 -- We must switch that 'co' to 'sym co';
2794 -- see the comment with occ_bs_env
2795 -- No need to test for isReflCo, because 'co' came from
2796 -- a (Cast e co) and hence is unlikely to be Refl
2797
2798 {-
2799 ************************************************************************
2800 * *
2801 \subsection[OccurAnal-types]{OccEnv}
2802 * *
2803 ************************************************************************
2804
2805 Note [UsageDetails and zapping]
2806 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2807 On many occasions, we must modify all gathered occurrence data at once. For
2808 instance, all occurrences underneath a (non-one-shot) lambda set the
2809 'occ_in_lam' flag to become 'True'. We could use 'mapVarEnv' to do this, but
2810 that takes O(n) time and we will do this often---in particular, there are many
2811 places where tail calls are not allowed, and each of these causes all variables
2812 to get marked with 'NoTailCallInfo'.
2813
2814 Instead of relying on `mapVarEnv`, then, we carry three 'IdEnv's around along
2815 with the 'OccInfoEnv'. Each of these extra environments is a "zapped set"
2816 recording which variables have been zapped in some way. Zapping all occurrence
2817 info then simply means setting the corresponding zapped set to the whole
2818 'OccInfoEnv', a fast O(1) operation.
2819 -}
2820
2821 type OccInfoEnv = IdEnv OccInfo -- A finite map from ids to their usage
2822 -- INVARIANT: never IAmDead
2823 -- (Deadness is signalled by not being in the map at all)
2824
2825 type ZappedSet = OccInfoEnv -- Values are ignored
2826
2827 data UsageDetails
2828 = UD { ud_env :: !OccInfoEnv
2829 , ud_z_many :: !ZappedSet -- apply 'markMany' to these
2830 , ud_z_in_lam :: !ZappedSet -- apply 'markInsideLam' to these
2831 , ud_z_no_tail :: !ZappedSet } -- apply 'markNonTail' to these
2832 -- INVARIANT: All three zapped sets are subsets of the OccInfoEnv
2833
2834 instance Outputable UsageDetails where
2835 ppr ud = ppr (ud_env (flattenUsageDetails ud))
2836
2837 -------------------
2838 -- UsageDetails API
2839
2840 andUDs, orUDs
2841 :: UsageDetails -> UsageDetails -> UsageDetails
2842 andUDs = combineUsageDetailsWith addOccInfo
2843 orUDs = combineUsageDetailsWith orOccInfo
2844
2845 mkOneOcc ::Id -> InterestingCxt -> JoinArity -> UsageDetails
2846 mkOneOcc id int_cxt arity
2847 | isLocalId id
2848 = emptyDetails { ud_env = unitVarEnv id occ_info }
2849 | otherwise
2850 = emptyDetails
2851 where
2852 occ_info = OneOcc { occ_in_lam = NotInsideLam
2853 , occ_n_br = oneBranch
2854 , occ_int_cxt = int_cxt
2855 , occ_tail = AlwaysTailCalled arity }
2856
2857 addManyOccId :: UsageDetails -> Id -> UsageDetails
2858 -- Add the non-committal (id :-> noOccInfo) to the usage details
2859 addManyOccId ud id = ud { ud_env = extendVarEnv (ud_env ud) id noOccInfo }
2860
2861 -- Add several occurrences, assumed not to be tail calls
2862 addManyOcc :: Var -> UsageDetails -> UsageDetails
2863 addManyOcc v u | isId v = addManyOccId u v
2864 | otherwise = u
2865 -- Give a non-committal binder info (i.e noOccInfo) because
2866 -- a) Many copies of the specialised thing can appear
2867 -- b) We don't want to substitute a BIG expression inside a RULE
2868 -- even if that's the only occurrence of the thing
2869 -- (Same goes for INLINE.)
2870
2871 addManyOccs :: UsageDetails -> VarSet -> UsageDetails
2872 addManyOccs usage id_set = nonDetStrictFoldUniqSet addManyOcc usage id_set
2873 -- It's OK to use nonDetStrictFoldUniqSet here because addManyOcc commutes
2874
2875 addLamCoVarOccs :: UsageDetails -> [Var] -> UsageDetails
2876 -- Add any CoVars free in the type of a lambda-binder
2877 -- See Note [Gather occurrences of coercion variables]
2878 addLamCoVarOccs uds bndrs
2879 = uds `addManyOccs` coVarsOfTypes (map varType bndrs)
2880
2881 delDetails :: UsageDetails -> Id -> UsageDetails
2882 delDetails ud bndr
2883 = ud `alterUsageDetails` (`delVarEnv` bndr)
2884
2885 delDetailsList :: UsageDetails -> [Id] -> UsageDetails
2886 delDetailsList ud bndrs
2887 = ud `alterUsageDetails` (`delVarEnvList` bndrs)
2888
2889 emptyDetails :: UsageDetails
2890 emptyDetails = UD { ud_env = emptyVarEnv
2891 , ud_z_many = emptyVarEnv
2892 , ud_z_in_lam = emptyVarEnv
2893 , ud_z_no_tail = emptyVarEnv }
2894
2895 isEmptyDetails :: UsageDetails -> Bool
2896 isEmptyDetails = isEmptyVarEnv . ud_env
2897
2898 markAllMany, markAllInsideLam, markAllNonTail, markAllManyNonTail
2899 :: UsageDetails -> UsageDetails
2900 markAllMany ud = ud { ud_z_many = ud_env ud }
2901 markAllInsideLam ud = ud { ud_z_in_lam = ud_env ud }
2902 markAllNonTail ud = ud { ud_z_no_tail = ud_env ud }
2903
2904 markAllInsideLamIf, markAllNonTailIf :: Bool -> UsageDetails -> UsageDetails
2905
2906 markAllInsideLamIf True ud = markAllInsideLam ud
2907 markAllInsideLamIf False ud = ud
2908
2909 markAllNonTailIf True ud = markAllNonTail ud
2910 markAllNonTailIf False ud = ud
2911
2912
2913 markAllManyNonTail = markAllMany . markAllNonTail -- effectively sets to noOccInfo
2914
2915 markAllManyNonTailIf :: Bool -- If this is true
2916 -> UsageDetails -- Then do markAllManyNonTail on this
2917 -> UsageDetails
2918 markAllManyNonTailIf True uds = markAllManyNonTail uds
2919 markAllManyNonTailIf False uds = uds
2920
2921 lookupDetails :: UsageDetails -> Id -> OccInfo
2922 lookupDetails ud id
2923 = case lookupVarEnv (ud_env ud) id of
2924 Just occ -> doZapping ud id occ
2925 Nothing -> IAmDead
2926
2927 usedIn :: Id -> UsageDetails -> Bool
2928 v `usedIn` ud = isExportedId v || v `elemVarEnv` ud_env ud
2929
2930 udFreeVars :: VarSet -> UsageDetails -> VarSet
2931 -- Find the subset of bndrs that are mentioned in uds
2932 udFreeVars bndrs ud = restrictFreeVars bndrs (ud_env ud)
2933
2934 restrictFreeVars :: VarSet -> OccInfoEnv -> VarSet
2935 restrictFreeVars bndrs fvs = restrictUniqSetToUFM bndrs fvs
2936
2937 -------------------
2938 -- Auxiliary functions for UsageDetails implementation
2939
2940 combineUsageDetailsWith :: (OccInfo -> OccInfo -> OccInfo)
2941 -> UsageDetails -> UsageDetails -> UsageDetails
2942 combineUsageDetailsWith plus_occ_info ud1 ud2
2943 | isEmptyDetails ud1 = ud2
2944 | isEmptyDetails ud2 = ud1
2945 | otherwise
2946 = UD { ud_env = plusVarEnv_C plus_occ_info (ud_env ud1) (ud_env ud2)
2947 , ud_z_many = plusVarEnv (ud_z_many ud1) (ud_z_many ud2)
2948 , ud_z_in_lam = plusVarEnv (ud_z_in_lam ud1) (ud_z_in_lam ud2)
2949 , ud_z_no_tail = plusVarEnv (ud_z_no_tail ud1) (ud_z_no_tail ud2) }
2950
2951 doZapping :: UsageDetails -> Var -> OccInfo -> OccInfo
2952 doZapping ud var occ
2953 = doZappingByUnique ud (varUnique var) occ
2954
2955 doZappingByUnique :: UsageDetails -> Unique -> OccInfo -> OccInfo
2956 doZappingByUnique (UD { ud_z_many = many
2957 , ud_z_in_lam = in_lam
2958 , ud_z_no_tail = no_tail })
2959 uniq occ
2960 = occ2
2961 where
2962 occ1 | uniq `elemVarEnvByKey` many = markMany occ
2963 | uniq `elemVarEnvByKey` in_lam = markInsideLam occ
2964 | otherwise = occ
2965 occ2 | uniq `elemVarEnvByKey` no_tail = markNonTail occ1
2966 | otherwise = occ1
2967
2968 alterUsageDetails :: UsageDetails -> (OccInfoEnv -> OccInfoEnv) -> UsageDetails
2969 alterUsageDetails !ud f
2970 = UD { ud_env = f (ud_env ud)
2971 , ud_z_many = f (ud_z_many ud)
2972 , ud_z_in_lam = f (ud_z_in_lam ud)
2973 , ud_z_no_tail = f (ud_z_no_tail ud) }
2974
2975 flattenUsageDetails :: UsageDetails -> UsageDetails
2976 flattenUsageDetails ud@(UD { ud_env = env })
2977 = UD { ud_env = mapUFM_Directly (doZappingByUnique ud) env
2978 , ud_z_many = emptyVarEnv
2979 , ud_z_in_lam = emptyVarEnv
2980 , ud_z_no_tail = emptyVarEnv }
2981
2982 -------------------
2983 -- See Note [Adjusting right-hand sides]
2984 adjustRhsUsage :: RecFlag -> Maybe JoinArity
2985 -> [CoreBndr] -- Outer lambdas, AFTER occ anal
2986 -> UsageDetails -- From body of lambda
2987 -> UsageDetails
2988 adjustRhsUsage is_rec mb_join_arity bndrs usage
2989 = markAllInsideLamIf (not one_shot) $
2990 markAllNonTailIf (not exact_join) $
2991 usage
2992 where
2993 one_shot = case mb_join_arity of
2994 Just join_arity
2995 | isRec is_rec -> False
2996 | otherwise -> all isOneShotBndr (drop join_arity bndrs)
2997 Nothing -> all isOneShotBndr bndrs
2998
2999 exact_join = exactJoin mb_join_arity bndrs
3000
3001 exactJoin :: Maybe JoinArity -> [a] -> Bool
3002 exactJoin Nothing _ = False
3003 exactJoin (Just join_arity) args = args `lengthIs` join_arity
3004 -- Remember join_arity includes type binders
3005
3006 type IdWithOccInfo = Id
3007
3008 tagLamBinders :: UsageDetails -- Of scope
3009 -> [Id] -- Binders
3010 -> (UsageDetails, -- Details with binders removed
3011 [IdWithOccInfo]) -- Tagged binders
3012 tagLamBinders usage binders
3013 = usage' `seq` (usage', bndrs')
3014 where
3015 (usage', bndrs') = mapAccumR tagLamBinder usage binders
3016
3017 tagLamBinder :: UsageDetails -- Of scope
3018 -> Id -- Binder
3019 -> (UsageDetails, -- Details with binder removed
3020 IdWithOccInfo) -- Tagged binders
3021 -- Used for lambda and case binders
3022 -- It copes with the fact that lambda bindings can have a
3023 -- stable unfolding, used for join points
3024 tagLamBinder usage bndr
3025 = (usage2, bndr')
3026 where
3027 occ = lookupDetails usage bndr
3028 bndr' = setBinderOcc (markNonTail occ) bndr
3029 -- Don't try to make an argument into a join point
3030 usage1 = usage `delDetails` bndr
3031 usage2 | isId bndr = addManyOccs usage1 (idUnfoldingVars bndr)
3032 -- This is effectively the RHS of a
3033 -- non-join-point binding, so it's okay to use
3034 -- addManyOccsSet, which assumes no tail calls
3035 | otherwise = usage1
3036
3037 tagNonRecBinder :: TopLevelFlag -- At top level?
3038 -> UsageDetails -- Of scope
3039 -> CoreBndr -- Binder
3040 -> (UsageDetails, -- Details with binder removed
3041 IdWithOccInfo) -- Tagged binder
3042
3043 tagNonRecBinder lvl usage binder
3044 = let
3045 occ = lookupDetails usage binder
3046 will_be_join = decideJoinPointHood lvl usage [binder]
3047 occ' | will_be_join = -- must already be marked AlwaysTailCalled
3048 assert (isAlwaysTailCalled occ) occ
3049 | otherwise = markNonTail occ
3050 binder' = setBinderOcc occ' binder
3051 usage' = usage `delDetails` binder
3052 in
3053 usage' `seq` (usage', binder')
3054
3055 tagRecBinders :: TopLevelFlag -- At top level?
3056 -> UsageDetails -- Of body of let ONLY
3057 -> [(CoreBndr, -- Binder
3058 UsageDetails, -- RHS usage details
3059 [CoreBndr])] -- Lambdas in new RHS
3060 -> (UsageDetails, -- Adjusted details for whole scope,
3061 -- with binders removed
3062 [IdWithOccInfo]) -- Tagged binders
3063 -- Substantially more complicated than non-recursive case. Need to adjust RHS
3064 -- details *before* tagging binders (because the tags depend on the RHSes).
3065 tagRecBinders lvl body_uds triples
3066 = let
3067 (bndrs, rhs_udss, _) = unzip3 triples
3068
3069 -- 1. Determine join-point-hood of whole group, as determined by
3070 -- the *unadjusted* usage details
3071 unadj_uds = foldr andUDs body_uds rhs_udss
3072 will_be_joins = decideJoinPointHood lvl unadj_uds bndrs
3073
3074 -- 2. Adjust usage details of each RHS, taking into account the
3075 -- join-point-hood decision
3076 rhs_udss' = map adjust triples
3077 adjust (bndr, rhs_uds, rhs_bndrs)
3078 = adjustRhsUsage Recursive mb_join_arity rhs_bndrs rhs_uds
3079 where
3080 -- Can't use willBeJoinId_maybe here because we haven't tagged the
3081 -- binder yet (the tag depends on these adjustments!)
3082 mb_join_arity
3083 | will_be_joins
3084 , let occ = lookupDetails unadj_uds bndr
3085 , AlwaysTailCalled arity <- tailCallInfo occ
3086 = Just arity
3087 | otherwise
3088 = assert (not will_be_joins) -- Should be AlwaysTailCalled if
3089 Nothing -- we are making join points!
3090
3091 -- 3. Compute final usage details from adjusted RHS details
3092 adj_uds = foldr andUDs body_uds rhs_udss'
3093
3094 -- 4. Tag each binder with its adjusted details
3095 bndrs' = [ setBinderOcc (lookupDetails adj_uds bndr) bndr
3096 | bndr <- bndrs ]
3097
3098 -- 5. Drop the binders from the adjusted details and return
3099 usage' = adj_uds `delDetailsList` bndrs
3100 in
3101 (usage', bndrs')
3102
3103 setBinderOcc :: OccInfo -> CoreBndr -> CoreBndr
3104 setBinderOcc occ_info bndr
3105 | isTyVar bndr = bndr
3106 | isExportedId bndr = if isManyOccs (idOccInfo bndr)
3107 then bndr
3108 else setIdOccInfo bndr noOccInfo
3109 -- Don't use local usage info for visible-elsewhere things
3110 -- BUT *do* erase any IAmALoopBreaker annotation, because we're
3111 -- about to re-generate it and it shouldn't be "sticky"
3112
3113 | otherwise = setIdOccInfo bndr occ_info
3114
3115 -- | Decide whether some bindings should be made into join points or not.
3116 -- Returns `False` if they can't be join points. Note that it's an
3117 -- all-or-nothing decision, as if multiple binders are given, they're
3118 -- assumed to be mutually recursive.
3119 --
3120 -- It must, however, be a final decision. If we say "True" for 'f',
3121 -- and then subsequently decide /not/ make 'f' into a join point, then
3122 -- the decision about another binding 'g' might be invalidated if (say)
3123 -- 'f' tail-calls 'g'.
3124 --
3125 -- See Note [Invariants on join points] in "GHC.Core".
3126 decideJoinPointHood :: TopLevelFlag -> UsageDetails
3127 -> [CoreBndr]
3128 -> Bool
3129 decideJoinPointHood TopLevel _ _
3130 = False
3131 decideJoinPointHood NotTopLevel usage bndrs
3132 | isJoinId (head bndrs)
3133 = warnPprTrace (not all_ok)
3134 (text "OccurAnal failed to rediscover join point(s):" <+> ppr bndrs)
3135 all_ok
3136 | otherwise
3137 = all_ok
3138 where
3139 -- See Note [Invariants on join points]; invariants cited by number below.
3140 -- Invariant 2 is always satisfiable by the simplifier by eta expansion.
3141 all_ok = -- Invariant 3: Either all are join points or none are
3142 all ok bndrs
3143
3144 ok bndr
3145 | -- Invariant 1: Only tail calls, all same join arity
3146 AlwaysTailCalled arity <- tailCallInfo (lookupDetails usage bndr)
3147
3148 , -- Invariant 1 as applied to LHSes of rules
3149 all (ok_rule arity) (idCoreRules bndr)
3150
3151 -- Invariant 2a: stable unfoldings
3152 -- See Note [Join points and INLINE pragmas]
3153 , ok_unfolding arity (realIdUnfolding bndr)
3154
3155 -- Invariant 4: Satisfies polymorphism rule
3156 , isValidJoinPointType arity (idType bndr)
3157 = True
3158
3159 | otherwise
3160 = False
3161
3162 ok_rule _ BuiltinRule{} = False -- only possible with plugin shenanigans
3163 ok_rule join_arity (Rule { ru_args = args })
3164 = args `lengthIs` join_arity
3165 -- Invariant 1 as applied to LHSes of rules
3166
3167 -- ok_unfolding returns False if we should /not/ convert a non-join-id
3168 -- into a join-id, even though it is AlwaysTailCalled
3169 ok_unfolding join_arity (CoreUnfolding { uf_src = src, uf_tmpl = rhs })
3170 = not (isStableSource src && join_arity > joinRhsArity rhs)
3171 ok_unfolding _ (DFunUnfolding {})
3172 = False
3173 ok_unfolding _ _
3174 = True
3175
3176 willBeJoinId_maybe :: CoreBndr -> Maybe JoinArity
3177 willBeJoinId_maybe bndr
3178 | isId bndr
3179 , AlwaysTailCalled arity <- tailCallInfo (idOccInfo bndr)
3180 = Just arity
3181 | otherwise
3182 = isJoinId_maybe bndr
3183
3184
3185 {- Note [Join points and INLINE pragmas]
3186 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3187 Consider
3188 f x = let g = \x. not -- Arity 1
3189 {-# INLINE g #-}
3190 in case x of
3191 A -> g True True
3192 B -> g True False
3193 C -> blah2
3194
3195 Here 'g' is always tail-called applied to 2 args, but the stable
3196 unfolding captured by the INLINE pragma has arity 1. If we try to
3197 convert g to be a join point, its unfolding will still have arity 1
3198 (since it is stable, and we don't meddle with stable unfoldings), and
3199 Lint will complain (see Note [Invariants on join points], (2a), in
3200 GHC.Core. #13413.
3201
3202 Moreover, since g is going to be inlined anyway, there is no benefit
3203 from making it a join point.
3204
3205 If it is recursive, and uselessly marked INLINE, this will stop us
3206 making it a join point, which is annoying. But occasionally
3207 (notably in class methods; see Note [Instances and loop breakers] in
3208 GHC.Tc.TyCl.Instance) we mark recursive things as INLINE but the recursion
3209 unravels; so ignoring INLINE pragmas on recursive things isn't good
3210 either.
3211
3212 See Invariant 2a of Note [Invariants on join points] in GHC.Core
3213
3214
3215 ************************************************************************
3216 * *
3217 \subsection{Operations over OccInfo}
3218 * *
3219 ************************************************************************
3220 -}
3221
3222 markMany, markInsideLam, markNonTail :: OccInfo -> OccInfo
3223
3224 markMany IAmDead = IAmDead
3225 markMany occ = ManyOccs { occ_tail = occ_tail occ }
3226
3227 markInsideLam occ@(OneOcc {}) = occ { occ_in_lam = IsInsideLam }
3228 markInsideLam occ = occ
3229
3230 markNonTail IAmDead = IAmDead
3231 markNonTail occ = occ { occ_tail = NoTailCallInfo }
3232
3233 addOccInfo, orOccInfo :: OccInfo -> OccInfo -> OccInfo
3234
3235 addOccInfo a1 a2 = assert (not (isDeadOcc a1 || isDeadOcc a2)) $
3236 ManyOccs { occ_tail = tailCallInfo a1 `andTailCallInfo`
3237 tailCallInfo a2 }
3238 -- Both branches are at least One
3239 -- (Argument is never IAmDead)
3240
3241 -- (orOccInfo orig new) is used
3242 -- when combining occurrence info from branches of a case
3243
3244 orOccInfo (OneOcc { occ_in_lam = in_lam1
3245 , occ_n_br = nbr1
3246 , occ_int_cxt = int_cxt1
3247 , occ_tail = tail1 })
3248 (OneOcc { occ_in_lam = in_lam2
3249 , occ_n_br = nbr2
3250 , occ_int_cxt = int_cxt2
3251 , occ_tail = tail2 })
3252 = OneOcc { occ_n_br = nbr1 + nbr2
3253 , occ_in_lam = in_lam1 `mappend` in_lam2
3254 , occ_int_cxt = int_cxt1 `mappend` int_cxt2
3255 , occ_tail = tail1 `andTailCallInfo` tail2 }
3256
3257 orOccInfo a1 a2 = assert (not (isDeadOcc a1 || isDeadOcc a2)) $
3258 ManyOccs { occ_tail = tailCallInfo a1 `andTailCallInfo`
3259 tailCallInfo a2 }
3260
3261 andTailCallInfo :: TailCallInfo -> TailCallInfo -> TailCallInfo
3262 andTailCallInfo info@(AlwaysTailCalled arity1) (AlwaysTailCalled arity2)
3263 | arity1 == arity2 = info
3264 andTailCallInfo _ _ = NoTailCallInfo