never executed always true always false
1 {-
2 (c) The GRASP/AQUA Project, Glasgow University, 1993-1998
3
4 \section[WorkWrap]{Worker/wrapper-generating back-end of strictness analyser}
5 -}
6
7
8 module GHC.Core.Opt.WorkWrap ( wwTopBinds ) where
9
10 import GHC.Prelude
11
12 import GHC.Driver.Session
13
14 import GHC.Core.Opt.Arity ( manifestArity )
15 import GHC.Core
16 import GHC.Core.Unfold.Make
17 import GHC.Core.Utils ( exprType, exprIsHNF )
18 import GHC.Core.Type
19 import GHC.Core.Opt.WorkWrap.Utils
20 import GHC.Core.FamInstEnv
21 import GHC.Core.SimpleOpt
22
23 import GHC.Types.Var
24 import GHC.Types.Id
25 import GHC.Types.Id.Info
26 import GHC.Types.Unique.Supply
27 import GHC.Types.Basic
28 import GHC.Types.Demand
29 import GHC.Types.Cpr
30 import GHC.Types.SourceText
31 import GHC.Types.Unique
32
33 import GHC.Utils.Misc
34 import GHC.Utils.Outputable
35 import GHC.Utils.Panic
36 import GHC.Utils.Panic.Plain
37 import GHC.Utils.Monad
38 import GHC.Utils.Trace
39 import GHC.Unit.Types
40
41 {-
42 We take Core bindings whose binders have:
43
44 \begin{enumerate}
45
46 \item Strictness attached (by the front-end of the strictness
47 analyser), and / or
48
49 \item Constructed Product Result information attached by the CPR
50 analysis pass.
51
52 \end{enumerate}
53
54 and we return some ``plain'' bindings which have been
55 worker/wrapper-ified, meaning:
56
57 \begin{enumerate}
58
59 \item Functions have been split into workers and wrappers where
60 appropriate. If a function has both strictness and CPR properties
61 then only one worker/wrapper doing both transformations is produced;
62
63 \item Binders' @IdInfos@ have been updated to reflect the existence of
64 these workers/wrappers (this is where we get STRICTNESS and CPR pragma
65 info for exported values).
66 \end{enumerate}
67 -}
68
69 wwTopBinds :: Module -> DynFlags -> FamInstEnvs -> UniqSupply -> CoreProgram -> CoreProgram
70
71 wwTopBinds this_mod dflags fam_envs us top_binds
72 = initUs_ us $ do
73 top_binds' <- mapM (wwBind ww_opts) top_binds
74 return (concat top_binds')
75 where
76 ww_opts = initWwOpts this_mod dflags fam_envs
77
78 {-
79 ************************************************************************
80 * *
81 \subsection[wwBind-wwExpr]{@wwBind@ and @wwExpr@}
82 * *
83 ************************************************************************
84
85 @wwBind@ works on a binding, trying each \tr{(binder, expr)} pair in
86 turn. Non-recursive case first, then recursive...
87 -}
88
89 wwBind :: WwOpts
90 -> CoreBind
91 -> UniqSM [CoreBind] -- returns a WwBinding intermediate form;
92 -- the caller will convert to Expr/Binding,
93 -- as appropriate.
94
95 wwBind ww_opts (NonRec binder rhs) = do
96 new_rhs <- wwExpr ww_opts rhs
97 new_pairs <- tryWW ww_opts NonRecursive binder new_rhs
98 return [NonRec b e | (b,e) <- new_pairs]
99 -- Generated bindings must be non-recursive
100 -- because the original binding was.
101
102 wwBind ww_opts (Rec pairs)
103 = return . Rec <$> concatMapM do_one pairs
104 where
105 do_one (binder, rhs) = do new_rhs <- wwExpr ww_opts rhs
106 tryWW ww_opts Recursive binder new_rhs
107
108 {-
109 @wwExpr@ basically just walks the tree, looking for appropriate
110 annotations that can be used. Remember it is @wwBind@ that does the
111 matching by looking for strict arguments of the correct type.
112 @wwExpr@ is a version that just returns the ``Plain'' Tree.
113 -}
114
115 wwExpr :: WwOpts -> CoreExpr -> UniqSM CoreExpr
116
117 wwExpr _ e@(Type {}) = return e
118 wwExpr _ e@(Coercion {}) = return e
119 wwExpr _ e@(Lit {}) = return e
120 wwExpr _ e@(Var {}) = return e
121
122 wwExpr ww_opts (Lam binder expr)
123 = Lam new_binder <$> wwExpr ww_opts expr
124 where new_binder | isId binder = zapIdUsedOnceInfo binder
125 | otherwise = binder
126 -- See Note [Zapping Used Once info in WorkWrap]
127
128 wwExpr ww_opts (App f a)
129 = App <$> wwExpr ww_opts f <*> wwExpr ww_opts a
130
131 wwExpr ww_opts (Tick note expr)
132 = Tick note <$> wwExpr ww_opts expr
133
134 wwExpr ww_opts (Cast expr co) = do
135 new_expr <- wwExpr ww_opts expr
136 return (Cast new_expr co)
137
138 wwExpr ww_opts (Let bind expr)
139 = mkLets <$> wwBind ww_opts bind <*> wwExpr ww_opts expr
140
141 wwExpr ww_opts (Case expr binder ty alts) = do
142 new_expr <- wwExpr ww_opts expr
143 new_alts <- mapM ww_alt alts
144 let new_binder = zapIdUsedOnceInfo binder
145 -- See Note [Zapping Used Once info in WorkWrap]
146 return (Case new_expr new_binder ty new_alts)
147 where
148 ww_alt (Alt con binders rhs) = do
149 new_rhs <- wwExpr ww_opts rhs
150 let new_binders = [ if isId b then zapIdUsedOnceInfo b else b
151 | b <- binders ]
152 -- See Note [Zapping Used Once info in WorkWrap]
153 return (Alt con new_binders new_rhs)
154
155 {-
156 ************************************************************************
157 * *
158 \subsection[tryWW]{@tryWW@: attempt a worker/wrapper pair}
159 * *
160 ************************************************************************
161
162 @tryWW@ just accumulates arguments, converts strictness info from the
163 front-end into the proper form, then calls @mkWwBodies@ to do
164 the business.
165
166 The only reason this is monadised is for the unique supply.
167
168 Note [Don't w/w INLINE things]
169 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
170 It's very important to refrain from w/w-ing an INLINE function (ie one
171 with a stable unfolding) because the wrapper will then overwrite the
172 old stable unfolding with the wrapper code.
173
174 Furthermore, if the programmer has marked something as INLINE,
175 we may lose by w/w'ing it.
176
177 If the strictness analyser is run twice, this test also prevents
178 wrappers (which are INLINEd) from being re-done. (You can end up with
179 several liked-named Ids bouncing around at the same time---absolute
180 mischief.)
181
182 Notice that we refrain from w/w'ing an INLINE function even if it is
183 in a recursive group. It might not be the loop breaker. (We could
184 test for loop-breaker-hood, but I'm not sure that ever matters.)
185
186 Note [Worker/wrapper for INLINABLE functions]
187 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
188 If we have
189 {-# INLINABLE f #-}
190 f :: Ord a => [a] -> Int -> a
191 f x y = ....f....
192
193 where f is strict in y, we might get a more efficient loop by w/w'ing
194 f. But that would make a new unfolding which would overwrite the old
195 one! So the function would no longer be INLINABLE, and in particular
196 will not be specialised at call sites in other modules.
197
198 This comes up in practice (#6056).
199
200 Solution: do the w/w for strictness analysis, but transfer the Stable
201 unfolding to the *worker*. So we will get something like this:
202
203 {-# INLINE[2] f #-}
204 f :: Ord a => [a] -> Int -> a
205 f d x y = case y of I# y' -> fw d x y'
206
207 {-# INLINABLE[2] fw #-}
208 fw :: Ord a => [a] -> Int# -> a
209 fw d x y' = let y = I# y' in ...f...
210
211 How do we "transfer the unfolding"? Easy: by using the old one, wrapped
212 in work_fn! See GHC.Core.Unfold.Make.mkWorkerUnfolding.
213
214 Note [No worker/wrapper for record selectors]
215 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
216 We sometimes generate a lot of record selectors, and generally the
217 don't benefit from worker/wrapper. Yes, mkWwBodies would find a w/w split,
218 but it is then suppressed by the certainlyWillInline test in splitFun.
219
220 The wasted effort in mkWwBodies makes a measurable difference in
221 compile time (see MR !2873), so although it's a terribly ad-hoc test,
222 we just check here for record selectors, and do a no-op in that case.
223
224 I did look for a generalisation, so that it's not just record
225 selectors that benefit. But you'd need a cheap test for "this
226 function will definitely get a w/w split" and that's hard to predict
227 in advance...the logic in mkWwBodies is complex. So I've left the
228 super-simple test, with this Note to explain.
229
230 NB: record selectors are ordinary functions, inlined iff GHC wants to,
231 so won't be caught by the preceding isInlineUnfolding test in tryWW.
232
233 Note [Worker/wrapper for NOINLINE functions]
234 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
235 We used to disable worker/wrapper for NOINLINE things, but it turns out
236 this can cause unnecessary reboxing of values. Consider
237
238 {-# NOINLINE f #-}
239 f :: Int -> a
240 f x = error (show x)
241
242 g :: Bool -> Bool -> Int -> Int
243 g True True p = f p
244 g False True p = p + 1
245 g b False p = g b True p
246
247 the strictness analysis will discover f and g are strict, but because f
248 has no wrapper, the worker for g will rebox p. So we get
249
250 $wg x y p# =
251 let p = I# p# in -- Yikes! Reboxing!
252 case x of
253 False ->
254 case y of
255 False -> $wg False True p#
256 True -> +# p# 1#
257 True ->
258 case y of
259 False -> $wg True True p#
260 True -> case f p of { }
261
262 g x y p = case p of (I# p#) -> $wg x y p#
263
264 Now, in this case the reboxing will float into the True branch, and so
265 the allocation will only happen on the error path. But it won't float
266 inwards if there are multiple branches that call (f p), so the reboxing
267 will happen on every call of g. Disaster.
268
269 Solution: do worker/wrapper even on NOINLINE things; but move the
270 NOINLINE pragma to the worker.
271
272 (See #13143 for a real-world example.)
273
274 It is crucial that we do this for *all* NOINLINE functions. #10069
275 demonstrates what happens when we promise to w/w a (NOINLINE) leaf
276 function, but fail to deliver:
277
278 data C = C Int# Int#
279
280 {-# NOINLINE c1 #-}
281 c1 :: C -> Int#
282 c1 (C _ n) = n
283
284 {-# NOINLINE fc #-}
285 fc :: C -> Int#
286 fc c = 2 *# c1 c
287
288 Failing to w/w `c1`, but still w/wing `fc` leads to the following code:
289
290 c1 :: C -> Int#
291 c1 (C _ n) = n
292
293 $wfc :: Int# -> Int#
294 $wfc n = let c = C 0# n in 2 #* c1 c
295
296 fc :: C -> Int#
297 fc (C _ n) = $wfc n
298
299 Yikes! The reboxed `C` in `$wfc` can't cancel out, so we are in a bad place.
300 This generalises to any function that derives its strictness signature from
301 its callees, so we have to make sure that when a function announces particular
302 strictness properties, we have to w/w them accordingly, even if it means
303 splitting a NOINLINE function.
304
305 Note [Worker activation]
306 ~~~~~~~~~~~~~~~~~~~~~~~~
307 Follows on from Note [Worker/wrapper for INLINABLE functions]
308
309 It is *vital* that if the worker gets an INLINABLE pragma (from the
310 original function), then the worker has the same phase activation as
311 the wrapper (or later). That is necessary to allow the wrapper to
312 inline into the worker's unfolding: see GHC.Core.Opt.Simplify.Utils
313 Note [Simplifying inside stable unfoldings].
314
315 If the original is NOINLINE, it's important that the worker inherits the
316 original activation. Consider
317
318 {-# NOINLINE expensive #-}
319 expensive x = x + 1
320
321 f y = let z = expensive y in ...
322
323 If expensive's worker inherits the wrapper's activation,
324 we'll get this (because of the compromise in point (2) of
325 Note [Wrapper activation])
326
327 {-# NOINLINE[Final] $wexpensive #-}
328 $wexpensive x = x + 1
329 {-# INLINE[Final] expensive #-}
330 expensive x = $wexpensive x
331
332 f y = let z = expensive y in ...
333
334 and $wexpensive will be immediately inlined into expensive, followed by
335 expensive into f. This effectively removes the original NOINLINE!
336
337 Otherwise, nothing is lost by giving the worker the same activation as the
338 wrapper, because the worker won't have any chance of inlining until the
339 wrapper does; there's no point in giving it an earlier activation.
340
341 Note [Don't w/w inline small non-loop-breaker things]
342 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
343 In general, we refrain from w/w-ing *small* functions, which are not
344 loop breakers, because they'll inline anyway. But we must take care:
345 it may look small now, but get to be big later after other inlining
346 has happened. So we take the precaution of adding a StableUnfolding
347 for any such functions.
348
349 I made this change when I observed a big function at the end of
350 compilation with a useful strictness signature but no w-w. (It was
351 small during demand analysis, we refrained from w/w, and then got big
352 when something was inlined in its rhs.) When I measured it on nofib,
353 it didn't make much difference; just a few percent improved allocation
354 on one benchmark (bspt/Euclid.space). But nothing got worse.
355
356 There is an infelicity though. We may get something like
357 f = g val
358 ==>
359 g x = case gw x of r -> I# r
360
361 f {- InlineStable, Template = g val -}
362 f = case gw x of r -> I# r
363
364 The code for f duplicates that for g, without any real benefit. It
365 won't really be executed, because calls to f will go via the inlining.
366
367 Note [Don't w/w join points for CPR]
368 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
369 There's no point in exploiting CPR info on a join point. If the whole function
370 is getting CPR'd, then the case expression around the worker function will get
371 pushed into the join point by the simplifier, which will have the same effect
372 that w/w'ing for CPR would have - the result will be returned in an unboxed
373 tuple.
374
375 f z = let join j x y = (x+1, y+1)
376 in case z of A -> j 1 2
377 B -> j 2 3
378
379 =>
380
381 f z = case $wf z of (# a, b #) -> (a, b)
382 $wf z = case (let join j x y = (x+1, y+1)
383 in case z of A -> j 1 2
384 B -> j 2 3) of (a, b) -> (# a, b #)
385
386 =>
387
388 f z = case $wf z of (# a, b #) -> (a, b)
389 $wf z = let join j x y = (# x+1, y+1 #)
390 in case z of A -> j 1 2
391 B -> j 2 3
392
393 Note that we still want to give `j` the CPR property, so that `f` has it. So
394 CPR *analyse* join points as regular functions, but don't *transform* them.
395
396 We could retain the CPR /signature/ on the worker after W/W, but it would
397 become outright wrong if the Simplifier pushes a non-trivial continuation
398 into it. For example:
399 case (let $j x = (x,x) in ...) of alts
400 ==>
401 let $j x = case (x,x) of alts in case ... of alts
402 Before pushing the case in, `$j` has the CPR property, but not afterwards.
403
404 So we simply zap the CPR signature for join pints as part of the W/W pass.
405 The signature served its purpose during CPR analysis in propagating the
406 CPR property of `$j`.
407
408 Doing W/W for returned products on a join point would be tricky anyway, as the
409 worker could not be a join point because it would not be tail-called. However,
410 doing the *argument* part of W/W still works for join points, since the wrapper
411 body will make a tail call:
412
413 f z = let join j x y = x + y
414 in ...
415
416 =>
417
418 f z = let join $wj x# y# = x# +# y#
419 j x y = case x of I# x# ->
420 case y of I# y# ->
421 $wj x# y#
422 in ...
423
424 Note [Wrapper activation]
425 ~~~~~~~~~~~~~~~~~~~~~~~~~
426 When should the wrapper inlining be active?
427
428 1. It must not be active earlier than the current Activation of the Id,
429 because we must give rewrite rules mentioning the wrapper and
430 specialisation a chance to fire.
431 See Note [Worker/wrapper for INLINABLE functions]
432 and Note [Worker activation]
433
434 2. It should be active at some point, despite (1) because of
435 Note [Worker/wrapper for NOINLINE functions]
436
437 3. For ordinary functions with no pragmas we want to inline the
438 wrapper as early as possible (#15056). Suppose another module
439 defines f !x xs = ... foldr k z xs ...
440 and suppose we have the usual foldr/build RULE. Then if we have
441 a call `f x [1..x]`, we'd expect to inline f and the RULE will fire.
442 But if f is w/w'd (which it might be), we want the inlining to
443 occur just as if it hadn't been.
444
445 (This only matters if f's RHS is big enough to w/w, but small
446 enough to inline given the call site, but that can happen.)
447
448 4. We do not want to inline the wrapper before specialisation.
449 module Foo where
450 f :: Num a => a -> Int -> a
451 f n 0 = n -- Strict in the Int, hence wrapper
452 f n x = f (n+n) (x-1)
453
454 g :: Int -> Int
455 g x = f x x -- Provokes a specialisation for f
456
457 module Bar where
458 import Foo
459
460 h :: Int -> Int
461 h x = f 3 x
462
463 In module Bar we want to give specialisations a chance to fire
464 before inlining f's wrapper.
465
466 Historical note: At one stage I tried making the wrapper inlining
467 always-active, and that had a very bad effect on nofib/imaginary/x2n1;
468 a wrapper was inlined before the specialisation fired.
469
470 Reminder: Note [Don't w/w INLINE things], so we don't need to worry
471 about INLINE things here.
472
473 Conclusion:
474 - If the user said NOINLINE[n] or INLINABLE[n], respect that by putting
475 INLINE[n] on the wrapper (and NOINLINE[n]/INLINABLE[n] on the worker).
476
477 - If the user said NOINLINE, inline the wrapper only in
478 FinalPhase, which is after all the numbered, user-visible phases (and put
479 the original pragma on the worker). That means that all rules will have had
480 a chance to fire.
481 NB: Similar to InitialPhase, users can't write INLINE[Final] f;
482 it's syntactically illegal. See Note [Compiler phases].
483
484 - Otherwise (no pragma or INLINABLE) inline the wrapper in the first phase
485 *after* InitialPhase. We run InitialPhase before the specialiser so that
486 will not inline the wrapper before specialisation; but it will do so
487 immediately afterwards.
488
489 Note [Wrapper NoUserInlinePrag]
490 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
491 We use NoUserInlinePrag on the wrapper, to say that there is no
492 user-specified inline pragma. (The worker inherits that; see Note
493 [Worker/wrapper for INLINABLE functions].) The wrapper has no pragma
494 given by the user.
495
496 (Historical note: we used to give the wrapper an INLINE pragma, but
497 CSE will not happen if there is a user-specified pragma, but should
498 happen for w/w’ed things (#14186). We don't need a pragma, because
499 everything we needs is expressed by (a) the stable unfolding and (b)
500 the inl_act activation.)
501
502 Note [Drop absent bindings]
503 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
504 Consider (#19824):
505 let t = ...big...
506 in ...(f t x)...
507
508 were `f` ignores its first argument. With luck f's wrapper will inline
509 thereby dropping `t`, but maybe not: the arguments to f all look boring.
510
511 So we pre-empt the problem by replacing t's RHS with an absent filler.
512 Simple and effective.
513 -}
514
515 tryWW :: WwOpts
516 -> RecFlag
517 -> Id -- The fn binder
518 -> CoreExpr -- The bound rhs; its innards
519 -- are already ww'd
520 -> UniqSM [(Id, CoreExpr)] -- either *one* or *two* pairs;
521 -- if one, then no worker (only
522 -- the orig "wrapper" lives on);
523 -- if two, then a worker and a
524 -- wrapper.
525 tryWW ww_opts is_rec fn_id rhs
526 -- Do this even if there is a NOINLINE pragma
527 -- See Note [Worker/wrapper for NOINLINE functions]
528
529 -- See Note [Drop absent bindings]
530 | isAbsDmd (demandInfo fn_info)
531 , not (isJoinId fn_id)
532 , Just filler <- mkAbsentFiller ww_opts fn_id
533 = return [(new_fn_id, filler)]
534
535 -- See Note [Don't w/w INLINE things]
536 | hasInlineUnfolding fn_info
537 = return [(new_fn_id, rhs)]
538
539 -- See Note [No worker/wrapper for record selectors]
540 | isRecordSelector fn_id
541 = return [ (new_fn_id, rhs ) ]
542
543 | is_fun && is_eta_exp
544 = splitFun ww_opts new_fn_id rhs
545
546 -- See Note [Thunk splitting]
547 | isNonRec is_rec, is_thunk
548 = splitThunk ww_opts is_rec new_fn_id rhs
549
550 | otherwise
551 = return [ (new_fn_id, rhs) ]
552
553 where
554 fn_info = idInfo fn_id
555 (wrap_dmds, _) = splitDmdSig (dmdSigInfo fn_info)
556 new_fn_id = zap_join_cpr $ zap_usage fn_id
557
558 zap_usage = zapIdUsedOnceInfo . zapIdUsageEnvInfo
559 -- See Note [Zapping DmdEnv after Demand Analyzer] and
560 -- See Note [Zapping Used Once info in WorkWrap]
561
562 zap_join_cpr id
563 | isJoinId id = id `setIdCprSig` topCprSig
564 | otherwise = id
565 -- See Note [Don't w/w join points for CPR]
566
567 -- is_eta_exp: see Note [Don't eta expand in w/w]
568 is_eta_exp = length wrap_dmds == manifestArity rhs
569 is_fun = notNull wrap_dmds || isJoinId fn_id
570 is_thunk = not is_fun && not (exprIsHNF rhs) && not (isJoinId fn_id)
571 && not (isUnliftedType (idType fn_id))
572
573 {-
574 Note [Zapping DmdEnv after Demand Analyzer]
575 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
576 In the worker-wrapper pass we zap the DmdEnv. Why?
577 (a) it is never used again
578 (b) it wastes space
579 (c) it becomes incorrect as things are cloned, because
580 we don't push the substitution into it
581
582 Why here?
583 * Because we don’t want to do it in the Demand Analyzer, as we never know
584 there when we are doing the last pass.
585 * We want them to be still there at the end of DmdAnal, so that
586 -ddump-str-anal contains them.
587 * We don’t want a second pass just for that.
588 * WorkWrap looks at all bindings anyway.
589
590 We also need to do it in TidyCore.tidyLetBndr to clean up after the
591 final, worker/wrapper-less run of the demand analyser (see
592 Note [Final Demand Analyser run] in GHC.Core.Opt.DmdAnal).
593
594 Note [Zapping Used Once info in WorkWrap]
595 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
596 During the work/wrap pass, using zapIdUsedOnceInfo, we zap the "used once" info
597 * on every binder (let binders, case binders, lambda binders)
598 * in both demands and in strictness signatures
599 * recursively
600
601 Why?
602 * The simplifier may happen to transform code in a way that invalidates the
603 data (see #11731 for an example).
604 * It is not used in later passes, up to code generation.
605
606 At first it's hard to see how the simplifier might invalidate it (and
607 indeed for a while I thought it couldn't: #19482), but it's not quite
608 as simple as I thought. Consider this:
609 {-# STRICTNESS SIG <SP(M,A)> #-}
610 f p = let v = case p of (a,b) -> a
611 in p `seq` (v,v)
612
613 I think we'll give `f` the strictness signature `<SP(M,A)>`, where the
614 `M` sayd that we'll evaluate the first component of the pair at most
615 once. Why? Because the RHS of the thunk `v` is evaluated at most
616 once.
617
618 But now let's worker/wrapper f:
619 {-# STRICTNESS SIG <M> #-}
620 $wf p1 = let p2 = absentError "urk" in
621 let p = (p1,p2) in
622 let v = case p of (a,b) -> a
623 in p `seq` (v,v)
624
625 where I've gotten the demand on `p1` by decomposing the P(M,A) argument demand.
626 This rapidly simplifies to
627 {-# STRICTNESS SIG <M> #-}
628 $wf p1 = let v = p1 in
629 (v,v)
630
631 and thence to `(p1,p1)` by inlining the trivial let. Now the demand on `p1` should
632 not be at most once!!
633
634 Conclusion: used-once info is fragile to simplification, because of
635 the non-monotonic behaviour of let's, which turn used-many into
636 used-once. So indeed we should zap this info in worker/wrapper.
637
638 Conclusion: kill it during worker/wrapper, using `zapUsedOnceInfo`.
639 Both the *demand signature* of the binder, and the *demand-info* of
640 the binder. Moreover, do so recursively.
641
642 You might wonder: why do we generate used-once info if we then throw
643 it away. The main reason is that we do a final run of the demand analyser,
644 immediately before CoreTidy, which is /not/ followed by worker/wrapper; it
645 is there only to generate used-once info for single-entry thunks.
646
647 Note [Don't eta expand in w/w]
648 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
649 A binding where the manifestArity of the RHS is less than idArity of the binder
650 means GHC.Core.Opt.Arity didn't eta expand that binding. When this happens, it does so
651 for a reason (see Note [exprArity invariant] in GHC.Core.Opt.Arity) and we probably have
652 a PAP, cast or trivial expression as RHS.
653
654 Below is a historical account of what happened when w/w still did eta expansion.
655 Nowadays, it doesn't do that, but will simply w/w for the wrong arity, unleashing
656 a demand signature meant for e.g. 2 args to be unleashed for e.g. 1 arg
657 (manifest arity). That's at least as terrible as doing eta expansion, so don't
658 do it.
659 ---
660 When worker/wrapper did eta expansion, it implictly eta expanded the binding to
661 idArity, overriding GHC.Core.Opt.Arity's decision. Other than playing fast and loose with
662 divergence, it's also broken for newtypes:
663
664 f = (\xy.blah) |> co
665 where
666 co :: (Int -> Int -> Char) ~ T
667
668 Then idArity is 2 (despite the type T), and it can have a DmdSig based on a
669 threshold of 2. But we can't w/w it without a type error.
670
671 The situation is less grave for PAPs, but the implicit eta expansion caused a
672 compiler allocation regression in T15164, where huge recursive instance method
673 groups, mostly consisting of PAPs, got w/w'd. This caused great churn in the
674 simplifier, when simply waiting for the PAPs to inline arrived at the same
675 output program.
676
677 Note there is the worry here that such PAPs and trivial RHSs might not *always*
678 be inlined. That would lead to reboxing, because the analysis tacitly assumes
679 that we W/W'd for idArity and will propagate analysis information under that
680 assumption. So far, this doesn't seem to matter in practice.
681 See https://gitlab.haskell.org/ghc/ghc/merge_requests/312#note_192064.
682
683 Note [Inline pragma for certainlyWillInline]
684 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
685 Consider this (#19824 comment on 15 May 21):
686 f _ (x,y) = ...big...
687 v = ...big...
688 g x = f v x + 1
689
690 So `f` will generate a worker/wrapper split; and `g` (since it is small)
691 will trigger the certainlyWillInline case of splitFun. The danger is that
692 we end up with
693 g {- StableUnfolding = \x -> f v x + 1 -}
694 = ...blah...
695
696 Since (a) that unfolding for g is AlwaysActive
697 (b) the unfolding for f's wrapper is ActiveAfterInitial
698 the call of f will never inline in g's stable unfolding, thereby
699 keeping `v` alive.
700
701 I thought of changing g's unfolding to be ActiveAfterInitial, but that
702 too is bad: it delays g's inlining into other modules, which makes fewer
703 specialisations happen. Example in perf/should_run/DeriveNull.
704
705 So I decided to live with the problem. In fact v's RHS will be replaced
706 by LitRubbish (see Note [Drop absent bindings]) so there is no great harm.
707 -}
708
709
710 ---------------------
711 splitFun :: WwOpts -> Id -> CoreExpr -> UniqSM [(Id, CoreExpr)]
712 splitFun ww_opts fn_id rhs
713 = warnPprTrace (not (wrap_dmds `lengthIs` (arityInfo fn_info)))
714 (ppr fn_id <+> (ppr wrap_dmds $$ ppr cpr)) $
715 do { mb_stuff <- mkWwBodies ww_opts fn_id arg_vars (exprType body) wrap_dmds cpr
716 ; case mb_stuff of
717 Nothing -> -- No useful wrapper; leave the binding alone
718 return [(fn_id, rhs)]
719
720 Just stuff
721 | let opt_wwd_rhs = simpleOptExpr (wo_simple_opts ww_opts) rhs
722 -- We need to stabilise the WW'd (and optimised) RHS below
723 , Just stable_unf <- certainlyWillInline uf_opts fn_info opt_wwd_rhs
724 -- We could make a w/w split, but in fact the RHS is small
725 -- See Note [Don't w/w inline small non-loop-breaker things]
726 , let id_w_unf = fn_id `setIdUnfolding` stable_unf
727 -- See Note [Inline pragma for certainlyWillInline]
728 -> return [ (id_w_unf, rhs) ]
729
730 | otherwise
731 -> do { work_uniq <- getUniqueM
732 ; return (mkWWBindPair ww_opts fn_id fn_info arg_vars body
733 work_uniq div stuff) } }
734 where
735 uf_opts = so_uf_opts (wo_simple_opts ww_opts)
736 fn_info = idInfo fn_id
737 (arg_vars, body) = collectBinders rhs
738
739 (wrap_dmds, div) = splitDmdSig (dmdSigInfo fn_info)
740
741 cpr_ty = getCprSig (cprSigInfo fn_info)
742 -- Arity of the CPR sig should match idArity when it's not a join point.
743 -- See Note [Arity trimming for CPR signatures] in GHC.Core.Opt.CprAnal
744 cpr = assertPpr (isJoinId fn_id || cpr_ty == topCprType || ct_arty cpr_ty == arityInfo fn_info)
745 (ppr fn_id <> colon <+> text "ct_arty:" <+> int (ct_arty cpr_ty)
746 <+> text "arityInfo:" <+> ppr (arityInfo fn_info)) $
747 ct_cpr cpr_ty
748
749 mkWWBindPair :: WwOpts -> Id -> IdInfo
750 -> [Var] -> CoreExpr -> Unique -> Divergence
751 -> ([Demand], JoinArity, Id -> CoreExpr, Expr CoreBndr -> CoreExpr)
752 -> [(Id, CoreExpr)]
753 mkWWBindPair ww_opts fn_id fn_info fn_args fn_body work_uniq div
754 (work_demands, join_arity, wrap_fn, work_fn)
755 = [(work_id, work_rhs), (wrap_id, wrap_rhs)]
756 -- Worker first, because wrapper mentions it
757 where
758 arity = arityInfo fn_info
759 -- The arity is set by the simplifier using exprEtaExpandArity
760 -- So it may be more than the number of top-level-visible lambdas
761
762 simpl_opts = wo_simple_opts ww_opts
763
764 work_rhs = work_fn (mkLams fn_args fn_body)
765 work_act = case fn_inline_spec of -- See Note [Worker activation]
766 NoInline _ -> inl_act fn_inl_prag
767 _ -> inl_act wrap_prag
768
769 work_prag = InlinePragma { inl_src = SourceText "{-# INLINE"
770 , inl_inline = fn_inline_spec
771 , inl_sat = Nothing
772 , inl_act = work_act
773 , inl_rule = FunLike }
774 -- inl_inline: copy from fn_id; see Note [Worker/wrapper for INLINABLE functions]
775 -- inl_act: see Note [Worker activation]
776 -- inl_rule: it does not make sense for workers to be constructorlike.
777
778 work_join_arity | isJoinId fn_id = Just join_arity
779 | otherwise = Nothing
780 -- worker is join point iff wrapper is join point
781 -- (see Note [Don't w/w join points for CPR])
782
783 work_id = mkWorkerId work_uniq fn_id (exprType work_rhs)
784 `setIdOccInfo` occInfo fn_info
785 -- Copy over occurrence info from parent
786 -- Notably whether it's a loop breaker
787 -- Doesn't matter much, since we will simplify next, but
788 -- seems right-er to do so
789
790 `setInlinePragma` work_prag
791
792 `setIdUnfolding` mkWorkerUnfolding simpl_opts work_fn fn_unfolding
793 -- See Note [Worker/wrapper for INLINABLE functions]
794
795 `setIdDmdSig` mkClosedDmdSig work_demands div
796 -- Even though we may not be at top level,
797 -- it's ok to give it an empty DmdEnv
798
799 `setIdCprSig` topCprSig
800
801 `setIdDemandInfo` worker_demand
802
803 `setIdArity` work_arity
804 -- Set the arity so that the Core Lint check that the
805 -- arity is consistent with the demand type goes
806 -- through
807 `asJoinId_maybe` work_join_arity
808
809 work_arity = length work_demands
810
811 -- See Note [Demand on the Worker]
812 single_call = saturatedByOneShots arity (demandInfo fn_info)
813 worker_demand | single_call = mkWorkerDemand work_arity
814 | otherwise = topDmd
815
816 wrap_rhs = wrap_fn work_id
817 wrap_prag = mkStrWrapperInlinePrag fn_inl_prag
818 wrap_unf = mkWrapperUnfolding simpl_opts wrap_rhs arity
819
820 wrap_id = fn_id `setIdUnfolding` wrap_unf
821 `setInlinePragma` wrap_prag
822 `setIdOccInfo` noOccInfo
823 -- Zap any loop-breaker-ness, to avoid bleating from Lint
824 -- about a loop breaker with an INLINE rule
825
826 fn_inl_prag = inlinePragInfo fn_info
827 fn_inline_spec = inl_inline fn_inl_prag
828 fn_unfolding = realUnfoldingInfo fn_info
829
830 mkStrWrapperInlinePrag :: InlinePragma -> InlinePragma
831 mkStrWrapperInlinePrag (InlinePragma { inl_act = act, inl_rule = rule_info })
832 = InlinePragma { inl_src = SourceText "{-# INLINE"
833 , inl_inline = NoUserInlinePrag -- See Note [Wrapper NoUserInline]
834 , inl_sat = Nothing
835 , inl_act = wrap_act
836 , inl_rule = rule_info } -- RuleMatchInfo is (and must be) unaffected
837 where
838 wrap_act = case act of -- See Note [Wrapper activation]
839 NeverActive -> activateDuringFinal
840 FinalActive -> act
841 ActiveAfter {} -> act
842 ActiveBefore {} -> activateAfterInitial
843 AlwaysActive -> activateAfterInitial
844 -- For the last two cases, see (4) in Note [Wrapper activation]
845 -- NB: the (ActiveBefore n) isn't quite right. We really want
846 -- it to be active *after* Initial but *before* n. We don't have
847 -- a way to say that, alas.
848
849 {-
850 Note [Demand on the worker]
851 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
852
853 If the original function is called once, according to its demand info, then
854 so is the worker. This is important so that the occurrence analyser can
855 attach OneShot annotations to the worker’s lambda binders.
856
857
858 Example:
859
860 -- Original function
861 f [Demand=<L,1*C1(U)>] :: (a,a) -> a
862 f = \p -> ...
863
864 -- Wrapper
865 f [Demand=<L,1*C1(U)>] :: a -> a -> a
866 f = \p -> case p of (a,b) -> $wf a b
867
868 -- Worker
869 $wf [Demand=<L,1*C1(C1(U))>] :: Int -> Int
870 $wf = \a b -> ...
871
872 We need to check whether the original function is called once, with
873 sufficiently many arguments. This is done using saturatedByOneShots, which
874 takes the arity of the original function (resp. the wrapper) and the demand on
875 the original function.
876
877 The demand on the worker is then calculated using mkWorkerDemand, and always of
878 the form [Demand=<L,1*(C1(...(C1(U))))>]
879
880
881 Note [Do not split void functions]
882 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
883 Consider this rather common form of binding:
884 $j = \x:Void# -> ...no use of x...
885
886 Since x is not used it'll be marked as absent. But there is no point
887 in w/w-ing because we'll simply add (\y:Void#), see GHC.Core.Opt.WorkWrap.Utils.mkWorerArgs.
888
889 If x has a more interesting type (eg Int, or Int#), there *is* a point
890 in w/w so that we don't pass the argument at all.
891
892 Note [Thunk splitting]
893 ~~~~~~~~~~~~~~~~~~~~~~
894 Suppose x is used strictly (never mind whether it has the CPR
895 property).
896
897 let
898 x* = x-rhs
899 in body
900
901 splitThunk transforms like this:
902
903 let
904 x* = case x-rhs of { I# a -> I# a }
905 in body
906
907 Now simplifier will transform to
908
909 case x-rhs of
910 I# a -> let x* = I# a
911 in body
912
913 which is what we want. Now suppose x-rhs is itself a case:
914
915 x-rhs = case e of { T -> I# a; F -> I# b }
916
917 The join point will abstract over a, rather than over (which is
918 what would have happened before) which is fine.
919
920 Notice that x certainly has the CPR property now!
921
922 In fact, splitThunk uses the function argument w/w splitting
923 function, so that if x's demand is deeper (say U(U(L,L),L))
924 then the splitting will go deeper too.
925
926 NB: For recursive thunks, the Simplifier is unable to float `x-rhs` out of
927 `x*`'s RHS, because `x*` occurs freely in `x-rhs`, and will just change it
928 back to the original definition, so we just split non-recursive thunks.
929
930 Note [Thunk splitting for top-level binders]
931 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
932 Top-level bindings are never strict. Yet they can be absent, as T14270 shows:
933
934 module T14270 (mkTrApp) where
935 mkTrApp x y
936 | Just ... <- ... typeRepKind x ...
937 = undefined
938 | otherwise
939 = undefined
940 typeRepKind = Tick scc undefined
941
942 (T19180 is a profiling-free test case for this)
943 Note that `typeRepKind` is not exported and its only use site in
944 `mkTrApp` guards a bottoming expression. Thus, demand analysis
945 figures out that `typeRepKind` is absent and splits the thunk to
946
947 typeRepKind =
948 let typeRepKind = Tick scc undefined in
949 let typeRepKind = absentError in
950 typeRepKind
951
952 But now we have a local binding with an External Name
953 (See Note [About the NameSorts]). That will trigger a CoreLint error, which we
954 get around by localising the Id for the auxiliary bindings in 'splitThunk'.
955 -}
956
957 -- | See Note [Thunk splitting].
958 --
959 -- splitThunk converts the *non-recursive* binding
960 -- x = e
961 -- into
962 -- x = let x' = e in
963 -- case x' of I# y -> let x' = I# y in x'
964 -- See comments above. Is it not beautifully short?
965 -- Moreover, it works just as well when there are
966 -- several binders, and if the binders are lifted
967 -- E.g. x = e
968 -- --> x = let x' = e in
969 -- case x' of (a,b) -> let x' = (a,b) in x'
970 -- Here, x' is a localised version of x, in case x is a
971 -- top-level Id with an External Name, because Lint rejects local binders with
972 -- External Names; see Note [About the NameSorts] in GHC.Types.Name.
973 --
974 -- How can we do thunk-splitting on a top-level binder? See
975 -- Note [Thunk splitting for top-level binders].
976 splitThunk :: WwOpts -> RecFlag -> Var -> Expr Var -> UniqSM [(Var, Expr Var)]
977 splitThunk ww_opts is_rec x rhs
978 = assert (not (isJoinId x)) $
979 do { let x' = localiseId x -- See comment above
980 ; (useful,_, wrap_fn, fn_arg) <- mkWWstr_one ww_opts x'
981 ; let res = [ (x, Let (NonRec x' rhs) (wrap_fn fn_arg)) ]
982 ; if useful then assertPpr (isNonRec is_rec) (ppr x) -- The thunk must be non-recursive
983 return res
984 else return [(x, rhs)] }