never executed always true always false
1 {-
2 (c) The GRASP/AQUA Project, Glasgow University, 1993-1998
3
4 A library for the ``worker\/wrapper'' back-end to the strictness analyser
5 -}
6
7
8 {-# LANGUAGE ViewPatterns #-}
9
10 module GHC.Core.Opt.WorkWrap.Utils
11 ( WwOpts(..), initWwOpts, mkWwBodies, mkWWstr, mkWWstr_one, mkWorkerArgs
12 , DataConPatContext(..)
13 , UnboxingDecision(..), InsideInlineableFun(..), wantToUnboxArg
14 , findTypeShape, IsRecDataConResult(..), isRecDataCon, finaliseBoxity
15 , mkAbsentFiller
16 , isWorkerSmallEnough
17 )
18 where
19
20 import GHC.Prelude
21
22 import GHC.Driver.Session
23 import GHC.Driver.Config (initSimpleOpts)
24
25 import GHC.Core
26 import GHC.Core.Utils
27 import GHC.Core.DataCon
28 import GHC.Core.Make
29 import GHC.Core.Subst
30 import GHC.Core.Type
31 import GHC.Core.Multiplicity
32 import GHC.Core.Predicate ( isClassPred )
33 import GHC.Core.Coercion
34 import GHC.Core.Reduction
35 import GHC.Core.FamInstEnv
36 import GHC.Core.TyCon
37 import GHC.Core.TyCon.RecWalk
38 import GHC.Core.SimpleOpt( SimpleOpts )
39
40 import GHC.Types.Id
41 import GHC.Types.Id.Info
42 import GHC.Types.Demand
43 import GHC.Types.Cpr
44 import GHC.Types.Id.Make ( voidArgId, voidPrimId )
45 import GHC.Types.Var.Env
46 import GHC.Types.Basic
47 import GHC.Types.Unique.Supply
48 import GHC.Types.Name ( getOccFS )
49
50 import GHC.Data.FastString
51 import GHC.Data.Maybe
52 import GHC.Data.OrdList
53 import GHC.Data.List.SetOps
54
55 import GHC.Builtin.Types ( tupleDataCon )
56
57 import GHC.Utils.Misc
58 import GHC.Utils.Outputable
59 import GHC.Utils.Panic
60 import GHC.Utils.Panic.Plain
61 import GHC.Utils.Trace
62
63 import Control.Applicative ( (<|>) )
64 import Control.Monad ( zipWithM )
65 import Data.List ( unzip4 )
66
67 import GHC.Types.RepType
68 import GHC.Unit.Types
69
70 {-
71 ************************************************************************
72 * *
73 \subsection[mkWrapperAndWorker]{@mkWrapperAndWorker@}
74 * *
75 ************************************************************************
76
77 Here's an example. The original function is:
78
79 \begin{verbatim}
80 g :: forall a . Int -> [a] -> a
81
82 g = \/\ a -> \ x ys ->
83 case x of
84 0 -> head ys
85 _ -> head (tail ys)
86 \end{verbatim}
87
88 From this, we want to produce:
89 \begin{verbatim}
90 -- wrapper (an unfolding)
91 g :: forall a . Int -> [a] -> a
92
93 g = \/\ a -> \ x ys ->
94 case x of
95 I# x# -> $wg a x# ys
96 -- call the worker; don't forget the type args!
97
98 -- worker
99 $wg :: forall a . Int# -> [a] -> a
100
101 $wg = \/\ a -> \ x# ys ->
102 let
103 x = I# x#
104 in
105 case x of -- note: body of g moved intact
106 0 -> head ys
107 _ -> head (tail ys)
108 \end{verbatim}
109
110 Something we have to be careful about: Here's an example:
111
112 \begin{verbatim}
113 -- "f" strictness: U(P)U(P)
114 f (I# a) (I# b) = a +# b
115
116 g = f -- "g" strictness same as "f"
117 \end{verbatim}
118
119 \tr{f} will get a worker all nice and friendly-like; that's good.
120 {\em But we don't want a worker for \tr{g}}, even though it has the
121 same strictness as \tr{f}. Doing so could break laziness, at best.
122
123 Consequently, we insist that the number of strictness-info items is
124 exactly the same as the number of lambda-bound arguments. (This is
125 probably slightly paranoid, but OK in practice.) If it isn't the
126 same, we ``revise'' the strictness info, so that we won't propagate
127 the unusable strictness-info into the interfaces.
128
129
130 ************************************************************************
131 * *
132 \subsection{The worker wrapper core}
133 * *
134 ************************************************************************
135
136 @mkWwBodies@ is called when doing the worker\/wrapper split inside a module.
137 -}
138
139 data WwOpts
140 = MkWwOpts
141 { wo_fam_envs :: !FamInstEnvs
142 , wo_simple_opts :: !SimpleOpts
143 , wo_cpr_anal :: !Bool
144 , wo_fun_to_thunk :: !Bool
145 , wo_max_worker_args :: !Int
146 -- Used for absent argument error message
147 , wo_module :: !Module
148 }
149
150 initWwOpts :: Module -> DynFlags -> FamInstEnvs -> WwOpts
151 initWwOpts this_mod dflags fam_envs = MkWwOpts
152 { wo_fam_envs = fam_envs
153 , wo_simple_opts = initSimpleOpts dflags
154 , wo_cpr_anal = gopt Opt_CprAnal dflags
155 , wo_fun_to_thunk = gopt Opt_FunToThunk dflags
156 , wo_max_worker_args = maxWorkerArgs dflags
157 , wo_module = this_mod
158 }
159
160 type WwResult
161 = ([Demand], -- Demands for worker (value) args
162 JoinArity, -- Number of worker (type OR value) args
163 Id -> CoreExpr, -- Wrapper body, lacking only the worker Id
164 CoreExpr -> CoreExpr) -- Worker body, lacking the original function rhs
165
166 nop_fn :: CoreExpr -> CoreExpr
167 nop_fn body = body
168
169
170 mkWwBodies :: WwOpts
171 -> Id -- ^ The original function
172 -> [Var] -- ^ Manifest args of original function
173 -> Type -- ^ Result type of the original function,
174 -- after being stripped of args
175 -> [Demand] -- ^ Strictness of original function
176 -> Cpr -- ^ Info about function result
177 -> UniqSM (Maybe WwResult)
178 -- ^ Given a function definition
179 --
180 -- > data T = MkT Int Bool Char
181 -- > f :: (a, b) -> Int -> T
182 -- > f = \x y -> E
183 --
184 -- @mkWwBodies _ 'f' ['x::(a,b)','y::Int'] '(a,b)' ['1P(L,L)', '1P(L)'] '1'@
185 -- returns
186 --
187 -- * The wrapper body context for the call to the worker function, lacking
188 -- only the 'Id' for the worker function:
189 --
190 -- > W[_] :: Id -> CoreExpr
191 -- > W[work_fn] = \x y -> -- args of the wrapper (cloned_arg_vars)
192 -- > case x of (a, b) -> -- unbox wrapper args (wrap_fn_str)
193 -- > case y of I# n -> --
194 -- > case <work_fn> a b n of -- call to the worker fun (call_work)
195 -- > (# i, b, c #) -> MkT i b c -- rebox result (wrap_fn_cpr)
196 --
197 -- * The worker body context that wraps around its hole reboxing defns for x
198 -- and y, as well as returning CPR transit variables of the unboxed MkT
199 -- result in an unboxed tuple:
200 --
201 -- > w[_] :: CoreExpr -> CoreExpr
202 -- > w[fn_rhs] = \a b n -> -- args of the worker (work_lam_args)
203 -- > let { y = I# n; x = (a, b) } in -- reboxing wrapper args (work_fn_str)
204 -- > case <fn_rhs> x y of -- call to the original RHS (call_rhs)
205 -- > MkT i b c -> (# i, b, c #) -- return CPR transit vars (work_fn_cpr)
206 --
207 -- NB: The wrap_rhs hole is to be filled with the original wrapper RHS
208 -- @\x y -> E@. This is so that we can also use @w@ to transform stable
209 -- unfoldings, the lambda args of which may be different than x and y.
210 --
211 -- * Id details for the worker function like demands on arguments and its join
212 -- arity.
213 --
214 -- All without looking at E (except for beta reduction, see Note [Join points
215 -- and beta-redexes]), which allows us to apply the same split to function body
216 -- and its unfolding(s) alike.
217 --
218 mkWwBodies opts fun_id arg_vars res_ty demands res_cpr
219 = do { massertPpr (filter isId arg_vars `equalLength` demands)
220 (text "wrong wrapper arity" $$ ppr fun_id $$ ppr arg_vars $$ ppr res_ty $$ ppr demands)
221
222 -- Clone and prepare arg_vars of the original fun RHS
223 -- See Note [Freshen WW arguments]
224 -- and Note [Zap IdInfo on worker args]
225 ; uniq_supply <- getUniqueSupplyM
226 ; let args_free_tcvs = tyCoVarsOfTypes (res_ty : map varType arg_vars)
227 empty_subst = mkEmptySubst (mkInScopeSet args_free_tcvs)
228 zapped_arg_vars = map zap_var arg_vars
229 (subst, cloned_arg_vars) = cloneBndrs empty_subst uniq_supply zapped_arg_vars
230 res_ty' = GHC.Core.Subst.substTy subst res_ty
231
232 ; (useful1, work_args, wrap_fn_str, fn_args)
233 <- mkWWstr opts cloned_arg_vars
234
235 -- Do CPR w/w. See Note [Always do CPR w/w]
236 ; (useful2, wrap_fn_cpr, work_fn_cpr, cpr_res_ty)
237 <- mkWWcpr_entry opts res_ty' res_cpr
238
239 ; let (work_lam_args, work_call_args) = mkWorkerArgs fun_id (wo_fun_to_thunk opts)
240 work_args cpr_res_ty
241 call_work work_fn = mkVarApps (Var work_fn) work_call_args
242 call_rhs fn_rhs = mkAppsBeta fn_rhs fn_args
243 -- See Note [Join points and beta-redexes]
244 wrapper_body = mkLams cloned_arg_vars . wrap_fn_cpr . wrap_fn_str . call_work
245 worker_body = mkLams work_lam_args . work_fn_cpr . call_rhs
246 worker_args_dmds = [idDemandInfo v | v <- work_call_args, isId v]
247
248 ; if isWorkerSmallEnough (wo_max_worker_args opts) (length demands) work_args
249 && not (too_many_args_for_join_point arg_vars)
250 && ((useful1 && not only_one_void_argument) || useful2)
251 then return (Just (worker_args_dmds, length work_call_args,
252 wrapper_body, worker_body))
253 else return Nothing
254 }
255 -- We use an INLINE unconditionally, even if the wrapper turns out to be
256 -- something trivial like
257 -- fw = ...
258 -- f = __inline__ (coerce T fw)
259 -- The point is to propagate the coerce to f's call sites, so even though
260 -- f's RHS is now trivial (size 1) we still want the __inline__ to prevent
261 -- fw from being inlined into f's RHS
262 where
263 zap_var v | isTyVar v = v
264 | otherwise = modifyIdInfo zap_info v
265 zap_info info -- See Note [Zap IdInfo on worker args]
266 = info `setOccInfo` noOccInfo
267
268 mb_join_arity = isJoinId_maybe fun_id
269
270 -- Note [Do not split void functions]
271 only_one_void_argument
272 | [d] <- demands
273 , [v] <- filter isId arg_vars
274 , isAbsDmd d && isVoidTy (idType v)
275 = True
276 | otherwise
277 = False
278
279 -- Note [Join points returning functions]
280 too_many_args_for_join_point wrap_args
281 | Just join_arity <- mb_join_arity
282 , wrap_args `lengthExceeds` join_arity
283 = warnPprTrace True (text "Unable to worker/wrapper join point with arity " <+>
284 int join_arity <+> text "but" <+>
285 int (length wrap_args) <+> text "args") $
286 True
287 | otherwise
288 = False
289
290 -- | Version of 'GHC.Core.mkApps' that does beta reduction on-the-fly.
291 -- PRECONDITION: The arg expressions are not free in any of the lambdas binders.
292 mkAppsBeta :: CoreExpr -> [CoreArg] -> CoreExpr
293 -- The precondition holds for our call site in mkWwBodies, because all the FVs
294 -- of as are either cloned_arg_vars (and thus fresh) or fresh worker args.
295 mkAppsBeta (Lam b body) (a:as) = bindNonRec b a $! mkAppsBeta body as
296 mkAppsBeta f as = mkApps f as
297
298 -- See Note [Limit w/w arity]
299 isWorkerSmallEnough :: Int -> Int -> [Var] -> Bool
300 isWorkerSmallEnough max_worker_args old_n_args vars
301 = count isId vars <= max old_n_args max_worker_args
302 -- We count only Free variables (isId) to skip Type, Kind
303 -- variables which have no runtime representation.
304 -- Also if the function took 82 arguments before (old_n_args), it's fine if
305 -- it takes <= 82 arguments afterwards.
306
307 {-
308 Note [Always do CPR w/w]
309 ~~~~~~~~~~~~~~~~~~~~~~~~
310 At one time we refrained from doing CPR w/w for thunks, on the grounds that
311 we might duplicate work. But that is already handled by the demand analyser,
312 which doesn't give the CPR property if w/w might waste work: see
313 Note [CPR for thunks] in GHC.Core.Opt.DmdAnal.
314
315 And if something *has* been given the CPR property and we don't w/w, it's
316 a disaster, because then the enclosing function might say it has the CPR
317 property, but now doesn't and there a cascade of disaster. A good example
318 is #5920.
319
320 Note [Limit w/w arity]
321 ~~~~~~~~~~~~~~~~~~~~~~~~
322 Guard against high worker arity as it generates a lot of stack traffic.
323 A simplified example is #11565#comment:6
324
325 Current strategy is very simple: don't perform w/w transformation at all
326 if the result produces a wrapper with arity higher than -fmax-worker-args
327 and the number arguments before w/w (see #18122).
328
329 It is a bit all or nothing, consider
330
331 f (x,y) (a,b,c,d,e ... , z) = rhs
332
333 Currently we will remove all w/w ness entirely. But actually we could
334 w/w on the (x,y) pair... it's the huge product that is the problem.
335
336 Could we instead refrain from w/w on an arg-by-arg basis? Yes, that'd
337 solve f. But we can get a lot of args from deeply-nested products:
338
339 g (a, (b, (c, (d, ...)))) = rhs
340
341 This is harder to spot on an arg-by-arg basis. Previously mkWwStr was
342 given some "fuel" saying how many arguments it could add; when we ran
343 out of fuel it would stop w/wing.
344
345 Still not very clever because it had a left-right bias.
346
347 Note [Zap IdInfo on worker args]
348 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
349 We have to zap the following IdInfo when re-using arg variables of the original
350 function for the worker:
351
352 * OccInfo: Dead wrapper args now occur in Apps of the worker's call to the
353 original fun body. Those occurrences will quickly cancel away with the lambdas
354 of the fun body in the next run of the Simplifier, but CoreLint will complain
355 in the meantime, so zap it.
356
357 We zap in mkWwBodies because we need the zapped variables both when binding them
358 in mkWWstr (mkAbsentFiller, specifically) and in mkWorkerArgs, where we produce
359 the call to the fun body.
360
361 ************************************************************************
362 * *
363 \subsection{Making wrapper args}
364 * *
365 ************************************************************************
366
367 During worker-wrapper stuff we may end up with an unlifted thing
368 which we want to let-bind without losing laziness. So we
369 add a void argument. E.g.
370
371 f = /\a -> \x y z -> E::Int# -- E does not mention x,y,z
372 ==>
373 fw = /\ a -> \void -> E
374 f = /\ a -> \x y z -> fw realworld
375
376 We use the state-token type which generates no code.
377 -}
378
379 mkWorkerArgs :: Id -- The wrapper Id
380 -> Bool
381 -> [Var]
382 -> Type -- Type of body
383 -> ([Var], -- Lambda bound args
384 [Var]) -- Args at call site
385 mkWorkerArgs wrap_id fun_to_thunk args res_ty
386 | not (isJoinId wrap_id) -- Join Ids never need an extra arg
387 , not (any isId args) -- No existing value lambdas
388 , needs_a_value_lambda -- and we need to add one
389 = (args ++ [voidArgId], args ++ [voidPrimId])
390
391 | otherwise
392 = (args, args)
393 where
394 -- If fun_to_thunk is False we always keep at least one value
395 -- argument: see Note [Protecting the last value argument]
396 -- If it is True, we only need to keep a value argument if
397 -- the result type is (or might be) unlifted, in which case
398 -- dropping the last arg would mean we wrongly used call-by-value
399 needs_a_value_lambda
400 = not fun_to_thunk
401 || might_be_unlifted
402
403 -- Might the result be lifted?
404 -- False => definitely lifted
405 -- True => might be unlifted
406 -- We may encounter a representation-polymorphic result, in which case we
407 -- conservatively assume that we have laziness that needs
408 -- preservation. See #15186.
409 might_be_unlifted = case isLiftedType_maybe res_ty of
410 Just lifted -> not lifted
411 Nothing -> True
412
413 {-
414 Note [Protecting the last value argument]
415 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
416 If the user writes (\_ -> E), they might be intentionally disallowing
417 the sharing of E. Since absence analysis and worker-wrapper are keen
418 to remove such unused arguments, we add in a void argument to prevent
419 the function from becoming a thunk.
420
421 The user can avoid adding the void argument with the -ffun-to-thunk
422 flag. However, this can create sharing, which may be bad in two ways. 1) It can
423 create a space leak. 2) It can prevent inlining *under a lambda*. If w/w
424 removes the last argument from a function f, then f now looks like a thunk, and
425 so f can't be inlined *under a lambda*.
426
427 Note [Join points and beta-redexes]
428 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
429 Originally, the worker would invoke the original function by calling it with
430 arguments, thus producing a beta-redex for the simplifier to munch away:
431
432 \x y z -> e => (\x y z -> e) wx wy wz
433
434 Now that we have special rules about join points, however, this is Not Good if
435 the original function is itself a join point, as then it may contain invocations
436 of other join points:
437
438 join j1 x = ...
439 join j2 y = if y == 0 then 0 else j1 y
440
441 =>
442
443 join j1 x = ...
444 join $wj2 y# = let wy = I# y# in (\y -> if y == 0 then 0 else jump j1 y) wy
445 join j2 y = case y of I# y# -> jump $wj2 y#
446
447 There can't be an intervening lambda between a join point's declaration and its
448 occurrences, so $wj2 here is wrong. But of course, this is easy enough to fix:
449
450 ...
451 let join $wj2 y# = let wy = I# y# in let y = wy in if y == 0 then 0 else j1 y
452 ...
453
454 Hence we simply do the beta-reduction here. (This would be harder if we had to
455 worry about hygiene, but luckily wy is freshly generated.)
456
457 Note [Join points returning functions]
458 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
459 It is crucial that the arity of a join point depends on its *callers,* not its
460 own syntax. What this means is that a join point can have "extra lambdas":
461
462 f :: Int -> Int -> (Int, Int) -> Int
463 f x y = join j (z, w) = \(u, v) -> ...
464 in jump j (x, y)
465
466 Typically this happens with functions that are seen as computing functions,
467 rather than being curried. (The real-life example was GHC.Data.Graph.Ops.addConflicts.)
468
469 When we create the wrapper, it *must* be in "eta-contracted" form so that the
470 jump has the right number of arguments:
471
472 f x y = join $wj z' w' = \u' v' -> let {z = z'; w = w'; u = u'; v = v'} in ...
473 j (z, w) = jump $wj z w
474
475 (See Note [Join points and beta-redexes] for where the lets come from.) If j
476 were a function, we would instead say
477
478 f x y = let $wj = \z' w' u' v' -> let {z = z'; w = w'; u = u'; v = v'} in ...
479 j (z, w) (u, v) = $wj z w u v
480
481 Notice that the worker ends up with the same lambdas; it's only the wrapper we
482 have to be concerned about.
483
484 FIXME Currently the functionality to produce "eta-contracted" wrappers is
485 unimplemented; we simply give up.
486
487 Note [Freshen WW arguments]
488 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
489 When we do a worker/wrapper split, we must freshen the arg vars of the original
490 fun RHS because they might shadow each other. E.g.
491
492 f :: forall a. Maybe a -> forall a. Maybe a -> Int -> Int
493 f @a x @a y z = case x <|> y of
494 Nothing -> z
495 Just _ -> z + 1
496
497 ==> {WW split unboxing the Int}
498
499 $wf :: forall a. Maybe a -> forall a. Maybe a -> Int# -> Int
500 $wf @a x @a y wz = (\@a x @a y z -> case x <|> y of ...) ??? x @a y (I# wz)
501
502 (Notice that the code we actually emit will sort-of ANF-ise the lambda args,
503 leading to even more shadowing issues. The above demonstrates that even if we
504 try harder we'll still get shadowing issues.)
505
506 What should we put in place for ??? ? Certainly not @a, because that would
507 reference the wrong, inner a. A similar situation occurred in #12562, we even
508 saw a type variable in the worker shadowing an outer term-variable binding.
509
510 We avoid the issue by freshening the argument variables from the original fun
511 RHS through 'cloneBndrs', which will also take care of subsitution in binder
512 types. Fortunately, it's sufficient to pick the FVs of the arg vars as in-scope
513 set, so that we don't need to do a FV traversal over the whole body of the
514 original function.
515
516 At the moment, #12562 has no regression test. As such, this Note is not covered
517 by any test logic or when bootstrapping the compiler. Yet we clearly want to
518 freshen the binders, as the example above demonstrates.
519 Adding a Core pass that maximises shadowing for testing purposes might help,
520 see #17478.
521 -}
522
523 {-
524 ************************************************************************
525 * *
526 \subsection{Unboxing Decision for Strictness and CPR}
527 * *
528 ************************************************************************
529 -}
530
531 -- | The information needed to build a pattern for a DataCon to be unboxed.
532 -- The pattern can be generated from 'dcpc_dc' and 'dcpc_tc_args' via
533 -- 'GHC.Core.Utils.dataConRepInstPat'. The coercion 'dcpc_co' is for newtype
534 -- wrappers.
535 --
536 -- If we get @DataConPatContext dc tys co@ for some type @ty@
537 -- and @dataConRepInstPat ... dc tys = (exs, flds)@, then
538 --
539 -- * @dc @exs flds :: T tys@
540 -- * @co :: T tys ~ ty@
541 data DataConPatContext
542 = DataConPatContext
543 { dcpc_dc :: !DataCon
544 , dcpc_tc_args :: ![Type]
545 , dcpc_co :: !Coercion
546 }
547
548 -- | Describes the outer shape of an argument to be unboxed or left as-is
549 -- Depending on how @s@ is instantiated (e.g., 'Demand' or 'Cpr').
550 data UnboxingDecision s
551 = StopUnboxing
552 -- ^ We ran out of strictness info. Leave untouched.
553 | DropAbsent
554 -- ^ The argument/field was absent. Drop it.
555 | Unbox !DataConPatContext [s]
556 -- ^ The argument is used strictly or the returned product was constructed, so
557 -- unbox it.
558 -- The 'DataConPatContext' carries the bits necessary for
559 -- instantiation with 'dataConRepInstPat'.
560 -- The @[s]@ carries the bits of information with which we can continue
561 -- unboxing, e.g. @s@ will be 'Demand' or 'Cpr'.
562
563 -- | Unwraps the 'Boxity' decision encoded in the given 'SubDemand' and returns
564 -- a 'DataConPatContext' as well the nested demands on fields of the 'DataCon'
565 -- to unbox.
566 wantToUnboxArg
567 :: FamInstEnvs
568 -> Type -- ^ Type of the argument
569 -> Demand -- ^ How the arg was used
570 -> UnboxingDecision Demand
571 -- See Note [Which types are unboxed?]
572 wantToUnboxArg fam_envs ty (n :* sd)
573 | isAbs n
574 = DropAbsent
575
576 | Just (tc, tc_args, co) <- normSplitTyConApp_maybe fam_envs ty
577 , Just dc <- tyConSingleAlgDataCon_maybe tc
578 , let arity = dataConRepArity dc
579 , Just (Unboxed, ds) <- viewProd arity sd -- See Note [Boxity Analysis]
580 -- NB: No strictness or evaluatedness checks here. That is done by
581 -- 'finaliseBoxity'!
582 = Unbox (DataConPatContext dc tc_args co) ds
583
584 | otherwise
585 = StopUnboxing
586
587
588 -- | Unboxing strategy for constructed results.
589 wantToUnboxResult :: FamInstEnvs -> Type -> Cpr -> UnboxingDecision Cpr
590 -- See Note [Which types are unboxed?]
591 wantToUnboxResult fam_envs ty cpr
592 | Just (con_tag, arg_cprs) <- asConCpr cpr
593 , Just (tc, tc_args, co) <- normSplitTyConApp_maybe fam_envs ty
594 , Just dcs <- tyConAlgDataCons_maybe tc <|> open_body_ty_warning
595 , dcs `lengthAtLeast` con_tag -- This might not be true if we import the
596 -- type constructor via a .hs-boot file (#8743)
597 , let dc = dcs `getNth` (con_tag - fIRST_TAG)
598 , null (dataConExTyCoVars dc) -- no existentials;
599 -- See Note [Which types are unboxed?]
600 -- and GHC.Core.Opt.CprAnal.argCprType
601 -- where we also check this.
602 , all isLinear (dataConInstArgTys dc tc_args)
603 -- Deactivates CPR worker/wrapper splits on constructors with non-linear
604 -- arguments, for the moment, because they require unboxed tuple with variable
605 -- multiplicity fields.
606 = Unbox (DataConPatContext dc tc_args co) arg_cprs
607
608 | otherwise
609 = StopUnboxing
610
611 where
612 -- | See Note [non-algebraic or open body type warning]
613 open_body_ty_warning = warnPprTrace True (text "wantToUnboxResult: non-algebraic or open body type" <+> ppr ty) Nothing
614
615 isLinear :: Scaled a -> Bool
616 isLinear (Scaled w _ ) =
617 case w of
618 One -> True
619 _ -> False
620
621
622 {- Note [Which types are unboxed?]
623 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
624 Worker/wrapper will unbox
625
626 1. A strict data type argument, that
627 * is an algebraic data type (not a newtype)
628 * has a single constructor (thus is a "product")
629 * that may bind existentials
630 We can transform
631 > data D a = forall b. D a b
632 > f (D @ex a b) = e
633 to
634 > $wf @ex a b = e
635 via 'mkWWstr'.
636
637 2. The constructed result of a function, if
638 * its type is an algebraic data type (not a newtype)
639 * (might have multiple constructors, in contrast to (1))
640 * the applied data constructor *does not* bind existentials
641 We can transform
642 > f x y = let ... in D a b
643 to
644 > $wf x y = let ... in (# a, b #)
645 via 'mkWWcpr'.
646
647 NB: We don't allow existentials for CPR W/W, because we don't have unboxed
648 dependent tuples (yet?). Otherwise, we could transform
649 > f x y = let ... in D @ex (a :: ..ex..) (b :: ..ex..)
650 to
651 > $wf x y = let ... in (# @ex, (a :: ..ex..), (b :: ..ex..) #)
652
653 The respective tests are in 'wantToUnboxArg' and
654 'wantToUnboxResult', respectively.
655
656 Note that the data constructor /can/ have evidence arguments: equality
657 constraints, type classes etc. So it can be GADT. These evidence
658 arguments are simply value arguments, and should not get in the way.
659
660 Note [Do not unbox class dictionaries]
661 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
662 If we have
663 f :: Ord a => [a] -> Int -> a
664 {-# INLINABLE f #-}
665 and we worker/wrapper f, we'll get a worker with an INLINABLE pragma
666 (see Note [Worker/wrapper for INLINABLE functions] in GHC.Core.Opt.WorkWrap),
667 which can still be specialised by the type-class specialiser, something like
668 fw :: Ord a => [a] -> Int# -> a
669
670 BUT if f is strict in the Ord dictionary, we might unpack it, to get
671 fw :: (a->a->Bool) -> [a] -> Int# -> a
672 and the type-class specialiser can't specialise that. An example is #6056.
673
674 But in any other situation, a dictionary is just an ordinary value,
675 and can be unpacked. So we track the INLINABLE pragma, and discard the boxity
676 flag in finaliseBoxity (see the isClassPred test).
677
678 Historical note: #14955 describes how I got this fix wrong the first time.
679
680 Note that the simplicity of this fix implies that INLINE functions (such as
681 wrapper functions after the WW run) will never say that they unbox class
682 dictionaries. That's not ideal, but not worth losing sleep over, as INLINE
683 functions will have been inlined by the time we run demand analysis so we'll
684 see the unboxing around the worker in client modules. I got aware of the issue
685 in T5075 by the change in boxity of loop between demand analysis runs.
686
687 Note [mkWWstr and unsafeCoerce]
688 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
689 By using unsafeCoerce, it is possible to make the number of demands fail to
690 match the number of constructor arguments; this happened in #8037.
691 If so, the worker/wrapper split doesn't work right and we get a Core Lint
692 bug. The fix here is simply to decline to do w/w if that happens.
693
694 Note [Unboxing evaluated arguments]
695 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
696 Consider this program (due to Roman):
697
698 data X a = X !a
699
700 foo :: X Int -> Int -> Int
701 foo x@(X a) n = go 0
702 where
703 go i | i < n = a + go (i+1)
704 | otherwise = 0
705
706 We want the worker for 'foo' too look like this:
707
708 $wfoo :: Int# -> Int# -> Int#
709
710 with the first argument unboxed, so that it is not eval'd each time around the
711 'go' loop (which would otherwise happen, since 'foo' is not strict in 'a'). It
712 is sound for the wrapper to pass an unboxed arg because X is strict
713 (see Note [Strictness and Unboxing] in "GHC.Core.Opt.DmdAnal"), so its argument
714 must be evaluated. And if we *don't* pass an unboxed argument, we can't even
715 repair it by adding a `seq` thus:
716
717 foo (X a) n = a `seq` go 0
718
719 because the seq is discarded (very early) since X is strict!
720
721 So here's what we do
722
723 * Since this has nothing to do with how 'foo' uses 'a', we leave demand analysis
724 alone, but account for the additional evaluatedness when annotating the binder
725 in 'annotateLamIdBndr' via 'finaliseBoxity', which will retain the Unboxed boxity
726 on 'a' in the definition of 'foo' in the demand 'L!P(L)'; meaning it's used
727 lazily but unboxed nonetheless. This seems to contradict
728 Note [No lazy, Unboxed demands in demand signature], but we know that 'a' is
729 evaluated and thus can be unboxed.
730
731 * When 'finaliseBoxity' decides to unbox a record, it will zip the field demands
732 together with the respective 'StrictnessMark'. In case of 'x', it will pair
733 up the lazy field demand 'L!P(L)' on 'a' with 'MarkedStrict' to account for
734 the strict field.
735
736 * Said 'StrictnessMark' is passed to the recursive invocation of
737 'finaliseBoxity' when deciding whether to unbox 'a'. 'a' was used lazily, but
738 since it also says 'MarkedStrict', we'll retain the 'Unboxed' boxity on 'a'.
739
740 * Worker/wrapper will consult 'wantToUnboxArg' for its unboxing decision. It will
741 /not/ look at the strictness bits of the demand, only at Boxity flags. As such,
742 it will happily unbox 'a' despite the lazy demand on it.
743
744 The net effect is that boxity analysis and the w/w transformation are more
745 aggressive about unboxing the strict arguments of a data constructor than when
746 looking at strictness info exclusively. It is very much like (Nested) CPR, which
747 needs its nested fields to be evaluated in order for it to unbox nestedly.
748
749 There is the usual danger of reboxing, which as usual we ignore. But
750 if X is monomorphic, and has an UNPACK pragma, then this optimisation
751 is even more important. We don't want the wrapper to rebox an unboxed
752 argument, and pass an Int to $wfoo!
753
754 This works in nested situations like T10482
755
756 data family Bar a
757 data instance Bar (a, b) = BarPair !(Bar a) !(Bar b)
758 newtype instance Bar Int = Bar Int
759
760 foo :: Bar ((Int, Int), Int) -> Int -> Int
761 foo f k = case f of BarPair x y ->
762 case burble of
763 True -> case x of
764 BarPair p q -> ...
765 False -> ...
766
767 The extra eagerness lets us produce a worker of type:
768 $wfoo :: Int# -> Int# -> Int# -> Int -> Int
769 $wfoo p# q# y# = ...
770
771 even though the `case x` is only lazily evaluated.
772
773 --------- Historical note ------------
774 We used to add data-con strictness demands when demand analysing case
775 expression. However, it was noticed in #15696 that this misses some cases. For
776 instance, consider the program (from T10482)
777
778 data family Bar a
779 data instance Bar (a, b) = BarPair !(Bar a) !(Bar b)
780 newtype instance Bar Int = Bar Int
781
782 foo :: Bar ((Int, Int), Int) -> Int -> Int
783 foo f k =
784 case f of
785 BarPair x y -> case burble of
786 True -> case x of
787 BarPair p q -> ...
788 False -> ...
789
790 We really should be able to assume that `p` is already evaluated since it came
791 from a strict field of BarPair. This strictness would allow us to produce a
792 worker of type:
793
794 $wfoo :: Int# -> Int# -> Int# -> Int -> Int
795 $wfoo p# q# y# = ...
796
797 even though the `case x` is only lazily evaluated
798
799 Indeed before we fixed #15696 this would happen since we would float the inner
800 `case x` through the `case burble` to get:
801
802 foo f k =
803 case f of
804 BarPair x y -> case x of
805 BarPair p q -> case burble of
806 True -> ...
807 False -> ...
808
809 However, after fixing #15696 this could no longer happen (for the reasons
810 discussed in ticket:15696#comment:76). This means that the demand placed on `f`
811 would then be significantly weaker (since the False branch of the case on
812 `burble` is not strict in `p` or `q`).
813
814 Consequently, we now instead account for data-con strictness in mkWWstr_one,
815 applying the strictness demands to the final result of DmdAnal. The result is
816 that we get the strict demand signature we wanted even if we can't float
817 the case on `x` up through the case on `burble`.
818
819 Note [No nested Unboxed inside Boxed in demand signature]
820 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
821 Consider
822 ```
823 f p@(x,y)
824 | even (x+y) = []
825 | otherwise = [p]
826 ```
827 Demand analysis will infer that the function body puts a demand of `1P(1!L,1!L)`
828 on 'p', e.g., Boxed on the outside but Unboxed on the inside. But worker/wrapper
829 can't unbox the pair components without unboxing the pair! So we better say
830 `1P(1L,1L)` in the demand signature in order not to spread wrong Boxity info.
831 That happens in 'finaliseBoxity'.
832
833 Note [No lazy, Unboxed demands in demand signature]
834 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
835 Consider T19407:
836
837 data Huge = Huge Bool () ... () -- think: DynFlags
838 data T = T { h :: Huge, n :: Int }
839 f t@(T h _) = g h t
840 g (H b _ ... _) t = if b then 1 else n t
841
842 The body of `g` puts (approx.) demand `L!P(A,1)` on `t`. But we better
843 not put that demand in `g`'s demand signature, because worker/wrapper will not
844 in general unbox a lazy-and-unboxed demand like `L!P(..)`.
845 (The exception are known-to-be-evaluated arguments like strict fields,
846 see Note [Unboxing evaluated arguments].)
847
848 The program above is an example where spreading misinformed boxity through the
849 signature is particularly egregious. If we give `g` that signature, then `f`
850 puts demand `S!P(1!P(1L,A,..),ML)` on `t`. Now we will unbox `t` in `f` it and
851 we get
852
853 f (T (H b _ ... _) n) = $wf b n
854 $wf b n = $wg b (T (H b x ... x) n)
855 $wg = ...
856
857 Massive reboxing in `$wf`! Solution: Trim boxity on lazy demands in
858 'finaliseBoxity', modulo Note [Unboxing evaluated arguments].
859
860 Note [Finalising boxity for demand signature]
861 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
862 The worker/wrapper pass must strictly adhere to the boxity decisions encoded
863 in the demand signature, because that is the information that demand analysis
864 propagates throughout the program. Failing to implement the strategy laid out
865 in the signature can result in reboxing in unexpected places. Hence, we must
866 completely anticipate unboxing decisions during demand analysis and reflect
867 these decicions in demand annotations. That is the job of 'finaliseBoxity',
868 which is defined here and called from demand analysis.
869
870 Here is a list of different Notes it has to take care of:
871
872 * Note [No lazy, Unboxed demands in demand signature] such as `L!P(L)` in
873 general, but still allow Note [Unboxing evaluated arguments]
874 * Note [No nested Unboxed inside Boxed in demand signature] such as `1P(1!L)`
875 * Implement fixes for corner cases Note [Do not unbox class dictionaries]
876 and Note [mkWWstr and unsafeCoerce]
877
878 Then, in worker/wrapper blindly trusts the boxity info in the demand signature
879 and will not look at strictness info *at all*, in 'wantToUnboxArg'.
880
881 Note [non-algebraic or open body type warning]
882 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
883 There are a few cases where the W/W transformation is told that something
884 returns a constructor, but the type at hand doesn't really match this. One
885 real-world example involves unsafeCoerce:
886 foo = IO a
887 foo = unsafeCoerce c_exit
888 foreign import ccall "c_exit" c_exit :: IO ()
889 Here CPR will tell you that `foo` returns a () constructor for sure, but trying
890 to create a worker/wrapper for type `a` obviously fails.
891 (This was a real example until ee8e792 in libraries/base.)
892
893 It does not seem feasible to avoid all such cases already in the analyser (and
894 after all, the analysis is not really wrong), so we simply do nothing here in
895 mkWWcpr. But we still want to emit warning with -DDEBUG, to hopefully catch
896 other cases where something went avoidably wrong.
897
898 This warning also triggers for the stream fusion library within `text`.
899 We can'easily W/W constructed results like `Stream` because we have no simple
900 way to express existential types in the worker's type signature.
901 -}
902
903 {-
904 ************************************************************************
905 * *
906 \subsection{Worker/wrapper for Strictness and Absence}
907 * *
908 ************************************************************************
909 -}
910
911 mkWWstr :: WwOpts
912 -> [Var] -- Wrapper args; have their demand info on them
913 -- *Includes type variables*
914 -> UniqSM (Bool, -- Is this useful
915 [Var], -- Worker args
916 CoreExpr -> CoreExpr, -- Wrapper body, lacking the worker call
917 -- and without its lambdas
918 -- This fn adds the unboxing
919 [CoreExpr]) -- Reboxed args for the call to the
920 -- original RHS. Corresponds one-to-one
921 -- with the wrapper arg vars
922 mkWWstr opts args
923 = go args
924 where
925 go_one arg = mkWWstr_one opts arg
926
927 go [] = return (False, [], nop_fn, [])
928 go (arg : args) = do { (useful1, args1, wrap_fn1, wrap_arg) <- go_one arg
929 ; (useful2, args2, wrap_fn2, wrap_args) <- go args
930 ; return ( useful1 || useful2
931 , args1 ++ args2
932 , wrap_fn1 . wrap_fn2
933 , wrap_arg:wrap_args ) }
934
935 ----------------------
936 -- mkWWstr_one wrap_var = (useful, work_args, wrap_fn, wrap_arg)
937 -- * wrap_fn assumes wrap_var is in scope,
938 -- brings into scope work_args (via cases)
939 -- * wrap_arg assumes work_args are in scope, and builds a ConApp that
940 -- reconstructs the RHS of wrap_var that we pass to the original RHS
941 -- See Note [Worker/wrapper for Strictness and Absence]
942 mkWWstr_one :: WwOpts -> Var -> UniqSM (Bool, [Var], CoreExpr -> CoreExpr, CoreExpr)
943 mkWWstr_one opts arg =
944 case wantToUnboxArg fam_envs arg_ty arg_dmd of
945 _ | isTyVar arg -> do_nothing
946
947 DropAbsent
948 | Just absent_filler <- mkAbsentFiller opts arg
949 -- Absent case. We can't always handle absence for arbitrary
950 -- unlifted types, so we need to choose just the cases we can
951 -- (that's what mkAbsentFiller does)
952 -> return (True, [], nop_fn, absent_filler)
953
954 Unbox dcpc ds -> unbox_one_arg opts arg ds dcpc
955
956 _ -> do_nothing -- Other cases, like StopUnboxing
957
958 where
959 fam_envs = wo_fam_envs opts
960 arg_ty = idType arg
961 arg_dmd = idDemandInfo arg
962 do_nothing = return (False, [arg], nop_fn, varToCoreExpr arg)
963
964 unbox_one_arg :: WwOpts
965 -> Var
966 -> [Demand]
967 -> DataConPatContext
968 -> UniqSM (Bool, [Var], CoreExpr -> CoreExpr, CoreExpr)
969 unbox_one_arg opts arg_var ds
970 DataConPatContext { dcpc_dc = dc, dcpc_tc_args = tc_args
971 , dcpc_co = co }
972 = do { pat_bndrs_uniqs <- getUniquesM
973 ; let ex_name_fss = map getOccFS $ dataConExTyCoVars dc
974 (ex_tvs', arg_ids) =
975 dataConRepFSInstPat (ex_name_fss ++ repeat ww_prefix) pat_bndrs_uniqs (idMult arg_var) dc tc_args
976 arg_ids' = zipWithEqual "unbox_one_arg" setIdDemandInfo arg_ids ds
977 unbox_fn = mkUnpackCase (Var arg_var) co (idMult arg_var)
978 dc (ex_tvs' ++ arg_ids')
979 ; (_, worker_args, wrap_fn, wrap_args) <- mkWWstr opts (ex_tvs' ++ arg_ids')
980 ; let wrap_arg = mkConApp dc (map Type tc_args ++ wrap_args) `mkCast` mkSymCo co
981 ; return (True, worker_args, unbox_fn . wrap_fn, wrap_arg) }
982 -- Don't pass the arg, rebox instead
983
984 -- | Tries to find a suitable absent filler to bind the given absent identifier
985 -- to. See Note [Absent fillers].
986 --
987 -- If @mkAbsentFiller _ id == Just e@, then @e@ is an absent filler with the
988 -- same type as @id@. Otherwise, no suitable filler could be found.
989 mkAbsentFiller :: WwOpts -> Id -> Maybe CoreExpr
990 mkAbsentFiller opts arg
991 -- The lifted case: Bind 'absentError' for a nice panic message if we are
992 -- wrong (like we were in #11126). See (1) in Note [Absent fillers]
993 | not (isUnliftedType arg_ty)
994 , not is_strict, not is_evald -- See (2) in Note [Absent fillers]
995 = Just (mkAbsentErrorApp arg_ty msg)
996
997 -- The default case for mono rep: Bind `RUBBISH[rr] arg_ty`
998 -- See Note [Absent fillers], the main part
999 | otherwise
1000 = mkLitRubbish arg_ty
1001
1002 where
1003 arg_ty = idType arg
1004 is_strict = isStrictDmd (idDemandInfo arg)
1005 is_evald = isEvaldUnfolding $ idUnfolding arg
1006
1007 msg = renderWithContext
1008 (defaultSDocContext { sdocSuppressUniques = True })
1009 (vcat
1010 [ text "Arg:" <+> ppr arg
1011 , text "Type:" <+> ppr arg_ty
1012 , file_msg ])
1013 -- We need to suppress uniques here because otherwise they'd
1014 -- end up in the generated code as strings. This is bad for
1015 -- determinism, because with different uniques the strings
1016 -- will have different lengths and hence different costs for
1017 -- the inliner leading to different inlining.
1018 -- See also Note [Unique Determinism] in GHC.Types.Unique
1019 file_msg = text "In module" <+> quotes (ppr $ wo_module opts)
1020
1021 {- Note [Worker/wrapper for Strictness and Absence]
1022 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1023 The worker/wrapper transformation, mkWWstr_one, takes concrete action
1024 based on the 'UnboxingDescision' returned by 'wantToUnboxArg'.
1025 The latter takes into account several possibilities to decide if the
1026 function is worthy for splitting:
1027
1028 1. If an argument is absent, it would be silly to pass it to
1029 the worker. Hence the DropAbsent case. This case must come
1030 first because the bottom demand B is also strict.
1031 E.g. B comes from a function like
1032 f x = error "urk"
1033 and the absent demand A can come from Note [Unboxing evaluated arguments]
1034
1035 2. If the argument is evaluated strictly (or known to be eval'd),
1036 we can take a view into the product demand ('viewProd'). In accordance
1037 with Note [Boxity analysis], 'wantToUnboxArg' will say 'Unbox'.
1038 'mkWWstr_one' then follows suit it and recurses into the fields of the
1039 product demand. For example
1040
1041 f :: (Int, Int) -> Int
1042 f p = (case p of (a,b) -> a) + 1
1043 is split to
1044 f :: (Int, Int) -> Int
1045 f p = case p of (a,b) -> $wf a
1046
1047 $wf :: Int -> Int
1048 $wf a = a + 1
1049
1050 and
1051 g :: Bool -> (Int, Int) -> Int
1052 g c p = case p of (a,b) ->
1053 if c then a else b
1054 is split to
1055 g c p = case p of (a,b) -> $gw c a b
1056 $gw c a b = if c then a else b
1057
1058 2a But do /not/ split if Boxity Analysis said "Boxed".
1059 In this case, 'wantToUnboxArg' returns 'StopUnboxing'.
1060 Otherwise we risk decomposing and reboxing a massive
1061 tuple which is barely used. Example:
1062
1063 f :: ((Int,Int) -> String) -> (Int,Int) -> a
1064 f g pr = error (g pr)
1065
1066 main = print (f fst (1, error "no"))
1067
1068 Here, f does not take 'pr' apart, and it's stupid to do so.
1069 Imagine that it had millions of fields. This actually happened
1070 in GHC itself where the tuple was DynFlags
1071
1072 3. In all other cases (e.g., lazy, used demand and not eval'd),
1073 'finaliseBoxity' will have cleared the Boxity flag to 'Boxed'
1074 (see Note [Finalising boxity for demand signature]) and
1075 'wantToUnboxArg' returns 'StopUnboxing' so that 'mkWWstr_one'
1076 stops unboxing.
1077
1078 Note [Worker/wrapper for bottoming functions]
1079 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1080 We used not to split if the result is bottom.
1081 [Justification: there's no efficiency to be gained.]
1082
1083 But it's sometimes bad not to make a wrapper. Consider
1084 fw = \x# -> let x = I# x# in case e of
1085 p1 -> error_fn x
1086 p2 -> error_fn x
1087 p3 -> the real stuff
1088 The re-boxing code won't go away unless error_fn gets a wrapper too.
1089 [We don't do reboxing now, but in general it's better to pass an
1090 unboxed thing to f, and have it reboxed in the error cases....]
1091
1092 Note [Record evaluated-ness in worker/wrapper]
1093 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1094 Suppose we have
1095
1096 data T = MkT !Int Int
1097
1098 f :: T -> T
1099 f x = e
1100
1101 and f's is strict, and has the CPR property. The we are going to generate
1102 this w/w split
1103
1104 f x = case x of
1105 MkT x1 x2 -> case $wf x1 x2 of
1106 (# r1, r2 #) -> MkT r1 r2
1107
1108 $wfw x1 x2 = let x = MkT x1 x2 in
1109 case e of
1110 MkT r1 r2 -> (# r1, r2 #)
1111
1112 Note that
1113
1114 * In the worker $wf, inside 'e' we can be sure that x1 will be
1115 evaluated (it came from unpacking the argument MkT. But that's no
1116 immediately apparent in $wf
1117
1118 * In the wrapper 'f', which we'll inline at call sites, we can be sure
1119 that 'r1' has been evaluated (because it came from unpacking the result
1120 MkT. But that is not immediately apparent from the wrapper code.
1121
1122 Missing these facts isn't unsound, but it loses possible future
1123 opportunities for optimisation.
1124
1125 Solution: use setCaseBndrEvald when creating
1126 (A) The arg binders x1,x2 in mkWstr_one via mkUnpackCase
1127 See #13077, test T13077
1128 (B) The result binders r1,r2 in mkWWcpr_entry
1129 See Trace #13077, test T13077a
1130 And #13027 comment:20, item (4)
1131 to record that the relevant binder is evaluated.
1132
1133 Note [Absent fillers]
1134 ~~~~~~~~~~~~~~~~~~~~~
1135 Consider
1136
1137 data T = MkT [Int] [Int] ![Int] -- NB: last field is strict
1138 f :: T -> Int# -> blah
1139 f ps w = case ps of MkT xs ys zs -> <body mentioning xs>
1140
1141 Then f gets a strictness sig of <S(L,A,A)><A>. We make a worker $wf thus:
1142
1143 $wf :: [Int] -> blah
1144 $wf xs = case ps of MkT xs _ _ -> <body mentioning xs>
1145 where
1146 ys = absentError "ys :: [Int]"
1147 zs = RUBBISH[LiftedRep] @[Int]
1148 ps = MkT xs ys zs
1149 w = RUBBISH[IntRep] @Int#
1150
1151 The absent arguments 'ys', 'zs' and 'w' aren't even passed to the worker.
1152 And neither should they! They are never used, their value is irrelevant (hence
1153 they are *dead code*) and they are probably discarded after the next run of the
1154 Simplifier (when they are in fact *unreachable code*). Yet, we have to come up
1155 with "filler" values that we bind the absent arg Ids to.
1156
1157 That is exactly what Note [Rubbish literals] are for: A convenient way to
1158 conjure filler values at any type (and any representation or levity!).
1159
1160 Needless to say, there are some wrinkles:
1161
1162 1. In case we have a absent, /lazy/, and /lifted/ arg, we use an error-thunk
1163 instead. If absence analysis was wrong (e.g., #11126) and the binding
1164 in fact is used, then we get a nice panic message instead of undefined
1165 runtime behavior (See Modes of failure from Note [Rubbish literals]).
1166
1167 Obviously, we can't use an error-thunk if the value is of unlifted rep
1168 (like 'Int#' or 'MutVar#'), because we'd immediately evaluate the panic.
1169
1170 2. We also mustn't put an error-thunk (that fills in for an absent value of
1171 lifted rep) in a strict field, because #16970 establishes the invariant
1172 that strict fields are always evaluated, by (re-)evaluating what is put in
1173 a strict field. That's the reason why 'zs' binds a rubbish literal instead
1174 of an error-thunk, see #19133.
1175
1176 How do we detect when we are about to put an error-thunk in a strict field?
1177 Ideally, we'd just look at the 'StrictnessMark' of the DataCon's field, but
1178 it's quite nasty to thread the marks though 'mkWWstr' and 'mkWWstr_one'.
1179 So we rather look out for a necessary condition for strict fields:
1180 Note [Unboxing evaluated arguments] makes it so that the demand on
1181 'zs' is absent and /strict/: It will get cardinality 'C_10', the empty
1182 interval, rather than 'C_00'. Hence the 'isStrictDmd' check: It guarantees
1183 we never fill in an error-thunk for an absent strict field.
1184 But that also means we emit a rubbish lit for other args that have
1185 cardinality 'C_10' (say, the arg to a bottoming function) where we could've
1186 used an error-thunk, but that's a small price to pay for simplicity.
1187
1188 In #19766, we discovered that even if the binder has eval cardinality
1189 'C_00', it may end up in a strict field, with no surrounding seq
1190 whatsoever! That happens if the calling code has already evaluated
1191 said lambda binder, which will then have an evaluated unfolding
1192 ('isEvaldUnfolding'). That in turn tells the Simplifier it is free to drop
1193 the seq. So we better don't fill in an error-thunk for eval'd arguments
1194 either, just in case it ends up in a strict field!
1195
1196 3. We can only emit a LitRubbish if the arg's type @arg_ty@ is mono-rep, e.g.
1197 of the form @TYPE rep@ where @rep@ is not (and doesn't contain) a variable.
1198 Why? Because if we don't know its representation (e.g. size in memory,
1199 register class), we don't know what or how much rubbish to emit in codegen.
1200 'typeMonoPrimRep_maybe' returns 'Nothing' in this case and we simply fall
1201 back to passing the original parameter to the worker.
1202
1203 Note that currently this case should not occur, because binders always
1204 have to be representation monomorphic. But in the future, we might allow
1205 levity polymorphism, e.g. a polymorphic levity variable in 'BoxedRep'.
1206
1207 While (1) and (2) are simply an optimisation in terms of compiler debugging
1208 experience, (3) should be irrelevant in most programs, if not all.
1209
1210 Historical note: I did try the experiment of using an error thunk for unlifted
1211 things too, relying on the simplifier to drop it as dead code. But this is
1212 fragile
1213
1214 - It fails when profiling is on, which disables various optimisations
1215
1216 - It fails when reboxing happens. E.g.
1217 data T = MkT Int Int#
1218 f p@(MkT a _) = ...g p....
1219 where g is /lazy/ in 'p', but only uses the first component. Then
1220 'f' is /strict/ in 'p', and only uses the first component. So we only
1221 pass that component to the worker for 'f', which reconstructs 'p' to
1222 pass it to 'g'. Alas we can't say
1223 ...f (MkT a (absentError Int# "blah"))...
1224 because `MkT` is strict in its Int# argument, so we get an absentError
1225 exception when we shouldn't. Very annoying!
1226
1227 ************************************************************************
1228 * *
1229 Type scrutiny that is specific to demand analysis
1230 * *
1231 ************************************************************************
1232 -}
1233
1234 -- | Exactly 'dataConInstArgTys', but lacks the (ASSERT'ed) precondition that
1235 -- the 'DataCon' may not have existentials. The lack of cloning the existentials
1236 -- compared to 'dataConInstExAndArgVars' makes this function \"dubious\";
1237 -- only use it where type variables aren't substituted for!
1238 dubiousDataConInstArgTys :: DataCon -> [Type] -> [Type]
1239 dubiousDataConInstArgTys dc tc_args = arg_tys
1240 where
1241 univ_tvs = dataConUnivTyVars dc
1242 ex_tvs = dataConExTyCoVars dc
1243 subst = extendTCvInScopeList (zipTvSubst univ_tvs tc_args) ex_tvs
1244 arg_tys = map (GHC.Core.Type.substTy subst . scaledThing) (dataConRepArgTys dc)
1245
1246 findTypeShape :: FamInstEnvs -> Type -> TypeShape
1247 -- Uncover the arrow and product shape of a type
1248 -- The data type TypeShape is defined in GHC.Types.Demand
1249 -- See Note [Trimming a demand to a type] in GHC.Core.Opt.DmdAnal
1250 findTypeShape fam_envs ty
1251 = go (setRecTcMaxBound 2 initRecTc) ty
1252 -- You might think this bound of 2 is low, but actually
1253 -- I think even 1 would be fine. This only bites for recursive
1254 -- product types, which are rare, and we really don't want
1255 -- to look deep into such products -- see #18034
1256 where
1257 go rec_tc ty
1258 | Just (_, _, res) <- splitFunTy_maybe ty
1259 = TsFun (go rec_tc res)
1260
1261 | Just (tc, tc_args) <- splitTyConApp_maybe ty
1262 = go_tc rec_tc tc tc_args
1263
1264 | Just (_, ty') <- splitForAllTyCoVar_maybe ty
1265 = go rec_tc ty'
1266
1267 | otherwise
1268 = TsUnk
1269
1270 go_tc rec_tc tc tc_args
1271 | Just (HetReduction (Reduction _ rhs) _) <- topReduceTyFamApp_maybe fam_envs tc tc_args
1272 = go rec_tc rhs
1273
1274 | Just con <- tyConSingleAlgDataCon_maybe tc
1275 , Just rec_tc <- if isTupleTyCon tc
1276 then Just rec_tc
1277 else checkRecTc rec_tc tc
1278 -- We treat tuples specially because they can't cause loops.
1279 -- Maybe we should do so in checkRecTc.
1280 -- The use of 'dubiousDataConInstArgTys' is OK, since this
1281 -- function performs no substitution at all, hence the uniques
1282 -- don't matter.
1283 -- We really do encounter existentials here, see
1284 -- Note [Which types are unboxed?] for an example.
1285 = TsProd (map (go rec_tc) (dubiousDataConInstArgTys con tc_args))
1286
1287 | Just (ty', _) <- instNewTyCon_maybe tc tc_args
1288 , Just rec_tc <- checkRecTc rec_tc tc
1289 = go rec_tc ty'
1290
1291 | otherwise
1292 = TsUnk
1293
1294 -- | Returned by 'isRecDataCon'.
1295 -- See also Note [Detecting recursive data constructors].
1296 data IsRecDataConResult
1297 = DefinitelyRecursive -- ^ The algorithm detected a loop
1298 | NonRecursiveOrUnsure -- ^ The algorithm detected no loop, went out of fuel
1299 -- or hit an .hs-boot file
1300 deriving (Eq, Show)
1301
1302 instance Outputable IsRecDataConResult where
1303 ppr = text . show
1304
1305 combineIRDCR :: IsRecDataConResult -> IsRecDataConResult -> IsRecDataConResult
1306 combineIRDCR DefinitelyRecursive _ = DefinitelyRecursive
1307 combineIRDCR _ DefinitelyRecursive = DefinitelyRecursive
1308 combineIRDCR _ _ = NonRecursiveOrUnsure
1309
1310 combineIRDCRs :: [IsRecDataConResult] -> IsRecDataConResult
1311 combineIRDCRs = foldl' combineIRDCR NonRecursiveOrUnsure
1312 {-# INLINE combineIRDCRs #-}
1313
1314 -- | @isRecDataCon _ fuel dc@, where @tc = dataConTyCon dc@ returns
1315 --
1316 -- * @Just Recursive@ if the analysis found that @tc@ is reachable through one
1317 -- of @dc@'s fields
1318 -- * @Just NonRecursive@ if the analysis found that @tc@ is not reachable
1319 -- through one of @dc@'s fields
1320 -- * @Nothing@ is returned in two cases. The first is when @fuel /= Infinity@
1321 -- and @f@ expansions of nested data TyCons were not enough to prove
1322 -- non-recursivenss, nor arrive at an occurrence of @tc@ thus proving
1323 -- recursiveness. The other is when we hit an abstract TyCon (one without
1324 -- visible DataCons), such as those imported from .hs-boot files.
1325 --
1326 -- If @fuel = 'Infinity'@ and there are no boot files involved, then the result
1327 -- is never @Nothing@ and the analysis is a depth-first search. If @fuel = 'Int'
1328 -- f@, then the analysis behaves like a depth-limited DFS and returns @Nothing@
1329 -- if the search was inconclusive.
1330 --
1331 -- See Note [Detecting recursive data constructors] for which recursive DataCons
1332 -- we want to flag.
1333 isRecDataCon :: FamInstEnvs -> IntWithInf -> DataCon -> IsRecDataConResult
1334 isRecDataCon fam_envs fuel dc
1335 | isTupleDataCon dc || isUnboxedSumDataCon dc
1336 = NonRecursiveOrUnsure
1337 | otherwise
1338 = -- pprTrace "isRecDataCon" (ppr dc <+> dcolon <+> ppr (dataConRepType dc) $$ ppr fuel $$ ppr answer)
1339 answer
1340 where
1341 answer = go_dc fuel (setRecTcMaxBound 1 initRecTc) dc
1342 (<||>) = combineIRDCR
1343
1344 go_dc :: IntWithInf -> RecTcChecker -> DataCon -> IsRecDataConResult
1345 go_dc fuel rec_tc dc =
1346 combineIRDCRs [ go_arg_ty fuel rec_tc (scaledThing arg_ty)
1347 | arg_ty <- dataConRepArgTys dc ]
1348
1349 go_arg_ty :: IntWithInf -> RecTcChecker -> Type -> IsRecDataConResult
1350 go_arg_ty fuel rec_tc ty
1351 --- | pprTrace "arg_ty" (ppr ty) False = undefined
1352
1353 | Just (_, _arg_ty, _res_ty) <- splitFunTy_maybe ty
1354 -- = go_arg_ty fuel rec_tc _arg_ty <||> go_arg_ty fuel rec_tc _res_ty
1355 -- Plausible, but unnecessary for CPR.
1356 -- See Note [Detecting recursive data constructors], point (1)
1357 = NonRecursiveOrUnsure
1358
1359 | Just (_tcv, ty') <- splitForAllTyCoVar_maybe ty
1360 = go_arg_ty fuel rec_tc ty'
1361 -- See Note [Detecting recursive data constructors], point (2)
1362
1363 | Just (tc, tc_args) <- splitTyConApp_maybe ty
1364 = combineIRDCRs (map (go_arg_ty fuel rec_tc) tc_args)
1365 <||> go_tc_app fuel rec_tc tc tc_args
1366
1367 | otherwise
1368 = NonRecursiveOrUnsure
1369
1370 -- | PRECONDITION: tc_args has no recursive occs
1371 -- See Note [Detecting recursive data constructors], point (5)
1372 go_tc_app :: IntWithInf -> RecTcChecker -> TyCon -> [Type] -> IsRecDataConResult
1373 go_tc_app fuel rec_tc tc tc_args
1374 --- | pprTrace "tc_app" (vcat [ppr tc, ppr tc_args]) False = undefined
1375
1376 | tc == dataConTyCon dc
1377 = DefinitelyRecursive -- loop found!
1378
1379 | isPrimTyCon tc
1380 = NonRecursiveOrUnsure
1381
1382 | not $ tcIsRuntimeTypeKind $ tyConResKind tc
1383 = NonRecursiveOrUnsure
1384
1385 | isAbstractTyCon tc -- When tc has no DataCons, from an hs-boot file
1386 = NonRecursiveOrUnsure -- See Note [Detecting recursive data constructors], point (7)
1387
1388 | isFamilyTyCon tc
1389 -- This is the only place where we look at tc_args
1390 -- See Note [Detecting recursive data constructors], point (5)
1391 = case topReduceTyFamApp_maybe fam_envs tc tc_args of
1392 Just (HetReduction (Reduction _ rhs) _) -> go_arg_ty fuel rec_tc rhs
1393 Nothing -> DefinitelyRecursive -- we hit this case for 'Any'
1394
1395 | otherwise
1396 = assertPpr (isAlgTyCon tc) (ppr tc <+> ppr dc) $
1397 case checkRecTc rec_tc tc of
1398 Nothing -> NonRecursiveOrUnsure
1399 -- we expanded this TyCon once already, no need to test it multiple times
1400
1401 Just rec_tc'
1402 | Just (_tvs, rhs, _co) <- unwrapNewTyCon_maybe tc
1403 -- See Note [Detecting recursive data constructors], points (2) and (3)
1404 -> go_arg_ty fuel rec_tc' rhs
1405
1406 | fuel < 0
1407 -> NonRecursiveOrUnsure -- that's why we track fuel!
1408
1409 | let dcs = expectJust "isRecDataCon:go_tc_app" $ tyConDataCons_maybe tc
1410 -> combineIRDCRs (map (\dc -> go_dc (subWithInf fuel 1) rec_tc' dc) dcs)
1411 -- See Note [Detecting recursive data constructors], point (4)
1412
1413 -- | A specialised Bool for an argument to 'finaliseBoxity'.
1414 -- See Note [Do not unbox class dictionaries].
1415 data InsideInlineableFun
1416 = NotInsideInlineableFun -- ^ Not in an inlineable fun.
1417 | InsideInlineableFun -- ^ We are in an inlineable fun, so we won't
1418 -- unbox dictionary args.
1419 deriving Eq
1420
1421 -- | This function makes sure that the demand only says 'Unboxed' where
1422 -- worker/wrapper should actually unbox and trims any boxity beyond that.
1423 -- Called for every demand annotation during DmdAnal.
1424 --
1425 -- > data T a = T !a
1426 -- > f :: (T (Int,Int), Int) -> ()
1427 -- > f p = ... -- demand on p: 1!P(L!P(L!P(L), L!P(L)), L!P(L))
1428 --
1429 -- 'finaliseBoxity' will trim the demand on 'p' to 1!P(L!P(LP(L), LP(L)), LP(L)).
1430 -- This is done when annotating lambdas and thunk bindings.
1431 -- See Note [Finalising boxity for demand signature]
1432 finaliseBoxity
1433 :: FamInstEnvs
1434 -> InsideInlineableFun -- ^ See the haddocks on 'InsideInlineableFun'
1435 -> Type -- ^ Type of the argument
1436 -> Demand -- ^ How the arg was used
1437 -> Demand
1438 finaliseBoxity env in_inl_fun ty dmd = go NotMarkedStrict ty dmd
1439 where
1440 go mark ty dmd@(n :* _) =
1441 case wantToUnboxArg env ty dmd of
1442 DropAbsent -> dmd
1443 Unbox DataConPatContext{dcpc_dc=dc, dcpc_tc_args=tc_args} ds
1444 -- See Note [No lazy, Unboxed demands in demand signature]
1445 -- See Note [Unboxing evaluated arguments]
1446 | isStrict n || isMarkedStrict mark
1447 -- See Note [Do not unbox class dictionaries]
1448 , in_inl_fun == NotInsideInlineableFun || not (isClassPred ty)
1449 -- See Note [mkWWstr and unsafeCoerce]
1450 , ds `lengthIs` dataConRepArity dc
1451 , let arg_tys = dubiousDataConInstArgTys dc tc_args
1452 -> -- pprTrace "finaliseBoxity:Unbox" (ppr ty $$ ppr dmd $$ ppr ds) $
1453 n :* (mkProd Unboxed $! zip_go_with_marks dc arg_tys ds)
1454 -- See Note [No nested Unboxed inside Boxed in demand signature]
1455 _ -> trimBoxity dmd
1456
1457 -- See Note [Unboxing evaluated arguments]
1458 zip_go_with_marks dc arg_tys ds = case dataConWrapId_maybe dc of
1459 Nothing -> strictZipWith (go NotMarkedStrict) arg_tys ds
1460 -- Shortcut when DataCon worker=wrapper
1461 Just _ -> strictZipWith3 go (dataConRepStrictness dc) arg_tys ds
1462
1463 {-
1464 ************************************************************************
1465 * *
1466 \subsection{Worker/wrapper for CPR}
1467 * *
1468 ************************************************************************
1469 See Note [Worker/wrapper for CPR] for an overview.
1470 -}
1471
1472 mkWWcpr_entry
1473 :: WwOpts
1474 -> Type -- function body
1475 -> Cpr -- CPR analysis results
1476 -> UniqSM (Bool, -- Is w/w'ing useful?
1477 CoreExpr -> CoreExpr, -- New wrapper. 'nop_fn' if not useful
1478 CoreExpr -> CoreExpr, -- New worker. 'nop_fn' if not useful
1479 Type) -- Type of worker's body.
1480 -- Just the input body_ty if not useful
1481 -- ^ Entrypoint to CPR W/W. See Note [Worker/wrapper for CPR] for an overview.
1482 mkWWcpr_entry opts body_ty body_cpr
1483 | not (wo_cpr_anal opts) = return (False, nop_fn, nop_fn, body_ty)
1484 | otherwise = do
1485 -- Part (1)
1486 res_bndr <- mk_res_bndr body_ty
1487 let bind_res_bndr body scope = mkDefaultCase body res_bndr scope
1488
1489 -- Part (2)
1490 (useful, fromOL -> transit_vars, rebuilt_result, work_unpack_res) <-
1491 mkWWcpr_one opts res_bndr body_cpr
1492
1493 -- Part (3)
1494 let (unbox_transit_tup, transit_tup) = move_transit_vars transit_vars
1495
1496 -- Stacking unboxer (work_fn) and builder (wrap_fn) together
1497 let wrap_fn = unbox_transit_tup rebuilt_result -- 3 2
1498 work_fn body = bind_res_bndr body (work_unpack_res transit_tup) -- 1 2 3
1499 work_body_ty = exprType transit_tup
1500 return $ if not useful
1501 then (False, nop_fn, nop_fn, body_ty)
1502 else (True, wrap_fn, work_fn, work_body_ty)
1503
1504 -- | Part (1) of Note [Worker/wrapper for CPR].
1505 mk_res_bndr :: Type -> UniqSM Id
1506 mk_res_bndr body_ty = do
1507 -- See Note [Linear types and CPR]
1508 bndr <- mkSysLocalOrCoVarM ww_prefix cprCaseBndrMult body_ty
1509 -- See Note [Record evaluated-ness in worker/wrapper]
1510 pure (setCaseBndrEvald MarkedStrict bndr)
1511
1512 -- | What part (2) of Note [Worker/wrapper for CPR] collects.
1513 --
1514 -- 1. A Bool capturing whether the transformation did anything useful.
1515 -- 2. The list of transit variables (see the Note).
1516 -- 3. The result builder expression for the wrapper. The original case binder if not useful.
1517 -- 4. The result unpacking expression for the worker. 'nop_fn' if not useful.
1518 type CprWwResultOne = (Bool, OrdList Var, CoreExpr , CoreExpr -> CoreExpr)
1519 type CprWwResultMany = (Bool, OrdList Var, [CoreExpr], CoreExpr -> CoreExpr)
1520
1521 mkWWcpr :: WwOpts -> [Id] -> [Cpr] -> UniqSM CprWwResultMany
1522 mkWWcpr _opts vars [] =
1523 -- special case: No CPRs means all top (for example from FlatConCpr),
1524 -- hence stop WW.
1525 return (False, toOL vars, map varToCoreExpr vars, nop_fn)
1526 mkWWcpr opts vars cprs = do
1527 -- No existentials in 'vars'. 'wantToUnboxResult' should have checked that.
1528 massertPpr (not (any isTyVar vars)) (ppr vars $$ ppr cprs)
1529 massertPpr (equalLength vars cprs) (ppr vars $$ ppr cprs)
1530 (usefuls, varss, rebuilt_results, work_unpack_ress) <-
1531 unzip4 <$> zipWithM (mkWWcpr_one opts) vars cprs
1532 return ( or usefuls
1533 , concatOL varss
1534 , rebuilt_results
1535 , foldl' (.) nop_fn work_unpack_ress )
1536
1537 mkWWcpr_one :: WwOpts -> Id -> Cpr -> UniqSM CprWwResultOne
1538 -- ^ See if we want to unbox the result and hand off to 'unbox_one_result'.
1539 mkWWcpr_one opts res_bndr cpr
1540 | assert (not (isTyVar res_bndr) ) True
1541 , Unbox dcpc arg_cprs <- wantToUnboxResult (wo_fam_envs opts) (idType res_bndr) cpr
1542 = unbox_one_result opts res_bndr arg_cprs dcpc
1543 | otherwise
1544 = return (False, unitOL res_bndr, varToCoreExpr res_bndr, nop_fn)
1545
1546 unbox_one_result
1547 :: WwOpts -> Id -> [Cpr] -> DataConPatContext -> UniqSM CprWwResultOne
1548 -- ^ Implements the main bits of part (2) of Note [Worker/wrapper for CPR]
1549 unbox_one_result opts res_bndr arg_cprs
1550 DataConPatContext { dcpc_dc = dc, dcpc_tc_args = tc_args
1551 , dcpc_co = co } = do
1552 -- unboxer (free in `res_bndr`): | builder (where <i> builds what was
1553 -- ( case res_bndr of (i, j) -> ) | bound to i)
1554 -- ( case i of I# a -> ) |
1555 -- ( case j of I# b -> ) | ( (<i>, <j>) )
1556 -- ( <hole> ) |
1557 pat_bndrs_uniqs <- getUniquesM
1558 let (_exs, arg_ids) =
1559 dataConRepFSInstPat (repeat ww_prefix) pat_bndrs_uniqs cprCaseBndrMult dc tc_args
1560 massert (null _exs) -- Should have been caught by wantToUnboxResult
1561
1562 (nested_useful, transit_vars, con_args, work_unbox_res) <-
1563 mkWWcpr opts arg_ids arg_cprs
1564
1565 let -- rebuilt_result = (C a b |> sym co)
1566 rebuilt_result = mkConApp dc (map Type tc_args ++ con_args) `mkCast` mkSymCo co
1567 -- this_work_unbox_res alt = (case res_bndr |> co of C a b -> <alt>[a,b])
1568 this_work_unbox_res = mkUnpackCase (Var res_bndr) co cprCaseBndrMult dc arg_ids
1569
1570 -- Don't try to WW an unboxed tuple return type when there's nothing inside
1571 -- to unbox further.
1572 return $ if isUnboxedTupleDataCon dc && not nested_useful
1573 then ( False, unitOL res_bndr, Var res_bndr, nop_fn )
1574 else ( True
1575 , transit_vars
1576 , rebuilt_result
1577 , this_work_unbox_res . work_unbox_res
1578 )
1579
1580 -- | Implements part (3) of Note [Worker/wrapper for CPR].
1581 --
1582 -- If `move_transit_vars [a,b] = (unbox, tup)` then
1583 -- * `a` and `b` are the *transit vars* to be returned from the worker
1584 -- to the wrapper
1585 -- * `unbox scrut alt = (case <scrut> of (# a, b #) -> <alt>)`
1586 -- * `tup = (# a, b #)`
1587 -- There is a special case for when there's 1 transit var,
1588 -- see Note [No unboxed tuple for single, unlifted transit var].
1589 move_transit_vars :: [Id] -> (CoreExpr -> CoreExpr -> CoreExpr, CoreExpr)
1590 move_transit_vars vars
1591 | [var] <- vars
1592 , let var_ty = idType var
1593 , isUnliftedType var_ty || exprIsHNF (Var var)
1594 -- See Note [No unboxed tuple for single, unlifted transit var]
1595 -- * Wrapper: `unbox scrut alt = (case <scrut> of a -> <alt>)`
1596 -- * Worker: `tup = a`
1597 = ( \build_res wkr_call -> mkDefaultCase wkr_call var build_res
1598 , varToCoreExpr var ) -- varToCoreExpr important here: var can be a coercion
1599 -- Lacking this caused #10658
1600 | otherwise
1601 -- The general case: Just return an unboxed tuple from the worker
1602 -- * Wrapper: `unbox scrut alt = (case <scrut> of (# a, b #) -> <alt>)`
1603 -- * Worker: `tup = (# a, b #)`
1604 = ( \build_res wkr_call -> mkSingleAltCase wkr_call case_bndr
1605 (DataAlt tup_con) vars build_res
1606 , ubx_tup_app )
1607 where
1608 ubx_tup_app = mkCoreUbxTup (map idType vars) (map varToCoreExpr vars)
1609 tup_con = tupleDataCon Unboxed (length vars)
1610 -- See also Note [Linear types and CPR]
1611 case_bndr = mkWildValBinder cprCaseBndrMult (exprType ubx_tup_app)
1612
1613
1614 {- Note [Worker/wrapper for CPR]
1615 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1616 'mkWWcpr_entry' is the entry-point to the worker/wrapper transformation that
1617 exploits CPR info. Here's an example:
1618 ```
1619 f :: ... -> (Int, Int)
1620 f ... = <body>
1621 ```
1622 Let's assume the CPR info `body_cpr` for the body of `f` says
1623 "unbox the pair and its components" and `body_ty` is the type of the function
1624 body `body` (i.e., `(Int, Int)`). Then `mkWWcpr_entry body_ty body_cpr` returns
1625
1626 * A result-unpacking expression for the worker, with a hole for the fun body:
1627 ```
1628 unpack body = ( case <body> of r __DEFAULT -> ) -- (1)
1629 ( case r of (i, j) -> ) -- (2)
1630 ( case i of I# a -> ) -- (2)
1631 ( case j of I# b -> ) -- (2)
1632 ( (# a, b #) ) -- (3)
1633 ```
1634 * A result-building expression for the wrapper, with a hole for the worker call:
1635 ```
1636 build wkr_call = ( case <wkr_call> of (# a, b #) -> ) -- (3)
1637 ( (I# a, I# b) ) -- (2)
1638 ```
1639 * The result type of the worker, e.g., `(# Int#, Int# #)` above.
1640
1641 To achieve said transformation, 'mkWWcpr_entry'
1642
1643 1. First allocates a fresh result binder `r`, giving a name to the `body`
1644 expression and contributing part (1) of the unpacker and builder.
1645 2. Then it delegates to 'mkWWcpr_one', which recurses into all result fields
1646 to unbox, contributing the parts marked with (2). Crucially, it knows
1647 what belongs in the case scrutinee of the unpacker through the communicated
1648 Id `r`: The unpacking expression will be free in that variable.
1649 (This is a similar contract as that of 'mkWWstr_one' for strict args.)
1650 3. 'mkWWstr_one' produces a bunch of *transit vars*: Those result variables
1651 that have to be transferred from the worker to the wrapper, where the
1652 constructed result can be rebuilt, `a` and `b` above. Part (3) is
1653 responsible for tupling them up in the worker and taking the tuple apart
1654 in the wrapper. This is implemented in 'move_transit_vars'.
1655
1656 Note [No unboxed tuple for single, unlifted transit var]
1657 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1658 When there's only a single, unlifted transit var (Note [Worker/wrapper for CPR]),
1659 we don't wrap an unboxed singleton tuple around it (which otherwise would be
1660 needed to suspend evaluation) and return the unlifted thing directly. E.g.
1661 ```
1662 f :: Int -> Int
1663 f x = x+1
1664 ```
1665 We certainly want `$wf :: Int# -> Int#`, not `$wf :: Int# -> (# Int# #)`.
1666 This is OK as long as we know that evaluation of the returned thing terminates
1667 quickly, as is the case for fields of unlifted type like `Int#`.
1668
1669 But more generally, this should also be true for *lifted* types that terminate
1670 quickly! Consider from `T18109`:
1671 ```
1672 data F = F (Int -> Int)
1673 f :: Int -> F
1674 f n = F (+n)
1675
1676 data T = T (Int, Int)
1677 g :: T -> T
1678 g t@(T p) = p `seq` t
1679
1680 data U = U ![Int]
1681 h :: Int -> U
1682 h n = U [0..n]
1683 ```
1684 All of the nested fields are actually ok-for-speculation and thus OK to
1685 return unboxed instead of in an unboxed singleton tuple:
1686
1687 1. The field of `F` is a HNF.
1688 We want `$wf :: Int -> Int -> Int`.
1689 We get `$wf :: Int -> (# Int -> Int #)`.
1690 2. The field of `T` is `seq`'d in `g`.
1691 We want `$wg :: (Int, Int) -> (Int, Int)`.
1692 We get `$wg :: (Int, Int) -> (# (Int, Int) #)`.
1693 3. The field of `U` is strict and thus always evaluated.
1694 We want `$wh :: Int# -> [Int]`.
1695 We'd get `$wh :: Int# -> (# [Int] #)`.
1696
1697 By considering vars as unlifted that satsify 'exprIsHNF', we catch (3).
1698 Why not check for 'exprOkForSpeculation'? Quite perplexingly, evaluated vars
1699 are not ok-for-spec, see Note [exprOkForSpeculation and evaluated variables].
1700 For (1) and (2) we would have to look at the term. WW only looks at the
1701 type and the CPR signature, so the only way to fix (1) and (2) would be to
1702 have a nested termination signature, like in MR !1866.
1703
1704 Note [Linear types and CPR]
1705 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
1706 Remark on linearity: in both the case of the wrapper and the worker,
1707 we build a linear case to unpack constructed products. All the
1708 multiplicity information is kept in the constructors (both C and (#, #)).
1709 In particular (#,#) is parametrised by the multiplicity of its fields.
1710 Specifically, in this instance, the multiplicity of the fields of (#,#)
1711 is chosen to be the same as those of C.
1712
1713
1714 ************************************************************************
1715 * *
1716 \subsection{Utilities}
1717 * *
1718 ************************************************************************
1719 -}
1720
1721 mkUnpackCase :: CoreExpr -> Coercion -> Mult -> DataCon -> [Id] -> CoreExpr -> CoreExpr
1722 -- (mkUnpackCase e co Con args body)
1723 -- returns
1724 -- case e |> co of _dead { Con args -> body }
1725 mkUnpackCase (Tick tickish e) co mult con args body -- See Note [Profiling and unpacking]
1726 = Tick tickish (mkUnpackCase e co mult con args body)
1727 mkUnpackCase scrut co mult boxing_con unpk_args body
1728 = mkSingleAltCase casted_scrut bndr
1729 (DataAlt boxing_con) unpk_args body
1730 where
1731 casted_scrut = scrut `mkCast` co
1732 bndr = mkWildValBinder mult (exprType casted_scrut)
1733
1734 -- | The multiplicity of a case binder unboxing a constructed result.
1735 -- See Note [Linear types and CPR]
1736 cprCaseBndrMult :: Mult
1737 cprCaseBndrMult = One
1738
1739 ww_prefix :: FastString
1740 ww_prefix = fsLit "ww"
1741
1742 {- Note [Profiling and unpacking]
1743 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1744 If the original function looked like
1745 f = \ x -> {-# SCC "foo" #-} E
1746
1747 then we want the CPR'd worker to look like
1748 \ x -> {-# SCC "foo" #-} (case E of I# x -> x)
1749 and definitely not
1750 \ x -> case ({-# SCC "foo" #-} E) of I# x -> x)
1751
1752 This transform doesn't move work or allocation
1753 from one cost centre to another.
1754
1755 Later [SDM]: presumably this is because we want the simplifier to
1756 eliminate the case, and the scc would get in the way? I'm ok with
1757 including the case itself in the cost centre, since it is morally
1758 part of the function (post transformation) anyway.
1759 -}