never executed always true always false
1
2 {-# LANGUAGE BangPatterns #-}
3
4 {-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}
5
6 {-
7 (c) The University of Glasgow, 1994-2006
8
9
10 Core pass to saturate constructors and PrimOps
11 -}
12
13 module GHC.CoreToStg.Prep
14 ( corePrepPgm
15 , corePrepExpr
16 , mkConvertNumLiteral
17 )
18 where
19
20 import GHC.Prelude
21
22 import GHC.Platform
23
24 import GHC.Driver.Session
25 import GHC.Driver.Env
26 import GHC.Driver.Ppr
27
28 import GHC.Tc.Utils.Env
29 import GHC.Unit
30
31 import GHC.Builtin.Names
32 import GHC.Builtin.PrimOps
33 import GHC.Builtin.Types
34 import GHC.Builtin.Types.Prim ( realWorldStatePrimTy )
35
36 import GHC.Core.Utils
37 import GHC.Core.Opt.Arity
38 import GHC.Core.FVs
39 import GHC.Core.Opt.Monad ( CoreToDo(..) )
40 import GHC.Core.Lint ( endPassIO )
41 import GHC.Core
42 import GHC.Core.Make hiding( FloatBind(..) ) -- We use our own FloatBind here
43 import GHC.Core.Type
44 import GHC.Core.Coercion
45 import GHC.Core.TyCon
46 import GHC.Core.DataCon
47 import GHC.Core.Opt.OccurAnal
48 import GHC.Core.TyCo.Rep( UnivCoProvenance(..) )
49
50 import GHC.Data.Maybe
51 import GHC.Data.OrdList
52 import GHC.Data.FastString
53 import GHC.Data.Pair
54
55 import GHC.Utils.Error
56 import GHC.Utils.Misc
57 import GHC.Utils.Panic
58 import GHC.Utils.Panic.Plain
59 import GHC.Utils.Outputable
60 import GHC.Utils.Monad ( mapAccumLM )
61 import GHC.Utils.Logger
62 import GHC.Utils.Trace
63
64 import GHC.Types.Demand
65 import GHC.Types.Var
66 import GHC.Types.Var.Set
67 import GHC.Types.Var.Env
68 import GHC.Types.Id
69 import GHC.Types.Id.Info
70 import GHC.Types.Id.Make ( realWorldPrimId, mkPrimOpId )
71 import GHC.Types.Basic
72 import GHC.Types.Name ( NamedThing(..), nameSrcSpan, isInternalName )
73 import GHC.Types.SrcLoc ( SrcSpan(..), realSrcLocSpan, mkRealSrcLoc )
74 import GHC.Types.Literal
75 import GHC.Types.Tickish
76 import GHC.Types.TyThing
77 import GHC.Types.Unique.Supply
78
79 import Data.List ( unfoldr )
80 import Data.Functor.Identity
81 import Control.Monad
82
83 {-
84 -- ---------------------------------------------------------------------------
85 -- Note [CorePrep Overview]
86 -- ---------------------------------------------------------------------------
87
88 The goal of this pass is to prepare for code generation.
89
90 1. Saturate constructor and primop applications.
91
92 2. Convert to A-normal form; that is, function arguments
93 are always variables.
94
95 * Use case for strict arguments:
96 f E ==> case E of x -> f x
97 (where f is strict)
98
99 * Use let for non-trivial lazy arguments
100 f E ==> let x = E in f x
101 (were f is lazy and x is non-trivial)
102
103 3. Similarly, convert any unboxed lets into cases.
104 [I'm experimenting with leaving 'ok-for-speculation'
105 rhss in let-form right up to this point.]
106
107 4. Ensure that *value* lambdas only occur as the RHS of a binding
108 (The code generator can't deal with anything else.)
109 Type lambdas are ok, however, because the code gen discards them.
110
111 5. [Not any more; nuked Jun 2002] Do the seq/par munging.
112
113 6. Clone all local Ids.
114 This means that all such Ids are unique, rather than the
115 weaker guarantee of no clashes which the simplifier provides.
116 And that is what the code generator needs.
117
118 We don't clone TyVars or CoVars. The code gen doesn't need that,
119 and doing so would be tiresome because then we'd need
120 to substitute in types and coercions.
121
122 7. Give each dynamic CCall occurrence a fresh unique; this is
123 rather like the cloning step above.
124
125 8. Inject bindings for the "implicit" Ids:
126 * Constructor wrappers
127 * Constructor workers
128 We want curried definitions for all of these in case they
129 aren't inlined by some caller.
130
131 9. Replace (lazy e) by e. See Note [lazyId magic] in GHC.Types.Id.Make
132 Also replace (noinline e) by e.
133
134 10. Convert bignum literals into their core representation.
135
136 11. Uphold tick consistency while doing this: We move ticks out of
137 (non-type) applications where we can, and make sure that we
138 annotate according to scoping rules when floating.
139
140 12. Collect cost centres (including cost centres in unfoldings) if we're in
141 profiling mode. We have to do this here beucase we won't have unfoldings
142 after this pass (see `zapUnfolding` and Note [Drop unfoldings and rules].
143
144 13. Eliminate case clutter in favour of unsafe coercions.
145 See Note [Unsafe coercions]
146
147 14. Eliminate some magic Ids, specifically
148 runRW# (\s. e) ==> e[readWorldId/s]
149 lazy e ==> e
150 noinline e ==> e
151 ToDo: keepAlive# ...
152 This is done in cpeApp
153
154 This is all done modulo type applications and abstractions, so that
155 when type erasure is done for conversion to STG, we don't end up with
156 any trivial or useless bindings.
157
158 Note [Unsafe coercions]
159 ~~~~~~~~~~~~~~~~~~~~~~~
160 CorePrep does these two transformations:
161
162 1. Convert empty case to cast with an unsafe coercion
163 (case e of {}) ===> e |> unsafe-co
164 See Note [Empty case alternatives] in GHC.Core: if the case
165 alternatives are empty, the scrutinee must diverge or raise an
166 exception, so we can just dive into it.
167
168 Of course, if the scrutinee *does* return, we may get a seg-fault.
169 A belt-and-braces approach would be to persist empty-alternative
170 cases to code generator, and put a return point anyway that calls a
171 runtime system error function.
172
173 Notice that eliminating empty case can lead to an ill-kinded coercion
174 case error @Int "foo" of {} :: Int#
175 ===> error @Int "foo" |> unsafe-co
176 where unsafe-co :: Int ~ Int#
177 But that's fine because the expression diverges anyway. And it's
178 no different to what happened before.
179
180 2. Eliminate unsafeEqualityProof in favour of an unsafe coercion
181 case unsafeEqualityProof of UnsafeRefl g -> e
182 ===> e[unsafe-co/g]
183 See (U2) in Note [Implementing unsafeCoerce] in base:Unsafe.Coerce
184
185 Note that this requires us to substitute 'unsafe-co' for 'g', and
186 that is the main (current) reason for cpe_tyco_env in CorePrepEnv.
187 Tiresome, but not difficult.
188
189 These transformations get rid of "case clutter", leaving only casts.
190 We are doing no further significant tranformations, so the reasons
191 for the case forms have disappeared. And it is extremely helpful for
192 the ANF-ery, CoreToStg, and backends, if trivial expressions really do
193 look trivial. #19700 was an example.
194
195 In both cases, the "unsafe-co" is just (UnivCo ty1 ty2 (CorePrepProv b)),
196 The boolean 'b' says whether the unsafe coercion is supposed to be
197 kind-homogeneous (yes for (2), no for (1). This information is used
198 /only/ by Lint.
199
200 Note [CorePrep invariants]
201 ~~~~~~~~~~~~~~~~~~~~~~~~~~
202 Here is the syntax of the Core produced by CorePrep:
203
204 Trivial expressions
205 arg ::= lit | var
206 | arg ty | /\a. arg
207 | truv co | /\c. arg | arg |> co
208
209 Applications
210 app ::= lit | var | app arg | app ty | app co | app |> co
211
212 Expressions
213 body ::= app
214 | let(rec) x = rhs in body -- Boxed only
215 | case body of pat -> body
216 | /\a. body | /\c. body
217 | body |> co
218
219 Right hand sides (only place where value lambdas can occur)
220 rhs ::= /\a.rhs | \x.rhs | body
221
222 We define a synonym for each of these non-terminals. Functions
223 with the corresponding name produce a result in that syntax.
224 -}
225
226 type CpeArg = CoreExpr -- Non-terminal 'arg'
227 type CpeApp = CoreExpr -- Non-terminal 'app'
228 type CpeBody = CoreExpr -- Non-terminal 'body'
229 type CpeRhs = CoreExpr -- Non-terminal 'rhs'
230
231 {-
232 ************************************************************************
233 * *
234 Top level stuff
235 * *
236 ************************************************************************
237 -}
238
239 corePrepPgm :: HscEnv -> Module -> ModLocation -> CoreProgram -> [TyCon]
240 -> IO CoreProgram
241 corePrepPgm hsc_env this_mod mod_loc binds data_tycons =
242 withTiming logger
243 (text "CorePrep"<+>brackets (ppr this_mod))
244 (\a -> a `seqList` ()) $ do
245 us <- mkSplitUniqSupply 's'
246 initialCorePrepEnv <- mkInitialCorePrepEnv hsc_env
247
248 let
249 implicit_binds = mkDataConWorkers dflags mod_loc data_tycons
250 -- NB: we must feed mkImplicitBinds through corePrep too
251 -- so that they are suitably cloned and eta-expanded
252
253 binds_out = initUs_ us $ do
254 floats1 <- corePrepTopBinds initialCorePrepEnv binds
255 floats2 <- corePrepTopBinds initialCorePrepEnv implicit_binds
256 return (deFloatTop (floats1 `appendFloats` floats2))
257
258 endPassIO hsc_env alwaysQualify CorePrep binds_out []
259 return binds_out
260 where
261 dflags = hsc_dflags hsc_env
262 logger = hsc_logger hsc_env
263
264 corePrepExpr :: HscEnv -> CoreExpr -> IO CoreExpr
265 corePrepExpr hsc_env expr = do
266 let logger = hsc_logger hsc_env
267 withTiming logger (text "CorePrep [expr]") (\e -> e `seq` ()) $ do
268 us <- mkSplitUniqSupply 's'
269 initialCorePrepEnv <- mkInitialCorePrepEnv hsc_env
270 let new_expr = initUs_ us (cpeBodyNF initialCorePrepEnv expr)
271 putDumpFileMaybe logger Opt_D_dump_prep "CorePrep" FormatCore (ppr new_expr)
272 return new_expr
273
274 corePrepTopBinds :: CorePrepEnv -> [CoreBind] -> UniqSM Floats
275 -- Note [Floating out of top level bindings]
276 corePrepTopBinds initialCorePrepEnv binds
277 = go initialCorePrepEnv binds
278 where
279 go _ [] = return emptyFloats
280 go env (bind : binds) = do (env', floats, maybe_new_bind)
281 <- cpeBind TopLevel env bind
282 massert (isNothing maybe_new_bind)
283 -- Only join points get returned this way by
284 -- cpeBind, and no join point may float to top
285 floatss <- go env' binds
286 return (floats `appendFloats` floatss)
287
288 mkDataConWorkers :: DynFlags -> ModLocation -> [TyCon] -> [CoreBind]
289 -- See Note [Data constructor workers]
290 -- c.f. Note [Injecting implicit bindings] in GHC.Iface.Tidy
291 mkDataConWorkers dflags mod_loc data_tycons
292 = [ NonRec id (tick_it (getName data_con) (Var id))
293 -- The ice is thin here, but it works
294 | tycon <- data_tycons, -- CorePrep will eta-expand it
295 data_con <- tyConDataCons tycon,
296 let id = dataConWorkId data_con
297 ]
298 where
299 -- If we want to generate debug info, we put a source note on the
300 -- worker. This is useful, especially for heap profiling.
301 tick_it name
302 | debugLevel dflags == 0 = id
303 | RealSrcSpan span _ <- nameSrcSpan name = tick span
304 | Just file <- ml_hs_file mod_loc = tick (span1 file)
305 | otherwise = tick (span1 "???")
306 where tick span = Tick (SourceNote span $ showSDoc dflags (ppr name))
307 span1 file = realSrcLocSpan $ mkRealSrcLoc (mkFastString file) 1 1
308
309 {-
310 Note [Floating out of top level bindings]
311 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
312 NB: we do need to float out of top-level bindings
313 Consider x = length [True,False]
314 We want to get
315 s1 = False : []
316 s2 = True : s1
317 x = length s2
318
319 We return a *list* of bindings, because we may start with
320 x* = f (g y)
321 where x is demanded, in which case we want to finish with
322 a = g y
323 x* = f a
324 And then x will actually end up case-bound
325
326 Note [Join points and floating]
327 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
328 Join points can float out of other join points but not out of value bindings:
329
330 let z =
331 let w = ... in -- can float
332 join k = ... in -- can't float
333 ... jump k ...
334 join j x1 ... xn =
335 let y = ... in -- can float (but don't want to)
336 join h = ... in -- can float (but not much point)
337 ... jump h ...
338 in ...
339
340 Here, the jump to h remains valid if h is floated outward, but the jump to k
341 does not.
342
343 We don't float *out* of join points. It would only be safe to float out of
344 nullary join points (or ones where the arguments are all either type arguments
345 or dead binders). Nullary join points aren't ever recursive, so they're always
346 effectively one-shot functions, which we don't float out of. We *could* float
347 join points from nullary join points, but there's no clear benefit at this
348 stage.
349
350 Note [Data constructor workers]
351 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
352 Create any necessary "implicit" bindings for data con workers. We
353 create the rather strange (non-recursive!) binding
354
355 $wC = \x y -> $wC x y
356
357 i.e. a curried constructor that allocates. This means that we can
358 treat the worker for a constructor like any other function in the rest
359 of the compiler. The point here is that CoreToStg will generate a
360 StgConApp for the RHS, rather than a call to the worker (which would
361 give a loop). As Lennart says: the ice is thin here, but it works.
362
363 Hmm. Should we create bindings for dictionary constructors? They are
364 always fully applied, and the bindings are just there to support
365 partial applications. But it's easier to let them through.
366
367
368 Note [Dead code in CorePrep]
369 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
370 Imagine that we got an input program like this (see #4962):
371
372 f :: Show b => Int -> (Int, b -> Maybe Int -> Int)
373 f x = (g True (Just x) + g () (Just x), g)
374 where
375 g :: Show a => a -> Maybe Int -> Int
376 g _ Nothing = x
377 g y (Just z) = if z > 100 then g y (Just (z + length (show y))) else g y unknown
378
379 After specialisation and SpecConstr, we would get something like this:
380
381 f :: Show b => Int -> (Int, b -> Maybe Int -> Int)
382 f x = (g$Bool_True_Just x + g$Unit_Unit_Just x, g)
383 where
384 {-# RULES g $dBool = g$Bool
385 g $dUnit = g$Unit #-}
386 g = ...
387 {-# RULES forall x. g$Bool True (Just x) = g$Bool_True_Just x #-}
388 g$Bool = ...
389 {-# RULES forall x. g$Unit () (Just x) = g$Unit_Unit_Just x #-}
390 g$Unit = ...
391 g$Bool_True_Just = ...
392 g$Unit_Unit_Just = ...
393
394 Note that the g$Bool and g$Unit functions are actually dead code: they
395 are only kept alive by the occurrence analyser because they are
396 referred to by the rules of g, which is being kept alive by the fact
397 that it is used (unspecialised) in the returned pair.
398
399 However, at the CorePrep stage there is no way that the rules for g
400 will ever fire, and it really seems like a shame to produce an output
401 program that goes to the trouble of allocating a closure for the
402 unreachable g$Bool and g$Unit functions.
403
404 The way we fix this is to:
405 * In cloneBndr, drop all unfoldings/rules
406
407 * In deFloatTop, run a simple dead code analyser on each top-level
408 RHS to drop the dead local bindings.
409
410 The reason we don't just OccAnal the whole output of CorePrep is that
411 the tidier ensures that all top-level binders are GlobalIds, so they
412 don't show up in the free variables any longer. So if you run the
413 occurrence analyser on the output of CoreTidy (or later) you e.g. turn
414 this program:
415
416 Rec {
417 f = ... f ...
418 }
419
420 Into this one:
421
422 f = ... f ...
423
424 (Since f is not considered to be free in its own RHS.)
425
426
427 Note [keepAlive# magic]
428 ~~~~~~~~~~~~~~~~~~~~~~~
429 When interacting with foreign code, it is often necessary for the user to
430 extend the lifetime of a heap object beyond the lifetime that would be apparent
431 from the on-heap references alone. For instance, a program like:
432
433 foreign import safe "hello" hello :: ByteArray# -> IO ()
434
435 callForeign :: IO ()
436 callForeign = IO $ \s0 ->
437 case newByteArray# n# s0 of (# s1, barr #) ->
438 unIO hello barr s1
439
440 As-written this program is susceptible to memory-unsafety since there are
441 no references to `barr` visible to the garbage collector. Consequently, if a
442 garbage collection happens during the execution of the C function `hello`, it
443 may be that the array is freed while in use by the foreign function.
444
445 To address this, we introduced a new primop, keepAlive#, which "scopes over"
446 the computation needing the kept-alive value:
447
448 keepAlive# :: forall (ra :: RuntimeRep) (rb :: RuntimeRep) (a :: TYPE a) (b :: TYPE b).
449 a -> State# RealWorld -> (State# RealWorld -> b) -> b
450
451 When entered, an application (keepAlive# x s k) will apply `k` to the state
452 token, evaluating it to WHNF. However, during the course of this evaluation
453 will *guarantee* that `x` is considered to be alive.
454
455 There are a few things to note here:
456
457 - we are RuntimeRep-polymorphic in the value to be kept-alive. This is
458 necessary since we will often (but not always) be keeping alive something
459 unlifted (like a ByteArray#)
460
461 - we are RuntimeRep-polymorphic in the result value since the result may take
462 many forms (e.g. a boxed value, a raw state token, or a (# State s, result #).
463
464 We implement this operation by desugaring to touch# during CorePrep (see
465 GHC.CoreToStg.Prep.cpeApp). Specifically,
466
467 keepAlive# x s0 k
468
469 is transformed to:
470
471 case k s0 of r ->
472 case touch# x realWorld# of s1 ->
473 r
474
475 Operationally, `keepAlive# x s k` is equivalent to pushing a stack frame with a
476 pointer to `x` and entering `k s0`. This compilation strategy is safe
477 because we do no optimization on STG that would drop or re-order the
478 continuation containing the `touch#`. However, if we were to become more
479 aggressive in our STG pipeline then we would need to revisit this.
480
481 Beyond this CorePrep transformation, there is very little special about
482 keepAlive#. However, we did explore (and eventually gave up on)
483 an optimisation which would allow unboxing of constructed product results,
484 which we describe below.
485
486
487 Lost optimisation: CPR unboxing
488 --------------------------------
489 One unfortunate property of this approach is that the simplifier is unable to
490 unbox the result of a keepAlive# expression. For instance, consider the program:
491
492 case keepAlive# arr s0 (
493 \s1 -> case peekInt arr s1 of
494 (# s2, r #) -> I# r
495 ) of
496 I# x -> ...
497
498 This is a surprisingly common pattern, previously used, e.g., in
499 GHC.IO.Buffer.readWord8Buf. While exploring ideas, we briefly played around
500 with optimising this away by pushing strict contexts (like the
501 `case [] of I# x -> ...` above) into keepAlive#'s continuation. While this can
502 recover unboxing, it can also unfortunately in general change the asymptotic
503 memory (namely stack) behavior of the program. For instance, consider
504
505 writeN =
506 ...
507 case keepAlive# x s0 (\s1 -> something s1) of
508 (# s2, x #) ->
509 writeN ...
510
511 As it is tail-recursive, this program will run in constant space. However, if
512 we push outer case into the continuation we get:
513
514 writeN =
515
516 case keepAlive# x s0 (\s1 ->
517 case something s1 of
518 (# s2, x #) ->
519 writeN ...
520 ) of
521 ...
522
523 Which ends up building a stack which is linear in the recursion depth. For this
524 reason, we ended up giving up on this optimisation.
525
526
527 Historical note: touch# and its inadequacy
528 ------------------------------------------
529 Prior to the introduction of `keepAlive#` we instead addressed the need for
530 lifetime extension with the `touch#` primop:
531
532 touch# :: a -> State# s -> State# s
533
534 This operation would ensure that the `a` value passed as the first argument was
535 considered "alive" at the time the primop application is entered.
536
537 For instance, the user might modify `callForeign` as:
538
539 callForeign :: IO ()
540 callForeign s0 = IO $ \s0 ->
541 case newByteArray# n# s0 of (# s1, barr #) ->
542 case unIO hello barr s1 of (# s2, () #) ->
543 case touch# barr s2 of s3 ->
544 (# s3, () #)
545
546 However, in #14346 we discovered that this primop is insufficient in the
547 presence of simplification. For instance, consider a program like:
548
549 callForeign :: IO ()
550 callForeign s0 = IO $ \s0 ->
551 case newByteArray# n# s0 of (# s1, barr #) ->
552 case unIO (forever $ hello barr) s1 of (# s2, () #) ->
553 case touch# barr s2 of s3 ->
554 (# s3, () #)
555
556 In this case the Simplifier may realize that (forever $ hello barr)
557 will never return and consequently that the `touch#` that follows is dead code.
558 As such, it will be dropped, resulting in memory unsoundness.
559 This unsoundness lead to the introduction of keepAlive#.
560
561
562
563 Other related tickets:
564
565 - #15544
566 - #17760
567 - #14375
568 - #15260
569 - #18061
570
571 ************************************************************************
572 * *
573 The main code
574 * *
575 ************************************************************************
576 -}
577
578 cpeBind :: TopLevelFlag -> CorePrepEnv -> CoreBind
579 -> UniqSM (CorePrepEnv,
580 Floats, -- Floating value bindings
581 Maybe CoreBind) -- Just bind' <=> returned new bind; no float
582 -- Nothing <=> added bind' to floats instead
583 cpeBind top_lvl env (NonRec bndr rhs)
584 | not (isJoinId bndr)
585 = do { (env1, bndr1) <- cpCloneBndr env bndr
586 ; let dmd = idDemandInfo bndr
587 is_unlifted = isUnliftedType (idType bndr)
588 ; (floats, rhs1) <- cpePair top_lvl NonRecursive
589 dmd is_unlifted
590 env bndr1 rhs
591 -- See Note [Inlining in CorePrep]
592 ; let triv_rhs = exprIsTrivial rhs1
593 env2 | triv_rhs = extendCorePrepEnvExpr env1 bndr rhs1
594 | otherwise = env1
595 floats1 | triv_rhs, isInternalName (idName bndr)
596 = floats
597 | otherwise
598 = addFloat floats new_float
599
600 new_float = mkFloat dmd is_unlifted bndr1 rhs1
601
602 ; return (env2, floats1, Nothing) }
603
604 | otherwise -- A join point; see Note [Join points and floating]
605 = assert (not (isTopLevel top_lvl)) $ -- can't have top-level join point
606 do { (_, bndr1) <- cpCloneBndr env bndr
607 ; (bndr2, rhs1) <- cpeJoinPair env bndr1 rhs
608 ; return (extendCorePrepEnv env bndr bndr2,
609 emptyFloats,
610 Just (NonRec bndr2 rhs1)) }
611
612 cpeBind top_lvl env (Rec pairs)
613 | not (isJoinId (head bndrs))
614 = do { (env', bndrs1) <- cpCloneBndrs env bndrs
615 ; stuff <- zipWithM (cpePair top_lvl Recursive topDmd False env')
616 bndrs1 rhss
617
618 ; let (floats_s, rhss1) = unzip stuff
619 all_pairs = foldrOL add_float (bndrs1 `zip` rhss1)
620 (concatFloats floats_s)
621
622 ; return (extendCorePrepEnvList env (bndrs `zip` bndrs1),
623 unitFloat (FloatLet (Rec all_pairs)),
624 Nothing) }
625
626 | otherwise -- See Note [Join points and floating]
627 = do { (env', bndrs1) <- cpCloneBndrs env bndrs
628 ; pairs1 <- zipWithM (cpeJoinPair env') bndrs1 rhss
629
630 ; let bndrs2 = map fst pairs1
631 ; return (extendCorePrepEnvList env' (bndrs `zip` bndrs2),
632 emptyFloats,
633 Just (Rec pairs1)) }
634 where
635 (bndrs, rhss) = unzip pairs
636
637 -- Flatten all the floats, and the current
638 -- group into a single giant Rec
639 add_float (FloatLet (NonRec b r)) prs2 = (b,r) : prs2
640 add_float (FloatLet (Rec prs1)) prs2 = prs1 ++ prs2
641 add_float b _ = pprPanic "cpeBind" (ppr b)
642
643 ---------------
644 cpePair :: TopLevelFlag -> RecFlag -> Demand -> Bool
645 -> CorePrepEnv -> OutId -> CoreExpr
646 -> UniqSM (Floats, CpeRhs)
647 -- Used for all bindings
648 -- The binder is already cloned, hence an OutId
649 cpePair top_lvl is_rec dmd is_unlifted env bndr rhs
650 = assert (not (isJoinId bndr)) $ -- those should use cpeJoinPair
651 do { (floats1, rhs1) <- cpeRhsE env rhs
652
653 -- See if we are allowed to float this stuff out of the RHS
654 ; (floats2, rhs2) <- float_from_rhs floats1 rhs1
655
656 -- Make the arity match up
657 ; (floats3, rhs3)
658 <- if manifestArity rhs1 <= arity
659 then return (floats2, cpeEtaExpand arity rhs2)
660 else warnPprTrace True (text "CorePrep: silly extra arguments:" <+> ppr bndr) $
661 -- Note [Silly extra arguments]
662 (do { v <- newVar (idType bndr)
663 ; let float = mkFloat topDmd False v rhs2
664 ; return ( addFloat floats2 float
665 , cpeEtaExpand arity (Var v)) })
666
667 -- Wrap floating ticks
668 ; let (floats4, rhs4) = wrapTicks floats3 rhs3
669
670 ; return (floats4, rhs4) }
671 where
672 arity = idArity bndr -- We must match this arity
673
674 ---------------------
675 float_from_rhs floats rhs
676 | isEmptyFloats floats = return (emptyFloats, rhs)
677 | isTopLevel top_lvl = float_top floats rhs
678 | otherwise = float_nested floats rhs
679
680 ---------------------
681 float_nested floats rhs
682 | wantFloatNested is_rec dmd is_unlifted floats rhs
683 = return (floats, rhs)
684 | otherwise = dontFloat floats rhs
685
686 ---------------------
687 float_top floats rhs
688 | allLazyTop floats
689 = return (floats, rhs)
690
691 | Just floats <- canFloat floats rhs
692 = return floats
693
694 | otherwise
695 = dontFloat floats rhs
696
697 dontFloat :: Floats -> CpeRhs -> UniqSM (Floats, CpeBody)
698 -- Non-empty floats, but do not want to float from rhs
699 -- So wrap the rhs in the floats
700 -- But: rhs1 might have lambdas, and we can't
701 -- put them inside a wrapBinds
702 dontFloat floats1 rhs
703 = do { (floats2, body) <- rhsToBody rhs
704 ; return (emptyFloats, wrapBinds floats1 $
705 wrapBinds floats2 body) }
706
707 {- Note [Silly extra arguments]
708 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
709 Suppose we had this
710 f{arity=1} = \x\y. e
711 We *must* match the arity on the Id, so we have to generate
712 f' = \x\y. e
713 f = \x. f' x
714
715 It's a bizarre case: why is the arity on the Id wrong? Reason
716 (in the days of __inline_me__):
717 f{arity=0} = __inline_me__ (let v = expensive in \xy. e)
718 When InlineMe notes go away this won't happen any more. But
719 it seems good for CorePrep to be robust.
720 -}
721
722 ---------------
723 cpeJoinPair :: CorePrepEnv -> JoinId -> CoreExpr
724 -> UniqSM (JoinId, CpeRhs)
725 -- Used for all join bindings
726 -- No eta-expansion: see Note [Do not eta-expand join points] in GHC.Core.Opt.Simplify.Utils
727 cpeJoinPair env bndr rhs
728 = assert (isJoinId bndr) $
729 do { let Just join_arity = isJoinId_maybe bndr
730 (bndrs, body) = collectNBinders join_arity rhs
731
732 ; (env', bndrs') <- cpCloneBndrs env bndrs
733
734 ; body' <- cpeBodyNF env' body -- Will let-bind the body if it starts
735 -- with a lambda
736
737 ; let rhs' = mkCoreLams bndrs' body'
738 bndr' = bndr `setIdUnfolding` evaldUnfolding
739 `setIdArity` count isId bndrs
740 -- See Note [Arity and join points]
741
742 ; return (bndr', rhs') }
743
744 {-
745 Note [Arity and join points]
746 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
747 Up to now, we've allowed a join point to have an arity greater than its join
748 arity (minus type arguments), since this is what's useful for eta expansion.
749 However, for code gen purposes, its arity must be exactly the number of value
750 arguments it will be called with, and it must have exactly that many value
751 lambdas. Hence if there are extra lambdas we must let-bind the body of the RHS:
752
753 join j x y z = \w -> ... in ...
754 =>
755 join j x y z = (let f = \w -> ... in f) in ...
756
757 This is also what happens with Note [Silly extra arguments]. Note that it's okay
758 for us to mess with the arity because a join point is never exported.
759 -}
760
761 -- ---------------------------------------------------------------------------
762 -- CpeRhs: produces a result satisfying CpeRhs
763 -- ---------------------------------------------------------------------------
764
765 cpeRhsE :: CorePrepEnv -> CoreExpr -> UniqSM (Floats, CpeRhs)
766 -- If
767 -- e ===> (bs, e')
768 -- then
769 -- e = let bs in e' (semantically, that is!)
770 --
771 -- For example
772 -- f (g x) ===> ([v = g x], f v)
773
774 cpeRhsE env (Type ty)
775 = return (emptyFloats, Type (cpSubstTy env ty))
776 cpeRhsE env (Coercion co)
777 = return (emptyFloats, Coercion (cpSubstCo env co))
778 cpeRhsE env expr@(Lit (LitNumber nt i))
779 = case cpe_convertNumLit env nt i of
780 Nothing -> return (emptyFloats, expr)
781 Just e -> cpeRhsE env e
782 cpeRhsE _env expr@(Lit {}) = return (emptyFloats, expr)
783 cpeRhsE env expr@(Var {}) = cpeApp env expr
784 cpeRhsE env expr@(App {}) = cpeApp env expr
785
786 cpeRhsE env (Let bind body)
787 = do { (env', bind_floats, maybe_bind') <- cpeBind NotTopLevel env bind
788 ; (body_floats, body') <- cpeRhsE env' body
789 ; let expr' = case maybe_bind' of Just bind' -> Let bind' body'
790 Nothing -> body'
791 ; return (bind_floats `appendFloats` body_floats, expr') }
792
793 cpeRhsE env (Tick tickish expr)
794 | tickishPlace tickish == PlaceNonLam && tickish `tickishScopesLike` SoftScope
795 = do { (floats, body) <- cpeRhsE env expr
796 -- See [Floating Ticks in CorePrep]
797 ; return (unitFloat (FloatTick tickish) `appendFloats` floats, body) }
798 | otherwise
799 = do { body <- cpeBodyNF env expr
800 ; return (emptyFloats, mkTick tickish' body) }
801 where
802 tickish' | Breakpoint ext n fvs <- tickish
803 -- See also 'substTickish'
804 = Breakpoint ext n (map (getIdFromTrivialExpr . lookupCorePrepEnv env) fvs)
805 | otherwise
806 = tickish
807
808 cpeRhsE env (Cast expr co)
809 = do { (floats, expr') <- cpeRhsE env expr
810 ; return (floats, Cast expr' (cpSubstCo env co)) }
811
812 cpeRhsE env expr@(Lam {})
813 = do { let (bndrs,body) = collectBinders expr
814 ; (env', bndrs') <- cpCloneBndrs env bndrs
815 ; body' <- cpeBodyNF env' body
816 ; return (emptyFloats, mkLams bndrs' body') }
817
818 -- Eliminate empty case
819 -- See Note [Unsafe coercions]
820 cpeRhsE env (Case scrut _ ty [])
821 = do { (floats, scrut') <- cpeRhsE env scrut
822 ; let ty' = cpSubstTy env ty
823 scrut_ty' = exprType scrut'
824 co' = mkUnivCo prov Representational scrut_ty' ty'
825 prov = CorePrepProv False
826 -- False says that the kinds of two types may differ
827 -- E.g. we might cast Int to Int#. This is fine
828 -- because the scrutinee is guaranteed to diverge
829
830 ; return (floats, Cast scrut' co') }
831 -- This can give rise to
832 -- Warning: Unsafe coercion: between unboxed and boxed value
833 -- but it's fine because 'scrut' diverges
834
835 -- Eliminate unsafeEqualityProof
836 -- See Note [Unsafe coercions]
837 cpeRhsE env (Case scrut bndr _ alts)
838 | isUnsafeEqualityProof scrut
839 , isDeadBinder bndr -- We can only discard the case if the case-binder
840 -- is dead. It usually is, but see #18227
841 , [Alt _ [co_var] rhs] <- alts
842 , let Pair ty1 ty2 = coVarTypes co_var
843 the_co = mkUnivCo prov Nominal (cpSubstTy env ty1) (cpSubstTy env ty2)
844 prov = CorePrepProv True -- True <=> kind homogeneous
845 env' = extendCoVarEnv env co_var the_co
846 = cpeRhsE env' rhs
847
848 cpeRhsE env (Case scrut bndr ty alts)
849 = do { (floats, scrut') <- cpeBody env scrut
850 ; (env', bndr2) <- cpCloneBndr env bndr
851 ; let alts'
852 -- This flag is intended to aid in debugging strictness
853 -- analysis bugs. These are particularly nasty to chase down as
854 -- they may manifest as segmentation faults. When this flag is
855 -- enabled we instead produce an 'error' expression to catch
856 -- the case where a function we think should bottom
857 -- unexpectedly returns.
858 | gopt Opt_CatchBottoms (cpe_dynFlags env)
859 , not (altsAreExhaustive alts)
860 = addDefault alts (Just err)
861 | otherwise = alts
862 where err = mkRuntimeErrorApp rUNTIME_ERROR_ID ty
863 "Bottoming expression returned"
864 ; alts'' <- mapM (sat_alt env') alts'
865
866 ; return (floats, Case scrut' bndr2 ty alts'') }
867 where
868 sat_alt env (Alt con bs rhs)
869 = do { (env2, bs') <- cpCloneBndrs env bs
870 ; rhs' <- cpeBodyNF env2 rhs
871 ; return (Alt con bs' rhs') }
872
873 -- ---------------------------------------------------------------------------
874 -- CpeBody: produces a result satisfying CpeBody
875 -- ---------------------------------------------------------------------------
876
877 -- | Convert a 'CoreExpr' so it satisfies 'CpeBody', without
878 -- producing any floats (any generated floats are immediately
879 -- let-bound using 'wrapBinds'). Generally you want this, esp.
880 -- when you've reached a binding form (e.g., a lambda) and
881 -- floating any further would be incorrect.
882 cpeBodyNF :: CorePrepEnv -> CoreExpr -> UniqSM CpeBody
883 cpeBodyNF env expr
884 = do { (floats, body) <- cpeBody env expr
885 ; return (wrapBinds floats body) }
886
887 -- | Convert a 'CoreExpr' so it satisfies 'CpeBody'; also produce
888 -- a list of 'Floats' which are being propagated upwards. In
889 -- fact, this function is used in only two cases: to
890 -- implement 'cpeBodyNF' (which is what you usually want),
891 -- and in the case when a let-binding is in a case scrutinee--here,
892 -- we can always float out:
893 --
894 -- case (let x = y in z) of ...
895 -- ==> let x = y in case z of ...
896 --
897 cpeBody :: CorePrepEnv -> CoreExpr -> UniqSM (Floats, CpeBody)
898 cpeBody env expr
899 = do { (floats1, rhs) <- cpeRhsE env expr
900 ; (floats2, body) <- rhsToBody rhs
901 ; return (floats1 `appendFloats` floats2, body) }
902
903 --------
904 rhsToBody :: CpeRhs -> UniqSM (Floats, CpeBody)
905 -- Remove top level lambdas by let-binding
906
907 rhsToBody (Tick t expr)
908 | tickishScoped t == NoScope -- only float out of non-scoped annotations
909 = do { (floats, expr') <- rhsToBody expr
910 ; return (floats, mkTick t expr') }
911
912 rhsToBody (Cast e co)
913 -- You can get things like
914 -- case e of { p -> coerce t (\s -> ...) }
915 = do { (floats, e') <- rhsToBody e
916 ; return (floats, Cast e' co) }
917
918 rhsToBody expr@(Lam {})
919 | Just no_lam_result <- tryEtaReducePrep bndrs body
920 = return (emptyFloats, no_lam_result)
921 | all isTyVar bndrs -- Type lambdas are ok
922 = return (emptyFloats, expr)
923 | otherwise -- Some value lambdas
924 = do { let rhs = cpeEtaExpand (exprArity expr) expr
925 ; fn <- newVar (exprType rhs)
926 ; let float = FloatLet (NonRec fn rhs)
927 ; return (unitFloat float, Var fn) }
928 where
929 (bndrs,body) = collectBinders expr
930
931 rhsToBody expr = return (emptyFloats, expr)
932
933
934
935 -- ---------------------------------------------------------------------------
936 -- CpeApp: produces a result satisfying CpeApp
937 -- ---------------------------------------------------------------------------
938
939 data ArgInfo = CpeApp CoreArg
940 | CpeCast Coercion
941 | CpeTick CoreTickish
942
943 instance Outputable ArgInfo where
944 ppr (CpeApp arg) = text "app" <+> ppr arg
945 ppr (CpeCast co) = text "cast" <+> ppr co
946 ppr (CpeTick tick) = text "tick" <+> ppr tick
947
948 cpeApp :: CorePrepEnv -> CoreExpr -> UniqSM (Floats, CpeRhs)
949 -- May return a CpeRhs because of saturating primops
950 cpeApp top_env expr
951 = do { let (terminal, args, depth) = collect_args expr
952 ; cpe_app top_env terminal args depth
953 }
954
955 where
956 -- We have a nested data structure of the form
957 -- e `App` a1 `App` a2 ... `App` an, convert it into
958 -- (e, [CpeApp a1, CpeApp a2, ..., CpeApp an], depth)
959 -- We use 'ArgInfo' because we may also need to
960 -- record casts and ticks. Depth counts the number
961 -- of arguments that would consume strictness information
962 -- (so, no type or coercion arguments.)
963 collect_args :: CoreExpr -> (CoreExpr, [ArgInfo], Int)
964 collect_args e = go e [] 0
965 where
966 go (App fun arg) as !depth
967 = go fun (CpeApp arg : as)
968 (if isTyCoArg arg then depth else depth + 1)
969 go (Cast fun co) as depth
970 = go fun (CpeCast co : as) depth
971 go (Tick tickish fun) as depth
972 | tickishPlace tickish == PlaceNonLam
973 && tickish `tickishScopesLike` SoftScope
974 = go fun (CpeTick tickish : as) depth
975 go terminal as depth = (terminal, as, depth)
976
977 cpe_app :: CorePrepEnv
978 -> CoreExpr
979 -> [ArgInfo]
980 -> Int
981 -> UniqSM (Floats, CpeRhs)
982 cpe_app env (Var f) (CpeApp Type{} : CpeApp arg : args) depth
983 | f `hasKey` lazyIdKey -- Replace (lazy a) with a, and
984 -- See Note [lazyId magic] in GHC.Types.Id.Make
985 || f `hasKey` noinlineIdKey -- Replace (noinline a) with a
986 -- See Note [noinlineId magic] in GHC.Types.Id.Make
987
988 -- Consider the code:
989 --
990 -- lazy (f x) y
991 --
992 -- We need to make sure that we need to recursively collect arguments on
993 -- "f x", otherwise we'll float "f x" out (it's not a variable) and
994 -- end up with this awful -ddump-prep:
995 --
996 -- case f x of f_x {
997 -- __DEFAULT -> f_x y
998 -- }
999 --
1000 -- rather than the far superior "f x y". Test case is par01.
1001 = let (terminal, args', depth') = collect_args arg
1002 in cpe_app env terminal (args' ++ args) (depth + depth' - 1)
1003
1004 -- See Note [keepAlive# magic].
1005 cpe_app env
1006 (Var f)
1007 args
1008 n
1009 | Just KeepAliveOp <- isPrimOpId_maybe f
1010 , CpeApp (Type arg_rep)
1011 : CpeApp (Type arg_ty)
1012 : CpeApp (Type _result_rep)
1013 : CpeApp (Type result_ty)
1014 : CpeApp arg
1015 : CpeApp s0
1016 : CpeApp k
1017 : rest <- args
1018 = do { y <- newVar (cpSubstTy env result_ty)
1019 ; s2 <- newVar realWorldStatePrimTy
1020 ; -- beta reduce if possible
1021 ; (floats, k') <- case k of
1022 Lam s body -> cpe_app (extendCorePrepEnvExpr env s s0) body rest (n-2)
1023 _ -> cpe_app env k (CpeApp s0 : rest) (n-1)
1024 ; let touchId = mkPrimOpId TouchOp
1025 expr = Case k' y result_ty [Alt DEFAULT [] rhs]
1026 rhs = let scrut = mkApps (Var touchId) [Type arg_rep, Type arg_ty, arg, Var realWorldPrimId]
1027 in Case scrut s2 result_ty [Alt DEFAULT [] (Var y)]
1028 ; (floats', expr') <- cpeBody env expr
1029 ; return (floats `appendFloats` floats', expr')
1030 }
1031 | Just KeepAliveOp <- isPrimOpId_maybe f
1032 = panic "invalid keepAlive# application"
1033
1034 cpe_app env (Var f) (CpeApp _runtimeRep@Type{} : CpeApp _type@Type{} : CpeApp arg : rest) n
1035 | f `hasKey` runRWKey
1036 -- N.B. While it may appear that n == 1 in the case of runRW#
1037 -- applications, keep in mind that we may have applications that return
1038 , n >= 1
1039 -- See Note [runRW magic]
1040 -- Replace (runRW# f) by (f realWorld#), beta reducing if possible (this
1041 -- is why we return a CorePrepEnv as well)
1042 = case arg of
1043 Lam s body -> cpe_app (extendCorePrepEnv env s realWorldPrimId) body rest (n-2)
1044 _ -> cpe_app env arg (CpeApp (Var realWorldPrimId) : rest) (n-1)
1045 -- TODO: What about casts?
1046
1047 cpe_app env (Var v) args depth
1048 = do { v1 <- fiddleCCall v
1049 ; let e2 = lookupCorePrepEnv env v1
1050 hd = getIdFromTrivialExpr_maybe e2
1051 -- NB: depth from collect_args is right, because e2 is a trivial expression
1052 -- and thus its embedded Id *must* be at the same depth as any
1053 -- Apps it is under are type applications only (c.f.
1054 -- exprIsTrivial). But note that we need the type of the
1055 -- expression, not the id.
1056 ; (app, floats) <- rebuild_app env args e2 emptyFloats stricts
1057 ; mb_saturate hd app floats depth }
1058 where
1059 stricts = case idDmdSig v of
1060 DmdSig (DmdType _ demands _)
1061 | listLengthCmp demands depth /= GT -> demands
1062 -- length demands <= depth
1063 | otherwise -> []
1064 -- If depth < length demands, then we have too few args to
1065 -- satisfy strictness info so we have to ignore all the
1066 -- strictness info, e.g. + (error "urk")
1067 -- Here, we can't evaluate the arg strictly, because this
1068 -- partial application might be seq'd
1069
1070 -- We inlined into something that's not a var and has no args.
1071 -- Bounce it back up to cpeRhsE.
1072 cpe_app env fun [] _ = cpeRhsE env fun
1073
1074 -- N-variable fun, better let-bind it
1075 cpe_app env fun args depth
1076 = do { (fun_floats, fun') <- cpeArg env evalDmd fun
1077 -- The evalDmd says that it's sure to be evaluated,
1078 -- so we'll end up case-binding it
1079 ; (app, floats) <- rebuild_app env args fun' fun_floats []
1080 ; mb_saturate Nothing app floats depth }
1081
1082 -- Saturate if necessary
1083 mb_saturate head app floats depth =
1084 case head of
1085 Just fn_id -> do { sat_app <- maybeSaturate fn_id app depth
1086 ; return (floats, sat_app) }
1087 _other -> return (floats, app)
1088
1089 -- Deconstruct and rebuild the application, floating any non-atomic
1090 -- arguments to the outside. We collect the type of the expression,
1091 -- the head of the application, and the number of actual value arguments,
1092 -- all of which are used to possibly saturate this application if it
1093 -- has a constructor or primop at the head.
1094 rebuild_app
1095 :: CorePrepEnv
1096 -> [ArgInfo] -- The arguments (inner to outer)
1097 -> CpeApp
1098 -> Floats
1099 -> [Demand]
1100 -> UniqSM (CpeApp, Floats)
1101 rebuild_app _ [] app floats ss
1102 = assert (null ss) -- make sure we used all the strictness info
1103 return (app, floats)
1104
1105 rebuild_app env (a : as) fun' floats ss = case a of
1106
1107 CpeApp (Type arg_ty)
1108 -> rebuild_app env as (App fun' (Type arg_ty')) floats ss
1109 where
1110 arg_ty' = cpSubstTy env arg_ty
1111
1112 CpeApp (Coercion co)
1113 -> rebuild_app env as (App fun' (Coercion co')) floats ss
1114 where
1115 co' = cpSubstCo env co
1116
1117 CpeApp arg -> do
1118 let (ss1, ss_rest) -- See Note [lazyId magic] in GHC.Types.Id.Make
1119 = case (ss, isLazyExpr arg) of
1120 (_ : ss_rest, True) -> (topDmd, ss_rest)
1121 (ss1 : ss_rest, False) -> (ss1, ss_rest)
1122 ([], _) -> (topDmd, [])
1123 (fs, arg') <- cpeArg top_env ss1 arg
1124 rebuild_app env as (App fun' arg') (fs `appendFloats` floats) ss_rest
1125
1126 CpeCast co
1127 -> rebuild_app env as (Cast fun' co') floats ss
1128 where
1129 co' = cpSubstCo env co
1130
1131 CpeTick tickish
1132 -- See [Floating Ticks in CorePrep]
1133 -> rebuild_app env as fun' (addFloat floats (FloatTick tickish)) ss
1134
1135 isLazyExpr :: CoreExpr -> Bool
1136 -- See Note [lazyId magic] in GHC.Types.Id.Make
1137 isLazyExpr (Cast e _) = isLazyExpr e
1138 isLazyExpr (Tick _ e) = isLazyExpr e
1139 isLazyExpr (Var f `App` _ `App` _) = f `hasKey` lazyIdKey
1140 isLazyExpr _ = False
1141
1142 {- Note [runRW magic]
1143 ~~~~~~~~~~~~~~~~~~~~~
1144 Some definitions, for instance @runST@, must have careful control over float out
1145 of the bindings in their body. Consider this use of @runST@,
1146
1147 f x = runST ( \ s -> let (a, s') = newArray# 100 [] s
1148 (_, s'') = fill_in_array_or_something a x s'
1149 in freezeArray# a s'' )
1150
1151 If we inline @runST@, we'll get:
1152
1153 f x = let (a, s') = newArray# 100 [] realWorld#{-NB-}
1154 (_, s'') = fill_in_array_or_something a x s'
1155 in freezeArray# a s''
1156
1157 And now if we allow the @newArray#@ binding to float out to become a CAF,
1158 we end up with a result that is totally and utterly wrong:
1159
1160 f = let (a, s') = newArray# 100 [] realWorld#{-NB-} -- YIKES!!!
1161 in \ x ->
1162 let (_, s'') = fill_in_array_or_something a x s'
1163 in freezeArray# a s''
1164
1165 All calls to @f@ will share a {\em single} array! Clearly this is nonsense and
1166 must be prevented.
1167
1168 This is what @runRW#@ gives us: by being inlined extremely late in the
1169 optimization (right before lowering to STG, in CorePrep), we can ensure that
1170 no further floating will occur. This allows us to safely inline things like
1171 @runST@, which are otherwise needlessly expensive (see #10678 and #5916).
1172
1173 'runRW' has a variety of quirks:
1174
1175 * 'runRW' is known-key with a NOINLINE definition in
1176 GHC.Magic. This definition is used in cases where runRW is curried.
1177
1178 * In addition to its normal Haskell definition in GHC.Magic, we give it
1179 a special late inlining here in CorePrep and GHC.StgToByteCode, avoiding
1180 the incorrect sharing due to float-out noted above.
1181
1182 * It is levity-polymorphic:
1183
1184 runRW# :: forall (r1 :: RuntimeRep). (o :: TYPE r)
1185 => (State# RealWorld -> (# State# RealWorld, o #))
1186 -> (# State# RealWorld, o #)
1187
1188 * It has some special simplification logic to allow unboxing of results when
1189 runRW# appears in a strict context. See Note [Simplification of runRW#]
1190 below.
1191
1192 * Since its body is inlined, we allow runRW#'s argument to contain jumps to
1193 join points. That is, the following is allowed:
1194
1195 join j x = ...
1196 in runRW# @_ @_ (\s -> ... jump j 42 ...)
1197
1198 The Core Linter knows about this. See Note [Linting of runRW#] in
1199 GHC.Core.Lint for details.
1200
1201 The occurrence analyser and SetLevels also know about this, as described in
1202 Note [Simplification of runRW#].
1203
1204 Other relevant Notes:
1205
1206 * Note [Simplification of runRW#] below, describing a transformation of runRW
1207 applications in strict contexts performed by the simplifier.
1208 * Note [Linting of runRW#] in GHC.Core.Lint
1209 * Note [runRW arg] below, describing a non-obvious case where the
1210 late-inlining could go wrong.
1211
1212
1213 Note [runRW arg]
1214 ~~~~~~~~~~~~~~~~~~~
1215 Consider the Core program (from #11291),
1216
1217 runRW# (case bot of {})
1218
1219 The late inlining logic in cpe_app would transform this into:
1220
1221 (case bot of {}) realWorldPrimId#
1222
1223 Which would rise to a panic in CoreToStg.myCollectArgs, which expects only
1224 variables in function position.
1225
1226 However, as runRW#'s strictness signature captures the fact that it will call
1227 its argument this can't happen: the simplifier will transform the bottoming
1228 application into simply (case bot of {}).
1229
1230 Note that this reasoning does *not* apply to non-bottoming continuations like:
1231
1232 hello :: Bool -> Int
1233 hello n =
1234 runRW# (
1235 case n of
1236 True -> \s -> 23
1237 _ -> \s -> 10)
1238
1239 Why? The difference is that (case bot of {}) is considered by okCpeArg to be
1240 trivial, consequently cpeArg (which the catch-all case of cpe_app calls on both
1241 the function and the arguments) will forgo binding it to a variable. By
1242 contrast, in the non-bottoming case of `hello` above the function will be
1243 deemed non-trivial and consequently will be case-bound.
1244
1245
1246 Note [Simplification of runRW#]
1247 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1248 Consider the program,
1249
1250 case runRW# (\s -> I# 42#) of
1251 I# n# -> f n#
1252
1253 There is no reason why we should allocate an I# constructor given that we
1254 immediately destructure it.
1255
1256 To avoid this the simplifier has a special transformation rule, specific to
1257 runRW#, that pushes a strict context into runRW#'s continuation. See the
1258 `runRW#` guard in `GHC.Core.Opt.Simplify.rebuildCall`. That is, it transforms
1259
1260 K[ runRW# @r @ty cont ]
1261 ~>
1262 runRW# @r @ty (\s -> K[cont s])
1263
1264 This has a few interesting implications. Consider, for instance, this program:
1265
1266 join j = ...
1267 in case runRW# @r @ty cont of
1268 result -> jump j result
1269
1270 Performing the transform described above would result in:
1271
1272 join j x = ...
1273 in runRW# @r @ty (\s ->
1274 case cont of in
1275 result -> jump j result
1276 )
1277
1278 If runRW# were a "normal" function this call to join point j would not be
1279 allowed in its continuation argument. However, since runRW# is inlined (as
1280 described in Note [runRW magic] above), such join point occurrences are
1281 completely fine. Both occurrence analysis (see the runRW guard in occAnalApp)
1282 and Core Lint (see the App case of lintCoreExpr) have special treatment for
1283 runRW# applications. See Note [Linting of runRW#] for details on the latter.
1284
1285 Moreover, it's helpful to ensure that runRW's continuation isn't floated out
1286 For instance, if we have
1287
1288 runRW# (\s -> do_something)
1289
1290 where do_something contains only top-level free variables, we may be tempted to
1291 float the argument to the top-level. However, we must resist this urge as since
1292 doing so would then require that runRW# produce an allocation and call, e.g.:
1293
1294 let lvl = \s -> do_somethign
1295 in
1296 ....(runRW# lvl)....
1297
1298 whereas without floating the inlining of the definition of runRW would result
1299 in straight-line code. Consequently, GHC.Core.Opt.SetLevels.lvlApp has special
1300 treatment for runRW# applications, ensure the arguments are not floated as
1301 MFEs.
1302
1303 Now that we float evaluation context into runRW#, we also have to give runRW# a
1304 special higher-order CPR transformer lest we risk #19822. E.g.,
1305
1306 case runRW# (\s -> doThings) of x -> Data.Text.Text x something something'
1307 ~>
1308 runRW# (\s -> case doThings s of x -> Data.Text.Text x something something')
1309
1310 The former had the CPR property, and so should the latter.
1311
1312 Other considered designs
1313 ------------------------
1314
1315 One design that was rejected was to *require* that runRW#'s continuation be
1316 headed by a lambda. However, this proved to be quite fragile. For instance,
1317 SetLevels is very eager to float bottoming expressions. For instance given
1318 something of the form,
1319
1320 runRW# @r @ty (\s -> case expr of x -> undefined)
1321
1322 SetLevels will see that the body the lambda is bottoming and will consequently
1323 float it to the top-level (assuming expr has no free coercion variables which
1324 prevent this). We therefore end up with
1325
1326 runRW# @r @ty (\s -> lvl s)
1327
1328 Which the simplifier will beta reduce, leaving us with
1329
1330 runRW# @r @ty lvl
1331
1332 Breaking our desired invariant. Ultimately we decided to simply accept that
1333 the continuation may not be a manifest lambda.
1334
1335
1336 -- ---------------------------------------------------------------------------
1337 -- CpeArg: produces a result satisfying CpeArg
1338 -- ---------------------------------------------------------------------------
1339
1340 Note [ANF-ising literal string arguments]
1341 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1342
1343 Consider a program like,
1344
1345 data Foo = Foo Addr#
1346
1347 foo = Foo "turtle"#
1348
1349 When we go to ANFise this we might think that we want to float the string
1350 literal like we do any other non-trivial argument. This would look like,
1351
1352 foo = u\ [] case "turtle"# of s { __DEFAULT__ -> Foo s }
1353
1354 However, this 1) isn't necessary since strings are in a sense "trivial"; and 2)
1355 wreaks havoc on the CAF annotations that we produce here since we the result
1356 above is caffy since it is updateable. Ideally at some point in the future we
1357 would like to just float the literal to the top level as suggested in #11312,
1358
1359 s = "turtle"#
1360 foo = Foo s
1361
1362 However, until then we simply add a special case excluding literals from the
1363 floating done by cpeArg.
1364 -}
1365
1366 -- | Is an argument okay to CPE?
1367 okCpeArg :: CoreExpr -> Bool
1368 -- Don't float literals. See Note [ANF-ising literal string arguments].
1369 okCpeArg (Lit _) = False
1370 -- Do not eta expand a trivial argument
1371 okCpeArg expr = not (exprIsTrivial expr)
1372
1373 -- This is where we arrange that a non-trivial argument is let-bound
1374 cpeArg :: CorePrepEnv -> Demand
1375 -> CoreArg -> UniqSM (Floats, CpeArg)
1376 cpeArg env dmd arg
1377 = do { (floats1, arg1) <- cpeRhsE env arg -- arg1 can be a lambda
1378 ; let arg_ty = exprType arg1
1379 is_unlifted = isUnliftedType arg_ty
1380 want_float = wantFloatNested NonRecursive dmd is_unlifted
1381 ; (floats2, arg2) <- if want_float floats1 arg1
1382 then return (floats1, arg1)
1383 else dontFloat floats1 arg1
1384 -- Else case: arg1 might have lambdas, and we can't
1385 -- put them inside a wrapBinds
1386
1387 ; if okCpeArg arg2
1388 then do { v <- newVar arg_ty
1389 ; let arg3 = cpeEtaExpand (exprArity arg2) arg2
1390 arg_float = mkFloat dmd is_unlifted v arg3
1391 ; return (addFloat floats2 arg_float, varToCoreExpr v) }
1392 else return (floats2, arg2)
1393 }
1394
1395 {-
1396 Note [Floating unlifted arguments]
1397 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1398 Consider C (let v* = expensive in v)
1399
1400 where the "*" indicates "will be demanded". Usually v will have been
1401 inlined by now, but let's suppose it hasn't (see #2756). Then we
1402 do *not* want to get
1403
1404 let v* = expensive in C v
1405
1406 because that has different strictness. Hence the use of 'allLazy'.
1407 (NB: the let v* turns into a FloatCase, in mkLocalNonRec.)
1408
1409
1410 ------------------------------------------------------------------------------
1411 -- Building the saturated syntax
1412 -- ---------------------------------------------------------------------------
1413
1414 Note [Eta expansion of hasNoBinding things in CorePrep]
1415 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1416 maybeSaturate deals with eta expanding to saturate things that can't deal with
1417 unsaturated applications (identified by 'hasNoBinding', currently just
1418 foreign calls and unboxed tuple/sum constructors).
1419
1420 Historical Note: Note that eta expansion in CorePrep used to be very fragile
1421 due to the "prediction" of CAFfyness that we used to make during tidying.
1422 We previously saturated primop
1423 applications here as well but due to this fragility (see #16846) we now deal
1424 with this another way, as described in Note [Primop wrappers] in GHC.Builtin.PrimOps.
1425 -}
1426
1427 maybeSaturate :: Id -> CpeApp -> Int -> UniqSM CpeRhs
1428 maybeSaturate fn expr n_args
1429 | hasNoBinding fn -- There's no binding
1430 = return sat_expr
1431
1432 | otherwise
1433 = return expr
1434 where
1435 fn_arity = idArity fn
1436 excess_arity = fn_arity - n_args
1437 sat_expr = cpeEtaExpand excess_arity expr
1438
1439 {-
1440 ************************************************************************
1441 * *
1442 Simple GHC.Core operations
1443 * *
1444 ************************************************************************
1445 -}
1446
1447 {-
1448 -- -----------------------------------------------------------------------------
1449 -- Eta reduction
1450 -- -----------------------------------------------------------------------------
1451
1452 Note [Eta expansion]
1453 ~~~~~~~~~~~~~~~~~~~~~
1454 Eta expand to match the arity claimed by the binder Remember,
1455 CorePrep must not change arity
1456
1457 Eta expansion might not have happened already, because it is done by
1458 the simplifier only when there at least one lambda already.
1459
1460 NB1:we could refrain when the RHS is trivial (which can happen
1461 for exported things). This would reduce the amount of code
1462 generated (a little) and make things a little words for
1463 code compiled without -O. The case in point is data constructor
1464 wrappers.
1465
1466 NB2: we have to be careful that the result of etaExpand doesn't
1467 invalidate any of the assumptions that CorePrep is attempting
1468 to establish. One possible cause is eta expanding inside of
1469 an SCC note - we're now careful in etaExpand to make sure the
1470 SCC is pushed inside any new lambdas that are generated.
1471
1472 Note [Eta expansion and the CorePrep invariants]
1473 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1474 It turns out to be much much easier to do eta expansion
1475 *after* the main CorePrep stuff. But that places constraints
1476 on the eta expander: given a CpeRhs, it must return a CpeRhs.
1477
1478 For example here is what we do not want:
1479 f = /\a -> g (h 3) -- h has arity 2
1480 After ANFing we get
1481 f = /\a -> let s = h 3 in g s
1482 and now we do NOT want eta expansion to give
1483 f = /\a -> \ y -> (let s = h 3 in g s) y
1484
1485 Instead GHC.Core.Opt.Arity.etaExpand gives
1486 f = /\a -> \y -> let s = h 3 in g s y
1487
1488 -}
1489
1490 cpeEtaExpand :: Arity -> CpeRhs -> CpeRhs
1491 cpeEtaExpand arity expr
1492 | arity == 0 = expr
1493 | otherwise = etaExpand arity expr
1494
1495 {-
1496 -- -----------------------------------------------------------------------------
1497 -- Eta reduction
1498 -- -----------------------------------------------------------------------------
1499
1500 Why try eta reduction? Hasn't the simplifier already done eta?
1501 But the simplifier only eta reduces if that leaves something
1502 trivial (like f, or f Int). But for deLam it would be enough to
1503 get to a partial application:
1504 case x of { p -> \xs. map f xs }
1505 ==> case x of { p -> map f }
1506 -}
1507
1508 -- When updating this function, make sure it lines up with
1509 -- GHC.Core.Utils.tryEtaReduce!
1510 tryEtaReducePrep :: [CoreBndr] -> CoreExpr -> Maybe CoreExpr
1511 tryEtaReducePrep bndrs expr@(App _ _)
1512 | ok_to_eta_reduce f
1513 , n_remaining >= 0
1514 , and (zipWith ok bndrs last_args)
1515 , not (any (`elemVarSet` fvs_remaining) bndrs)
1516 , exprIsHNF remaining_expr -- Don't turn value into a non-value
1517 -- else the behaviour with 'seq' changes
1518 = Just remaining_expr
1519 where
1520 (f, args) = collectArgs expr
1521 remaining_expr = mkApps f remaining_args
1522 fvs_remaining = exprFreeVars remaining_expr
1523 (remaining_args, last_args) = splitAt n_remaining args
1524 n_remaining = length args - length bndrs
1525
1526 ok bndr (Var arg) = bndr == arg
1527 ok _ _ = False
1528
1529 -- We can't eta reduce something which must be saturated.
1530 ok_to_eta_reduce (Var f) = not (hasNoBinding f) && not (isLinearType (idType f))
1531 ok_to_eta_reduce _ = False -- Safe. ToDo: generalise
1532
1533
1534 tryEtaReducePrep bndrs (Tick tickish e)
1535 | tickishFloatable tickish
1536 = fmap (mkTick tickish) $ tryEtaReducePrep bndrs e
1537
1538 tryEtaReducePrep _ _ = Nothing
1539
1540 {-
1541 ************************************************************************
1542 * *
1543 Floats
1544 * *
1545 ************************************************************************
1546
1547 Note [Pin demand info on floats]
1548 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1549 We pin demand info on floated lets, so that we can see the one-shot thunks.
1550
1551 Note [Speculative evaluation]
1552 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1553 Since call-by-value is much cheaper than call-by-need, we case-bind arguments
1554 that are either
1555
1556 1. Strictly evaluated anyway, according to the DmdSig of the callee, or
1557 2. ok-for-spec, according to 'exprOkForSpeculation'
1558
1559 While (1) is a no-brainer and always beneficial, (2) is a bit
1560 more subtle, as the careful haddock for 'exprOkForSpeculation'
1561 points out. Still, by case-binding the argument we don't need
1562 to allocate a thunk for it, whose closure must be retained as
1563 long as the callee might evaluate it. And if it is evaluated on
1564 most code paths anyway, we get to turn the unknown eval in the
1565 callee into a known call at the call site.
1566 -}
1567
1568 data FloatingBind
1569 = FloatLet CoreBind -- Rhs of bindings are CpeRhss
1570 -- They are always of lifted type;
1571 -- unlifted ones are done with FloatCase
1572
1573 | FloatCase
1574 CpeBody -- Always ok-for-speculation
1575 Id -- Case binder
1576 AltCon [Var] -- Single alternative
1577 Bool -- Ok-for-speculation; False of a strict,
1578 -- but lifted binding
1579
1580 -- | See Note [Floating Ticks in CorePrep]
1581 | FloatTick CoreTickish
1582
1583 data Floats = Floats OkToSpec (OrdList FloatingBind)
1584
1585 instance Outputable FloatingBind where
1586 ppr (FloatLet b) = ppr b
1587 ppr (FloatCase r b k bs ok) = text "case" <> braces (ppr ok) <+> ppr r
1588 <+> text "of"<+> ppr b <> text "@"
1589 <> case bs of
1590 [] -> ppr k
1591 _ -> parens (ppr k <+> ppr bs)
1592 ppr (FloatTick t) = ppr t
1593
1594 instance Outputable Floats where
1595 ppr (Floats flag fs) = text "Floats" <> brackets (ppr flag) <+>
1596 braces (vcat (map ppr (fromOL fs)))
1597
1598 instance Outputable OkToSpec where
1599 ppr OkToSpec = text "OkToSpec"
1600 ppr IfUnboxedOk = text "IfUnboxedOk"
1601 ppr NotOkToSpec = text "NotOkToSpec"
1602
1603 -- Can we float these binds out of the rhs of a let? We cache this decision
1604 -- to avoid having to recompute it in a non-linear way when there are
1605 -- deeply nested lets.
1606 data OkToSpec
1607 = OkToSpec -- Lazy bindings of lifted type
1608 | IfUnboxedOk -- A mixture of lazy lifted bindings and n
1609 -- ok-to-speculate unlifted bindings
1610 | NotOkToSpec -- Some not-ok-to-speculate unlifted bindings
1611
1612 mkFloat :: Demand -> Bool -> Id -> CpeRhs -> FloatingBind
1613 mkFloat dmd is_unlifted bndr rhs
1614 | is_strict || ok_for_spec -- See Note [Speculative evaluation]
1615 , not is_hnf = FloatCase rhs bndr DEFAULT [] ok_for_spec
1616 -- Don't make a case for a HNF binding, even if it's strict
1617 -- Otherwise we get case (\x -> e) of ...!
1618
1619 | is_unlifted = FloatCase rhs bndr DEFAULT [] True
1620 -- we used to assertPpr ok_for_spec (ppr rhs) here, but it is now disabled
1621 -- because exprOkForSpeculation isn't stable under ANF-ing. See for
1622 -- example #19489 where the following unlifted expression:
1623 --
1624 -- GHC.Prim.(#|_#) @LiftedRep @LiftedRep @[a_ax0] @[a_ax0]
1625 -- (GHC.Types.: @a_ax0 a2_agq a3_agl)
1626 --
1627 -- is ok-for-spec but is ANF-ised into:
1628 --
1629 -- let sat = GHC.Types.: @a_ax0 a2_agq a3_agl
1630 -- in GHC.Prim.(#|_#) @LiftedRep @LiftedRep @[a_ax0] @[a_ax0] sat
1631 --
1632 -- which isn't ok-for-spec because of the let-expression.
1633
1634 | is_hnf = FloatLet (NonRec bndr rhs)
1635 | otherwise = FloatLet (NonRec (setIdDemandInfo bndr dmd) rhs)
1636 -- See Note [Pin demand info on floats]
1637 where
1638 is_hnf = exprIsHNF rhs
1639 is_strict = isStrUsedDmd dmd
1640 ok_for_spec = exprOkForSpeculation rhs
1641
1642 emptyFloats :: Floats
1643 emptyFloats = Floats OkToSpec nilOL
1644
1645 isEmptyFloats :: Floats -> Bool
1646 isEmptyFloats (Floats _ bs) = isNilOL bs
1647
1648 wrapBinds :: Floats -> CpeBody -> CpeBody
1649 wrapBinds (Floats _ binds) body
1650 = foldrOL mk_bind body binds
1651 where
1652 mk_bind (FloatCase rhs bndr con bs _) body = Case rhs bndr (exprType body) [Alt con bs body]
1653 mk_bind (FloatLet bind) body = Let bind body
1654 mk_bind (FloatTick tickish) body = mkTick tickish body
1655
1656 addFloat :: Floats -> FloatingBind -> Floats
1657 addFloat (Floats ok_to_spec floats) new_float
1658 = Floats (combine ok_to_spec (check new_float)) (floats `snocOL` new_float)
1659 where
1660 check (FloatLet {}) = OkToSpec
1661 check (FloatCase _ _ _ _ ok_for_spec)
1662 | ok_for_spec = IfUnboxedOk
1663 | otherwise = NotOkToSpec
1664 check FloatTick{} = OkToSpec
1665 -- The ok-for-speculation flag says that it's safe to
1666 -- float this Case out of a let, and thereby do it more eagerly
1667 -- We need the top-level flag because it's never ok to float
1668 -- an unboxed binding to the top level
1669
1670 unitFloat :: FloatingBind -> Floats
1671 unitFloat = addFloat emptyFloats
1672
1673 appendFloats :: Floats -> Floats -> Floats
1674 appendFloats (Floats spec1 floats1) (Floats spec2 floats2)
1675 = Floats (combine spec1 spec2) (floats1 `appOL` floats2)
1676
1677 concatFloats :: [Floats] -> OrdList FloatingBind
1678 concatFloats = foldr (\ (Floats _ bs1) bs2 -> appOL bs1 bs2) nilOL
1679
1680 combine :: OkToSpec -> OkToSpec -> OkToSpec
1681 combine NotOkToSpec _ = NotOkToSpec
1682 combine _ NotOkToSpec = NotOkToSpec
1683 combine IfUnboxedOk _ = IfUnboxedOk
1684 combine _ IfUnboxedOk = IfUnboxedOk
1685 combine _ _ = OkToSpec
1686
1687 deFloatTop :: Floats -> [CoreBind]
1688 -- For top level only; we don't expect any FloatCases
1689 deFloatTop (Floats _ floats)
1690 = foldrOL get [] floats
1691 where
1692 get (FloatLet b) bs = get_bind b : bs
1693 get (FloatCase body var _ _ _) bs = get_bind (NonRec var body) : bs
1694 get b _ = pprPanic "corePrepPgm" (ppr b)
1695
1696 -- See Note [Dead code in CorePrep]
1697 get_bind (NonRec x e) = NonRec x (occurAnalyseExpr e)
1698 get_bind (Rec xes) = Rec [(x, occurAnalyseExpr e) | (x, e) <- xes]
1699
1700 ---------------------------------------------------------------------------
1701
1702 canFloat :: Floats -> CpeRhs -> Maybe (Floats, CpeRhs)
1703 canFloat (Floats ok_to_spec fs) rhs
1704 | OkToSpec <- ok_to_spec -- Worth trying
1705 , Just fs' <- go nilOL (fromOL fs)
1706 = Just (Floats OkToSpec fs', rhs)
1707 | otherwise
1708 = Nothing
1709 where
1710 go :: OrdList FloatingBind -> [FloatingBind]
1711 -> Maybe (OrdList FloatingBind)
1712
1713 go (fbs_out) [] = Just fbs_out
1714
1715 go fbs_out (fb@(FloatLet _) : fbs_in)
1716 = go (fbs_out `snocOL` fb) fbs_in
1717
1718 go fbs_out (ft@FloatTick{} : fbs_in)
1719 = go (fbs_out `snocOL` ft) fbs_in
1720
1721 go _ (FloatCase{} : _) = Nothing
1722
1723
1724 wantFloatNested :: RecFlag -> Demand -> Bool -> Floats -> CpeRhs -> Bool
1725 wantFloatNested is_rec dmd is_unlifted floats rhs
1726 = isEmptyFloats floats
1727 || isStrUsedDmd dmd
1728 || is_unlifted
1729 || (allLazyNested is_rec floats && exprIsHNF rhs)
1730 -- Why the test for allLazyNested?
1731 -- v = f (x `divInt#` y)
1732 -- we don't want to float the case, even if f has arity 2,
1733 -- because floating the case would make it evaluated too early
1734
1735 allLazyTop :: Floats -> Bool
1736 allLazyTop (Floats OkToSpec _) = True
1737 allLazyTop _ = False
1738
1739 allLazyNested :: RecFlag -> Floats -> Bool
1740 allLazyNested _ (Floats OkToSpec _) = True
1741 allLazyNested _ (Floats NotOkToSpec _) = False
1742 allLazyNested is_rec (Floats IfUnboxedOk _) = isNonRec is_rec
1743
1744 {-
1745 ************************************************************************
1746 * *
1747 Cloning
1748 * *
1749 ************************************************************************
1750 -}
1751
1752 -- ---------------------------------------------------------------------------
1753 -- The environment
1754 -- ---------------------------------------------------------------------------
1755
1756 {- Note [Inlining in CorePrep]
1757 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
1758 There is a subtle but important invariant that must be upheld in the output
1759 of CorePrep: there are no "trivial" updatable thunks. Thus, this Core
1760 is impermissible:
1761
1762 let x :: ()
1763 x = y
1764
1765 (where y is a reference to a GLOBAL variable). Thunks like this are silly:
1766 they can always be profitably replaced by inlining x with y. Consequently,
1767 the code generator/runtime does not bother implementing this properly
1768 (specifically, there is no implementation of stg_ap_0_upd_info, which is the
1769 stack frame that would be used to update this thunk. The "0" means it has
1770 zero free variables.)
1771
1772 In general, the inliner is good at eliminating these let-bindings. However,
1773 there is one case where these trivial updatable thunks can arise: when
1774 we are optimizing away 'lazy' (see Note [lazyId magic], and also
1775 'cpeRhsE'.) Then, we could have started with:
1776
1777 let x :: ()
1778 x = lazy @ () y
1779
1780 which is a perfectly fine, non-trivial thunk, but then CorePrep will
1781 drop 'lazy', giving us 'x = y' which is trivial and impermissible.
1782 The solution is CorePrep to have a miniature inlining pass which deals
1783 with cases like this. We can then drop the let-binding altogether.
1784
1785 Why does the removal of 'lazy' have to occur in CorePrep?
1786 The gory details are in Note [lazyId magic] in GHC.Types.Id.Make, but the
1787 main reason is that lazy must appear in unfoldings (optimizer
1788 output) and it must prevent call-by-value for catch# (which
1789 is implemented by CorePrep.)
1790
1791 An alternate strategy for solving this problem is to have the
1792 inliner treat 'lazy e' as a trivial expression if 'e' is trivial.
1793 We decided not to adopt this solution to keep the definition
1794 of 'exprIsTrivial' simple.
1795
1796 There is ONE caveat however: for top-level bindings we have
1797 to preserve the binding so that we float the (hacky) non-recursive
1798 binding for data constructors; see Note [Data constructor workers].
1799
1800 Note [CorePrep inlines trivial CoreExpr not Id]
1801 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1802 Why does cpe_env need to be an IdEnv CoreExpr, as opposed to an
1803 IdEnv Id? Naively, we might conjecture that trivial updatable thunks
1804 as per Note [Inlining in CorePrep] always have the form
1805 'lazy @ SomeType gbl_id'. But this is not true: the following is
1806 perfectly reasonable Core:
1807
1808 let x :: ()
1809 x = lazy @ (forall a. a) y @ Bool
1810
1811 When we inline 'x' after eliminating 'lazy', we need to replace
1812 occurrences of 'x' with 'y @ bool', not just 'y'. Situations like
1813 this can easily arise with higher-rank types; thus, cpe_env must
1814 map to CoreExprs, not Ids.
1815
1816 -}
1817
1818 data CorePrepEnv
1819 = CPE { cpe_dynFlags :: DynFlags
1820 , cpe_env :: IdEnv CoreExpr -- Clone local Ids
1821 -- ^ This environment is used for three operations:
1822 --
1823 -- 1. To support cloning of local Ids so that they are
1824 -- all unique (see item (6) of CorePrep overview).
1825 --
1826 -- 2. To support beta-reduction of runRW, see
1827 -- Note [runRW magic] and Note [runRW arg].
1828 --
1829 -- 3. To let us inline trivial RHSs of non top-level let-bindings,
1830 -- see Note [lazyId magic], Note [Inlining in CorePrep]
1831 -- and Note [CorePrep inlines trivial CoreExpr not Id] (#12076)
1832
1833 , cpe_tyco_env :: Maybe CpeTyCoEnv -- See Note [CpeTyCoEnv]
1834
1835 , cpe_convertNumLit :: LitNumType -> Integer -> Maybe CoreExpr
1836 -- ^ Convert some numeric literals (Integer, Natural) into their
1837 -- final Core form
1838 }
1839
1840 mkInitialCorePrepEnv :: HscEnv -> IO CorePrepEnv
1841 mkInitialCorePrepEnv hsc_env = do
1842 convertNumLit <- mkConvertNumLiteral hsc_env
1843 return $ CPE
1844 { cpe_dynFlags = hsc_dflags hsc_env
1845 , cpe_env = emptyVarEnv
1846 , cpe_tyco_env = Nothing
1847 , cpe_convertNumLit = convertNumLit
1848 }
1849
1850 extendCorePrepEnv :: CorePrepEnv -> Id -> Id -> CorePrepEnv
1851 extendCorePrepEnv cpe id id'
1852 = cpe { cpe_env = extendVarEnv (cpe_env cpe) id (Var id') }
1853
1854 extendCorePrepEnvExpr :: CorePrepEnv -> Id -> CoreExpr -> CorePrepEnv
1855 extendCorePrepEnvExpr cpe id expr
1856 = cpe { cpe_env = extendVarEnv (cpe_env cpe) id expr }
1857
1858 extendCorePrepEnvList :: CorePrepEnv -> [(Id,Id)] -> CorePrepEnv
1859 extendCorePrepEnvList cpe prs
1860 = cpe { cpe_env = extendVarEnvList (cpe_env cpe)
1861 (map (\(id, id') -> (id, Var id')) prs) }
1862
1863 lookupCorePrepEnv :: CorePrepEnv -> Id -> CoreExpr
1864 lookupCorePrepEnv cpe id
1865 = case lookupVarEnv (cpe_env cpe) id of
1866 Nothing -> Var id
1867 Just exp -> exp
1868
1869 ------------------------------------------------------------------------------
1870 -- CpeTyCoEnv
1871 -- ---------------------------------------------------------------------------
1872
1873 {- Note [CpeTyCoEnv]
1874 ~~~~~~~~~~~~~~~~~~~~
1875 The cpe_tyco_env :: Maybe CpeTyCoEnv field carries a substitution
1876 for type and coercion varibles
1877
1878 * We need the coercion substitution to support the elimination of
1879 unsafeEqualityProof (see Note [Unsafe coercions])
1880
1881 * We need the type substitution in case one of those unsafe
1882 coercions occurs in the kind of tyvar binder (sigh)
1883
1884 We don't need an in-scope set because we don't clone any of these
1885 binders at all, so no new capture can take place.
1886
1887 The cpe_tyco_env is almost always empty -- it only gets populated
1888 when we get under an usafeEqualityProof. Hence the Maybe CpeTyCoEnv,
1889 which makes everything into a no-op in the common case.
1890 -}
1891
1892 data CpeTyCoEnv = TCE TvSubstEnv CvSubstEnv
1893
1894 emptyTCE :: CpeTyCoEnv
1895 emptyTCE = TCE emptyTvSubstEnv emptyCvSubstEnv
1896
1897 extend_tce_cv :: CpeTyCoEnv -> CoVar -> Coercion -> CpeTyCoEnv
1898 extend_tce_cv (TCE tv_env cv_env) cv co
1899 = TCE tv_env (extendVarEnv cv_env cv co)
1900
1901 extend_tce_tv :: CpeTyCoEnv -> TyVar -> Type -> CpeTyCoEnv
1902 extend_tce_tv (TCE tv_env cv_env) tv ty
1903 = TCE (extendVarEnv tv_env tv ty) cv_env
1904
1905 lookup_tce_cv :: CpeTyCoEnv -> CoVar -> Coercion
1906 lookup_tce_cv (TCE _ cv_env) cv
1907 = case lookupVarEnv cv_env cv of
1908 Just co -> co
1909 Nothing -> mkCoVarCo cv
1910
1911 lookup_tce_tv :: CpeTyCoEnv -> TyVar -> Type
1912 lookup_tce_tv (TCE tv_env _) tv
1913 = case lookupVarEnv tv_env tv of
1914 Just ty -> ty
1915 Nothing -> mkTyVarTy tv
1916
1917 extendCoVarEnv :: CorePrepEnv -> CoVar -> Coercion -> CorePrepEnv
1918 extendCoVarEnv cpe@(CPE { cpe_tyco_env = mb_tce }) cv co
1919 = cpe { cpe_tyco_env = Just (extend_tce_cv tce cv co) }
1920 where
1921 tce = mb_tce `orElse` emptyTCE
1922
1923
1924 cpSubstTy :: CorePrepEnv -> Type -> Type
1925 cpSubstTy (CPE { cpe_tyco_env = mb_env }) ty
1926 = case mb_env of
1927 Just env -> runIdentity (subst_ty env ty)
1928 Nothing -> ty
1929
1930 cpSubstCo :: CorePrepEnv -> Coercion -> Coercion
1931 cpSubstCo (CPE { cpe_tyco_env = mb_env }) co
1932 = case mb_env of
1933 Just tce -> runIdentity (subst_co tce co)
1934 Nothing -> co
1935
1936 subst_tyco_mapper :: TyCoMapper CpeTyCoEnv Identity
1937 subst_tyco_mapper = TyCoMapper
1938 { tcm_tyvar = \env tv -> return (lookup_tce_tv env tv)
1939 , tcm_covar = \env cv -> return (lookup_tce_cv env cv)
1940 , tcm_hole = \_ hole -> pprPanic "subst_co_mapper:hole" (ppr hole)
1941 , tcm_tycobinder = \env tcv _vis -> if isTyVar tcv
1942 then return (subst_tv_bndr env tcv)
1943 else return (subst_cv_bndr env tcv)
1944 , tcm_tycon = \tc -> return tc }
1945
1946 subst_ty :: CpeTyCoEnv -> Type -> Identity Type
1947 subst_co :: CpeTyCoEnv -> Coercion -> Identity Coercion
1948 (subst_ty, _, subst_co, _) = mapTyCoX subst_tyco_mapper
1949
1950 cpSubstTyVarBndr :: CorePrepEnv -> TyVar -> (CorePrepEnv, TyVar)
1951 cpSubstTyVarBndr env@(CPE { cpe_tyco_env = mb_env }) tv
1952 = case mb_env of
1953 Nothing -> (env, tv)
1954 Just tce -> (env { cpe_tyco_env = Just tce' }, tv')
1955 where
1956 (tce', tv') = subst_tv_bndr tce tv
1957
1958 subst_tv_bndr :: CpeTyCoEnv -> TyVar -> (CpeTyCoEnv, TyVar)
1959 subst_tv_bndr tce tv
1960 = (extend_tce_tv tce tv (mkTyVarTy tv'), tv')
1961 where
1962 tv' = mkTyVar (tyVarName tv) kind'
1963 kind' = runIdentity $ subst_ty tce $ tyVarKind tv
1964
1965 cpSubstCoVarBndr :: CorePrepEnv -> CoVar -> (CorePrepEnv, CoVar)
1966 cpSubstCoVarBndr env@(CPE { cpe_tyco_env = mb_env }) cv
1967 = case mb_env of
1968 Nothing -> (env, cv)
1969 Just tce -> (env { cpe_tyco_env = Just tce' }, cv')
1970 where
1971 (tce', cv') = subst_cv_bndr tce cv
1972
1973 subst_cv_bndr :: CpeTyCoEnv -> CoVar -> (CpeTyCoEnv, CoVar)
1974 subst_cv_bndr tce cv
1975 = (extend_tce_cv tce cv (mkCoVarCo cv'), cv')
1976 where
1977 cv' = mkCoVar (varName cv) ty'
1978 ty' = runIdentity (subst_ty tce $ varType cv)
1979
1980 ------------------------------------------------------------------------------
1981 -- Cloning binders
1982 -- ---------------------------------------------------------------------------
1983
1984 cpCloneBndrs :: CorePrepEnv -> [InVar] -> UniqSM (CorePrepEnv, [OutVar])
1985 cpCloneBndrs env bs = mapAccumLM cpCloneBndr env bs
1986
1987 cpCloneBndr :: CorePrepEnv -> InVar -> UniqSM (CorePrepEnv, OutVar)
1988 cpCloneBndr env bndr
1989 | isTyVar bndr
1990 = return (cpSubstTyVarBndr env bndr)
1991
1992 | isCoVar bndr
1993 = return (cpSubstCoVarBndr env bndr)
1994
1995 | otherwise
1996 = do { bndr' <- clone_it bndr
1997
1998 -- Drop (now-useless) rules/unfoldings
1999 -- See Note [Drop unfoldings and rules]
2000 -- and Note [Preserve evaluatedness] in GHC.Core.Tidy
2001 ; let unfolding' = zapUnfolding (realIdUnfolding bndr)
2002 -- Simplifier will set the Id's unfolding
2003
2004 bndr'' = bndr' `setIdUnfolding` unfolding'
2005 `setIdSpecialisation` emptyRuleInfo
2006
2007 ; return (extendCorePrepEnv env bndr bndr'', bndr'') }
2008 where
2009 clone_it bndr
2010 | isLocalId bndr
2011 = do { uniq <- getUniqueM
2012 ; let ty' = cpSubstTy env (idType bndr)
2013 ; return (setVarUnique (setIdType bndr ty') uniq) }
2014
2015 | otherwise -- Top level things, which we don't want
2016 -- to clone, have become GlobalIds by now
2017 = return bndr
2018
2019 {- Note [Drop unfoldings and rules]
2020 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2021 We want to drop the unfolding/rules on every Id:
2022
2023 - We are now past interface-file generation, and in the
2024 codegen pipeline, so we really don't need full unfoldings/rules
2025
2026 - The unfolding/rule may be keeping stuff alive that we'd like
2027 to discard. See Note [Dead code in CorePrep]
2028
2029 - Getting rid of unnecessary unfoldings reduces heap usage
2030
2031 - We are changing uniques, so if we didn't discard unfoldings/rules
2032 we'd have to substitute in them
2033
2034 HOWEVER, we want to preserve evaluated-ness;
2035 see Note [Preserve evaluatedness] in GHC.Core.Tidy.
2036 -}
2037
2038 ------------------------------------------------------------------------------
2039 -- Cloning ccall Ids; each must have a unique name,
2040 -- to give the code generator a handle to hang it on
2041 -- ---------------------------------------------------------------------------
2042
2043 fiddleCCall :: Id -> UniqSM Id
2044 fiddleCCall id
2045 | isFCallId id = (id `setVarUnique`) <$> getUniqueM
2046 | otherwise = return id
2047
2048 ------------------------------------------------------------------------------
2049 -- Generating new binders
2050 -- ---------------------------------------------------------------------------
2051
2052 newVar :: Type -> UniqSM Id
2053 newVar ty
2054 = seqType ty `seq` do
2055 uniq <- getUniqueM
2056 return (mkSysLocalOrCoVar (fsLit "sat") uniq Many ty)
2057
2058
2059 ------------------------------------------------------------------------------
2060 -- Floating ticks
2061 -- ---------------------------------------------------------------------------
2062 --
2063 -- Note [Floating Ticks in CorePrep]
2064 --
2065 -- It might seem counter-intuitive to float ticks by default, given
2066 -- that we don't actually want to move them if we can help it. On the
2067 -- other hand, nothing gets very far in CorePrep anyway, and we want
2068 -- to preserve the order of let bindings and tick annotations in
2069 -- relation to each other. For example, if we just wrapped let floats
2070 -- when they pass through ticks, we might end up performing the
2071 -- following transformation:
2072 --
2073 -- src<...> let foo = bar in baz
2074 -- ==> let foo = src<...> bar in src<...> baz
2075 --
2076 -- Because the let-binding would float through the tick, and then
2077 -- immediately materialize, achieving nothing but decreasing tick
2078 -- accuracy. The only special case is the following scenario:
2079 --
2080 -- let foo = src<...> (let a = b in bar) in baz
2081 -- ==> let foo = src<...> bar; a = src<...> b in baz
2082 --
2083 -- Here we would not want the source tick to end up covering "baz" and
2084 -- therefore refrain from pushing ticks outside. Instead, we copy them
2085 -- into the floating binds (here "a") in cpePair. Note that where "b"
2086 -- or "bar" are (value) lambdas we have to push the annotations
2087 -- further inside in order to uphold our rules.
2088 --
2089 -- All of this is implemented below in @wrapTicks@.
2090
2091 -- | Like wrapFloats, but only wraps tick floats
2092 wrapTicks :: Floats -> CoreExpr -> (Floats, CoreExpr)
2093 wrapTicks (Floats flag floats0) expr =
2094 (Floats flag (toOL $ reverse floats1), foldr mkTick expr (reverse ticks1))
2095 where (floats1, ticks1) = foldlOL go ([], []) $ floats0
2096 -- Deeply nested constructors will produce long lists of
2097 -- redundant source note floats here. We need to eliminate
2098 -- those early, as relying on mkTick to spot it after the fact
2099 -- can yield O(n^3) complexity [#11095]
2100 go (floats, ticks) (FloatTick t)
2101 = assert (tickishPlace t == PlaceNonLam)
2102 (floats, if any (flip tickishContains t) ticks
2103 then ticks else t:ticks)
2104 go (floats, ticks) f
2105 = (foldr wrap f (reverse ticks):floats, ticks)
2106
2107 wrap t (FloatLet bind) = FloatLet (wrapBind t bind)
2108 wrap t (FloatCase r b con bs ok) = FloatCase (mkTick t r) b con bs ok
2109 wrap _ other = pprPanic "wrapTicks: unexpected float!"
2110 (ppr other)
2111 wrapBind t (NonRec binder rhs) = NonRec binder (mkTick t rhs)
2112 wrapBind t (Rec pairs) = Rec (mapSnd (mkTick t) pairs)
2113
2114
2115
2116 ------------------------------------------------------------------------------
2117 -- Numeric literals
2118 -- ---------------------------------------------------------------------------
2119
2120 -- | Create a function that converts Bignum literals into their final CoreExpr
2121 mkConvertNumLiteral
2122 :: HscEnv
2123 -> IO (LitNumType -> Integer -> Maybe CoreExpr)
2124 mkConvertNumLiteral hsc_env = do
2125 let
2126 dflags = hsc_dflags hsc_env
2127 platform = targetPlatform dflags
2128 home_unit = hsc_home_unit hsc_env
2129 guardBignum act
2130 | isHomeUnitInstanceOf home_unit primUnitId
2131 = return $ panic "Bignum literals are not supported in ghc-prim"
2132 | isHomeUnitInstanceOf home_unit bignumUnitId
2133 = return $ panic "Bignum literals are not supported in ghc-bignum"
2134 | otherwise = act
2135
2136 lookupBignumId n = guardBignum (tyThingId <$> lookupGlobal hsc_env n)
2137
2138 -- The lookup is done here but the failure (panic) is reported lazily when we
2139 -- try to access the `bigNatFromWordList` function.
2140 --
2141 -- If we ever get built-in ByteArray# literals, we could avoid the lookup by
2142 -- directly using the Integer/Natural wired-in constructors for big numbers.
2143
2144 bignatFromWordListId <- lookupBignumId bignatFromWordListName
2145
2146 let
2147 convertNumLit nt i = case nt of
2148 LitNumBigNat -> Just (convertBignatPrim i)
2149 _ -> Nothing
2150
2151 convertBignatPrim i =
2152 let
2153 target = targetPlatform dflags
2154
2155 -- ByteArray# literals aren't supported (yet). Were they supported,
2156 -- we would use them directly. We would need to handle
2157 -- wordSize/endianness conversion between host and target
2158 -- wordSize = platformWordSize platform
2159 -- byteOrder = platformByteOrder platform
2160
2161 -- For now we build a list of Words and we produce
2162 -- `bigNatFromWordList# list_of_words`
2163
2164 words = mkListExpr wordTy (reverse (unfoldr f i))
2165 where
2166 f 0 = Nothing
2167 f x = let low = x .&. mask
2168 high = x `shiftR` bits
2169 in Just (mkConApp wordDataCon [Lit (mkLitWord platform low)], high)
2170 bits = platformWordSizeInBits target
2171 mask = 2 ^ bits - 1
2172
2173 in mkApps (Var bignatFromWordListId) [words]
2174
2175
2176 return convertNumLit
2177