never executed always true always false
1 {-
2 (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
3
4 \section[SimplCore]{Driver for simplifying @Core@ programs}
5 -}
6
7 {-# LANGUAGE CPP #-}
8
9 module GHC.Core.Opt.Pipeline ( core2core, simplifyExpr ) where
10
11 import GHC.Prelude
12
13 import GHC.Driver.Session
14 import GHC.Driver.Plugins ( withPlugins, installCoreToDos )
15 import GHC.Driver.Env
16 import GHC.Platform.Ways ( hasWay, Way(WayProf) )
17
18 import GHC.Core
19 import GHC.Core.Opt.CSE ( cseProgram )
20 import GHC.Core.Rules ( mkRuleBase, unionRuleBase,
21 extendRuleBaseList, ruleCheckProgram, addRuleInfo,
22 getRules, initRuleOpts )
23 import GHC.Core.Ppr ( pprCoreBindings, pprCoreExpr )
24 import GHC.Core.Opt.OccurAnal ( occurAnalysePgm, occurAnalyseExpr )
25 import GHC.Core.Stats ( coreBindsSize, coreBindsStats, exprSize )
26 import GHC.Core.Utils ( mkTicks, stripTicksTop, dumpIdInfoOfProgram )
27 import GHC.Core.Lint ( endPass, lintPassResult, dumpPassResult,
28 lintAnnots )
29 import GHC.Core.Opt.Simplify ( simplTopBinds, simplExpr, simplRules )
30 import GHC.Core.Opt.Simplify.Utils ( simplEnvForGHCi, activeRule, activeUnfolding )
31 import GHC.Core.Opt.Simplify.Env
32 import GHC.Core.Opt.Simplify.Monad
33 import GHC.Core.Opt.Monad
34 import GHC.Core.Opt.FloatIn ( floatInwards )
35 import GHC.Core.Opt.FloatOut ( floatOutwards )
36 import GHC.Core.Opt.LiberateCase ( liberateCase )
37 import GHC.Core.Opt.StaticArgs ( doStaticArgs )
38 import GHC.Core.Opt.Specialise ( specProgram)
39 import GHC.Core.Opt.SpecConstr ( specConstrProgram)
40 import GHC.Core.Opt.DmdAnal
41 import GHC.Core.Opt.CprAnal ( cprAnalProgram )
42 import GHC.Core.Opt.CallArity ( callArityAnalProgram )
43 import GHC.Core.Opt.Exitify ( exitifyProgram )
44 import GHC.Core.Opt.WorkWrap ( wwTopBinds )
45 import GHC.Core.Opt.CallerCC ( addCallerCostCentres )
46 import GHC.Core.Seq (seqBinds)
47 import GHC.Core.FamInstEnv
48
49 import GHC.Utils.Error ( withTiming )
50 import GHC.Utils.Logger as Logger
51 import GHC.Utils.Outputable
52 import GHC.Utils.Panic
53 import GHC.Utils.Constants (debugIsOn)
54 import GHC.Utils.Trace
55
56 import GHC.Unit.External
57 import GHC.Unit.Module.Env
58 import GHC.Unit.Module.ModGuts
59 import GHC.Unit.Module.Deps
60
61 import GHC.Runtime.Context
62
63 import GHC.Types.Id
64 import GHC.Types.Id.Info
65 import GHC.Types.Basic
66 import GHC.Types.Demand ( zapDmdEnvSig )
67 import GHC.Types.Var.Set
68 import GHC.Types.Var.Env
69 import GHC.Types.Tickish
70 import GHC.Types.Unique.FM
71 import GHC.Types.Name.Ppr
72
73 import Control.Monad
74 import qualified GHC.LanguageExtensions as LangExt
75 {-
76 ************************************************************************
77 * *
78 \subsection{The driver for the simplifier}
79 * *
80 ************************************************************************
81 -}
82
83 core2core :: HscEnv -> ModGuts -> IO ModGuts
84 core2core hsc_env guts@(ModGuts { mg_module = mod
85 , mg_loc = loc
86 , mg_deps = deps
87 , mg_rdr_env = rdr_env })
88 = do { let builtin_passes = getCoreToDo logger dflags
89 orph_mods = mkModuleSet (mod : dep_orphs deps)
90 uniq_mask = 's'
91 ;
92 ; (guts2, stats) <- runCoreM hsc_env hpt_rule_base uniq_mask mod
93 orph_mods print_unqual loc $
94 do { hsc_env' <- getHscEnv
95 ; all_passes <- withPlugins hsc_env'
96 installCoreToDos
97 builtin_passes
98 ; runCorePasses all_passes guts }
99
100 ; Logger.putDumpFileMaybe logger Opt_D_dump_simpl_stats
101 "Grand total simplifier statistics"
102 FormatText
103 (pprSimplCount stats)
104
105 ; return guts2 }
106 where
107 logger = hsc_logger hsc_env
108 dflags = hsc_dflags hsc_env
109 home_pkg_rules = hptRules hsc_env (dep_direct_mods deps)
110 hpt_rule_base = mkRuleBase home_pkg_rules
111 print_unqual = mkPrintUnqualified (hsc_unit_env hsc_env) rdr_env
112 -- mod: get the module out of the current HscEnv so we can retrieve it from the monad.
113 -- This is very convienent for the users of the monad (e.g. plugins do not have to
114 -- consume the ModGuts to find the module) but somewhat ugly because mg_module may
115 -- _theoretically_ be changed during the Core pipeline (it's part of ModGuts), which
116 -- would mean our cached value would go out of date.
117
118 {-
119 ************************************************************************
120 * *
121 Generating the main optimisation pipeline
122 * *
123 ************************************************************************
124 -}
125
126 getCoreToDo :: Logger -> DynFlags -> [CoreToDo]
127 getCoreToDo logger dflags
128 = flatten_todos core_todo
129 where
130 opt_level = optLevel dflags
131 phases = simplPhases dflags
132 max_iter = maxSimplIterations dflags
133 rule_check = ruleCheck dflags
134 call_arity = gopt Opt_CallArity dflags
135 exitification = gopt Opt_Exitification dflags
136 strictness = gopt Opt_Strictness dflags
137 full_laziness = gopt Opt_FullLaziness dflags
138 do_specialise = gopt Opt_Specialise dflags
139 do_float_in = gopt Opt_FloatIn dflags
140 cse = gopt Opt_CSE dflags
141 spec_constr = gopt Opt_SpecConstr dflags
142 liberate_case = gopt Opt_LiberateCase dflags
143 late_dmd_anal = gopt Opt_LateDmdAnal dflags
144 late_specialise = gopt Opt_LateSpecialise dflags
145 static_args = gopt Opt_StaticArgumentTransformation dflags
146 rules_on = gopt Opt_EnableRewriteRules dflags
147 eta_expand_on = gopt Opt_DoLambdaEtaExpansion dflags
148 pre_inline_on = gopt Opt_SimplPreInlining dflags
149 ww_on = gopt Opt_WorkerWrapper dflags
150 static_ptrs = xopt LangExt.StaticPointers dflags
151 profiling = ways dflags `hasWay` WayProf
152
153 maybe_rule_check phase = runMaybe rule_check (CoreDoRuleCheck phase)
154
155 maybe_strictness_before (Phase phase)
156 | phase `elem` strictnessBefore dflags = CoreDoDemand
157 maybe_strictness_before _
158 = CoreDoNothing
159
160 base_mode = SimplMode { sm_phase = panic "base_mode"
161 , sm_names = []
162 , sm_dflags = dflags
163 , sm_logger = logger
164 , sm_uf_opts = unfoldingOpts dflags
165 , sm_rules = rules_on
166 , sm_eta_expand = eta_expand_on
167 , sm_inline = True
168 , sm_case_case = True
169 , sm_pre_inline = pre_inline_on
170 }
171
172 simpl_phase phase name iter
173 = CoreDoPasses
174 $ [ maybe_strictness_before phase
175 , CoreDoSimplify iter
176 (base_mode { sm_phase = phase
177 , sm_names = [name] })
178
179 , maybe_rule_check phase ]
180
181 -- Run GHC's internal simplification phase, after all rules have run.
182 -- See Note [Compiler phases] in GHC.Types.Basic
183 simplify name = simpl_phase FinalPhase name max_iter
184
185 -- initial simplify: mk specialiser happy: minimum effort please
186 simpl_gently = CoreDoSimplify max_iter
187 (base_mode { sm_phase = InitialPhase
188 , sm_names = ["Gentle"]
189 , sm_rules = rules_on -- Note [RULEs enabled in InitialPhase]
190 , sm_inline = True
191 -- See Note [Inline in InitialPhase]
192 , sm_case_case = False })
193 -- Don't do case-of-case transformations.
194 -- This makes full laziness work better
195
196 dmd_cpr_ww = if ww_on then [CoreDoDemand,CoreDoCpr,CoreDoWorkerWrapper]
197 else [CoreDoDemand,CoreDoCpr]
198
199
200 demand_analyser = (CoreDoPasses (
201 dmd_cpr_ww ++
202 [simplify "post-worker-wrapper"]
203 ))
204
205 -- Static forms are moved to the top level with the FloatOut pass.
206 -- See Note [Grand plan for static forms] in GHC.Iface.Tidy.StaticPtrTable.
207 static_ptrs_float_outwards =
208 runWhen static_ptrs $ CoreDoPasses
209 [ simpl_gently -- Float Out can't handle type lets (sometimes created
210 -- by simpleOptPgm via mkParallelBindings)
211 , CoreDoFloatOutwards FloatOutSwitches
212 { floatOutLambdas = Just 0
213 , floatOutConstants = True
214 , floatOutOverSatApps = False
215 , floatToTopLevelOnly = True
216 }
217 ]
218
219 add_caller_ccs =
220 runWhen (profiling && not (null $ callerCcFilters dflags)) CoreAddCallerCcs
221
222 core_todo =
223 if opt_level == 0 then
224 [ static_ptrs_float_outwards,
225 CoreDoSimplify max_iter
226 (base_mode { sm_phase = FinalPhase
227 , sm_names = ["Non-opt simplification"] })
228 , add_caller_ccs
229 ]
230
231 else {- opt_level >= 1 -} [
232
233 -- We want to do the static argument transform before full laziness as it
234 -- may expose extra opportunities to float things outwards. However, to fix
235 -- up the output of the transformation we need at do at least one simplify
236 -- after this before anything else
237 runWhen static_args (CoreDoPasses [ simpl_gently, CoreDoStaticArgs ]),
238
239 -- initial simplify: mk specialiser happy: minimum effort please
240 simpl_gently,
241
242 -- Specialisation is best done before full laziness
243 -- so that overloaded functions have all their dictionary lambdas manifest
244 runWhen do_specialise CoreDoSpecialising,
245
246 if full_laziness then
247 CoreDoFloatOutwards FloatOutSwitches {
248 floatOutLambdas = Just 0,
249 floatOutConstants = True,
250 floatOutOverSatApps = False,
251 floatToTopLevelOnly = False }
252 -- Was: gentleFloatOutSwitches
253 --
254 -- I have no idea why, but not floating constants to
255 -- top level is very bad in some cases.
256 --
257 -- Notably: p_ident in spectral/rewrite
258 -- Changing from "gentle" to "constantsOnly"
259 -- improved rewrite's allocation by 19%, and
260 -- made 0.0% difference to any other nofib
261 -- benchmark
262 --
263 -- Not doing floatOutOverSatApps yet, we'll do
264 -- that later on when we've had a chance to get more
265 -- accurate arity information. In fact it makes no
266 -- difference at all to performance if we do it here,
267 -- but maybe we save some unnecessary to-and-fro in
268 -- the simplifier.
269 else
270 -- Even with full laziness turned off, we still need to float static
271 -- forms to the top level. See Note [Grand plan for static forms] in
272 -- GHC.Iface.Tidy.StaticPtrTable.
273 static_ptrs_float_outwards,
274
275 -- Run the simplier phases 2,1,0 to allow rewrite rules to fire
276 CoreDoPasses [ simpl_phase (Phase phase) "main" max_iter
277 | phase <- [phases, phases-1 .. 1] ],
278 simpl_phase (Phase 0) "main" (max max_iter 3),
279 -- Phase 0: allow all Ids to be inlined now
280 -- This gets foldr inlined before strictness analysis
281
282 -- At least 3 iterations because otherwise we land up with
283 -- huge dead expressions because of an infelicity in the
284 -- simplifier.
285 -- let k = BIG in foldr k z xs
286 -- ==> let k = BIG in letrec go = \xs -> ...(k x).... in go xs
287 -- ==> let k = BIG in letrec go = \xs -> ...(BIG x).... in go xs
288 -- Don't stop now!
289
290 runWhen do_float_in CoreDoFloatInwards,
291 -- Run float-inwards immediately before the strictness analyser
292 -- Doing so pushes bindings nearer their use site and hence makes
293 -- them more likely to be strict. These bindings might only show
294 -- up after the inlining from simplification. Example in fulsom,
295 -- Csg.calc, where an arg of timesDouble thereby becomes strict.
296
297 runWhen call_arity $ CoreDoPasses
298 [ CoreDoCallArity
299 , simplify "post-call-arity"
300 ],
301
302 -- Strictness analysis
303 runWhen strictness demand_analyser,
304
305 runWhen exitification CoreDoExitify,
306 -- See note [Placement of the exitification pass]
307
308 runWhen full_laziness $
309 CoreDoFloatOutwards FloatOutSwitches {
310 floatOutLambdas = floatLamArgs dflags,
311 floatOutConstants = True,
312 floatOutOverSatApps = True,
313 floatToTopLevelOnly = False },
314 -- nofib/spectral/hartel/wang doubles in speed if you
315 -- do full laziness late in the day. It only happens
316 -- after fusion and other stuff, so the early pass doesn't
317 -- catch it. For the record, the redex is
318 -- f_el22 (f_el21 r_midblock)
319
320
321 runWhen cse CoreCSE,
322 -- We want CSE to follow the final full-laziness pass, because it may
323 -- succeed in commoning up things floated out by full laziness.
324 -- CSE used to rely on the no-shadowing invariant, but it doesn't any more
325
326 runWhen do_float_in CoreDoFloatInwards,
327
328 simplify "final", -- Final tidy-up
329
330 maybe_rule_check FinalPhase,
331
332 -------- After this we have -O2 passes -----------------
333 -- None of them run with -O
334
335 -- Case-liberation for -O2. This should be after
336 -- strictness analysis and the simplification which follows it.
337 runWhen liberate_case $ CoreDoPasses
338 [ CoreLiberateCase, simplify "post-liberate-case" ],
339 -- Run the simplifier after LiberateCase to vastly
340 -- reduce the possibility of shadowing
341 -- Reason: see Note [Shadowing] in GHC.Core.Opt.SpecConstr
342
343 runWhen spec_constr $ CoreDoPasses
344 [ CoreDoSpecConstr, simplify "post-spec-constr"],
345 -- See Note [Simplify after SpecConstr]
346
347 maybe_rule_check FinalPhase,
348
349 runWhen late_specialise $ CoreDoPasses
350 [ CoreDoSpecialising, simplify "post-late-spec"],
351
352 -- LiberateCase can yield new CSE opportunities because it peels
353 -- off one layer of a recursive function (concretely, I saw this
354 -- in wheel-sieve1), and I'm guessing that SpecConstr can too
355 -- And CSE is a very cheap pass. So it seems worth doing here.
356 runWhen ((liberate_case || spec_constr) && cse) $ CoreDoPasses
357 [ CoreCSE, simplify "post-final-cse" ],
358
359 --------- End of -O2 passes --------------
360
361 runWhen late_dmd_anal $ CoreDoPasses (
362 dmd_cpr_ww ++ [simplify "post-late-ww"]
363 ),
364
365 -- Final run of the demand_analyser, ensures that one-shot thunks are
366 -- really really one-shot thunks. Only needed if the demand analyser
367 -- has run at all. See Note [Final Demand Analyser run] in GHC.Core.Opt.DmdAnal
368 -- It is EXTREMELY IMPORTANT to run this pass, otherwise execution
369 -- can become /exponentially/ more expensive. See #11731, #12996.
370 runWhen (strictness || late_dmd_anal) CoreDoDemand,
371
372 maybe_rule_check FinalPhase,
373
374 add_caller_ccs
375 ]
376
377 -- Remove 'CoreDoNothing' and flatten 'CoreDoPasses' for clarity.
378 flatten_todos [] = []
379 flatten_todos (CoreDoNothing : rest) = flatten_todos rest
380 flatten_todos (CoreDoPasses passes : rest) =
381 flatten_todos passes ++ flatten_todos rest
382 flatten_todos (todo : rest) = todo : flatten_todos rest
383
384 {- Note [Inline in InitialPhase]
385 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
386 In GHC 8 and earlier we did not inline anything in the InitialPhase. But that is
387 confusing for users because when they say INLINE they expect the function to inline
388 right away.
389
390 So now we do inlining immediately, even in the InitialPhase, assuming that the
391 Id's Activation allows it.
392
393 This is a surprisingly big deal. Compiler performance improved a lot
394 when I made this change:
395
396 perf/compiler/T5837.run T5837 [stat too good] (normal)
397 perf/compiler/parsing001.run parsing001 [stat too good] (normal)
398 perf/compiler/T12234.run T12234 [stat too good] (optasm)
399 perf/compiler/T9020.run T9020 [stat too good] (optasm)
400 perf/compiler/T3064.run T3064 [stat too good] (normal)
401 perf/compiler/T9961.run T9961 [stat too good] (normal)
402 perf/compiler/T13056.run T13056 [stat too good] (optasm)
403 perf/compiler/T9872d.run T9872d [stat too good] (normal)
404 perf/compiler/T783.run T783 [stat too good] (normal)
405 perf/compiler/T12227.run T12227 [stat too good] (normal)
406 perf/should_run/lazy-bs-alloc.run lazy-bs-alloc [stat too good] (normal)
407 perf/compiler/T1969.run T1969 [stat too good] (normal)
408 perf/compiler/T9872a.run T9872a [stat too good] (normal)
409 perf/compiler/T9872c.run T9872c [stat too good] (normal)
410 perf/compiler/T9872b.run T9872b [stat too good] (normal)
411 perf/compiler/T9872d.run T9872d [stat too good] (normal)
412
413 Note [RULEs enabled in InitialPhase]
414 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
415 RULES are enabled when doing "gentle" simplification in InitialPhase,
416 or with -O0. Two reasons:
417
418 * We really want the class-op cancellation to happen:
419 op (df d1 d2) --> $cop3 d1 d2
420 because this breaks the mutual recursion between 'op' and 'df'
421
422 * I wanted the RULE
423 lift String ===> ...
424 to work in Template Haskell when simplifying
425 splices, so we get simpler code for literal strings
426
427 But watch out: list fusion can prevent floating. So use phase control
428 to switch off those rules until after floating.
429
430 Note [Simplify after SpecConstr]
431 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
432 We want to run the simplifier after SpecConstr, and before late-Specialise,
433 for two reasons, both shown up in test perf/compiler/T16473,
434 with -O2 -flate-specialise
435
436 1. I found that running late-Specialise after SpecConstr, with no
437 simplification in between meant that the carefullly constructed
438 SpecConstr rule never got to fire. (It was something like
439 lvl = f a -- Arity 1
440 ....g lvl....
441 SpecConstr specialised g for argument lvl; but Specialise then
442 specialised lvl = f a to lvl = $sf, and inlined. Or something like
443 that.)
444
445 2. Specialise relies on unfoldings being available for top-level dictionary
446 bindings; but SpecConstr kills them all! The Simplifer restores them.
447
448 This extra run of the simplifier has a cost, but this is only with -O2.
449
450
451 ************************************************************************
452 * *
453 The CoreToDo interpreter
454 * *
455 ************************************************************************
456 -}
457
458 runCorePasses :: [CoreToDo] -> ModGuts -> CoreM ModGuts
459 runCorePasses passes guts
460 = foldM do_pass guts passes
461 where
462 do_pass guts CoreDoNothing = return guts
463 do_pass guts (CoreDoPasses ps) = runCorePasses ps guts
464 do_pass guts pass = do
465 logger <- getLogger
466 withTiming logger (ppr pass <+> brackets (ppr mod))
467 (const ()) $ do
468 guts' <- lintAnnots (ppr pass) (doCorePass pass) guts
469 endPass pass (mg_binds guts') (mg_rules guts')
470 return guts'
471
472 mod = mg_module guts
473
474 doCorePass :: CoreToDo -> ModGuts -> CoreM ModGuts
475 doCorePass pass guts = do
476 logger <- getLogger
477 dflags <- getDynFlags
478 us <- getUniqueSupplyM
479 p_fam_env <- getPackageFamInstEnv
480 let platform = targetPlatform dflags
481 let fam_envs = (p_fam_env, mg_fam_inst_env guts)
482 let updateBinds f = return $ guts { mg_binds = f (mg_binds guts) }
483 let updateBindsM f = f (mg_binds guts) >>= \b' -> return $ guts { mg_binds = b' }
484
485 case pass of
486 CoreDoSimplify {} -> {-# SCC "Simplify" #-}
487 simplifyPgm pass guts
488
489 CoreCSE -> {-# SCC "CommonSubExpr" #-}
490 updateBinds cseProgram
491
492 CoreLiberateCase -> {-# SCC "LiberateCase" #-}
493 updateBinds (liberateCase dflags)
494
495 CoreDoFloatInwards -> {-# SCC "FloatInwards" #-}
496 updateBinds (floatInwards platform)
497
498 CoreDoFloatOutwards f -> {-# SCC "FloatOutwards" #-}
499 updateBindsM (liftIO . floatOutwards logger f us)
500
501 CoreDoStaticArgs -> {-# SCC "StaticArgs" #-}
502 updateBinds (doStaticArgs us)
503
504 CoreDoCallArity -> {-# SCC "CallArity" #-}
505 updateBinds callArityAnalProgram
506
507 CoreDoExitify -> {-# SCC "Exitify" #-}
508 updateBinds exitifyProgram
509
510 CoreDoDemand -> {-# SCC "DmdAnal" #-}
511 updateBindsM (liftIO . dmdAnal logger dflags fam_envs (mg_rules guts))
512
513 CoreDoCpr -> {-# SCC "CprAnal" #-}
514 updateBindsM (liftIO . cprAnalProgram logger fam_envs)
515
516 CoreDoWorkerWrapper -> {-# SCC "WorkWrap" #-}
517 updateBinds (wwTopBinds (mg_module guts) dflags fam_envs us)
518
519 CoreDoSpecialising -> {-# SCC "Specialise" #-}
520 specProgram guts
521
522 CoreDoSpecConstr -> {-# SCC "SpecConstr" #-}
523 specConstrProgram guts
524
525 CoreAddCallerCcs -> {-# SCC "AddCallerCcs" #-}
526 addCallerCostCentres guts
527
528 CoreDoPrintCore -> {-# SCC "PrintCore" #-}
529 liftIO $ printCore logger (mg_binds guts) >> return guts
530
531 CoreDoRuleCheck phase pat -> {-# SCC "RuleCheck" #-}
532 ruleCheckPass phase pat guts
533 CoreDoNothing -> return guts
534 CoreDoPasses passes -> runCorePasses passes guts
535
536 CoreDoPluginPass _ p -> {-# SCC "Plugin" #-} p guts
537
538 CoreDesugar -> pprPanic "doCorePass" (ppr pass)
539 CoreDesugarOpt -> pprPanic "doCorePass" (ppr pass)
540 CoreTidy -> pprPanic "doCorePass" (ppr pass)
541 CorePrep -> pprPanic "doCorePass" (ppr pass)
542 CoreOccurAnal -> pprPanic "doCorePass" (ppr pass)
543
544 {-
545 ************************************************************************
546 * *
547 \subsection{Core pass combinators}
548 * *
549 ************************************************************************
550 -}
551
552 printCore :: Logger -> CoreProgram -> IO ()
553 printCore logger binds
554 = Logger.logDumpMsg logger "Print Core" (pprCoreBindings binds)
555
556 ruleCheckPass :: CompilerPhase -> String -> ModGuts -> CoreM ModGuts
557 ruleCheckPass current_phase pat guts = do
558 dflags <- getDynFlags
559 logger <- getLogger
560 withTiming logger (text "RuleCheck"<+>brackets (ppr $ mg_module guts))
561 (const ()) $ do
562 rb <- getRuleBase
563 vis_orphs <- getVisibleOrphanMods
564 let rule_fn fn = getRules (RuleEnv rb vis_orphs) fn
565 ++ (mg_rules guts)
566 let ropts = initRuleOpts dflags
567 liftIO $ logDumpMsg logger "Rule check"
568 (ruleCheckProgram ropts current_phase pat
569 rule_fn (mg_binds guts))
570 return guts
571
572 {-
573 ************************************************************************
574 * *
575 Gentle simplification
576 * *
577 ************************************************************************
578 -}
579
580 simplifyExpr :: HscEnv -- includes spec of what core-to-core passes to do
581 -> CoreExpr
582 -> IO CoreExpr
583 -- simplifyExpr is called by the driver to simplify an
584 -- expression typed in at the interactive prompt
585 simplifyExpr hsc_env expr
586 = withTiming logger (text "Simplify [expr]") (const ()) $
587 do { eps <- hscEPS hsc_env ;
588 ; let rule_env = mkRuleEnv (eps_rule_base eps) []
589 fi_env = ( eps_fam_inst_env eps
590 , extendFamInstEnvList emptyFamInstEnv $
591 snd $ ic_instances $ hsc_IC hsc_env )
592 simpl_env = simplEnvForGHCi logger dflags
593
594 ; let sz = exprSize expr
595
596 ; (expr', counts) <- initSmpl logger dflags rule_env fi_env sz $
597 simplExprGently simpl_env expr
598
599 ; Logger.putDumpFileMaybe logger Opt_D_dump_simpl_stats
600 "Simplifier statistics" FormatText (pprSimplCount counts)
601
602 ; Logger.putDumpFileMaybe logger Opt_D_dump_simpl "Simplified expression"
603 FormatCore
604 (pprCoreExpr expr')
605
606 ; return expr'
607 }
608 where
609 dflags = hsc_dflags hsc_env
610 logger = hsc_logger hsc_env
611
612 simplExprGently :: SimplEnv -> CoreExpr -> SimplM CoreExpr
613 -- Simplifies an expression
614 -- does occurrence analysis, then simplification
615 -- and repeats (twice currently) because one pass
616 -- alone leaves tons of crud.
617 -- Used (a) for user expressions typed in at the interactive prompt
618 -- (b) the LHS and RHS of a RULE
619 -- (c) Template Haskell splices
620 --
621 -- The name 'Gently' suggests that the SimplMode is InitialPhase,
622 -- and in fact that is so.... but the 'Gently' in simplExprGently doesn't
623 -- enforce that; it just simplifies the expression twice
624
625 -- It's important that simplExprGently does eta reduction; see
626 -- Note [Simplifying the left-hand side of a RULE] above. The
627 -- simplifier does indeed do eta reduction (it's in GHC.Core.Opt.Simplify.completeLam)
628 -- but only if -O is on.
629
630 simplExprGently env expr = do
631 expr1 <- simplExpr env (occurAnalyseExpr expr)
632 simplExpr env (occurAnalyseExpr expr1)
633
634 {-
635 ************************************************************************
636 * *
637 \subsection{The driver for the simplifier}
638 * *
639 ************************************************************************
640 -}
641
642 simplifyPgm :: CoreToDo -> ModGuts -> CoreM ModGuts
643 simplifyPgm pass guts
644 = do { hsc_env <- getHscEnv
645 ; rb <- getRuleBase
646 ; liftIOWithCount $
647 simplifyPgmIO pass hsc_env rb guts }
648
649 simplifyPgmIO :: CoreToDo
650 -> HscEnv
651 -> RuleBase
652 -> ModGuts
653 -> IO (SimplCount, ModGuts) -- New bindings
654
655 simplifyPgmIO pass@(CoreDoSimplify max_iterations mode)
656 hsc_env hpt_rule_base
657 guts@(ModGuts { mg_module = this_mod
658 , mg_rdr_env = rdr_env
659 , mg_deps = deps
660 , mg_binds = binds, mg_rules = rules
661 , mg_fam_inst_env = fam_inst_env })
662 = do { (termination_msg, it_count, counts_out, guts')
663 <- do_iteration 1 [] binds rules
664
665 ; when (logHasDumpFlag logger Opt_D_verbose_core2core
666 && logHasDumpFlag logger Opt_D_dump_simpl_stats) $
667 logDumpMsg logger
668 "Simplifier statistics for following pass"
669 (vcat [text termination_msg <+> text "after" <+> ppr it_count
670 <+> text "iterations",
671 blankLine,
672 pprSimplCount counts_out])
673
674 ; return (counts_out, guts')
675 }
676 where
677 dflags = hsc_dflags hsc_env
678 logger = hsc_logger hsc_env
679 print_unqual = mkPrintUnqualified (hsc_unit_env hsc_env) rdr_env
680 simpl_env = mkSimplEnv mode
681 active_rule = activeRule mode
682 active_unf = activeUnfolding mode
683
684 do_iteration :: Int --UniqSupply
685 -- -> Int -- Counts iterations
686 -> [SimplCount] -- Counts from earlier iterations, reversed
687 -> CoreProgram -- Bindings in
688 -> [CoreRule] -- and orphan rules
689 -> IO (String, Int, SimplCount, ModGuts)
690
691 do_iteration iteration_no counts_so_far binds rules
692 -- iteration_no is the number of the iteration we are
693 -- about to begin, with '1' for the first
694 | iteration_no > max_iterations -- Stop if we've run out of iterations
695 = warnPprTrace (debugIsOn && (max_iterations > 2))
696 ( hang (ppr this_mod <> colon <+> text "simplifier bailing out after"
697 <+> int max_iterations <+> text "iterations"
698 <+> (brackets $ hsep $ punctuate comma $
699 map (int . simplCountN) (reverse counts_so_far)))
700 2 (text "Size =" <+> ppr (coreBindsStats binds))) $
701
702 -- Subtract 1 from iteration_no to get the
703 -- number of iterations we actually completed
704 return ( "Simplifier baled out", iteration_no - 1
705 , totalise counts_so_far
706 , guts { mg_binds = binds, mg_rules = rules } )
707
708 -- Try and force thunks off the binds; significantly reduces
709 -- space usage, especially with -O. JRS, 000620.
710 | let sz = coreBindsSize binds
711 , () <- sz `seq` () -- Force it
712 = do {
713 -- Occurrence analysis
714 let { tagged_binds = {-# SCC "OccAnal" #-}
715 occurAnalysePgm this_mod active_unf active_rule rules
716 binds
717 } ;
718 Logger.putDumpFileMaybe logger Opt_D_dump_occur_anal "Occurrence analysis"
719 FormatCore
720 (pprCoreBindings tagged_binds);
721
722 -- Get any new rules, and extend the rule base
723 -- See Note [Overall plumbing for rules] in GHC.Core.Rules
724 -- We need to do this regularly, because simplification can
725 -- poke on IdInfo thunks, which in turn brings in new rules
726 -- behind the scenes. Otherwise there's a danger we'll simply
727 -- miss the rules for Ids hidden inside imported inlinings
728 eps <- hscEPS hsc_env ;
729 let { rule_base1 = unionRuleBase hpt_rule_base (eps_rule_base eps)
730 ; rule_base2 = extendRuleBaseList rule_base1 rules
731 ; fam_envs = (eps_fam_inst_env eps, fam_inst_env)
732 ; vis_orphs = this_mod : dep_orphs deps } ;
733
734 -- Simplify the program
735 ((binds1, rules1), counts1) <-
736 initSmpl logger dflags (mkRuleEnv rule_base2 vis_orphs) fam_envs sz $
737 do { (floats, env1) <- {-# SCC "SimplTopBinds" #-}
738 simplTopBinds simpl_env tagged_binds
739
740 -- Apply the substitution to rules defined in this module
741 -- for imported Ids. Eg RULE map my_f = blah
742 -- If we have a substitution my_f :-> other_f, we'd better
743 -- apply it to the rule to, or it'll never match
744 ; rules1 <- simplRules env1 Nothing rules Nothing
745
746 ; return (getTopFloatBinds floats, rules1) } ;
747
748 -- Stop if nothing happened; don't dump output
749 -- See Note [Which transformations are innocuous] in GHC.Core.Opt.Monad
750 if isZeroSimplCount counts1 then
751 return ( "Simplifier reached fixed point", iteration_no
752 , totalise (counts1 : counts_so_far) -- Include "free" ticks
753 , guts { mg_binds = binds1, mg_rules = rules1 } )
754 else do {
755 -- Short out indirections
756 -- We do this *after* at least one run of the simplifier
757 -- because indirection-shorting uses the export flag on *occurrences*
758 -- and that isn't guaranteed to be ok until after the first run propagates
759 -- stuff from the binding site to its occurrences
760 --
761 -- ToDo: alas, this means that indirection-shorting does not happen at all
762 -- if the simplifier does nothing (not common, I know, but unsavoury)
763 let { binds2 = {-# SCC "ZapInd" #-} shortOutIndirections binds1 } ;
764
765 -- Dump the result of this iteration
766 let { dump_core_sizes = not (gopt Opt_SuppressCoreSizes dflags) } ;
767 dump_end_iteration logger dump_core_sizes print_unqual iteration_no counts1 binds2 rules1 ;
768 lintPassResult hsc_env pass binds2 ;
769
770 -- Loop
771 do_iteration (iteration_no + 1) (counts1:counts_so_far) binds2 rules1
772 } }
773 #if __GLASGOW_HASKELL__ <= 810
774 | otherwise = panic "do_iteration"
775 #endif
776 where
777 -- Remember the counts_so_far are reversed
778 totalise :: [SimplCount] -> SimplCount
779 totalise = foldr (\c acc -> acc `plusSimplCount` c)
780 (zeroSimplCount dflags)
781
782 simplifyPgmIO _ _ _ _ = panic "simplifyPgmIO"
783
784 -------------------
785 dump_end_iteration :: Logger -> Bool -> PrintUnqualified -> Int
786 -> SimplCount -> CoreProgram -> [CoreRule] -> IO ()
787 dump_end_iteration logger dump_core_sizes print_unqual iteration_no counts binds rules
788 = dumpPassResult logger dump_core_sizes print_unqual mb_flag hdr pp_counts binds rules
789 where
790 mb_flag | logHasDumpFlag logger Opt_D_dump_simpl_iterations = Just Opt_D_dump_simpl_iterations
791 | otherwise = Nothing
792 -- Show details if Opt_D_dump_simpl_iterations is on
793
794 hdr = "Simplifier iteration=" ++ show iteration_no
795 pp_counts = vcat [ text "---- Simplifier counts for" <+> text hdr
796 , pprSimplCount counts
797 , text "---- End of simplifier counts for" <+> text hdr ]
798
799 {-
800 ************************************************************************
801 * *
802 Shorting out indirections
803 * *
804 ************************************************************************
805
806 If we have this:
807
808 x_local = <expression>
809 ...bindings...
810 x_exported = x_local
811
812 where x_exported is exported, and x_local is not, then we replace it with this:
813
814 x_exported = <expression>
815 x_local = x_exported
816 ...bindings...
817
818 Without this we never get rid of the x_exported = x_local thing. This
819 save a gratuitous jump (from \tr{x_exported} to \tr{x_local}), and
820 makes strictness information propagate better. This used to happen in
821 the final phase, but it's tidier to do it here.
822
823 Note [Messing up the exported Id's RULES]
824 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
825 We must be careful about discarding (obviously) or even merging the
826 RULES on the exported Id. The example that went bad on me at one stage
827 was this one:
828
829 iterate :: (a -> a) -> a -> [a]
830 [Exported]
831 iterate = iterateList
832
833 iterateFB c f x = x `c` iterateFB c f (f x)
834 iterateList f x = x : iterateList f (f x)
835 [Not exported]
836
837 {-# RULES
838 "iterate" forall f x. iterate f x = build (\c _n -> iterateFB c f x)
839 "iterateFB" iterateFB (:) = iterateList
840 #-}
841
842 This got shorted out to:
843
844 iterateList :: (a -> a) -> a -> [a]
845 iterateList = iterate
846
847 iterateFB c f x = x `c` iterateFB c f (f x)
848 iterate f x = x : iterate f (f x)
849
850 {-# RULES
851 "iterate" forall f x. iterate f x = build (\c _n -> iterateFB c f x)
852 "iterateFB" iterateFB (:) = iterate
853 #-}
854
855 And now we get an infinite loop in the rule system
856 iterate f x -> build (\cn -> iterateFB c f x)
857 -> iterateFB (:) f x
858 -> iterate f x
859
860 Old "solution":
861 use rule switching-off pragmas to get rid
862 of iterateList in the first place
863
864 But in principle the user *might* want rules that only apply to the Id
865 they say. And inline pragmas are similar
866 {-# NOINLINE f #-}
867 f = local
868 local = <stuff>
869 Then we do not want to get rid of the NOINLINE.
870
871 Hence hasShortableIdinfo.
872
873
874 Note [Rules and indirection-zapping]
875 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
876 Problem: what if x_exported has a RULE that mentions something in ...bindings...?
877 Then the things mentioned can be out of scope! Solution
878 a) Make sure that in this pass the usage-info from x_exported is
879 available for ...bindings...
880 b) If there are any such RULES, rec-ify the entire top-level.
881 It'll get sorted out next time round
882
883 Other remarks
884 ~~~~~~~~~~~~~
885 If more than one exported thing is equal to a local thing (i.e., the
886 local thing really is shared), then we do one only:
887 \begin{verbatim}
888 x_local = ....
889 x_exported1 = x_local
890 x_exported2 = x_local
891 ==>
892 x_exported1 = ....
893
894 x_exported2 = x_exported1
895 \end{verbatim}
896
897 We rely on prior eta reduction to simplify things like
898 \begin{verbatim}
899 x_exported = /\ tyvars -> x_local tyvars
900 ==>
901 x_exported = x_local
902 \end{verbatim}
903 Hence,there's a possibility of leaving unchanged something like this:
904 \begin{verbatim}
905 x_local = ....
906 x_exported1 = x_local Int
907 \end{verbatim}
908 By the time we've thrown away the types in STG land this
909 could be eliminated. But I don't think it's very common
910 and it's dangerous to do this fiddling in STG land
911 because we might eliminate a binding that's mentioned in the
912 unfolding for something.
913
914 Note [Indirection zapping and ticks]
915 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
916 Unfortunately this is another place where we need a special case for
917 ticks. The following happens quite regularly:
918
919 x_local = <expression>
920 x_exported = tick<x> x_local
921
922 Which we want to become:
923
924 x_exported = tick<x> <expression>
925
926 As it makes no sense to keep the tick and the expression on separate
927 bindings. Note however that this might increase the ticks scoping
928 over the execution of x_local, so we can only do this for floatable
929 ticks. More often than not, other references will be unfoldings of
930 x_exported, and therefore carry the tick anyway.
931 -}
932
933 type IndEnv = IdEnv (Id, [CoreTickish]) -- Maps local_id -> exported_id, ticks
934
935 shortOutIndirections :: CoreProgram -> CoreProgram
936 shortOutIndirections binds
937 | isEmptyVarEnv ind_env = binds
938 | no_need_to_flatten = binds' -- See Note [Rules and indirect-zapping]
939 | otherwise = [Rec (flattenBinds binds')] -- for this no_need_to_flatten stuff
940 where
941 ind_env = makeIndEnv binds
942 -- These exported Ids are the subjects of the indirection-elimination
943 exp_ids = map fst $ nonDetEltsUFM ind_env
944 -- It's OK to use nonDetEltsUFM here because we forget the ordering
945 -- by immediately converting to a set or check if all the elements
946 -- satisfy a predicate.
947 exp_id_set = mkVarSet exp_ids
948 no_need_to_flatten = all (null . ruleInfoRules . idSpecialisation) exp_ids
949 binds' = concatMap zap binds
950
951 zap (NonRec bndr rhs) = [NonRec b r | (b,r) <- zapPair (bndr,rhs)]
952 zap (Rec pairs) = [Rec (concatMap zapPair pairs)]
953
954 zapPair (bndr, rhs)
955 | bndr `elemVarSet` exp_id_set
956 = [] -- Kill the exported-id binding
957
958 | Just (exp_id, ticks) <- lookupVarEnv ind_env bndr
959 , (exp_id', lcl_id') <- transferIdInfo exp_id bndr
960 = -- Turn a local-id binding into two bindings
961 -- exp_id = rhs; lcl_id = exp_id
962 [ (exp_id', mkTicks ticks rhs),
963 (lcl_id', Var exp_id') ]
964
965 | otherwise
966 = [(bndr,rhs)]
967
968 makeIndEnv :: [CoreBind] -> IndEnv
969 makeIndEnv binds
970 = foldl' add_bind emptyVarEnv binds
971 where
972 add_bind :: IndEnv -> CoreBind -> IndEnv
973 add_bind env (NonRec exported_id rhs) = add_pair env (exported_id, rhs)
974 add_bind env (Rec pairs) = foldl' add_pair env pairs
975
976 add_pair :: IndEnv -> (Id,CoreExpr) -> IndEnv
977 add_pair env (exported_id, exported)
978 | (ticks, Var local_id) <- stripTicksTop tickishFloatable exported
979 , shortMeOut env exported_id local_id
980 = extendVarEnv env local_id (exported_id, ticks)
981 add_pair env _ = env
982
983 -----------------
984 shortMeOut :: IndEnv -> Id -> Id -> Bool
985 shortMeOut ind_env exported_id local_id
986 -- The if-then-else stuff is just so I can get a pprTrace to see
987 -- how often I don't get shorting out because of IdInfo stuff
988 = if isExportedId exported_id && -- Only if this is exported
989
990 isLocalId local_id && -- Only if this one is defined in this
991 -- module, so that we *can* change its
992 -- binding to be the exported thing!
993
994 not (isExportedId local_id) && -- Only if this one is not itself exported,
995 -- since the transformation will nuke it
996
997 not (local_id `elemVarEnv` ind_env) -- Only if not already substituted for
998 then
999 if hasShortableIdInfo exported_id
1000 then True -- See Note [Messing up the exported Id's IdInfo]
1001 else warnPprTrace True (text "Not shorting out:" <+> ppr exported_id) False
1002 else
1003 False
1004
1005 -----------------
1006 hasShortableIdInfo :: Id -> Bool
1007 -- True if there is no user-attached IdInfo on exported_id,
1008 -- so we can safely discard it
1009 -- See Note [Messing up the exported Id's IdInfo]
1010 hasShortableIdInfo id
1011 = isEmptyRuleInfo (ruleInfo info)
1012 && isDefaultInlinePragma (inlinePragInfo info)
1013 && not (isStableUnfolding (realUnfoldingInfo info))
1014 where
1015 info = idInfo id
1016
1017 -----------------
1018 {- Note [Transferring IdInfo]
1019 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1020 If we have
1021 lcl_id = e; exp_id = lcl_id
1022
1023 and lcl_id has useful IdInfo, we don't want to discard it by going
1024 gbl_id = e; lcl_id = gbl_id
1025
1026 Instead, transfer IdInfo from lcl_id to exp_id, specifically
1027 * (Stable) unfolding
1028 * Strictness
1029 * Rules
1030 * Inline pragma
1031
1032 Overwriting, rather than merging, seems to work ok.
1033
1034 For the lcl_id we
1035
1036 * Zap the InlinePragma. It might originally have had a NOINLINE, which
1037 we have now transferred; and we really want the lcl_id to inline now
1038 that its RHS is trivial!
1039
1040 * Zap any Stable unfolding. agian, we want lcl_id = gbl_id to inline,
1041 replacing lcl_id by gbl_id. That won't happen if lcl_id has its original
1042 great big Stable unfolding
1043 -}
1044
1045 transferIdInfo :: Id -> Id -> (Id, Id)
1046 -- See Note [Transferring IdInfo]
1047 transferIdInfo exported_id local_id
1048 = ( modifyIdInfo transfer exported_id
1049 , modifyIdInfo zap_info local_id )
1050 where
1051 local_info = idInfo local_id
1052 transfer exp_info = exp_info `setDmdSigInfo` dmdSigInfo local_info
1053 `setCprSigInfo` cprSigInfo local_info
1054 `setUnfoldingInfo` realUnfoldingInfo local_info
1055 `setInlinePragInfo` inlinePragInfo local_info
1056 `setRuleInfo` addRuleInfo (ruleInfo exp_info) new_info
1057 new_info = setRuleInfoHead (idName exported_id)
1058 (ruleInfo local_info)
1059 -- Remember to set the function-name field of the
1060 -- rules as we transfer them from one function to another
1061
1062 zap_info lcl_info = lcl_info `setInlinePragInfo` defaultInlinePragma
1063 `setUnfoldingInfo` noUnfolding
1064
1065
1066 dmdAnal :: Logger -> DynFlags -> FamInstEnvs -> [CoreRule] -> CoreProgram -> IO CoreProgram
1067 dmdAnal logger dflags fam_envs rules binds = do
1068 let !opts = DmdAnalOpts
1069 { dmd_strict_dicts = gopt Opt_DictsStrict dflags
1070 , dmd_unbox_width = dmdUnboxWidth dflags
1071 }
1072 binds_plus_dmds = dmdAnalProgram opts fam_envs rules binds
1073 Logger.putDumpFileMaybe logger Opt_D_dump_str_signatures "Strictness signatures" FormatText $
1074 dumpIdInfoOfProgram (ppr . zapDmdEnvSig . dmdSigInfo) binds_plus_dmds
1075 -- See Note [Stamp out space leaks in demand analysis] in GHC.Core.Opt.DmdAnal
1076 seqBinds binds_plus_dmds `seq` return binds_plus_dmds