never executed always true always false
    1 {-
    2 ToDo [Oct 2013]
    3 ~~~~~~~~~~~~~~~
    4 1. Nuke ForceSpecConstr for good (it is subsumed by GHC.Types.SPEC in ghc-prim)
    5 2. Nuke NoSpecConstr
    6 
    7 
    8 (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
    9 
   10 \section[SpecConstr]{Specialise over constructors}
   11 -}
   12 
   13 
   14 
   15 {-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}
   16 
   17 module GHC.Core.Opt.SpecConstr(
   18         specConstrProgram,
   19         SpecConstrAnnotation(..)
   20     ) where
   21 
   22 import GHC.Prelude
   23 
   24 import GHC.Driver.Session ( DynFlags(..), GeneralFlag( Opt_SpecConstrKeen )
   25                           , gopt, hasPprDebug )
   26 
   27 import GHC.Core
   28 import GHC.Core.Subst
   29 import GHC.Core.Utils
   30 import GHC.Core.Unfold
   31 import GHC.Core.FVs     ( exprsFreeVarsList )
   32 import GHC.Core.Opt.Monad
   33 import GHC.Core.Opt.WorkWrap.Utils ( isWorkerSmallEnough, mkWorkerArgs )
   34 import GHC.Core.DataCon
   35 import GHC.Core.Coercion hiding( substCo )
   36 import GHC.Core.Rules
   37 import GHC.Core.Type     hiding ( substTy )
   38 import GHC.Core.TyCon   (TyCon, tyConUnique, tyConName )
   39 import GHC.Core.Multiplicity
   40 import GHC.Core.Ppr     ( pprParendExpr )
   41 import GHC.Core.Make    ( mkImpossibleExpr )
   42 
   43 import GHC.Unit.Module
   44 import GHC.Unit.Module.ModGuts
   45 
   46 import GHC.Types.Literal ( litIsLifted )
   47 import GHC.Types.Id
   48 import GHC.Types.Var.Env
   49 import GHC.Types.Var.Set
   50 import GHC.Types.Name
   51 import GHC.Types.Tickish
   52 import GHC.Types.Basic
   53 import GHC.Types.Demand
   54 import GHC.Types.Cpr
   55 import GHC.Types.Unique.Supply
   56 import GHC.Types.Unique.FM
   57 
   58 import GHC.Data.Maybe     ( orElse, catMaybes, isJust, isNothing )
   59 import GHC.Data.Pair
   60 import GHC.Data.FastString
   61 
   62 import GHC.Utils.Misc
   63 import GHC.Utils.Outputable
   64 import GHC.Utils.Panic.Plain
   65 import GHC.Utils.Constants (debugIsOn)
   66 import GHC.Utils.Monad
   67 import GHC.Utils.Trace
   68 
   69 import GHC.Builtin.Names ( specTyConKey )
   70 
   71 import GHC.Exts( SpecConstrAnnotation(..) )
   72 import GHC.Serialized   ( deserializeWithData )
   73 
   74 import Control.Monad    ( zipWithM )
   75 import Data.List (nubBy, sortBy, partition, dropWhileEnd, mapAccumL )
   76 import Data.Ord( comparing )
   77 
   78 {-
   79 -----------------------------------------------------
   80                         Game plan
   81 -----------------------------------------------------
   82 
   83 Consider
   84         drop n []     = []
   85         drop 0 xs     = []
   86         drop n (x:xs) = drop (n-1) xs
   87 
   88 After the first time round, we could pass n unboxed.  This happens in
   89 numerical code too.  Here's what it looks like in Core:
   90 
   91         drop n xs = case xs of
   92                       []     -> []
   93                       (y:ys) -> case n of
   94                                   I# n# -> case n# of
   95                                              0 -> []
   96                                              _ -> drop (I# (n# -# 1#)) xs
   97 
   98 Notice that the recursive call has an explicit constructor as argument.
   99 Noticing this, we can make a specialised version of drop
  100 
  101         RULE: drop (I# n#) xs ==> drop' n# xs
  102 
  103         drop' n# xs = let n = I# n# in ...orig RHS...
  104 
  105 Now the simplifier will apply the specialisation in the rhs of drop', giving
  106 
  107         drop' n# xs = case xs of
  108                       []     -> []
  109                       (y:ys) -> case n# of
  110                                   0 -> []
  111                                   _ -> drop' (n# -# 1#) xs
  112 
  113 Much better!
  114 
  115 We'd also like to catch cases where a parameter is carried along unchanged,
  116 but evaluated each time round the loop:
  117 
  118         f i n = if i>0 || i>n then i else f (i*2) n
  119 
  120 Here f isn't strict in n, but we'd like to avoid evaluating it each iteration.
  121 In Core, by the time we've w/wd (f is strict in i) we get
  122 
  123         f i# n = case i# ># 0 of
  124                    False -> I# i#
  125                    True  -> case n of { I# n# ->
  126                             case i# ># n# of
  127                                 False -> I# i#
  128                                 True  -> f (i# *# 2#) n
  129 
  130 At the call to f, we see that the argument, n is known to be (I# n#),
  131 and n is evaluated elsewhere in the body of f, so we can play the same
  132 trick as above.
  133 
  134 
  135 Note [Reboxing]
  136 ~~~~~~~~~~~~~~~
  137 We must be careful not to allocate the same constructor twice.  Consider
  138         f p = (...(case p of (a,b) -> e)...p...,
  139                ...let t = (r,s) in ...t...(f t)...)
  140 At the recursive call to f, we can see that t is a pair.  But we do NOT want
  141 to make a specialised copy:
  142         f' a b = let p = (a,b) in (..., ...)
  143 because now t is allocated by the caller, then r and s are passed to the
  144 recursive call, which allocates the (r,s) pair again.
  145 
  146 This happens if
  147   (a) the argument p is used in other than a case-scrutinisation way.
  148   (b) the argument to the call is not a 'fresh' tuple; you have to
  149         look into its unfolding to see that it's a tuple
  150 
  151 Hence the "OR" part of Note [Good arguments] below.
  152 
  153 ALTERNATIVE 2: pass both boxed and unboxed versions.  This no longer saves
  154 allocation, but does perhaps save evals. In the RULE we'd have
  155 something like
  156 
  157   f (I# x#) = f' (I# x#) x#
  158 
  159 If at the call site the (I# x) was an unfolding, then we'd have to
  160 rely on CSE to eliminate the duplicate allocation.... This alternative
  161 doesn't look attractive enough to pursue.
  162 
  163 ALTERNATIVE 3: ignore the reboxing problem.  The trouble is that
  164 the conservative reboxing story prevents many useful functions from being
  165 specialised.  Example:
  166         foo :: Maybe Int -> Int -> Int
  167         foo   (Just m) 0 = 0
  168         foo x@(Just m) n = foo x (n-m)
  169 Here the use of 'x' will clearly not require boxing in the specialised function.
  170 
  171 The strictness analyser has the same problem, in fact.  Example:
  172         f p@(a,b) = ...
  173 If we pass just 'a' and 'b' to the worker, it might need to rebox the
  174 pair to create (a,b).  A more sophisticated analysis might figure out
  175 precisely the cases in which this could happen, but the strictness
  176 analyser does no such analysis; it just passes 'a' and 'b', and hopes
  177 for the best.
  178 
  179 So my current choice is to make SpecConstr similarly aggressive, and
  180 ignore the bad potential of reboxing.
  181 
  182 
  183 Note [Good arguments]
  184 ~~~~~~~~~~~~~~~~~~~~~
  185 So we look for
  186 
  187 * A self-recursive function.  Ignore mutual recursion for now,
  188   because it's less common, and the code is simpler for self-recursion.
  189 
  190 * EITHER
  191 
  192    a) At a recursive call, one or more parameters is an explicit
  193       constructor application
  194         AND
  195       That same parameter is scrutinised by a case somewhere in
  196       the RHS of the function
  197 
  198   OR
  199 
  200     b) At a recursive call, one or more parameters has an unfolding
  201        that is an explicit constructor application
  202         AND
  203       That same parameter is scrutinised by a case somewhere in
  204       the RHS of the function
  205         AND
  206       Those are the only uses of the parameter (see Note [Reboxing])
  207 
  208 
  209 What to abstract over
  210 ~~~~~~~~~~~~~~~~~~~~~
  211 There's a bit of a complication with type arguments.  If the call
  212 site looks like
  213 
  214         f p = ...f ((:) [a] x xs)...
  215 
  216 then our specialised function look like
  217 
  218         f_spec x xs = let p = (:) [a] x xs in ....as before....
  219 
  220 This only makes sense if either
  221   a) the type variable 'a' is in scope at the top of f, or
  222   b) the type variable 'a' is an argument to f (and hence fs)
  223 
  224 Actually, (a) may hold for value arguments too, in which case
  225 we may not want to pass them.  Suppose 'x' is in scope at f's
  226 defn, but xs is not.  Then we'd like
  227 
  228         f_spec xs = let p = (:) [a] x xs in ....as before....
  229 
  230 Similarly (b) may hold too.  If x is already an argument at the
  231 call, no need to pass it again.
  232 
  233 Finally, if 'a' is not in scope at the call site, we could abstract
  234 it as we do the term variables:
  235 
  236         f_spec a x xs = let p = (:) [a] x xs in ...as before...
  237 
  238 So the grand plan is:
  239 
  240         * abstract the call site to a constructor-only pattern
  241           e.g.  C x (D (f p) (g q))  ==>  C s1 (D s2 s3)
  242 
  243         * Find the free variables of the abstracted pattern
  244 
  245         * Pass these variables, less any that are in scope at
  246           the fn defn.  But see Note [Shadowing] below.
  247 
  248 
  249 NOTICE that we only abstract over variables that are not in scope,
  250 so we're in no danger of shadowing variables used in "higher up"
  251 in f_spec's RHS.
  252 
  253 
  254 Note [Shadowing]
  255 ~~~~~~~~~~~~~~~~
  256 In this pass we gather up usage information that may mention variables
  257 that are bound between the usage site and the definition site; or (more
  258 seriously) may be bound to something different at the definition site.
  259 For example:
  260 
  261         f x = letrec g y v = let x = ...
  262                              in ...(g (a,b) x)...
  263 
  264 Since 'x' is in scope at the call site, we may make a rewrite rule that
  265 looks like
  266         RULE forall a,b. g (a,b) x = ...
  267 But this rule will never match, because it's really a different 'x' at
  268 the call site -- and that difference will be manifest by the time the
  269 simplifier gets to it.  [A worry: the simplifier doesn't *guarantee*
  270 no-shadowing, so perhaps it may not be distinct?]
  271 
  272 Anyway, the rule isn't actually wrong, it's just not useful.  One possibility
  273 is to run deShadowBinds before running SpecConstr, but instead we run the
  274 simplifier.  That gives the simplest possible program for SpecConstr to
  275 chew on; and it virtually guarantees no shadowing.
  276 
  277 Note [Specialising for constant parameters]
  278 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  279 This one is about specialising on a *constant* (but not necessarily
  280 constructor) argument
  281 
  282     foo :: Int -> (Int -> Int) -> Int
  283     foo 0 f = 0
  284     foo m f = foo (f m) (+1)
  285 
  286 It produces
  287 
  288     lvl_rmV :: GHC.Base.Int -> GHC.Base.Int
  289     lvl_rmV =
  290       \ (ds_dlk :: GHC.Base.Int) ->
  291         case ds_dlk of wild_alH { GHC.Base.I# x_alG ->
  292         GHC.Base.I# (GHC.Prim.+# x_alG 1)
  293 
  294     T.$wfoo :: GHC.Prim.Int# -> (GHC.Base.Int -> GHC.Base.Int) ->
  295     GHC.Prim.Int#
  296     T.$wfoo =
  297       \ (ww_sme :: GHC.Prim.Int#) (w_smg :: GHC.Base.Int -> GHC.Base.Int) ->
  298         case ww_sme of ds_Xlw {
  299           __DEFAULT ->
  300         case w_smg (GHC.Base.I# ds_Xlw) of w1_Xmo { GHC.Base.I# ww1_Xmz ->
  301         T.$wfoo ww1_Xmz lvl_rmV
  302         };
  303           0 -> 0
  304         }
  305 
  306 The recursive call has lvl_rmV as its argument, so we could create a specialised copy
  307 with that argument baked in; that is, not passed at all.   Now it can perhaps be inlined.
  308 
  309 When is this worth it?  Call the constant 'lvl'
  310 - If 'lvl' has an unfolding that is a constructor, see if the corresponding
  311   parameter is scrutinised anywhere in the body.
  312 
  313 - If 'lvl' has an unfolding that is a inlinable function, see if the corresponding
  314   parameter is applied (...to enough arguments...?)
  315 
  316   Also do this is if the function has RULES?
  317 
  318 Also
  319 
  320 Note [Specialising for lambda parameters]
  321 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  322     foo :: Int -> (Int -> Int) -> Int
  323     foo 0 f = 0
  324     foo m f = foo (f m) (\n -> n-m)
  325 
  326 This is subtly different from the previous one in that we get an
  327 explicit lambda as the argument:
  328 
  329     T.$wfoo :: GHC.Prim.Int# -> (GHC.Base.Int -> GHC.Base.Int) ->
  330     GHC.Prim.Int#
  331     T.$wfoo =
  332       \ (ww_sm8 :: GHC.Prim.Int#) (w_sma :: GHC.Base.Int -> GHC.Base.Int) ->
  333         case ww_sm8 of ds_Xlr {
  334           __DEFAULT ->
  335         case w_sma (GHC.Base.I# ds_Xlr) of w1_Xmf { GHC.Base.I# ww1_Xmq ->
  336         T.$wfoo
  337           ww1_Xmq
  338           (\ (n_ad3 :: GHC.Base.Int) ->
  339              case n_ad3 of wild_alB { GHC.Base.I# x_alA ->
  340              GHC.Base.I# (GHC.Prim.-# x_alA ds_Xlr)
  341              })
  342         };
  343           0 -> 0
  344         }
  345 
  346 I wonder if SpecConstr couldn't be extended to handle this? After all,
  347 lambda is a sort of constructor for functions and perhaps it already
  348 has most of the necessary machinery?
  349 
  350 Furthermore, there's an immediate win, because you don't need to allocate the lambda
  351 at the call site; and if perchance it's called in the recursive call, then you
  352 may avoid allocating it altogether.  Just like for constructors.
  353 
  354 Looks cool, but probably rare...but it might be easy to implement.
  355 
  356 
  357 Note [SpecConstr for casts]
  358 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
  359 Consider
  360     data family T a :: *
  361     data instance T Int = T Int
  362 
  363     foo n = ...
  364        where
  365          go (T 0) = 0
  366          go (T n) = go (T (n-1))
  367 
  368 The recursive call ends up looking like
  369         go (T (I# ...) `cast` g)
  370 So we want to spot the constructor application inside the cast.
  371 That's why we have the Cast case in argToPat
  372 
  373 Note [Local recursive groups]
  374 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  375 For a *local* recursive group, we can see all the calls to the
  376 function, so we seed the specialisation loop from the calls in the
  377 body, not from the calls in the RHS.  Consider:
  378 
  379   bar m n = foo n (n,n) (n,n) (n,n) (n,n)
  380    where
  381      foo n p q r s
  382        | n == 0    = m
  383        | n > 3000  = case p of { (p1,p2) -> foo (n-1) (p2,p1) q r s }
  384        | n > 2000  = case q of { (q1,q2) -> foo (n-1) p (q2,q1) r s }
  385        | n > 1000  = case r of { (r1,r2) -> foo (n-1) p q (r2,r1) s }
  386        | otherwise = case s of { (s1,s2) -> foo (n-1) p q r (s2,s1) }
  387 
  388 If we start with the RHSs of 'foo', we get lots and lots of specialisations,
  389 most of which are not needed.  But if we start with the (single) call
  390 in the rhs of 'bar' we get exactly one fully-specialised copy, and all
  391 the recursive calls go to this fully-specialised copy. Indeed, the original
  392 function is later collected as dead code.  This is very important in
  393 specialising the loops arising from stream fusion, for example in NDP where
  394 we were getting literally hundreds of (mostly unused) specialisations of
  395 a local function.
  396 
  397 In a case like the above we end up never calling the original un-specialised
  398 function.  (Although we still leave its code around just in case.)
  399 
  400 However, if we find any boring calls in the body, including *unsaturated*
  401 ones, such as
  402       letrec foo x y = ....foo...
  403       in map foo xs
  404 then we will end up calling the un-specialised function, so then we *should*
  405 use the calls in the un-specialised RHS as seeds.  We call these
  406 "boring call patterns", and callsToPats reports if it finds any of these.
  407 
  408 Note [Seeding top-level recursive groups]
  409 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  410 This seeding is done in the binding for seed_calls in specRec.
  411 
  412 1. If all the bindings in a top-level recursive group are local (not
  413    exported), then all the calls are in the rest of the top-level
  414    bindings.  This means we can specialise with those call patterns
  415    ONLY, and NOT with the RHSs of the recursive group (exactly like
  416    Note [Local recursive groups])
  417 
  418 2. But if any of the bindings are exported, the function may be called
  419    with any old arguments, so (for lack of anything better) we specialise
  420    based on
  421      (a) the call patterns in the RHS
  422      (b) the call patterns in the rest of the top-level bindings
  423    NB: before Apr 15 we used (a) only, but Dimitrios had an example
  424        where (b) was crucial, so I added that.
  425        Adding (b) also improved nofib allocation results:
  426                   multiplier: 4%   better
  427                   minimax:    2.8% better
  428 
  429 Actually in case (2), instead of using the calls from the RHS, it
  430 would be better to specialise in the importing module.  We'd need to
  431 add an INLINABLE pragma to the function, and then it can be
  432 specialised in the importing scope, just as is done for type classes
  433 in GHC.Core.Opt.Specialise.specImports. This remains to be done (#10346).
  434 
  435 Note [Top-level recursive groups]
  436 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  437 To get the call usage information from "the rest of the top level
  438 bindings" (c.f. Note [Seeding top-level recursive groups]), we work
  439 backwards through the top-level bindings so we see the usage before we
  440 get to the binding of the function.  Before we can collect the usage
  441 though, we go through all the bindings and add them to the
  442 environment. This is necessary because usage is only tracked for
  443 functions in the environment.  These two passes are called
  444    'go' and 'goEnv'
  445 in specConstrProgram.  (Looks a bit revolting to me.)
  446 
  447 Note [Do not specialise diverging functions]
  448 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  449 Specialising a function that just diverges is a waste of code.
  450 Furthermore, it broke GHC (simpl014) thus:
  451    {-# STR Sb #-}
  452    f = \x. case x of (a,b) -> f x
  453 If we specialise f we get
  454    f = \x. case x of (a,b) -> fspec a b
  455 But fspec doesn't have decent strictness info.  As it happened,
  456 (f x) :: IO t, so the state hack applied and we eta expanded fspec,
  457 and hence f.  But now f's strictness is less than its arity, which
  458 breaks an invariant.
  459 
  460 
  461 Note [Forcing specialisation]
  462 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  463 With stream fusion and in other similar cases, we want to fully
  464 specialise some (but not necessarily all!) loops regardless of their
  465 size and the number of specialisations.
  466 
  467 We allow a library to do this, in one of two ways (one which is
  468 deprecated):
  469 
  470   1) Add a parameter of type GHC.Types.SPEC (from ghc-prim) to the loop body.
  471 
  472   2) (Deprecated) Annotate a type with ForceSpecConstr from GHC.Exts,
  473      and then add *that* type as a parameter to the loop body
  474 
  475 The reason #2 is deprecated is because it requires GHCi, which isn't
  476 available for things like a cross compiler using stage1.
  477 
  478 Here's a (simplified) example from the `vector` package. You may bring
  479 the special 'force specialization' type into scope by saying:
  480 
  481   import GHC.Types (SPEC(..))
  482 
  483 or by defining your own type (again, deprecated):
  484 
  485   data SPEC = SPEC | SPEC2
  486   {-# ANN type SPEC ForceSpecConstr #-}
  487 
  488 (Note this is the exact same definition of GHC.Types.SPEC, just
  489 without the annotation.)
  490 
  491 After that, you say:
  492 
  493   foldl :: (a -> b -> a) -> a -> Stream b -> a
  494   {-# INLINE foldl #-}
  495   foldl f z (Stream step s _) = foldl_loop SPEC z s
  496     where
  497       foldl_loop !sPEC z s = case step s of
  498                               Yield x s' -> foldl_loop sPEC (f z x) s'
  499                               Skip       -> foldl_loop sPEC z s'
  500                               Done       -> z
  501 
  502 SpecConstr will spot the SPEC parameter and always fully specialise
  503 foldl_loop. Note that
  504 
  505   * We have to prevent the SPEC argument from being removed by
  506     w/w which is why (a) SPEC is a sum type, and (b) we have to seq on
  507     the SPEC argument.
  508 
  509   * And lastly, the SPEC argument is ultimately eliminated by
  510     SpecConstr itself so there is no runtime overhead.
  511 
  512 This is all quite ugly; we ought to come up with a better design.
  513 
  514 ForceSpecConstr arguments are spotted in scExpr' and scTopBinds which then set
  515 sc_force to True when calling specLoop. This flag does four things:
  516 
  517   * Ignore specConstrThreshold, to specialise functions of arbitrary size
  518         (see scTopBind)
  519   * Ignore specConstrCount, to make arbitrary numbers of specialisations
  520         (see specialise)
  521   * Specialise even for arguments that are not scrutinised in the loop
  522         (see argToPat; #4448)
  523   * Only specialise on recursive types a finite number of times
  524         (see is_too_recursive; #5550; Note [Limit recursive specialisation])
  525 
  526 The flag holds only for specialising a single binding group, and NOT
  527 for nested bindings.  (So really it should be passed around explicitly
  528 and not stored in ScEnv.)  #14379 turned out to be caused by
  529    f SPEC x = let g1 x = ...
  530               in ...
  531 We force-specialise f (because of the SPEC), but that generates a specialised
  532 copy of g1 (as well as the original).  Alas g1 has a nested binding g2; and
  533 in each copy of g1 we get an unspecialised and specialised copy of g2; and so
  534 on. Result, exponential.  So the force-spec flag now only applies to one
  535 level of bindings at a time.
  536 
  537 Mechanism for this one-level-only thing:
  538 
  539  - Switch it on at the call to specRec, in scExpr and scTopBinds
  540  - Switch it off when doing the RHSs;
  541    this can be done very conveniently in decreaseSpecCount
  542 
  543 What alternatives did I consider?
  544 
  545 * Annotating the loop itself doesn't work because (a) it is local and
  546   (b) it will be w/w'ed and having w/w propagating annotations somehow
  547   doesn't seem like a good idea. The types of the loop arguments
  548   really seem to be the most persistent thing.
  549 
  550 * Annotating the types that make up the loop state doesn't work,
  551   either, because (a) it would prevent us from using types like Either
  552   or tuples here, (b) we don't want to restrict the set of types that
  553   can be used in Stream states and (c) some types are fixed by the
  554   user (e.g., the accumulator here) but we still want to specialise as
  555   much as possible.
  556 
  557 Alternatives to ForceSpecConstr
  558 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  559 Instead of giving the loop an extra argument of type SPEC, we
  560 also considered *wrapping* arguments in SPEC, thus
  561   data SPEC a = SPEC a | SPEC2
  562 
  563   loop = \arg -> case arg of
  564                      SPEC state ->
  565                         case state of (x,y) -> ... loop (SPEC (x',y')) ...
  566                         S2 -> error ...
  567 The idea is that a SPEC argument says "specialise this argument
  568 regardless of whether the function case-analyses it".  But this
  569 doesn't work well:
  570   * SPEC must still be a sum type, else the strictness analyser
  571     eliminates it
  572   * But that means that 'loop' won't be strict in its real payload
  573 This loss of strictness in turn screws up specialisation, because
  574 we may end up with calls like
  575    loop (SPEC (case z of (p,q) -> (q,p)))
  576 Without the SPEC, if 'loop' were strict, the case would move out
  577 and we'd see loop applied to a pair. But if 'loop' isn't strict
  578 this doesn't look like a specialisable call.
  579 
  580 Note [Limit recursive specialisation]
  581 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  582 It is possible for ForceSpecConstr to cause an infinite loop of specialisation.
  583 Because there is no limit on the number of specialisations, a recursive call with
  584 a recursive constructor as an argument (for example, list cons) will generate
  585 a specialisation for that constructor. If the resulting specialisation also
  586 contains a recursive call with the constructor, this could proceed indefinitely.
  587 
  588 For example, if ForceSpecConstr is on:
  589   loop :: [Int] -> [Int] -> [Int]
  590   loop z []         = z
  591   loop z (x:xs)     = loop (x:z) xs
  592 this example will create a specialisation for the pattern
  593   loop (a:b) c      = loop' a b c
  594 
  595   loop' a b []      = (a:b)
  596   loop' a b (x:xs)  = loop (x:(a:b)) xs
  597 and a new pattern is found:
  598   loop (a:(b:c)) d  = loop'' a b c d
  599 which can continue indefinitely.
  600 
  601 Roman's suggestion to fix this was to stop after a couple of times on recursive types,
  602 but still specialising on non-recursive types as much as possible.
  603 
  604 To implement this, we count the number of times we have gone round the
  605 "specialise recursively" loop ('go' in 'specRec').  Once have gone round
  606 more than N times (controlled by -fspec-constr-recursive=N) we check
  607 
  608   - If sc_force is off, and sc_count is (Just max) then we don't
  609     need to do anything: trim_pats will limit the number of specs
  610 
  611   - Otherwise check if any function has now got more than (sc_count env)
  612     specialisations.  If sc_count is "no limit" then we arbitrarily
  613     choose 10 as the limit (ugh).
  614 
  615 See #5550.   Also #13623, where this test had become over-aggressive,
  616 and we lost a wonderful specialisation that we really wanted!
  617 
  618 Note [NoSpecConstr]
  619 ~~~~~~~~~~~~~~~~~~~
  620 The ignoreDataCon stuff allows you to say
  621     {-# ANN type T NoSpecConstr #-}
  622 to mean "don't specialise on arguments of this type".  It was added
  623 before we had ForceSpecConstr.  Lacking ForceSpecConstr we specialised
  624 regardless of size; and then we needed a way to turn that *off*.  Now
  625 that we have ForceSpecConstr, this NoSpecConstr is probably redundant.
  626 (Used only for PArray, TODO: remove?)
  627 
  628 -----------------------------------------------------
  629                 Stuff not yet handled
  630 -----------------------------------------------------
  631 
  632 Here are notes arising from Roman's work that I don't want to lose.
  633 
  634 Example 1
  635 ~~~~~~~~~
  636     data T a = T !a
  637 
  638     foo :: Int -> T Int -> Int
  639     foo 0 t = 0
  640     foo x t | even x    = case t of { T n -> foo (x-n) t }
  641             | otherwise = foo (x-1) t
  642 
  643 SpecConstr does no specialisation, because the second recursive call
  644 looks like a boxed use of the argument.  A pity.
  645 
  646     $wfoo_sFw :: GHC.Prim.Int# -> T.T GHC.Base.Int -> GHC.Prim.Int#
  647     $wfoo_sFw =
  648       \ (ww_sFo [Just L] :: GHC.Prim.Int#) (w_sFq [Just L] :: T.T GHC.Base.Int) ->
  649          case ww_sFo of ds_Xw6 [Just L] {
  650            __DEFAULT ->
  651                 case GHC.Prim.remInt# ds_Xw6 2 of wild1_aEF [Dead Just A] {
  652                   __DEFAULT -> $wfoo_sFw (GHC.Prim.-# ds_Xw6 1) w_sFq;
  653                   0 ->
  654                     case w_sFq of wild_Xy [Just L] { T.T n_ad5 [Just U(L)] ->
  655                     case n_ad5 of wild1_aET [Just A] { GHC.Base.I# y_aES [Just L] ->
  656                     $wfoo_sFw (GHC.Prim.-# ds_Xw6 y_aES) wild_Xy
  657                     } } };
  658            0 -> 0
  659 
  660 Example 2
  661 ~~~~~~~~~
  662     data a :*: b = !a :*: !b
  663     data T a = T !a
  664 
  665     foo :: (Int :*: T Int) -> Int
  666     foo (0 :*: t) = 0
  667     foo (x :*: t) | even x    = case t of { T n -> foo ((x-n) :*: t) }
  668                   | otherwise = foo ((x-1) :*: t)
  669 
  670 Very similar to the previous one, except that the parameters are now in
  671 a strict tuple. Before SpecConstr, we have
  672 
  673     $wfoo_sG3 :: GHC.Prim.Int# -> T.T GHC.Base.Int -> GHC.Prim.Int#
  674     $wfoo_sG3 =
  675       \ (ww_sFU [Just L] :: GHC.Prim.Int#) (ww_sFW [Just L] :: T.T
  676     GHC.Base.Int) ->
  677         case ww_sFU of ds_Xws [Just L] {
  678           __DEFAULT ->
  679         case GHC.Prim.remInt# ds_Xws 2 of wild1_aEZ [Dead Just A] {
  680           __DEFAULT ->
  681             case ww_sFW of tpl_B2 [Just L] { T.T a_sFo [Just A] ->
  682             $wfoo_sG3 (GHC.Prim.-# ds_Xws 1) tpl_B2             -- $wfoo1
  683             };
  684           0 ->
  685             case ww_sFW of wild_XB [Just A] { T.T n_ad7 [Just S(L)] ->
  686             case n_ad7 of wild1_aFd [Just L] { GHC.Base.I# y_aFc [Just L] ->
  687             $wfoo_sG3 (GHC.Prim.-# ds_Xws y_aFc) wild_XB        -- $wfoo2
  688             } } };
  689           0 -> 0 }
  690 
  691 We get two specialisations:
  692 "SC:$wfoo1" [0] __forall {a_sFB :: GHC.Base.Int sc_sGC :: GHC.Prim.Int#}
  693                   Foo.$wfoo sc_sGC (Foo.T @ GHC.Base.Int a_sFB)
  694                   = Foo.$s$wfoo1 a_sFB sc_sGC ;
  695 "SC:$wfoo2" [0] __forall {y_aFp :: GHC.Prim.Int# sc_sGC :: GHC.Prim.Int#}
  696                   Foo.$wfoo sc_sGC (Foo.T @ GHC.Base.Int (GHC.Base.I# y_aFp))
  697                   = Foo.$s$wfoo y_aFp sc_sGC ;
  698 
  699 But perhaps the first one isn't good.  After all, we know that tpl_B2 is
  700 a T (I# x) really, because T is strict and Int has one constructor.  (We can't
  701 unbox the strict fields, because T is polymorphic!)
  702 
  703 ************************************************************************
  704 *                                                                      *
  705 \subsection{Top level wrapper stuff}
  706 *                                                                      *
  707 ************************************************************************
  708 -}
  709 
  710 specConstrProgram :: ModGuts -> CoreM ModGuts
  711 specConstrProgram guts
  712   = do
  713       dflags <- getDynFlags
  714       us     <- getUniqueSupplyM
  715       (_, annos) <- getFirstAnnotations deserializeWithData guts
  716       this_mod <- getModule
  717       let binds' = reverse $ fst $ initUs us $ do
  718                     -- Note [Top-level recursive groups]
  719                     (env, binds) <- goEnv (initScEnv dflags this_mod annos)
  720                                           (mg_binds guts)
  721                         -- binds is identical to (mg_binds guts), except that the
  722                         -- binders on the LHS have been replaced by extendBndr
  723                         --   (SPJ this seems like overkill; I don't think the binders
  724                         --    will change at all; and we don't substitute in the RHSs anyway!!)
  725                     go env nullUsage (reverse binds)
  726 
  727       return (guts { mg_binds = binds' })
  728   where
  729     -- See Note [Top-level recursive groups]
  730     goEnv env []            = return (env, [])
  731     goEnv env (bind:binds)  = do (env', bind')   <- scTopBindEnv env bind
  732                                  (env'', binds') <- goEnv env' binds
  733                                  return (env'', bind' : binds')
  734 
  735     -- Arg list of bindings is in reverse order
  736     go _   _   []           = return []
  737     go env usg (bind:binds) = do (usg', bind') <- scTopBind env usg bind
  738                                  binds' <- go env usg' binds
  739                                  return (bind' : binds')
  740 
  741 {-
  742 ************************************************************************
  743 *                                                                      *
  744 \subsection{Environment: goes downwards}
  745 *                                                                      *
  746 ************************************************************************
  747 
  748 Note [Work-free values only in environment]
  749 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  750 The sc_vals field keeps track of in-scope value bindings, so
  751 that if we come across (case x of Just y ->...) we can reduce the
  752 case from knowing that x is bound to a pair.
  753 
  754 But only *work-free* values are ok here. For example if the envt had
  755     x -> Just (expensive v)
  756 then we do NOT want to expand to
  757      let y = expensive v in ...
  758 because the x-binding still exists and we've now duplicated (expensive v).
  759 
  760 This seldom happens because let-bound constructor applications are
  761 ANF-ised, but it can happen as a result of on-the-fly transformations in
  762 SpecConstr itself.  Here is #7865:
  763 
  764         let {
  765           a'_shr =
  766             case xs_af8 of _ {
  767               [] -> acc_af6;
  768               : ds_dgt [Dmd=<L,A>] ds_dgu [Dmd=<L,A>] ->
  769                 (expensive x_af7, x_af7
  770             } } in
  771         let {
  772           ds_sht =
  773             case a'_shr of _ { (p'_afd, q'_afe) ->
  774             TSpecConstr_DoubleInline.recursive
  775               (GHC.Types.: @ GHC.Types.Int x_af7 wild_X6) (q'_afe, p'_afd)
  776             } } in
  777 
  778 When processed knowing that xs_af8 was bound to a cons, we simplify to
  779    a'_shr = (expensive x_af7, x_af7)
  780 and we do NOT want to inline that at the occurrence of a'_shr in ds_sht.
  781 (There are other occurrences of a'_shr.)  No no no.
  782 
  783 It would be possible to do some on-the-fly ANF-ising, so that a'_shr turned
  784 into a work-free value again, thus
  785    a1 = expensive x_af7
  786    a'_shr = (a1, x_af7)
  787 but that's more work, so until its shown to be important I'm going to
  788 leave it for now.
  789 
  790 Note [Making SpecConstr keener]
  791 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  792 Consider this, in (perf/should_run/T9339)
  793    last (filter odd [1..1000])
  794 
  795 After optimisation, including SpecConstr, we get:
  796    f :: Int# -> Int -> Int
  797    f x y = case remInt# x 2# of
  798              __DEFAULT -> case x of
  799                             __DEFAULT -> f (+# wild_Xp 1#) (I# x)
  800                             1000000# -> ...
  801              0# -> case x of
  802                      __DEFAULT -> f (+# wild_Xp 1#) y
  803                     1000000#   -> y
  804 
  805 Not good!  We build an (I# x) box every time around the loop.
  806 SpecConstr (as described in the paper) does not specialise f, despite
  807 the call (f ... (I# x)) because 'y' is not scrutinised in the body.
  808 But it is much better to specialise f for the case where the argument
  809 is of form (I# x); then we build the box only when returning y, which
  810 is on the cold path.
  811 
  812 Another example:
  813 
  814    f x = ...(g x)....
  815 
  816 Here 'x' is not scrutinised in f's body; but if we did specialise 'f'
  817 then the call (g x) might allow 'g' to be specialised in turn.
  818 
  819 So sc_keen controls whether or not we take account of whether argument is
  820 scrutinised in the body.  True <=> ignore that, and specialise whenever
  821 the function is applied to a data constructor.
  822 -}
  823 
  824 data ScEnv = SCE { sc_dflags    :: DynFlags,
  825                    sc_uf_opts   :: !UnfoldingOpts, -- ^ Unfolding options
  826                    sc_module    :: !Module,
  827                    sc_size      :: Maybe Int,   -- Size threshold
  828                                                 -- Nothing => no limit
  829 
  830                    sc_count     :: Maybe Int,   -- Max # of specialisations for any one fn
  831                                                 -- Nothing => no limit
  832                                                 -- See Note [Avoiding exponential blowup]
  833 
  834                    sc_recursive :: Int,         -- Max # of specialisations over recursive type.
  835                                                 -- Stops ForceSpecConstr from diverging.
  836 
  837                    sc_keen     :: Bool,         -- Specialise on arguments that are known
  838                                                 -- constructors, even if they are not
  839                                                 -- scrutinised in the body.  See
  840                                                 -- Note [Making SpecConstr keener]
  841 
  842                    sc_force     :: Bool,        -- Force specialisation?
  843                                                 -- See Note [Forcing specialisation]
  844 
  845                    sc_subst     :: Subst,       -- Current substitution
  846                                                 -- Maps InIds to OutExprs
  847 
  848                    sc_how_bound :: HowBoundEnv,
  849                         -- Binds interesting non-top-level variables
  850                         -- Domain is OutVars (*after* applying the substitution)
  851 
  852                    sc_vals      :: ValueEnv,
  853                         -- Domain is OutIds (*after* applying the substitution)
  854                         -- Used even for top-level bindings (but not imported ones)
  855                         -- The range of the ValueEnv is *work-free* values
  856                         -- such as (\x. blah), or (Just v)
  857                         -- but NOT (Just (expensive v))
  858                         -- See Note [Work-free values only in environment]
  859 
  860                    sc_annotations :: UniqFM Name SpecConstrAnnotation
  861              }
  862 
  863 ---------------------
  864 type HowBoundEnv = VarEnv HowBound      -- Domain is OutVars
  865 
  866 ---------------------
  867 type ValueEnv = IdEnv Value             -- Domain is OutIds
  868 data Value    = ConVal AltCon [CoreArg] -- _Saturated_ constructors
  869                                         --   The AltCon is never DEFAULT
  870               | LambdaVal               -- Inlinable lambdas or PAPs
  871 
  872 instance Outputable Value where
  873    ppr (ConVal con args) = ppr con <+> interpp'SP args
  874    ppr LambdaVal         = text "<Lambda>"
  875 
  876 ---------------------
  877 initScEnv :: DynFlags -> Module -> UniqFM Name SpecConstrAnnotation -> ScEnv
  878 initScEnv dflags this_mod anns
  879   = SCE { sc_dflags      = dflags,
  880           sc_uf_opts     = unfoldingOpts dflags,
  881           sc_module      = this_mod,
  882           sc_size        = specConstrThreshold dflags,
  883           sc_count       = specConstrCount     dflags,
  884           sc_recursive   = specConstrRecursive dflags,
  885           sc_keen        = gopt Opt_SpecConstrKeen dflags,
  886           sc_force       = False,
  887           sc_subst       = emptySubst,
  888           sc_how_bound   = emptyVarEnv,
  889           sc_vals        = emptyVarEnv,
  890           sc_annotations = anns }
  891 
  892 data HowBound = RecFun  -- These are the recursive functions for which
  893                         -- we seek interesting call patterns
  894 
  895               | RecArg  -- These are those functions' arguments, or their sub-components;
  896                         -- we gather occurrence information for these
  897 
  898 instance Outputable HowBound where
  899   ppr RecFun = text "RecFun"
  900   ppr RecArg = text "RecArg"
  901 
  902 scForce :: ScEnv -> Bool -> ScEnv
  903 scForce env b = env { sc_force = b }
  904 
  905 lookupHowBound :: ScEnv -> Id -> Maybe HowBound
  906 lookupHowBound env id = lookupVarEnv (sc_how_bound env) id
  907 
  908 scSubstId :: ScEnv -> Id -> CoreExpr
  909 scSubstId env v = lookupIdSubst (sc_subst env) v
  910 
  911 scSubstTy :: ScEnv -> Type -> Type
  912 scSubstTy env ty = substTy (sc_subst env) ty
  913 
  914 scSubstCo :: ScEnv -> Coercion -> Coercion
  915 scSubstCo env co = substCo (sc_subst env) co
  916 
  917 zapScSubst :: ScEnv -> ScEnv
  918 zapScSubst env = env { sc_subst = zapSubstEnv (sc_subst env) }
  919 
  920 extendScInScope :: ScEnv -> [Var] -> ScEnv
  921         -- Bring the quantified variables into scope
  922 extendScInScope env qvars = env { sc_subst = extendInScopeList (sc_subst env) qvars }
  923 
  924         -- Extend the substitution
  925 extendScSubst :: ScEnv -> Var -> OutExpr -> ScEnv
  926 extendScSubst env var expr = env { sc_subst = extendSubst (sc_subst env) var expr }
  927 
  928 extendScSubstList :: ScEnv -> [(Var,OutExpr)] -> ScEnv
  929 extendScSubstList env prs = env { sc_subst = extendSubstList (sc_subst env) prs }
  930 
  931 extendHowBound :: ScEnv -> [Var] -> HowBound -> ScEnv
  932 extendHowBound env bndrs how_bound
  933   = env { sc_how_bound = extendVarEnvList (sc_how_bound env)
  934                             [(bndr,how_bound) | bndr <- bndrs] }
  935 
  936 extendBndrsWith :: HowBound -> ScEnv -> [Var] -> (ScEnv, [Var])
  937 extendBndrsWith how_bound env bndrs
  938   = (env { sc_subst = subst', sc_how_bound = hb_env' }, bndrs')
  939   where
  940     (subst', bndrs') = substBndrs (sc_subst env) bndrs
  941     hb_env' = sc_how_bound env `extendVarEnvList`
  942                     [(bndr,how_bound) | bndr <- bndrs']
  943 
  944 extendBndrWith :: HowBound -> ScEnv -> Var -> (ScEnv, Var)
  945 extendBndrWith how_bound env bndr
  946   = (env { sc_subst = subst', sc_how_bound = hb_env' }, bndr')
  947   where
  948     (subst', bndr') = substBndr (sc_subst env) bndr
  949     hb_env' = extendVarEnv (sc_how_bound env) bndr' how_bound
  950 
  951 extendRecBndrs :: ScEnv -> [Var] -> (ScEnv, [Var])
  952 extendRecBndrs env bndrs  = (env { sc_subst = subst' }, bndrs')
  953                       where
  954                         (subst', bndrs') = substRecBndrs (sc_subst env) bndrs
  955 
  956 extendBndrs :: ScEnv -> [Var] -> (ScEnv, [Var])
  957 extendBndrs env bndrs = mapAccumL extendBndr env bndrs
  958 
  959 extendBndr :: ScEnv -> Var -> (ScEnv, Var)
  960 extendBndr env bndr  = (env { sc_subst = subst' }, bndr')
  961                      where
  962                        (subst', bndr') = substBndr (sc_subst env) bndr
  963 
  964 extendValEnv :: ScEnv -> Id -> Maybe Value -> ScEnv
  965 extendValEnv env _  Nothing   = env
  966 extendValEnv env id (Just cv)
  967  | valueIsWorkFree cv      -- Don't duplicate work!!  #7865
  968  = env { sc_vals = extendVarEnv (sc_vals env) id cv }
  969 extendValEnv env _ _ = env
  970 
  971 extendCaseBndrs :: ScEnv -> OutExpr -> OutId -> AltCon -> [Var] -> (ScEnv, [Var])
  972 -- When we encounter
  973 --      case scrut of b
  974 --          C x y -> ...
  975 -- we want to bind b, to (C x y)
  976 -- NB1: Extends only the sc_vals part of the envt
  977 -- NB2: Kill the dead-ness info on the pattern binders x,y, since
  978 --      they are potentially made alive by the [b -> C x y] binding
  979 extendCaseBndrs env scrut case_bndr con alt_bndrs
  980    = (env2, alt_bndrs')
  981  where
  982    live_case_bndr = not (isDeadBinder case_bndr)
  983    env1 | Var v <- stripTicksTopE (const True) scrut
  984                          = extendValEnv env v cval
  985         | otherwise      = env  -- See Note [Add scrutinee to ValueEnv too]
  986    env2 | live_case_bndr = extendValEnv env1 case_bndr cval
  987         | otherwise      = env1
  988 
  989    alt_bndrs' | case scrut of { Var {} -> True; _ -> live_case_bndr }
  990               = map zap alt_bndrs
  991               | otherwise
  992               = alt_bndrs
  993 
  994    cval = case con of
  995                 DEFAULT    -> Nothing
  996                 LitAlt {}  -> Just (ConVal con [])
  997                 DataAlt {} -> Just (ConVal con vanilla_args)
  998                       where
  999                         vanilla_args = map Type (tyConAppArgs (idType case_bndr)) ++
 1000                                        varsToCoreExprs alt_bndrs
 1001 
 1002    zap v | isTyVar v = v                -- See NB2 above
 1003          | otherwise = zapIdOccInfo v
 1004 
 1005 
 1006 decreaseSpecCount :: ScEnv -> Int -> ScEnv
 1007 -- See Note [Avoiding exponential blowup]
 1008 decreaseSpecCount env n_specs
 1009   = env { sc_force = False   -- See Note [Forcing specialisation]
 1010         , sc_count = case sc_count env of
 1011                        Nothing -> Nothing
 1012                        Just n  -> Just (n `div` (n_specs + 1)) }
 1013         -- The "+1" takes account of the original function;
 1014         -- See Note [Avoiding exponential blowup]
 1015 
 1016 ---------------------------------------------------
 1017 -- See Note [Forcing specialisation]
 1018 ignoreType    :: ScEnv -> Type   -> Bool
 1019 ignoreDataCon  :: ScEnv -> DataCon -> Bool
 1020 forceSpecBndr :: ScEnv -> Var    -> Bool
 1021 
 1022 ignoreDataCon env dc = ignoreTyCon env (dataConTyCon dc)
 1023 
 1024 ignoreType env ty
 1025   = case tyConAppTyCon_maybe ty of
 1026       Just tycon -> ignoreTyCon env tycon
 1027       _          -> False
 1028 
 1029 ignoreTyCon :: ScEnv -> TyCon -> Bool
 1030 ignoreTyCon env tycon
 1031   = lookupUFM (sc_annotations env) (tyConName tycon) == Just NoSpecConstr
 1032 
 1033 forceSpecBndr env var = forceSpecFunTy env . snd . splitForAllTyCoVars . varType $ var
 1034 
 1035 forceSpecFunTy :: ScEnv -> Type -> Bool
 1036 forceSpecFunTy env = any (forceSpecArgTy env) . map scaledThing . fst . splitFunTys
 1037 
 1038 forceSpecArgTy :: ScEnv -> Type -> Bool
 1039 forceSpecArgTy env ty
 1040   | Just ty' <- coreView ty = forceSpecArgTy env ty'
 1041 
 1042 forceSpecArgTy env ty
 1043   | Just (tycon, tys) <- splitTyConApp_maybe ty
 1044   , tycon /= funTyCon
 1045       = tyConUnique tycon == specTyConKey
 1046         || lookupUFM (sc_annotations env) (tyConName tycon) == Just ForceSpecConstr
 1047         || any (forceSpecArgTy env) tys
 1048 
 1049 forceSpecArgTy _ _ = False
 1050 
 1051 {-
 1052 Note [Add scrutinee to ValueEnv too]
 1053 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 1054 Consider this:
 1055    case x of y
 1056      (a,b) -> case b of c
 1057                 I# v -> ...(f y)...
 1058 By the time we get to the call (f y), the ValueEnv
 1059 will have a binding for y, and for c
 1060     y -> (a,b)
 1061     c -> I# v
 1062 BUT that's not enough!  Looking at the call (f y) we
 1063 see that y is pair (a,b), but we also need to know what 'b' is.
 1064 So in extendCaseBndrs we must *also* add the binding
 1065    b -> I# v
 1066 else we lose a useful specialisation for f.  This is necessary even
 1067 though the simplifier has systematically replaced uses of 'x' with 'y'
 1068 and 'b' with 'c' in the code.  The use of 'b' in the ValueEnv came
 1069 from outside the case.  See #4908 for the live example.
 1070 
 1071 Note [Avoiding exponential blowup]
 1072 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 1073 The sc_count field of the ScEnv says how many times we are prepared to
 1074 duplicate a single function.  But we must take care with recursive
 1075 specialisations.  Consider
 1076 
 1077         let $j1 = let $j2 = let $j3 = ...
 1078                             in
 1079                             ...$j3...
 1080                   in
 1081                   ...$j2...
 1082         in
 1083         ...$j1...
 1084 
 1085 If we specialise $j1 then in each specialisation (as well as the original)
 1086 we can specialise $j2, and similarly $j3.  Even if we make just *one*
 1087 specialisation of each, because we also have the original we'll get 2^n
 1088 copies of $j3, which is not good.
 1089 
 1090 So when recursively specialising we divide the sc_count by the number of
 1091 copies we are making at this level, including the original.
 1092 
 1093 
 1094 ************************************************************************
 1095 *                                                                      *
 1096 \subsection{Usage information: flows upwards}
 1097 *                                                                      *
 1098 ************************************************************************
 1099 -}
 1100 
 1101 data ScUsage
 1102    = SCU {
 1103         scu_calls :: CallEnv,           -- Calls
 1104                                         -- The functions are a subset of the
 1105                                         --      RecFuns in the ScEnv
 1106 
 1107         scu_occs :: !(IdEnv ArgOcc)     -- Information on argument occurrences
 1108      }                                  -- The domain is OutIds
 1109 
 1110 type CallEnv = IdEnv [Call]
 1111 data Call = Call Id [CoreArg] ValueEnv
 1112         -- The arguments of the call, together with the
 1113         -- env giving the constructor bindings at the call site
 1114         -- We keep the function mainly for debug output
 1115         --
 1116         -- The call is not necessarily saturated; we just put
 1117         -- in however many args are visible at the call site
 1118 
 1119 instance Outputable ScUsage where
 1120   ppr (SCU { scu_calls = calls, scu_occs = occs })
 1121     = text "SCU" <+> braces (sep [ text "calls =" <+> ppr calls
 1122                                  , text "occs =" <+> ppr occs ])
 1123 
 1124 instance Outputable Call where
 1125   ppr (Call fn args _) = ppr fn <+> fsep (map pprParendExpr args)
 1126 
 1127 nullUsage :: ScUsage
 1128 nullUsage = SCU { scu_calls = emptyVarEnv, scu_occs = emptyVarEnv }
 1129 
 1130 combineCalls :: CallEnv -> CallEnv -> CallEnv
 1131 combineCalls = plusVarEnv_C (++)
 1132 
 1133 combineUsage :: ScUsage -> ScUsage -> ScUsage
 1134 combineUsage u1 u2 = SCU { scu_calls = combineCalls (scu_calls u1) (scu_calls u2),
 1135                            scu_occs  = plusVarEnv_C combineOcc (scu_occs u1) (scu_occs u2) }
 1136 
 1137 combineUsages :: [ScUsage] -> ScUsage
 1138 combineUsages [] = nullUsage
 1139 combineUsages us = foldr1 combineUsage us
 1140 
 1141 lookupOccs :: ScUsage -> [OutVar] -> (ScUsage, [ArgOcc])
 1142 lookupOccs (SCU { scu_calls = sc_calls, scu_occs = sc_occs }) bndrs
 1143   = (SCU {scu_calls = sc_calls, scu_occs = delVarEnvList sc_occs bndrs},
 1144      [lookupVarEnv sc_occs b `orElse` NoOcc | b <- bndrs])
 1145 
 1146 data ArgOcc = NoOcc     -- Doesn't occur at all; or a type argument
 1147             | UnkOcc    -- Used in some unknown way
 1148 
 1149             | ScrutOcc  -- See Note [ScrutOcc]
 1150                  (DataConEnv [ArgOcc])   -- How the sub-components are used
 1151 
 1152 type DataConEnv a = UniqFM DataCon a     -- Keyed by DataCon
 1153 
 1154 {- Note  [ScrutOcc]
 1155 ~~~~~~~~~~~~~~~~~~~
 1156 An occurrence of ScrutOcc indicates that the thing, or a `cast` version of the thing,
 1157 is *only* taken apart or applied.
 1158 
 1159   Functions, literal: ScrutOcc emptyUFM
 1160   Data constructors:  ScrutOcc subs,
 1161 
 1162 where (subs :: UniqFM [ArgOcc]) gives usage of the *pattern-bound* components,
 1163 The domain of the UniqFM is the Unique of the data constructor
 1164 
 1165 The [ArgOcc] is the occurrences of the *pattern-bound* components
 1166 of the data structure.  E.g.
 1167         data T a = forall b. MkT a b (b->a)
 1168 A pattern binds b, x::a, y::b, z::b->a, but not 'a'!
 1169 
 1170 -}
 1171 
 1172 instance Outputable ArgOcc where
 1173   ppr (ScrutOcc xs) = text "scrut-occ" <> ppr xs
 1174   ppr UnkOcc        = text "unk-occ"
 1175   ppr NoOcc         = text "no-occ"
 1176 
 1177 evalScrutOcc :: ArgOcc
 1178 evalScrutOcc = ScrutOcc emptyUFM
 1179 
 1180 -- Experimentally, this version of combineOcc makes ScrutOcc "win", so
 1181 -- that if the thing is scrutinised anywhere then we get to see that
 1182 -- in the overall result, even if it's also used in a boxed way
 1183 -- This might be too aggressive; see Note [Reboxing] Alternative 3
 1184 combineOcc :: ArgOcc -> ArgOcc -> ArgOcc
 1185 combineOcc NoOcc         occ           = occ
 1186 combineOcc occ           NoOcc         = occ
 1187 combineOcc (ScrutOcc xs) (ScrutOcc ys) = ScrutOcc (plusUFM_C combineOccs xs ys)
 1188 combineOcc UnkOcc        (ScrutOcc ys) = ScrutOcc ys
 1189 combineOcc (ScrutOcc xs) UnkOcc        = ScrutOcc xs
 1190 combineOcc UnkOcc        UnkOcc        = UnkOcc
 1191 
 1192 combineOccs :: [ArgOcc] -> [ArgOcc] -> [ArgOcc]
 1193 combineOccs xs ys = zipWithEqual "combineOccs" combineOcc xs ys
 1194 
 1195 setScrutOcc :: ScEnv -> ScUsage -> OutExpr -> ArgOcc -> ScUsage
 1196 -- _Overwrite_ the occurrence info for the scrutinee, if the scrutinee
 1197 -- is a variable, and an interesting variable
 1198 setScrutOcc env usg (Cast e _) occ      = setScrutOcc env usg e occ
 1199 setScrutOcc env usg (Tick _ e) occ      = setScrutOcc env usg e occ
 1200 setScrutOcc env usg (Var v)    occ
 1201   | Just RecArg <- lookupHowBound env v = usg { scu_occs = extendVarEnv (scu_occs usg) v occ }
 1202   | otherwise                           = usg
 1203 setScrutOcc _env usg _other _occ        -- Catch-all
 1204   = usg
 1205 
 1206 {-
 1207 ************************************************************************
 1208 *                                                                      *
 1209 \subsection{The main recursive function}
 1210 *                                                                      *
 1211 ************************************************************************
 1212 
 1213 The main recursive function gathers up usage information, and
 1214 creates specialised versions of functions.
 1215 -}
 1216 
 1217 scExpr, scExpr' :: ScEnv -> CoreExpr -> UniqSM (ScUsage, CoreExpr)
 1218         -- The unique supply is needed when we invent
 1219         -- a new name for the specialised function and its args
 1220 
 1221 scExpr env e = scExpr' env e
 1222 
 1223 scExpr' env (Var v)      = case scSubstId env v of
 1224                             Var v' -> return (mkVarUsage env v' [], Var v')
 1225                             e'     -> scExpr (zapScSubst env) e'
 1226 
 1227 scExpr' env (Type t)     = return (nullUsage, Type (scSubstTy env t))
 1228 scExpr' env (Coercion c) = return (nullUsage, Coercion (scSubstCo env c))
 1229 scExpr' _   e@(Lit {})   = return (nullUsage, e)
 1230 scExpr' env (Tick t e)   = do (usg, e') <- scExpr env e
 1231                               return (usg, Tick t e')
 1232 scExpr' env (Cast e co)  = do (usg, e') <- scExpr env e
 1233                               return (usg, mkCast e' (scSubstCo env co))
 1234                               -- Important to use mkCast here
 1235                               -- See Note [SpecConstr call patterns]
 1236 scExpr' env e@(App _ _)  = scApp env (collectArgs e)
 1237 scExpr' env (Lam b e)    = do let (env', b') = extendBndr env b
 1238                               (usg, e') <- scExpr env' e
 1239                               return (usg, Lam b' e')
 1240 
 1241 scExpr' env (Case scrut b ty alts)
 1242   = do  { (scrut_usg, scrut') <- scExpr env scrut
 1243         ; case isValue (sc_vals env) scrut' of
 1244                 Just (ConVal con args) -> sc_con_app con args scrut'
 1245                 _other                 -> sc_vanilla scrut_usg scrut'
 1246         }
 1247   where
 1248     sc_con_app con args scrut'  -- Known constructor; simplify
 1249      = do { let Alt _ bs rhs = findAlt con alts
 1250                                   `orElse` Alt DEFAULT [] (mkImpossibleExpr ty)
 1251                 alt_env'     = extendScSubstList env ((b,scrut') : bs `zip` trimConArgs con args)
 1252           ; scExpr alt_env' rhs }
 1253 
 1254     sc_vanilla scrut_usg scrut' -- Normal case
 1255      = do { let (alt_env,b') = extendBndrWith RecArg env b
 1256                         -- Record RecArg for the components
 1257 
 1258           ; (alt_usgs, alt_occs, alts')
 1259                 <- mapAndUnzip3M (sc_alt alt_env scrut' b') alts
 1260 
 1261           ; let scrut_occ  = foldr combineOcc NoOcc alt_occs
 1262                 scrut_usg' = setScrutOcc env scrut_usg scrut' scrut_occ
 1263                 -- The combined usage of the scrutinee is given
 1264                 -- by scrut_occ, which is passed to scScrut, which
 1265                 -- in turn treats a bare-variable scrutinee specially
 1266 
 1267           ; return (foldr combineUsage scrut_usg' alt_usgs,
 1268                     Case scrut' b' (scSubstTy env ty) alts') }
 1269 
 1270     sc_alt env scrut' b' (Alt con bs rhs)
 1271      = do { let (env1, bs1) = extendBndrsWith RecArg env bs
 1272                 (env2, bs2) = extendCaseBndrs env1 scrut' b' con bs1
 1273           ; (usg, rhs') <- scExpr env2 rhs
 1274           ; let (usg', b_occ:arg_occs) = lookupOccs usg (b':bs2)
 1275                 scrut_occ = case con of
 1276                                DataAlt dc -> ScrutOcc (unitUFM dc arg_occs)
 1277                                _          -> ScrutOcc emptyUFM
 1278           ; return (usg', b_occ `combineOcc` scrut_occ, Alt con bs2 rhs') }
 1279 
 1280 scExpr' env (Let (NonRec bndr rhs) body)
 1281   | isTyVar bndr        -- Type-lets may be created by doBeta
 1282   = scExpr' (extendScSubst env bndr rhs) body
 1283 
 1284   | otherwise
 1285   = do  { let (body_env, bndr') = extendBndr env bndr
 1286         ; rhs_info  <- scRecRhs env (bndr',rhs)
 1287 
 1288         ; let body_env2 = extendHowBound body_env [bndr'] RecFun
 1289                            -- Note [Local let bindings]
 1290               rhs'      = ri_new_rhs rhs_info
 1291               body_env3 = extendValEnv body_env2 bndr' (isValue (sc_vals env) rhs')
 1292 
 1293         ; (body_usg, body') <- scExpr body_env3 body
 1294 
 1295           -- NB: For non-recursive bindings we inherit sc_force flag from
 1296           -- the parent function (see Note [Forcing specialisation])
 1297         ; (spec_usg, specs) <- specNonRec env body_usg rhs_info
 1298 
 1299         ; return (body_usg { scu_calls = scu_calls body_usg `delVarEnv` bndr' }
 1300                     `combineUsage` spec_usg,  -- Note [spec_usg includes rhs_usg]
 1301                   mkLets [NonRec b r | (b,r) <- ruleInfoBinds rhs_info specs] body')
 1302         }
 1303 
 1304 
 1305 -- A *local* recursive group: see Note [Local recursive groups]
 1306 scExpr' env (Let (Rec prs) body)
 1307   = do  { let (bndrs,rhss)      = unzip prs
 1308               (rhs_env1,bndrs') = extendRecBndrs env bndrs
 1309               rhs_env2          = extendHowBound rhs_env1 bndrs' RecFun
 1310               force_spec        = any (forceSpecBndr env) bndrs'
 1311                 -- Note [Forcing specialisation]
 1312 
 1313         ; rhs_infos <- mapM (scRecRhs rhs_env2) (bndrs' `zip` rhss)
 1314         ; (body_usg, body')     <- scExpr rhs_env2 body
 1315 
 1316         -- NB: start specLoop from body_usg
 1317         ; (spec_usg, specs) <- specRec NotTopLevel (scForce rhs_env2 force_spec)
 1318                                        body_usg rhs_infos
 1319                 -- Do not unconditionally generate specialisations from rhs_usgs
 1320                 -- Instead use them only if we find an unspecialised call
 1321                 -- See Note [Local recursive groups]
 1322 
 1323         ; let all_usg = spec_usg `combineUsage` body_usg  -- Note [spec_usg includes rhs_usg]
 1324               bind'   = Rec (concat (zipWithEqual "scExpr'" ruleInfoBinds rhs_infos specs))
 1325                         -- zipWithEqual: length of returned [SpecInfo]
 1326                         -- should be the same as incoming [RhsInfo]
 1327 
 1328         ; return (all_usg { scu_calls = scu_calls all_usg `delVarEnvList` bndrs' },
 1329                   Let bind' body') }
 1330 
 1331 {-
 1332 Note [Local let bindings]
 1333 ~~~~~~~~~~~~~~~~~~~~~~~~~
 1334 It is not uncommon to find this
 1335 
 1336    let $j = \x. <blah> in ...$j True...$j True...
 1337 
 1338 Here $j is an arbitrary let-bound function, but it often comes up for
 1339 join points.  We might like to specialise $j for its call patterns.
 1340 Notice the difference from a letrec, where we look for call patterns
 1341 in the *RHS* of the function.  Here we look for call patterns in the
 1342 *body* of the let.
 1343 
 1344 At one point I predicated this on the RHS mentioning the outer
 1345 recursive function, but that's not essential and might even be
 1346 harmful.  I'm not sure.
 1347 -}
 1348 
 1349 scApp :: ScEnv -> (InExpr, [InExpr]) -> UniqSM (ScUsage, CoreExpr)
 1350 
 1351 scApp env (Var fn, args)        -- Function is a variable
 1352   = assert (not (null args)) $
 1353     do  { args_w_usgs <- mapM (scExpr env) args
 1354         ; let (arg_usgs, args') = unzip args_w_usgs
 1355               arg_usg = combineUsages arg_usgs
 1356         ; case scSubstId env fn of
 1357             fn'@(Lam {}) -> scExpr (zapScSubst env) (doBeta fn' args')
 1358                         -- Do beta-reduction and try again
 1359 
 1360             Var fn' -> return (arg_usg `combineUsage` mkVarUsage env fn' args',
 1361                                mkApps (Var fn') args')
 1362 
 1363             other_fn' -> return (arg_usg, mkApps other_fn' args') }
 1364                 -- NB: doing this ignores any usage info from the substituted
 1365                 --     function, but I don't think that matters.  If it does
 1366                 --     we can fix it.
 1367   where
 1368     doBeta :: OutExpr -> [OutExpr] -> OutExpr
 1369     -- ToDo: adjust for System IF
 1370     doBeta (Lam bndr body) (arg : args) = Let (NonRec bndr arg) (doBeta body args)
 1371     doBeta fn              args         = mkApps fn args
 1372 
 1373 -- The function is almost always a variable, but not always.
 1374 -- In particular, if this pass follows float-in,
 1375 -- which it may, we can get
 1376 --      (let f = ...f... in f) arg1 arg2
 1377 scApp env (other_fn, args)
 1378   = do  { (fn_usg,   fn')   <- scExpr env other_fn
 1379         ; (arg_usgs, args') <- mapAndUnzipM (scExpr env) args
 1380         ; return (combineUsages arg_usgs `combineUsage` fn_usg, mkApps fn' args') }
 1381 
 1382 ----------------------
 1383 mkVarUsage :: ScEnv -> Id -> [CoreExpr] -> ScUsage
 1384 mkVarUsage env fn args
 1385   = case lookupHowBound env fn of
 1386         Just RecFun -> SCU { scu_calls = unitVarEnv fn [Call fn args (sc_vals env)]
 1387                            , scu_occs  = emptyVarEnv }
 1388         Just RecArg -> SCU { scu_calls = emptyVarEnv
 1389                            , scu_occs  = unitVarEnv fn arg_occ }
 1390         Nothing     -> nullUsage
 1391   where
 1392     -- I rather think we could use UnkOcc all the time
 1393     arg_occ | null args = UnkOcc
 1394             | otherwise = evalScrutOcc
 1395 
 1396 ----------------------
 1397 scTopBindEnv :: ScEnv -> CoreBind -> UniqSM (ScEnv, CoreBind)
 1398 scTopBindEnv env (Rec prs)
 1399   = do  { let (rhs_env1,bndrs') = extendRecBndrs env bndrs
 1400               rhs_env2          = extendHowBound rhs_env1 bndrs RecFun
 1401 
 1402               prs'              = zip bndrs' rhss
 1403         ; return (rhs_env2, Rec prs') }
 1404   where
 1405     (bndrs,rhss) = unzip prs
 1406 
 1407 scTopBindEnv env (NonRec bndr rhs)
 1408   = do  { let (env1, bndr') = extendBndr env bndr
 1409               env2          = extendValEnv env1 bndr' (isValue (sc_vals env) rhs)
 1410         ; return (env2, NonRec bndr' rhs) }
 1411 
 1412 ----------------------
 1413 scTopBind :: ScEnv -> ScUsage -> CoreBind -> UniqSM (ScUsage, CoreBind)
 1414 
 1415 scTopBind env body_usage (Rec prs)
 1416   | Just threshold <- sc_size env
 1417   , not force_spec
 1418   , not (all (couldBeSmallEnoughToInline (sc_uf_opts env) threshold) rhss)
 1419                 -- No specialisation
 1420   = -- pprTrace "scTopBind: nospec" (ppr bndrs) $
 1421     do  { (rhs_usgs, rhss')   <- mapAndUnzipM (scExpr env) rhss
 1422         ; return (body_usage `combineUsage` combineUsages rhs_usgs, Rec (bndrs `zip` rhss')) }
 1423 
 1424   | otherwise   -- Do specialisation
 1425   = do  { rhs_infos <- mapM (scRecRhs env) prs
 1426 
 1427         ; (spec_usage, specs) <- specRec TopLevel (scForce env force_spec)
 1428                                          body_usage rhs_infos
 1429 
 1430         ; return (body_usage `combineUsage` spec_usage,
 1431                   Rec (concat (zipWith ruleInfoBinds rhs_infos specs))) }
 1432   where
 1433     (bndrs,rhss) = unzip prs
 1434     force_spec   = any (forceSpecBndr env) bndrs
 1435       -- Note [Forcing specialisation]
 1436 
 1437 scTopBind env usage (NonRec bndr rhs)   -- Oddly, we don't seem to specialise top-level non-rec functions
 1438   = do  { (rhs_usg', rhs') <- scExpr env rhs
 1439         ; return (usage `combineUsage` rhs_usg', NonRec bndr rhs') }
 1440 
 1441 ----------------------
 1442 scRecRhs :: ScEnv -> (OutId, InExpr) -> UniqSM RhsInfo
 1443 scRecRhs env (bndr,rhs)
 1444   = do  { let (arg_bndrs,body)       = collectBinders rhs
 1445               (body_env, arg_bndrs') = extendBndrsWith RecArg env arg_bndrs
 1446         ; (body_usg, body')         <- scExpr body_env body
 1447         ; let (rhs_usg, arg_occs)    = lookupOccs body_usg arg_bndrs'
 1448         ; return (RI { ri_rhs_usg = rhs_usg
 1449                      , ri_fn = bndr, ri_new_rhs = mkLams arg_bndrs' body'
 1450                      , ri_lam_bndrs = arg_bndrs, ri_lam_body = body
 1451                      , ri_arg_occs = arg_occs }) }
 1452                 -- The arg_occs says how the visible,
 1453                 -- lambda-bound binders of the RHS are used
 1454                 -- (including the TyVar binders)
 1455                 -- Two pats are the same if they match both ways
 1456 
 1457 ----------------------
 1458 ruleInfoBinds :: RhsInfo -> SpecInfo -> [(Id,CoreExpr)]
 1459 ruleInfoBinds (RI { ri_fn = fn, ri_new_rhs = new_rhs })
 1460               (SI { si_specs = specs })
 1461   = [(id,rhs) | OS { os_id = id, os_rhs = rhs } <- specs] ++
 1462               -- First the specialised bindings
 1463 
 1464     [(fn `addIdSpecialisations` rules, new_rhs)]
 1465               -- And now the original binding
 1466   where
 1467     rules = [r | OS { os_rule = r } <- specs]
 1468 
 1469 {-
 1470 ************************************************************************
 1471 *                                                                      *
 1472                 The specialiser itself
 1473 *                                                                      *
 1474 ************************************************************************
 1475 -}
 1476 
 1477 data RhsInfo
 1478   = RI { ri_fn :: OutId                 -- The binder
 1479        , ri_new_rhs :: OutExpr          -- The specialised RHS (in current envt)
 1480        , ri_rhs_usg :: ScUsage          -- Usage info from specialising RHS
 1481 
 1482        , ri_lam_bndrs :: [InVar]       -- The *original* RHS (\xs.body)
 1483        , ri_lam_body  :: InExpr        --   Note [Specialise original body]
 1484        , ri_arg_occs  :: [ArgOcc]      -- Info on how the xs occur in body
 1485     }
 1486 
 1487 data SpecInfo       -- Info about specialisations for a particular Id
 1488   = SI { si_specs :: [OneSpec]          -- The specialisations we have generated
 1489 
 1490        , si_n_specs :: Int              -- Length of si_specs; used for numbering them
 1491 
 1492        , si_mb_unspec :: Maybe ScUsage  -- Just cs  => we have not yet used calls in the
 1493        }                                --             from calls in the *original* RHS as
 1494                                         --             seeds for new specialisations;
 1495                                         --             if you decide to do so, here is the
 1496                                         --             RHS usage (which has not yet been
 1497                                         --             unleashed)
 1498                                         -- Nothing => we have
 1499                                         -- See Note [Local recursive groups]
 1500                                         -- See Note [spec_usg includes rhs_usg]
 1501 
 1502         -- One specialisation: Rule plus definition
 1503 data OneSpec =
 1504   OS { os_pat  :: CallPat    -- Call pattern that generated this specialisation
 1505      , os_rule :: CoreRule   -- Rule connecting original id with the specialisation
 1506      , os_id   :: OutId      -- Spec id
 1507      , os_rhs  :: OutExpr }  -- Spec rhs
 1508 
 1509 noSpecInfo :: SpecInfo
 1510 noSpecInfo = SI { si_specs = [], si_n_specs = 0, si_mb_unspec = Nothing }
 1511 
 1512 ----------------------
 1513 specNonRec :: ScEnv
 1514            -> ScUsage         -- Body usage
 1515            -> RhsInfo         -- Structure info usage info for un-specialised RHS
 1516            -> UniqSM (ScUsage, SpecInfo)       -- Usage from RHSs (specialised and not)
 1517                                                --     plus details of specialisations
 1518 
 1519 specNonRec env body_usg rhs_info
 1520   = specialise env (scu_calls body_usg) rhs_info
 1521                (noSpecInfo { si_mb_unspec = Just (ri_rhs_usg rhs_info) })
 1522 
 1523 ----------------------
 1524 specRec :: TopLevelFlag -> ScEnv
 1525         -> ScUsage                         -- Body usage
 1526         -> [RhsInfo]                       -- Structure info and usage info for un-specialised RHSs
 1527         -> UniqSM (ScUsage, [SpecInfo])    -- Usage from all RHSs (specialised and not)
 1528                                            --     plus details of specialisations
 1529 
 1530 specRec top_lvl env body_usg rhs_infos
 1531   = go 1 seed_calls nullUsage init_spec_infos
 1532   where
 1533     (seed_calls, init_spec_infos)    -- Note [Seeding top-level recursive groups]
 1534        | isTopLevel top_lvl
 1535        , any (isExportedId . ri_fn) rhs_infos   -- Seed from body and RHSs
 1536        = (all_calls,     [noSpecInfo | _ <- rhs_infos])
 1537        | otherwise                              -- Seed from body only
 1538        = (calls_in_body, [noSpecInfo { si_mb_unspec = Just (ri_rhs_usg ri) }
 1539                          | ri <- rhs_infos])
 1540 
 1541     calls_in_body = scu_calls body_usg
 1542     calls_in_rhss = foldr (combineCalls . scu_calls . ri_rhs_usg) emptyVarEnv rhs_infos
 1543     all_calls = calls_in_rhss `combineCalls` calls_in_body
 1544 
 1545     -- Loop, specialising, until you get no new specialisations
 1546     go :: Int   -- Which iteration of the "until no new specialisations"
 1547                 -- loop we are on; first iteration is 1
 1548        -> CallEnv   -- Seed calls
 1549                     -- Two accumulating parameters:
 1550        -> ScUsage      -- Usage from earlier specialisations
 1551        -> [SpecInfo]   -- Details of specialisations so far
 1552        -> UniqSM (ScUsage, [SpecInfo])
 1553     go n_iter seed_calls usg_so_far spec_infos
 1554       | isEmptyVarEnv seed_calls
 1555       = -- pprTrace "specRec1" (vcat [ ppr (map ri_fn rhs_infos)
 1556         --                           , ppr seed_calls
 1557         --                           , ppr body_usg ]) $
 1558         return (usg_so_far, spec_infos)
 1559 
 1560       -- Limit recursive specialisation
 1561       -- See Note [Limit recursive specialisation]
 1562       | n_iter > sc_recursive env  -- Too many iterations of the 'go' loop
 1563       , sc_force env || isNothing (sc_count env)
 1564            -- If both of these are false, the sc_count
 1565            -- threshold will prevent non-termination
 1566       , any ((> the_limit) . si_n_specs) spec_infos
 1567       = -- pprTrace "specRec2" (ppr (map (map os_pat . si_specs) spec_infos)) $
 1568         return (usg_so_far, spec_infos)
 1569 
 1570       | otherwise
 1571       = -- pprTrace "specRec3" (vcat [ text "bndrs" <+> ppr (map ri_fn rhs_infos)
 1572         --                           , text "iteration" <+> int n_iter
 1573         --                          , text "spec_infos" <+> ppr (map (map os_pat . si_specs) spec_infos)
 1574         --                    ]) $
 1575         do  { specs_w_usg <- zipWithM (specialise env seed_calls) rhs_infos spec_infos
 1576             ; let (extra_usg_s, new_spec_infos) = unzip specs_w_usg
 1577                   extra_usg = combineUsages extra_usg_s
 1578                   all_usg   = usg_so_far `combineUsage` extra_usg
 1579             ; go (n_iter + 1) (scu_calls extra_usg) all_usg new_spec_infos }
 1580 
 1581     -- See Note [Limit recursive specialisation]
 1582     the_limit = case sc_count env of
 1583                   Nothing  -> 10    -- Ugh!
 1584                   Just max -> max
 1585 
 1586 
 1587 ----------------------
 1588 specialise
 1589    :: ScEnv
 1590    -> CallEnv                     -- Info on newly-discovered calls to this function
 1591    -> RhsInfo
 1592    -> SpecInfo                    -- Original RHS plus patterns dealt with
 1593    -> UniqSM (ScUsage, SpecInfo)  -- New specialised versions and their usage
 1594 
 1595 -- See Note [spec_usg includes rhs_usg]
 1596 
 1597 -- Note: this only generates *specialised* bindings
 1598 -- The original binding is added by ruleInfoBinds
 1599 --
 1600 -- Note: the rhs here is the optimised version of the original rhs
 1601 -- So when we make a specialised copy of the RHS, we're starting
 1602 -- from an RHS whose nested functions have been optimised already.
 1603 
 1604 specialise env bind_calls (RI { ri_fn = fn, ri_lam_bndrs = arg_bndrs
 1605                               , ri_lam_body = body, ri_arg_occs = arg_occs })
 1606                spec_info@(SI { si_specs = specs, si_n_specs = spec_count
 1607                              , si_mb_unspec = mb_unspec })
 1608   | isDeadEndId fn  -- Note [Do not specialise diverging functions]
 1609                     -- and do not generate specialisation seeds from its RHS
 1610   = -- pprTrace "specialise bot" (ppr fn) $
 1611     return (nullUsage, spec_info)
 1612 
 1613   | not (isNeverActive (idInlineActivation fn))  -- See Note [Transfer activation]
 1614   , not (null arg_bndrs)                         -- Only specialise functions
 1615   , Just all_calls <- lookupVarEnv bind_calls fn -- Some calls to it
 1616   = -- pprTrace "specialise entry {" (ppr fn <+> ppr all_calls) $
 1617     do  { (boring_call, new_pats) <- callsToNewPats env fn spec_info arg_occs all_calls
 1618 
 1619         ; let n_pats = length new_pats
 1620 --        ; if (not (null new_pats) || isJust mb_unspec) then
 1621 --            pprTrace "specialise" (vcat [ ppr fn <+> text "with" <+> int n_pats <+> text "good patterns"
 1622 --                                        , text "mb_unspec" <+> ppr (isJust mb_unspec)
 1623 --                                        , text "arg_occs" <+> ppr arg_occs
 1624 --                                        , text "good pats" <+> ppr new_pats])  $
 1625 --               return ()
 1626 --          else return ()
 1627 
 1628         ; let spec_env = decreaseSpecCount env n_pats
 1629         ; (spec_usgs, new_specs) <- mapAndUnzipM (spec_one spec_env fn arg_bndrs body)
 1630                                                  (new_pats `zip` [spec_count..])
 1631                 -- See Note [Specialise original body]
 1632 
 1633         ; let spec_usg = combineUsages spec_usgs
 1634 
 1635               -- If there were any boring calls among the seeds (= all_calls), then those
 1636               -- calls will call the un-specialised function.  So we should use the seeds
 1637               -- from the _unspecialised_ function's RHS, which are in mb_unspec, by returning
 1638               -- then in new_usg.
 1639               (new_usg, mb_unspec')
 1640                   = case mb_unspec of
 1641                       Just rhs_usg | boring_call -> (spec_usg `combineUsage` rhs_usg, Nothing)
 1642                       _                          -> (spec_usg,                      mb_unspec)
 1643 
 1644 --        ; pprTrace "specialise return }"
 1645 --             (vcat [ ppr fn
 1646 --                   , text "boring_call:" <+> ppr boring_call
 1647 --                   , text "new calls:" <+> ppr (scu_calls new_usg)]) $
 1648 --          return ()
 1649 
 1650           ; return (new_usg, SI { si_specs = new_specs ++ specs
 1651                                 , si_n_specs = spec_count + n_pats
 1652                                 , si_mb_unspec = mb_unspec' }) }
 1653 
 1654   | otherwise  -- No calls, inactive, or not a function
 1655                -- Behave as if there was a single, boring call
 1656   = -- pprTrace "specialise inactive" (ppr fn $$ ppr mb_unspec) $
 1657     case mb_unspec of    -- Behave as if there was a single, boring call
 1658       Just rhs_usg -> return (rhs_usg, spec_info { si_mb_unspec = Nothing })
 1659                          -- See Note [spec_usg includes rhs_usg]
 1660       Nothing      -> return (nullUsage, spec_info)
 1661 
 1662 
 1663 ---------------------
 1664 spec_one :: ScEnv
 1665          -> OutId       -- Function
 1666          -> [InVar]     -- Lambda-binders of RHS; should match patterns
 1667          -> InExpr      -- Body of the original function
 1668          -> (CallPat, Int)
 1669          -> UniqSM (ScUsage, OneSpec)   -- Rule and binding
 1670 
 1671 -- spec_one creates a specialised copy of the function, together
 1672 -- with a rule for using it.  I'm very proud of how short this
 1673 -- function is, considering what it does :-).
 1674 
 1675 {-
 1676   Example
 1677 
 1678      In-scope: a, x::a
 1679      f = /\b \y::[(a,b)] -> ....f (b,c) ((:) (a,(b,c)) (x,v) (h w))...
 1680           [c::*, v::(b,c) are presumably bound by the (...) part]
 1681   ==>
 1682      f_spec = /\ b c \ v::(b,c) hw::[(a,(b,c))] ->
 1683                   (...entire body of f...) [b -> (b,c),
 1684                                             y -> ((:) (a,(b,c)) (x,v) hw)]
 1685 
 1686      RULE:  forall b::* c::*,           -- Note, *not* forall a, x
 1687                    v::(b,c),
 1688                    hw::[(a,(b,c))] .
 1689 
 1690             f (b,c) ((:) (a,(b,c)) (x,v) hw) = f_spec b c v hw
 1691 -}
 1692 
 1693 spec_one env fn arg_bndrs body (call_pat, rule_number)
 1694   | CP { cp_qvars = qvars, cp_args = pats } <- call_pat
 1695   = do  { spec_uniq <- getUniqueM
 1696         ; let env1 = extendScSubstList (extendScInScope env qvars)
 1697                                        (arg_bndrs `zip` pats)
 1698               (body_env, extra_bndrs) = extendBndrs env1 (dropList pats arg_bndrs)
 1699               -- Remember, there may be fewer pats than arg_bndrs
 1700               -- See Note [SpecConstr call patterns]
 1701 
 1702               fn_name  = idName fn
 1703               fn_loc   = nameSrcSpan fn_name
 1704               fn_occ   = nameOccName fn_name
 1705               spec_occ = mkSpecOcc fn_occ
 1706               -- We use fn_occ rather than fn in the rule_name string
 1707               -- as we don't want the uniq to end up in the rule, and
 1708               -- hence in the ABI, as that can cause spurious ABI
 1709               -- changes (#4012).
 1710               rule_name  = mkFastString ("SC:" ++ occNameString fn_occ ++ show rule_number)
 1711               spec_name  = mkInternalName spec_uniq spec_occ fn_loc
 1712 --      ; pprTrace "spec_one {" (vcat [ text "function:" <+> ppr fn <+> ppr (idUnique fn)
 1713 --                                    , text "sc_count:" <+> ppr (sc_count env)
 1714 --                                    , text "pats:" <+> ppr pats
 1715 --                                    , text "-->" <+> ppr spec_name
 1716 --                                    , text "bndrs" <+> ppr arg_bndrs
 1717 --                                    , text "body" <+> ppr body
 1718 --                                    , text "how_bound" <+> ppr (sc_how_bound env) ]) $
 1719 --        return ()
 1720 
 1721         -- Specialise the body
 1722         ; (spec_usg, spec_body) <- scExpr body_env body
 1723 
 1724 --      ; pprTrace "done spec_one }" (ppr fn $$ ppr (scu_calls spec_usg)) $
 1725 --        return ()
 1726 
 1727                 -- And build the results
 1728         ; let spec_body_ty   = exprType spec_body
 1729 
 1730               (spec_lam_args1, spec_sig, spec_arity, spec_join_arity)
 1731                   = calcSpecInfo fn call_pat extra_bndrs
 1732                   -- Annotate the variables with the strictness information from
 1733                   -- the function (see Note [Strictness information in worker binders])
 1734 
 1735               (spec_lam_args, spec_call_args) = mkWorkerArgs fn False
 1736                                                     spec_lam_args1 spec_body_ty
 1737                 -- mkWorkerArgs: usual w/w hack to avoid generating
 1738                 -- a spec_rhs of unlifted type and no args
 1739 
 1740               spec_id    = mkLocalId spec_name Many
 1741                                      (mkLamTypes spec_lam_args spec_body_ty)
 1742                              -- See Note [Transfer strictness]
 1743                              `setIdDmdSig`    spec_sig
 1744                              `setIdCprSig`    topCprSig
 1745                              `setIdArity`     spec_arity
 1746                              `asJoinId_maybe` spec_join_arity
 1747 
 1748                 -- Conditionally use result of new worker-wrapper transform
 1749               spec_rhs   = mkLams spec_lam_args spec_body
 1750               rule_rhs   = mkVarApps (Var spec_id) $
 1751                            dropTail (length extra_bndrs) spec_call_args
 1752               inline_act = idInlineActivation fn
 1753               this_mod   = sc_module env
 1754               rule       = mkRule this_mod True {- Auto -} True {- Local -}
 1755                                   rule_name inline_act fn_name qvars pats rule_rhs
 1756                            -- See Note [Transfer activation]
 1757         ; return (spec_usg, OS { os_pat = call_pat, os_rule = rule
 1758                                , os_id = spec_id
 1759                                , os_rhs = spec_rhs }) }
 1760 
 1761 
 1762 calcSpecInfo :: Id                     -- The original function
 1763              -> CallPat                -- Call pattern
 1764              -> [Var]                  -- Extra bndrs
 1765              -> ( [Var]                     -- Demand-decorated binders
 1766                 , DmdSig                    -- Strictness of specialised thing
 1767                 , Arity, Maybe JoinArity )  -- Arities of specialised thing
 1768 -- Calcuate bits of IdInfo for the specialised function
 1769 -- See Note [Transfer strictness]
 1770 -- See Note [Strictness information in worker binders]
 1771 calcSpecInfo fn (CP { cp_qvars = qvars, cp_args = pats }) extra_bndrs
 1772   | isJoinId fn    -- Join points have strictness and arity for LHS only
 1773   = ( bndrs_w_dmds
 1774     , mkClosedDmdSig qvar_dmds div
 1775     , count isId qvars
 1776     , Just (length qvars) )
 1777   | otherwise
 1778   = ( bndrs_w_dmds
 1779     , mkClosedDmdSig (qvar_dmds ++ extra_dmds) div
 1780     , count isId qvars + count isId extra_bndrs
 1781     , Nothing )
 1782   where
 1783     DmdSig (DmdType _ fn_dmds div) = idDmdSig fn
 1784 
 1785     val_pats   = filterOut isTypeArg pats
 1786     qvar_dmds  = [ lookupVarEnv dmd_env qv `orElse` topDmd | qv <- qvars, isId qv ]
 1787     extra_dmds = dropList val_pats fn_dmds
 1788 
 1789     bndrs_w_dmds =  set_dmds qvars       qvar_dmds
 1790                  ++ set_dmds extra_bndrs extra_dmds
 1791 
 1792     set_dmds :: [Var] -> [Demand] -> [Var]
 1793     set_dmds [] _   = []
 1794     set_dmds vs  [] = vs  -- Run out of demands
 1795     set_dmds (v:vs) ds@(d:ds') | isTyVar v = v                   : set_dmds vs ds
 1796                                | otherwise = setIdDemandInfo v d : set_dmds vs ds'
 1797 
 1798     dmd_env = go emptyVarEnv fn_dmds val_pats
 1799 
 1800     go :: DmdEnv -> [Demand] -> [CoreExpr] -> DmdEnv
 1801     -- We've filtered out all the type patterns already
 1802     go env (d:ds) (pat : pats)     = go (go_one env d pat) ds pats
 1803     go env _      _                = env
 1804 
 1805     go_one :: DmdEnv -> Demand -> CoreExpr -> DmdEnv
 1806     go_one env d          (Var v) = extendVarEnv_C plusDmd env v d
 1807     go_one env (_n :* cd) e -- NB: _n does not have to be strict
 1808       | (Var _, args) <- collectArgs e
 1809       , Just (_b, ds) <- viewProd (length args) cd -- TODO: We may want to look at boxity _b, though...
 1810       = go env ds args
 1811     go_one env _  _ = env
 1812 
 1813 
 1814 {-
 1815 Note [spec_usg includes rhs_usg]
 1816 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 1817 In calls to 'specialise', the returned ScUsage must include the rhs_usg in
 1818 the passed-in SpecInfo, unless there are no calls at all to the function.
 1819 
 1820 The caller can, indeed must, assume this.  They should not combine in rhs_usg
 1821 themselves, or they'll get rhs_usg twice -- and that can lead to an exponential
 1822 blowup of duplicates in the CallEnv.  This is what gave rise to the massive
 1823 performance loss in #8852.
 1824 
 1825 Note [Specialise original body]
 1826 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 1827 The RhsInfo for a binding keeps the *original* body of the binding.  We
 1828 must specialise that, *not* the result of applying specExpr to the RHS
 1829 (which is also kept in RhsInfo). Otherwise we end up specialising a
 1830 specialised RHS, and that can lead directly to exponential behaviour.
 1831 
 1832 Note [Transfer activation]
 1833 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 1834   This note is for SpecConstr, but exactly the same thing
 1835   happens in the overloading specialiser; see
 1836   Note [Auto-specialisation and RULES] in GHC.Core.Opt.Specialise.
 1837 
 1838 In which phase should the specialise-constructor rules be active?
 1839 Originally I made them always-active, but Manuel found that this
 1840 defeated some clever user-written rules.  Then I made them active only
 1841 in FinalPhase; after all, currently, the specConstr transformation is
 1842 only run after the simplifier has reached FinalPhase, but that meant
 1843 that specialisations didn't fire inside wrappers; see test
 1844 simplCore/should_compile/spec-inline.
 1845 
 1846 So now I just use the inline-activation of the parent Id, as the
 1847 activation for the specialisation RULE, just like the main specialiser;
 1848 
 1849 This in turn means there is no point in specialising NOINLINE things,
 1850 so we test for that.
 1851 
 1852 Note [Transfer strictness]
 1853 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 1854 We must transfer strictness information from the original function to
 1855 the specialised one.  Suppose, for example
 1856 
 1857   f has strictness     SSx
 1858         and a RULE     f (a:as) b = f_spec a as b
 1859 
 1860 Now we want f_spec to have strictness  LLSx, otherwise we'll use call-by-need
 1861 when calling f_spec instead of call-by-value.  And that can result in
 1862 unbounded worsening in space (cf the classic foldl vs foldl')
 1863 
 1864 See #3437 for a good example.
 1865 
 1866 The function calcSpecStrictness performs the calculation.
 1867 
 1868 Note [Strictness information in worker binders]
 1869 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 1870 After having calculated the strictness annotation for the worker (see Note
 1871 [Transfer strictness] above), we also want to have this information attached to
 1872 the worker’s arguments, for the benefit of later passes. The function
 1873 handOutStrictnessInformation decomposes the strictness annotation calculated by
 1874 calcSpecStrictness and attaches them to the variables.
 1875 
 1876 
 1877 ************************************************************************
 1878 *                                                                      *
 1879 \subsection{Argument analysis}
 1880 *                                                                      *
 1881 ************************************************************************
 1882 
 1883 This code deals with analysing call-site arguments to see whether
 1884 they are constructor applications.
 1885 
 1886 Note [Free type variables of the qvar types]
 1887 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 1888 In a call (f @a x True), that we want to specialise, what variables should
 1889 we quantify over.  Clearly over 'a' and 'x', but what about any type variables
 1890 free in x's type?  In fact we don't need to worry about them because (f @a)
 1891 can only be a well-typed application if its type is compatible with x, so any
 1892 variables free in x's type must be free in (f @a), and hence either be gathered
 1893 via 'a' itself, or be in scope at f's defn.  Hence we just take
 1894   (exprsFreeVars pats).
 1895 
 1896 BUT phantom type synonyms can mess this reasoning up,
 1897   eg   x::T b   with  type T b = Int
 1898 So we apply expandTypeSynonyms to the bound Ids.
 1899 See # 5458.  Yuk.
 1900 
 1901 Note [SpecConstr call patterns]
 1902 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 1903 A "call patterns" that we collect is going to become the LHS of a RULE.
 1904 
 1905 Wrinkles:
 1906 
 1907 * The list of argument patterns, cp_args, is no longer than the
 1908   visible lambdas of the binding, ri_arg_occs.  This is done via
 1909   the zipWithM in callToPats.
 1910 
 1911 * The list of argument patterns can certainly be shorter than the
 1912   lambdas in the function definition (under-saturated).  For example
 1913       f x y = case x of { True -> e1; False -> e2 }
 1914       ....map (f True) e...
 1915   We want to specialise `f` for `f True`.
 1916 
 1917 * In fact we deliberately shrink the list of argument patterns,
 1918   cp_args, by trimming off all the boring ones at the end (see
 1919   `dropWhileEnd is_boring` in callToPats).  Since the RULE only
 1920   applies when it is saturated, this shrinking makes the RULE more
 1921   applicable.  But it does mean that the argument patterns do not
 1922   necessarily saturate the lambdas of the function.
 1923 
 1924 * It's important that the pattern arguments do not look like
 1925      e |> Refl
 1926   or
 1927     e |> g1 |> g2
 1928   because both of these will be optimised by Simplify.simplRule. In the
 1929   former case such optimisation benign, because the rule will match more
 1930   terms; but in the latter we may lose a binding of 'g1' or 'g2', and
 1931   end up with a rule LHS that doesn't bind the template variables
 1932   (#10602).
 1933 
 1934   The simplifier eliminates such things, but SpecConstr itself constructs
 1935   new terms by substituting.  So the 'mkCast' in the Cast case of scExpr
 1936   is very important!
 1937 
 1938 Note [Choosing patterns]
 1939 ~~~~~~~~~~~~~~~~~~~~~~~~
 1940 If we get lots of patterns we may not want to make a specialisation
 1941 for each of them (code bloat), so we choose as follows, implemented
 1942 by trim_pats.
 1943 
 1944 * The flag -fspec-constr-count-N sets the sc_count field
 1945   of the ScEnv to (Just n).  This limits the total number
 1946   of specialisations for a given function to N.
 1947 
 1948 * -fno-spec-constr-count sets the sc_count field to Nothing,
 1949   which switches of the limit.
 1950 
 1951 * The ghastly ForceSpecConstr trick also switches of the limit
 1952   for a particular function
 1953 
 1954 * Otherwise we sort the patterns to choose the most general
 1955   ones first; more general => more widely applicable.
 1956 
 1957 Note [SpecConstr and casts]
 1958 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 1959 Consider (#14270) a call like
 1960 
 1961     let f = e
 1962     in ... f (K @(a |> co)) ...
 1963 
 1964 where 'co' is a coercion variable not in scope at f's definition site.
 1965 If we aren't caereful we'll get
 1966 
 1967     let $sf a co = e (K @(a |> co))
 1968         RULE "SC:f" forall a co.  f (K @(a |> co)) = $sf a co
 1969         f = e
 1970     in ...
 1971 
 1972 But alas, when we match the call we won't bind 'co', because type-matching
 1973 (for good reasons) discards casts).
 1974 
 1975 I don't know how to solve this, so for now I'm just discarding any
 1976 call patterns that
 1977   * Mentions a coercion variable in a type argument
 1978   * That is not in scope at the binding of the function
 1979 
 1980 I think this is very rare.
 1981 
 1982 It is important (e.g. #14936) that this /only/ applies to
 1983 coercions mentioned in casts.  We don't want to be discombobulated
 1984 by casts in terms!  For example, consider
 1985    f ((e1,e2) |> sym co)
 1986 where, say,
 1987    f  :: Foo -> blah
 1988    co :: Foo ~R (Int,Int)
 1989 
 1990 Here we definitely do want to specialise for that pair!  We do not
 1991 match on the structure of the coercion; instead we just match on a
 1992 coercion variable, so the RULE looks like
 1993 
 1994    forall (x::Int, y::Int, co :: (Int,Int) ~R Foo)
 1995      f ((x,y) |> co) = $sf x y co
 1996 
 1997 Often the body of f looks like
 1998    f arg = ...(case arg |> co' of
 1999                 (x,y) -> blah)...
 2000 
 2001 so that the specialised f will turn into
 2002    $sf x y co = let arg = (x,y) |> co
 2003                 in ...(case arg>| co' of
 2004                          (x,y) -> blah)....
 2005 
 2006 which will simplify to not use 'co' at all.  But we can't guarantee
 2007 that co will end up unused, so we still pass it.  Absence analysis
 2008 may remove it later.
 2009 
 2010 Note that this /also/ discards the call pattern if we have a cast in a
 2011 /term/, although in fact Rules.match does make a very flaky and
 2012 fragile attempt to match coercions.  e.g. a call like
 2013     f (Maybe Age) (Nothing |> co) blah
 2014     where co :: Maybe Int ~ Maybe Age
 2015 will be discarded.  It's extremely fragile to match on the form of a
 2016 coercion, so I think it's better just not to try.  A more complicated
 2017 alternative would be to discard calls that mention coercion variables
 2018 only in kind-casts, but I'm doing the simple thing for now.
 2019 -}
 2020 
 2021 data CallPat = CP { cp_qvars :: [Var]           -- Quantified variables
 2022                   , cp_args  :: [CoreExpr] }    -- Arguments
 2023      -- See Note [SpecConstr call patterns]
 2024 
 2025 instance Outputable CallPat where
 2026   ppr (CP { cp_qvars = qvars, cp_args = args })
 2027     = text "CP" <> braces (sep [ text "cp_qvars =" <+> ppr qvars <> comma
 2028                                , text "cp_args =" <+> ppr args ])
 2029 
 2030 callsToNewPats :: ScEnv -> Id
 2031                -> SpecInfo
 2032                -> [ArgOcc] -> [Call]
 2033                -> UniqSM (Bool, [CallPat])
 2034         -- Result has no duplicate patterns,
 2035         -- nor ones mentioned in done_pats
 2036         -- Bool indicates that there was at least one boring pattern
 2037 callsToNewPats env fn spec_info@(SI { si_specs = done_specs }) bndr_occs calls
 2038   = do  { mb_pats <- mapM (callToPats env bndr_occs) calls
 2039 
 2040         ; let have_boring_call = any isNothing mb_pats
 2041 
 2042               good_pats :: [CallPat]
 2043               good_pats = catMaybes mb_pats
 2044 
 2045               -- Remove patterns we have already done
 2046               new_pats = filterOut is_done good_pats
 2047               is_done p = any (samePat p . os_pat) done_specs
 2048 
 2049               -- Remove duplicates
 2050               non_dups = nubBy samePat new_pats
 2051 
 2052               -- Remove ones that have too many worker variables
 2053               small_pats = filterOut too_big non_dups
 2054               max_args = maxWorkerArgs (sc_dflags env)
 2055               too_big (CP { cp_qvars = vars, cp_args = args })
 2056                 = not (isWorkerSmallEnough max_args (valArgCount args) vars)
 2057                   -- We are about to construct w/w pair in 'spec_one'.
 2058                   -- Omit specialisation leading to high arity workers.
 2059                   -- See Note [Limit w/w arity] in GHC.Core.Opt.WorkWrap.Utils
 2060 
 2061                 -- Discard specialisations if there are too many of them
 2062               (pats_were_discarded, trimmed_pats) = trim_pats env fn spec_info small_pats
 2063 
 2064 --        ; pprTrace "callsToPats" (vcat [ text "calls to" <+> ppr fn <> colon <+> ppr calls
 2065 --                                       , text "done_specs:" <+> ppr (map os_pat done_specs)
 2066 --                                       , text "good_pats:" <+> ppr good_pats ]) $
 2067 --          return ()
 2068 
 2069         ; return (have_boring_call || pats_were_discarded, trimmed_pats) }
 2070           -- If any of the calls does not give rise to a specialisation, either
 2071           -- because it is boring, or because there are too many specialisations,
 2072           -- return a flag to say so, so that we know to keep the original function.
 2073 
 2074 
 2075 trim_pats :: ScEnv -> Id -> SpecInfo -> [CallPat] -> (Bool, [CallPat])
 2076 -- True <=> some patterns were discarded
 2077 -- See Note [Choosing patterns]
 2078 trim_pats env fn (SI { si_n_specs = done_spec_count }) pats
 2079   | sc_force env
 2080     || isNothing mb_scc
 2081     || n_remaining >= n_pats
 2082   = -- pprTrace "trim_pats: no-trim" (ppr (sc_force env) $$ ppr mb_scc $$ ppr n_remaining $$ ppr n_pats)
 2083     (False, pats)          -- No need to trim
 2084 
 2085   | otherwise
 2086   = emit_trace $  -- Need to trim, so keep the best ones
 2087     (True, take n_remaining sorted_pats)
 2088 
 2089   where
 2090     n_pats         = length pats
 2091     spec_count'    = n_pats + done_spec_count
 2092     n_remaining    = max_specs - done_spec_count
 2093     mb_scc         = sc_count env
 2094     Just max_specs = mb_scc
 2095 
 2096     sorted_pats = map fst $
 2097                   sortBy (comparing snd) $
 2098                   [(pat, pat_cons pat) | pat <- pats]
 2099      -- Sort in order of increasing number of constructors
 2100      -- (i.e. decreasing generality) and pick the initial
 2101      -- segment of this list
 2102 
 2103     pat_cons :: CallPat -> Int
 2104     -- How many data constructors of literals are in
 2105     -- the pattern.  More data-cons => less general
 2106     pat_cons (CP { cp_qvars = qs, cp_args = ps })
 2107        = foldr ((+) . n_cons) 0 ps
 2108        where
 2109           q_set = mkVarSet qs
 2110           n_cons (Var v) | v `elemVarSet` q_set = 0
 2111                          | otherwise            = 1
 2112           n_cons (Cast e _)  = n_cons e
 2113           n_cons (App e1 e2) = n_cons e1 + n_cons e2
 2114           n_cons (Lit {})    = 1
 2115           n_cons _           = 0
 2116 
 2117     emit_trace result
 2118        | debugIsOn || hasPprDebug (sc_dflags env)
 2119          -- Suppress this scary message for ordinary users!  #5125
 2120        = pprTrace "SpecConstr" msg result
 2121        | otherwise
 2122        = result
 2123     msg = vcat [ sep [ text "Function" <+> quotes (ppr fn)
 2124                      , nest 2 (text "has" <+>
 2125                                speakNOf spec_count' (text "call pattern") <> comma <+>
 2126                                text "but the limit is" <+> int max_specs) ]
 2127                , text "Use -fspec-constr-count=n to set the bound"
 2128                , text "done_spec_count =" <+> int done_spec_count
 2129                , text "Keeping " <+> int n_remaining <> text ", out of" <+> int n_pats
 2130                , text "Discarding:" <+> ppr (drop n_remaining sorted_pats) ]
 2131 
 2132 
 2133 callToPats :: ScEnv -> [ArgOcc] -> Call -> UniqSM (Maybe CallPat)
 2134         -- The [Var] is the variables to quantify over in the rule
 2135         --      Type variables come first, since they may scope
 2136         --      over the following term variables
 2137         -- The [CoreExpr] are the argument patterns for the rule
 2138 callToPats env bndr_occs call@(Call fn args con_env)
 2139   = do  { let in_scope = substInScope (sc_subst env)
 2140 
 2141         ; pairs <- zipWithM (argToPat env in_scope con_env) args bndr_occs
 2142                    -- This zip trims the args to be no longer than
 2143                    -- the lambdas in the function definition (bndr_occs)
 2144 
 2145           -- Drop boring patterns from the end
 2146           -- See Note [SpecConstr call patterns]
 2147         ; let pairs' | isJoinId fn = pairs
 2148                      | otherwise   = dropWhileEnd is_boring pairs
 2149               is_boring (interesting, _) = not interesting
 2150               (interesting_s, pats) = unzip pairs'
 2151               interesting           = or interesting_s
 2152 
 2153         ; let pat_fvs = exprsFreeVarsList pats
 2154                 -- To get determinism we need the list of free variables in
 2155                 -- deterministic order. Otherwise we end up creating
 2156                 -- lambdas with different argument orders. See
 2157                 -- determinism/simplCore/should_compile/spec-inline-determ.hs
 2158                 -- for an example. For explanation of determinism
 2159                 -- considerations See Note [Unique Determinism] in GHC.Types.Unique.
 2160 
 2161               in_scope_vars = getInScopeVars in_scope
 2162               is_in_scope v = v `elemVarSet` in_scope_vars
 2163               qvars         = filterOut is_in_scope pat_fvs
 2164                 -- Quantify over variables that are not in scope
 2165                 -- at the call site
 2166                 -- See Note [Free type variables of the qvar types]
 2167                 -- See Note [Shadowing] at the top
 2168 
 2169               (ktvs, ids)   = partition isTyVar qvars
 2170               qvars'        = scopedSort ktvs ++ map sanitise ids
 2171                 -- Order into kind variables, type variables, term variables
 2172                 -- The kind of a type variable may mention a kind variable
 2173                 -- and the type of a term variable may mention a type variable
 2174 
 2175               sanitise id   = updateIdTypeAndMult expandTypeSynonyms id
 2176                 -- See Note [Free type variables of the qvar types]
 2177 
 2178               -- Bad coercion variables: see Note [SpecConstr and casts]
 2179               bad_covars :: CoVarSet
 2180               bad_covars = mapUnionVarSet get_bad_covars pats
 2181               get_bad_covars :: CoreArg -> CoVarSet
 2182               get_bad_covars (Type ty) = filterVarSet bad_covar (tyCoVarsOfType ty)
 2183               get_bad_covars _         = emptyVarSet
 2184               bad_covar v = isId v && not (is_in_scope v)
 2185 
 2186         ; -- pprTrace "callToPats"  (ppr args $$ ppr bndr_occs) $
 2187           warnPprTrace (not (isEmptyVarSet bad_covars))
 2188               ( text "SpecConstr: bad covars:" <+> ppr bad_covars
 2189                 $$ ppr call) $
 2190           if interesting && isEmptyVarSet bad_covars
 2191           then return (Just (CP { cp_qvars = qvars', cp_args = pats }))
 2192           else return Nothing }
 2193 
 2194     -- argToPat takes an actual argument, and returns an abstracted
 2195     -- version, consisting of just the "constructor skeleton" of the
 2196     -- argument, with non-constructor sub-expression replaced by new
 2197     -- placeholder variables.  For example:
 2198     --    C a (D (f x) (g y))  ==>  C p1 (D p2 p3)
 2199 
 2200 argToPat :: ScEnv
 2201          -> InScopeSet                  -- What's in scope at the fn defn site
 2202          -> ValueEnv                    -- ValueEnv at the call site
 2203          -> CoreArg                     -- A call arg (or component thereof)
 2204          -> ArgOcc
 2205          -> UniqSM (Bool, CoreArg)
 2206 
 2207 -- Returns (interesting, pat),
 2208 -- where pat is the pattern derived from the argument
 2209 --            interesting=True if the pattern is non-trivial (not a variable or type)
 2210 -- E.g.         x:xs         --> (True, x:xs)
 2211 --              f xs         --> (False, w)        where w is a fresh wildcard
 2212 --              (f xs, 'c')  --> (True, (w, 'c'))  where w is a fresh wildcard
 2213 --              \x. x+y      --> (True, \x. x+y)
 2214 --              lvl7         --> (True, lvl7)      if lvl7 is bound
 2215 --                                                 somewhere further out
 2216 
 2217 argToPat _env _in_scope _val_env arg@(Type {}) _arg_occ
 2218   = return (False, arg)
 2219 
 2220 argToPat env in_scope val_env (Tick _ arg) arg_occ
 2221   = argToPat env in_scope val_env arg arg_occ
 2222         -- Note [Tick annotations in call patterns]
 2223         -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 2224         -- Ignore Notes.  In particular, we want to ignore any InlineMe notes
 2225         -- Perhaps we should not ignore profiling notes, but I'm going to
 2226         -- ride roughshod over them all for now.
 2227         --- See Note [Tick annotations in RULE matching] in GHC.Core.Rules
 2228 
 2229 argToPat env in_scope val_env (Let _ arg) arg_occ
 2230   = argToPat env in_scope val_env arg arg_occ
 2231         -- See Note [Matching lets] in "GHC.Core.Rules"
 2232         -- Look through let expressions
 2233         -- e.g.         f (let v = rhs in (v,w))
 2234         -- Here we can specialise for f (v,w)
 2235         -- because the rule-matcher will look through the let.
 2236 
 2237 {- Disabled; see Note [Matching cases] in "GHC.Core.Rules"
 2238 argToPat env in_scope val_env (Case scrut _ _ [(_, _, rhs)]) arg_occ
 2239   | exprOkForSpeculation scrut  -- See Note [Matching cases] in "GHC.Core.Rules"
 2240   = argToPat env in_scope val_env rhs arg_occ
 2241 -}
 2242 
 2243 argToPat env in_scope val_env (Cast arg co) arg_occ
 2244   | not (ignoreType env ty2)
 2245   = do  { (interesting, arg') <- argToPat env in_scope val_env arg arg_occ
 2246         ; if not interesting then
 2247                 wildCardPat ty2
 2248           else do
 2249         { -- Make a wild-card pattern for the coercion
 2250           uniq <- getUniqueM
 2251         ; let co_name = mkSysTvName uniq (fsLit "sg")
 2252               co_var  = mkCoVar co_name (mkCoercionType Representational ty1 ty2)
 2253         ; return (interesting, Cast arg' (mkCoVarCo co_var)) } }
 2254   where
 2255     Pair ty1 ty2 = coercionKind co
 2256 
 2257 
 2258 
 2259 {-      Disabling lambda specialisation for now
 2260         It's fragile, and the spec_loop can be infinite
 2261 argToPat in_scope val_env arg arg_occ
 2262   | is_value_lam arg
 2263   = return (True, arg)
 2264   where
 2265     is_value_lam (Lam v e)         -- Spot a value lambda, even if
 2266         | isId v       = True      -- it is inside a type lambda
 2267         | otherwise    = is_value_lam e
 2268     is_value_lam other = False
 2269 -}
 2270 
 2271   -- Check for a constructor application
 2272   -- NB: this *precedes* the Var case, so that we catch nullary constrs
 2273 argToPat env in_scope val_env arg arg_occ
 2274   | Just (ConVal (DataAlt dc) args) <- isValue val_env arg
 2275   , not (ignoreDataCon env dc)        -- See Note [NoSpecConstr]
 2276   , Just arg_occs <- mb_scrut dc
 2277   = do { let (ty_args, rest_args) = splitAtList (dataConUnivTyVars dc) args
 2278        ; prs <- zipWithM (argToPat env in_scope val_env) rest_args arg_occs
 2279        ; let args' = map snd prs
 2280        ; return (True, mkConApp dc (ty_args ++ args')) }
 2281   where
 2282     mb_scrut dc = case arg_occ of
 2283                     ScrutOcc bs | Just occs <- lookupUFM bs dc
 2284                                 -> Just (occs)  -- See Note [Reboxing]
 2285                     _other      | sc_force env || sc_keen env
 2286                                 -> Just (repeat UnkOcc)
 2287                                 | otherwise
 2288                                 -> Nothing
 2289 
 2290   -- Check if the argument is a variable that
 2291   --    (a) is used in an interesting way in the function body
 2292   ---       i.e. ScrutOcc. UnkOcc and NoOcc are not interesting
 2293   --        (NoOcc means we could drop the argument, but that's the
 2294   --         business of absence analysis, not SpecConstr.)
 2295   --    (b) we know what its value is
 2296   -- In that case it counts as "interesting"
 2297 argToPat env in_scope val_env (Var v) arg_occ
 2298   | sc_force env || case arg_occ of { ScrutOcc {} -> True
 2299                                     ; UnkOcc      -> False
 2300                                     ; NoOcc       -> False } -- (a)
 2301   , is_value                                                 -- (b)
 2302        -- Ignoring sc_keen here to avoid gratuitously incurring Note [Reboxing]
 2303        -- So sc_keen focused just on f (I# x), where we have freshly-allocated
 2304        -- box that we can eliminate in the caller
 2305   , not (ignoreType env (varType v))
 2306   = return (True, Var v)
 2307   where
 2308     is_value
 2309         | isLocalId v = v `elemInScopeSet` in_scope
 2310                         && isJust (lookupVarEnv val_env v)
 2311                 -- Local variables have values in val_env
 2312         | otherwise   = isValueUnfolding (idUnfolding v)
 2313                 -- Imports have unfoldings
 2314 
 2315 --      I'm really not sure what this comment means
 2316 --      And by not wild-carding we tend to get forall'd
 2317 --      variables that are in scope, which in turn can
 2318 --      expose the weakness in let-matching
 2319 --      See Note [Matching lets] in GHC.Core.Rules
 2320 
 2321   -- Check for a variable bound inside the function.
 2322   -- Don't make a wild-card, because we may usefully share
 2323   --    e.g.  f a = let x = ... in f (x,x)
 2324   -- NB: this case follows the lambda and con-app cases!!
 2325 -- argToPat _in_scope _val_env (Var v) _arg_occ
 2326 --   = return (False, Var v)
 2327         -- SLPJ : disabling this to avoid proliferation of versions
 2328         -- also works badly when thinking about seeding the loop
 2329         -- from the body of the let
 2330         --       f x y = letrec g z = ... in g (x,y)
 2331         -- We don't want to specialise for that *particular* x,y
 2332 
 2333   -- The default case: make a wild-card
 2334   -- We use this for coercions too
 2335 argToPat _env _in_scope _val_env arg _arg_occ
 2336   = wildCardPat (exprType arg)
 2337 
 2338 wildCardPat :: Type -> UniqSM (Bool, CoreArg)
 2339 wildCardPat ty
 2340   = do { uniq <- getUniqueM
 2341        ; let id = mkSysLocalOrCoVar (fsLit "sc") uniq Many ty
 2342        ; return (False, varToCoreExpr id) }
 2343 
 2344 isValue :: ValueEnv -> CoreExpr -> Maybe Value
 2345 isValue _env (Lit lit)
 2346   | litIsLifted lit = Nothing
 2347   | otherwise       = Just (ConVal (LitAlt lit) [])
 2348 
 2349 isValue env (Var v)
 2350   | Just cval <- lookupVarEnv env v
 2351   = Just cval  -- You might think we could look in the idUnfolding here
 2352                -- but that doesn't take account of which branch of a
 2353                -- case we are in, which is the whole point
 2354 
 2355   | not (isLocalId v) && isCheapUnfolding unf
 2356   = isValue env (unfoldingTemplate unf)
 2357   where
 2358     unf = idUnfolding v
 2359         -- However we do want to consult the unfolding
 2360         -- as well, for let-bound constructors!
 2361 
 2362 isValue env (Lam b e)
 2363   | isTyVar b = case isValue env e of
 2364                   Just _  -> Just LambdaVal
 2365                   Nothing -> Nothing
 2366   | otherwise = Just LambdaVal
 2367 
 2368 isValue env (Tick t e)
 2369   | not (tickishIsCode t)
 2370   = isValue env e
 2371 
 2372 isValue _env expr       -- Maybe it's a constructor application
 2373   | (Var fun, args, _) <- collectArgsTicks (not . tickishIsCode) expr
 2374   = case isDataConWorkId_maybe fun of
 2375 
 2376         Just con | args `lengthAtLeast` dataConRepArity con
 2377                 -- Check saturated; might be > because the
 2378                 --                  arity excludes type args
 2379                 -> Just (ConVal (DataAlt con) args)
 2380 
 2381         _other | valArgCount args < idArity fun
 2382                 -- Under-applied function
 2383                -> Just LambdaVal        -- Partial application
 2384 
 2385         _other -> Nothing
 2386 
 2387 isValue _env _expr = Nothing
 2388 
 2389 valueIsWorkFree :: Value -> Bool
 2390 valueIsWorkFree LambdaVal       = True
 2391 valueIsWorkFree (ConVal _ args) = all exprIsWorkFree args
 2392 
 2393 samePat :: CallPat -> CallPat -> Bool
 2394 samePat (CP { cp_qvars = vs1, cp_args = as1 })
 2395         (CP { cp_qvars = vs2, cp_args = as2 })
 2396   = all2 same as1 as2
 2397   where
 2398     same (Var v1) (Var v2)
 2399         | v1 `elem` vs1 = v2 `elem` vs2
 2400         | v2 `elem` vs2 = False
 2401         | otherwise     = v1 == v2
 2402 
 2403     same (Lit l1)    (Lit l2)    = l1==l2
 2404     same (App f1 a1) (App f2 a2) = same f1 f2 && same a1 a2
 2405 
 2406     same (Type {}) (Type {}) = True     -- Note [Ignore type differences]
 2407     same (Coercion {}) (Coercion {}) = True
 2408     same (Tick _ e1) e2 = same e1 e2  -- Ignore casts and notes
 2409     same (Cast e1 _) e2 = same e1 e2
 2410     same e1 (Tick _ e2) = same e1 e2
 2411     same e1 (Cast e2 _) = same e1 e2
 2412 
 2413     same e1 e2 = warnPprTrace (bad e1 || bad e2) (ppr e1 $$ ppr e2) $
 2414                  False  -- Let, lambda, case should not occur
 2415     bad (Case {}) = True
 2416     bad (Let {})  = True
 2417     bad (Lam {})  = True
 2418     bad _other    = False
 2419 
 2420 {-
 2421 Note [Ignore type differences]
 2422 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 2423 We do not want to generate specialisations where the call patterns
 2424 differ only in their type arguments!  Not only is it utterly useless,
 2425 but it also means that (with polymorphic recursion) we can generate
 2426 an infinite number of specialisations. Example is Data.Sequence.adjustTree,
 2427 I think.
 2428 -}