never executed always true always false
    1 {-# LANGUAGE TupleSections #-}
    2 -- This module contains functions which implement
    3 -- the -finfo-table-map and -fdistinct-constructor-tables flags
    4 module GHC.Stg.Debug(collectDebugInformation) where
    5 
    6 
    7 import GHC.Prelude
    8 
    9 import GHC.Stg.Syntax
   10 
   11 import GHC.Types.Id
   12 import GHC.Types.Tickish
   13 import GHC.Core.DataCon
   14 import GHC.Types.IPE
   15 import GHC.Unit.Module
   16 import GHC.Types.Name   ( getName, getOccName, occNameString, nameSrcSpan)
   17 import GHC.Data.FastString
   18 import GHC.Driver.Session
   19 
   20 import Control.Monad (when)
   21 import Control.Monad.Trans.Reader
   22 import GHC.Utils.Monad.State.Strict
   23 import Control.Monad.Trans.Class
   24 import GHC.Types.Unique.Map
   25 import GHC.Types.SrcLoc
   26 import Control.Applicative
   27 import qualified Data.List.NonEmpty as NE
   28 import Data.List.NonEmpty (NonEmpty(..))
   29 
   30 data SpanWithLabel = SpanWithLabel RealSrcSpan String
   31 
   32 data R = R { rDynFlags :: DynFlags, rModLocation :: ModLocation, rSpan :: Maybe SpanWithLabel }
   33 
   34 type M a = ReaderT R (State InfoTableProvMap) a
   35 
   36 withSpan :: IpeSourceLocation -> M a -> M a
   37 withSpan (new_s, new_l) act = local maybe_replace act
   38   where
   39     maybe_replace r@R{ rModLocation = cur_mod, rSpan = Just (SpanWithLabel old_s _old_l) }
   40       -- prefer spans from the current module
   41       | Just (unpackFS $ srcSpanFile old_s) == ml_hs_file cur_mod
   42       , Just (unpackFS $ srcSpanFile new_s) /= ml_hs_file cur_mod
   43       = r
   44     maybe_replace r
   45       = r { rSpan = Just (SpanWithLabel new_s new_l) }
   46 
   47 collectDebugInformation :: DynFlags -> ModLocation -> [StgTopBinding] -> ([StgTopBinding], InfoTableProvMap)
   48 collectDebugInformation dflags ml bs =
   49     runState (runReaderT (mapM collectTop bs) (R dflags ml Nothing)) emptyInfoTableProvMap
   50 
   51 collectTop :: StgTopBinding -> M StgTopBinding
   52 collectTop (StgTopLifted t) = StgTopLifted <$> collectStgBind t
   53 collectTop tb = return tb
   54 
   55 collectStgBind :: StgBinding -> M StgBinding
   56 collectStgBind  (StgNonRec bndr rhs) = do
   57     rhs' <- collectStgRhs bndr rhs
   58     return (StgNonRec bndr rhs')
   59 collectStgBind (StgRec pairs) = do
   60     es <- mapM (\(b, e) -> (b,) <$> collectStgRhs b e) pairs
   61     return (StgRec es)
   62 
   63 collectStgRhs :: Id -> StgRhs -> M StgRhs
   64 collectStgRhs bndr (StgRhsClosure ext cc us bs e)= do
   65   e' <- collectExpr e
   66   recordInfo bndr e'
   67   return $ StgRhsClosure ext cc us bs e'
   68 collectStgRhs _bndr (StgRhsCon cc dc _mn ticks args) = do
   69   n' <- numberDataCon dc ticks
   70   return (StgRhsCon cc dc n' ticks args)
   71 
   72 
   73 recordInfo :: Id -> StgExpr -> M ()
   74 recordInfo bndr new_rhs = do
   75   modLoc <- asks rModLocation
   76   let
   77     thisFile = maybe nilFS mkFastString $ ml_hs_file modLoc
   78     -- A span from the ticks surrounding the new_rhs
   79     best_span = quickSourcePos thisFile new_rhs
   80     -- A back-up span if the bndr had a source position, many do not (think internally generated ids)
   81     bndr_span = (\s -> SpanWithLabel s (occNameString (getOccName bndr)))
   82                   <$> srcSpanToRealSrcSpan (nameSrcSpan (getName bndr))
   83   recordStgIdPosition bndr best_span bndr_span
   84 
   85 collectExpr :: StgExpr -> M StgExpr
   86 collectExpr = go
   87   where
   88     go (StgApp occ as) = return $ StgApp occ as
   89     go (StgLit lit) = return $ StgLit lit
   90     go (StgConApp dc _mn as tys) = do
   91       n' <- numberDataCon dc []
   92       return (StgConApp dc n' as tys)
   93     go (StgOpApp op as ty) = return (StgOpApp op as ty)
   94     go (StgCase scrut bndr ty alts) =
   95       StgCase <$> collectExpr scrut <*> pure bndr <*> pure ty <*> mapM collectAlt alts
   96     go (StgLet ext bind body) = do
   97         bind' <- collectStgBind bind
   98         body' <- go body
   99         return (StgLet ext bind' body')
  100     go (StgLetNoEscape ext bind body) = do
  101         bind' <- collectStgBind bind
  102         body' <- go body
  103         return (StgLetNoEscape ext bind' body')
  104 
  105     go (StgTick tick e) = do
  106        let k = case tick of
  107                 SourceNote ss fp -> withSpan (ss, fp)
  108                 _ -> id
  109        e' <- k (go e)
  110        return (StgTick tick e')
  111 
  112 collectAlt :: StgAlt -> M StgAlt
  113 collectAlt (ac, bs, e) = (ac, bs, ) <$> collectExpr e
  114 
  115 -- | Try to find the best source position surrounding a 'StgExpr'. The
  116 -- heuristic strips ticks from the current expression until it finds one which
  117 -- is from the module currently being compiled. This is the same method that
  118 -- the DWARF information uses to give locations to info tables.
  119 --
  120 -- It is usually a better alternative than using the 'RealSrcSpan' which is carefully
  121 -- propagated downwards by 'withSpan'. It's "quick" because it works only using immediate context rather
  122 -- than looking at the parent context like 'withSpan'
  123 quickSourcePos :: FastString -> StgExpr -> Maybe SpanWithLabel
  124 quickSourcePos cur_mod (StgTick (SourceNote ss m) e)
  125   | srcSpanFile ss == cur_mod = Just (SpanWithLabel ss m)
  126   | otherwise = quickSourcePos cur_mod e
  127 quickSourcePos _ _ = Nothing
  128 
  129 recordStgIdPosition :: Id -> Maybe SpanWithLabel -> Maybe SpanWithLabel -> M ()
  130 recordStgIdPosition id best_span ss = do
  131   dflags <- asks rDynFlags
  132   when (gopt Opt_InfoTableMap dflags) $ do
  133     cc <- asks rSpan
  134     --Useful for debugging why a certain Id gets given a certain span
  135     --pprTraceM "recordStgIdPosition" (ppr id $$ ppr cc $$ ppr best_span $$ ppr ss)
  136     let mbspan = (\(SpanWithLabel rss d) -> (rss, d)) <$> (best_span <|> cc <|> ss)
  137     lift $ modify (\env -> env { provClosure = addToUniqMap (provClosure env) (idName id) (idType id, mbspan) })
  138 
  139 numberDataCon :: DataCon -> [StgTickish] -> M ConstructorNumber
  140 -- Unboxed tuples and sums do not allocate so they
  141 -- have no info tables.
  142 numberDataCon dc _ | isUnboxedTupleDataCon dc = return NoNumber
  143 numberDataCon dc _ | isUnboxedSumDataCon dc = return NoNumber
  144 numberDataCon dc ts = do
  145   dflags <- asks rDynFlags
  146   if not (gopt Opt_DistinctConstructorTables dflags) then return NoNumber else do
  147     env <- lift get
  148     mcc <- asks rSpan
  149     let !mbest_span = (\(SpanWithLabel rss l) -> (rss, l)) <$> (selectTick ts <|> mcc)
  150     let !dcMap' = alterUniqMap (maybe (Just ((0, mbest_span) :| [] ))
  151                         (\xs@((k, _):|_) -> Just $! ((k + 1, mbest_span) `NE.cons` xs))) (provDC env) dc
  152     lift $ put (env { provDC = dcMap' })
  153     let r = lookupUniqMap dcMap' dc
  154     return $ case r of
  155       Nothing -> NoNumber
  156       Just res -> Numbered (fst (NE.head res))
  157 
  158 selectTick :: [StgTickish] -> Maybe SpanWithLabel
  159 selectTick [] = Nothing
  160 selectTick (SourceNote rss d : ts ) = selectTick ts <|> Just (SpanWithLabel rss d)
  161 selectTick (_:ts) = selectTick ts
  162 
  163 {-
  164 Note [Mapping Info Tables to Source Positions]
  165 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  166 
  167 This note describes what the `-finfo-table-map` flag achieves.
  168 
  169 When debugging memory issues it is very useful to be able to map a specific closure
  170 to a position in the source. The prime example is being able to map a THUNK to
  171 a specific place in the source program, the mapping is usually quite precise because
  172 a fresh info table is created for each distinct THUNK.
  173 
  174 The info table map is also used to generate stacktraces.
  175 See Note [Stacktraces from Info Table Provenance Entries (IPE based stack unwinding)]
  176 for details.
  177 
  178 There are three parts to the implementation
  179 
  180 1. In GHC.Stg.Debug, the SourceNote information is used in order to give a source location
  181    to some specific closures.
  182 2. In GHC.Driver.GenerateCgIPEStub, the actually used info tables are collected after the
  183    Cmm pipeline. This is important as it's hard to predict beforehand what code generation
  184    will do and which ids will end up in the generated program. Additionally, info tables of
  185    return frames (used to create stacktraces) are generated in the Cmm pipeline and aren't
  186    available before.
  187 3. During code generation, a mapping from the info table to the statically determined location
  188    is emitted which can then be queried at runtime by various tools.
  189 
  190 -- Giving Source Locations to Closures
  191 
  192 At the moment thunk and constructor closures are added to the map. This information
  193 is collected in the `InfoTableProvMap` which provides a mapping from:
  194 
  195 1. Data constructors to a list of where they are used.
  196 2. `Name`s and where they originate from.
  197 3. Stack represented info tables (return frames) to an approximated source location
  198    of the call that pushed a contiunation on the stacks.
  199 
  200 During the CoreToStg phase, this map is populated whenever something is turned into
  201 a StgRhsClosure or an StgConApp. The current source position is recorded
  202 depending on the location indicated by the surrounding SourceNote.
  203 
  204 The functions which add information to the map are `recordStgIdPosition` and
  205 `numberDataCon`.
  206 
  207 When the `-fdistinct-constructor-tables` flag is turned on then every
  208 usage of a data constructor gets its own distinct info table. This is orchestrated
  209 in `collectExpr` where an incrementing number is used to distinguish each
  210 occurrence of a data constructor.
  211 
  212 -- GenerateCgIPEStub
  213 
  214 The info tables which are actually used in the generated program are collected after
  215 the Cmm pipeline. `initInfoTableProv` is used to create a CStub, that initializes the
  216 map in C code.
  217 
  218 This step has to be done after the Cmm pipeline to make sure that all info tables are
  219 really used and, even more importantly, return frame info tables are generated by the
  220 pipeline.
  221 
  222 -- Code Generation
  223 
  224 The output of these two phases is combined together during code generation.
  225 A C stub is generated which creates the static map from info table pointer to the
  226 information about where that info table was created from. This is created by
  227 `ipInitCode` in the same manner as a C stub is generated for cost centres.
  228 
  229 This information can be consumed in two ways.
  230 
  231 1. The complete mapping is emitted into the eventlog so that external tools such
  232 as eventlog2html can use the information with the heap profile by info table mode.
  233 2. The `lookupIPE` function can be used via the `whereFrom#` primop to introspect
  234 information about a closure in a running Haskell program.
  235 
  236 Note [Distinct Info Tables for Constructors]
  237 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  238 
  239 In the old times, each usage of a data constructor used the same info table.
  240 This made it impossible to distinguish which actual usuage of a data constructor was
  241 contributing primarily to the allocation in a program. Using the `-fdistinct-info-tables` flag you
  242 can cause code generation to generate a distinct info table for each usage of
  243 a constructor. Then, when inspecting the heap you can see precisely which usage of a constructor
  244 was responsible for each allocation.
  245 
  246 -}