never executed always true always false
    1 -- -----------------------------------------------------------------------------
    2 --
    3 -- (c) The University of Glasgow 1993-2004
    4 --
    5 --
    6 -- -----------------------------------------------------------------------------
    7 
    8 {-# LANGUAGE BangPatterns #-}
    9 {-# LANGUAGE DeriveFunctor #-}
   10 {-# LANGUAGE FlexibleContexts #-}
   11 {-# LANGUAGE FlexibleInstances #-}
   12 {-# LANGUAGE GADTs #-}
   13 {-# LANGUAGE MultiParamTypeClasses #-}
   14 {-# LANGUAGE PatternSynonyms #-}
   15 {-# LANGUAGE ScopedTypeVariables #-}
   16 {-# LANGUAGE UnboxedTuples #-}
   17 
   18 {-# OPTIONS_GHC -Wno-incomplete-record-updates #-}
   19 
   20 -- | Native code generator
   21 --
   22 -- The native-code generator has machine-independent and
   23 -- machine-dependent modules.
   24 --
   25 -- This module ("GHC.CmmToAsm") is the top-level machine-independent
   26 -- module.  Before entering machine-dependent land, we do some
   27 -- machine-independent optimisations (defined below) on the
   28 -- 'CmmStmts's.
   29 --
   30 -- We convert to the machine-specific 'Instr' datatype with
   31 -- 'cmmCodeGen', assuming an infinite supply of registers.  We then use
   32 -- a machine-independent register allocator ('regAlloc') to rejoin
   33 -- reality.  Obviously, 'regAlloc' has machine-specific helper
   34 -- functions (see about "RegAllocInfo" below).
   35 --
   36 -- Finally, we order the basic blocks of the function so as to minimise
   37 -- the number of jumps between blocks, by utilising fallthrough wherever
   38 -- possible.
   39 --
   40 -- The machine-dependent bits break down as follows:
   41 --
   42 --   * ["MachRegs"]  Everything about the target platform's machine
   43 --     registers (and immediate operands, and addresses, which tend to
   44 --     intermingle/interact with registers).
   45 --
   46 --   * ["MachInstrs"]  Includes the 'Instr' datatype (possibly should
   47 --     have a module of its own), plus a miscellany of other things
   48 --     (e.g., 'targetDoubleSize', 'smStablePtrTable', ...)
   49 --
   50 --   * ["MachCodeGen"]  is where 'Cmm' stuff turns into
   51 --     machine instructions.
   52 --
   53 --   * ["PprMach"] 'pprInstr' turns an 'Instr' into text (well, really
   54 --     a 'SDoc').
   55 --
   56 --   * ["RegAllocInfo"] In the register allocator, we manipulate
   57 --     'MRegsState's, which are 'BitSet's, one bit per machine register.
   58 --     When we want to say something about a specific machine register
   59 --     (e.g., ``it gets clobbered by this instruction''), we set/unset
   60 --     its bit.  Obviously, we do this 'BitSet' thing for efficiency
   61 --     reasons.
   62 --
   63 --     The 'RegAllocInfo' module collects together the machine-specific
   64 --     info needed to do register allocation.
   65 --
   66 --    * ["RegisterAlloc"] The (machine-independent) register allocator.
   67 -- -}
   68 --
   69 module GHC.CmmToAsm
   70    ( nativeCodeGen
   71 
   72    -- * Test-only exports: see trac #12744
   73    -- used by testGraphNoSpills, which needs to access
   74    -- the register allocator intermediate data structures
   75    -- cmmNativeGen emits
   76    , cmmNativeGen
   77    , NcgImpl(..)
   78    )
   79 where
   80 
   81 import GHC.Prelude
   82 
   83 import qualified GHC.CmmToAsm.X86   as X86
   84 import qualified GHC.CmmToAsm.PPC   as PPC
   85 import qualified GHC.CmmToAsm.SPARC as SPARC
   86 import qualified GHC.CmmToAsm.AArch64 as AArch64
   87 
   88 import GHC.CmmToAsm.Reg.Liveness
   89 import qualified GHC.CmmToAsm.Reg.Linear                as Linear
   90 
   91 import qualified GHC.Data.Graph.Color                   as Color
   92 import qualified GHC.CmmToAsm.Reg.Graph                 as Color
   93 import qualified GHC.CmmToAsm.Reg.Graph.Stats           as Color
   94 import qualified GHC.CmmToAsm.Reg.Graph.TrivColorable   as Color
   95 
   96 import GHC.Utils.Asm
   97 import GHC.CmmToAsm.Reg.Target
   98 import GHC.Platform
   99 import GHC.CmmToAsm.BlockLayout as BlockLayout
  100 import GHC.Settings.Config
  101 import GHC.CmmToAsm.Instr
  102 import GHC.CmmToAsm.PIC
  103 import GHC.Platform.Reg
  104 import GHC.Platform.Reg.Class (RegClass)
  105 import GHC.CmmToAsm.Monad
  106 import GHC.CmmToAsm.CFG
  107 import GHC.CmmToAsm.Dwarf
  108 import GHC.CmmToAsm.Config
  109 import GHC.CmmToAsm.Types
  110 import GHC.Cmm.DebugBlock
  111 
  112 import GHC.Cmm.BlockId
  113 import GHC.StgToCmm.CgUtils ( fixStgRegisters )
  114 import GHC.Cmm
  115 import GHC.Cmm.Utils
  116 import GHC.Cmm.Dataflow.Collections
  117 import GHC.Cmm.Dataflow.Label
  118 import GHC.Cmm.Dataflow.Block
  119 import GHC.Cmm.Opt           ( cmmMachOpFold )
  120 import GHC.Cmm.Ppr
  121 import GHC.Cmm.CLabel
  122 
  123 import GHC.Types.Unique.FM
  124 import GHC.Types.Unique.Supply
  125 import GHC.Driver.Session
  126 import GHC.Driver.Ppr
  127 import GHC.Utils.Misc
  128 import GHC.Utils.Logger
  129 
  130 import qualified GHC.Utils.Ppr as Pretty
  131 import GHC.Utils.BufHandle
  132 import GHC.Utils.Outputable as Outputable
  133 import GHC.Utils.Panic
  134 import GHC.Utils.Error
  135 import GHC.Utils.Exception (evaluate)
  136 import GHC.Utils.Constants (debugIsOn)
  137 
  138 import GHC.Data.FastString
  139 import GHC.Types.Unique.Set
  140 import GHC.Unit
  141 import GHC.Data.Stream (Stream)
  142 import qualified GHC.Data.Stream as Stream
  143 
  144 import Data.List (sortBy, groupBy)
  145 import Data.Maybe
  146 import Data.Ord         ( comparing )
  147 import Control.Monad
  148 import System.IO
  149 
  150 --------------------
  151 nativeCodeGen :: forall a . Logger -> NCGConfig -> ModLocation -> Handle -> UniqSupply
  152               -> Stream IO RawCmmGroup a
  153               -> IO a
  154 nativeCodeGen logger config modLoc h us cmms
  155  = let platform = ncgPlatform config
  156        nCG' :: ( OutputableP Platform statics, Outputable jumpDest, Instruction instr)
  157             => NcgImpl statics instr jumpDest -> IO a
  158        nCG' ncgImpl = nativeCodeGen' logger config modLoc ncgImpl h us cmms
  159    in case platformArch platform of
  160       ArchX86       -> nCG' (X86.ncgX86     config)
  161       ArchX86_64    -> nCG' (X86.ncgX86_64  config)
  162       ArchPPC       -> nCG' (PPC.ncgPPC     config)
  163       ArchPPC_64 _  -> nCG' (PPC.ncgPPC     config)
  164       ArchSPARC     -> nCG' (SPARC.ncgSPARC config)
  165       ArchSPARC64   -> panic "nativeCodeGen: No NCG for SPARC64"
  166       ArchS390X     -> panic "nativeCodeGen: No NCG for S390X"
  167       ArchARM {}    -> panic "nativeCodeGen: No NCG for ARM"
  168       ArchAArch64   -> nCG' (AArch64.ncgAArch64 config)
  169       ArchAlpha     -> panic "nativeCodeGen: No NCG for Alpha"
  170       ArchMipseb    -> panic "nativeCodeGen: No NCG for mipseb"
  171       ArchMipsel    -> panic "nativeCodeGen: No NCG for mipsel"
  172       ArchRISCV64   -> panic "nativeCodeGen: No NCG for RISCV64"
  173       ArchUnknown   -> panic "nativeCodeGen: No NCG for unknown arch"
  174       ArchJavaScript-> panic "nativeCodeGen: No NCG for JavaScript"
  175 
  176 -- | Data accumulated during code generation. Mostly about statistics,
  177 -- but also collects debug data for DWARF generation.
  178 data NativeGenAcc statics instr
  179   = NGS { ngs_imports     :: ![[CLabel]]
  180         , ngs_natives     :: ![[NatCmmDecl statics instr]]
  181              -- ^ Native code generated, for statistics. This might
  182              -- hold a lot of data, so it is important to clear this
  183              -- field as early as possible if it isn't actually
  184              -- required.
  185         , ngs_colorStats  :: ![[Color.RegAllocStats statics instr]]
  186         , ngs_linearStats :: ![[Linear.RegAllocStats]]
  187         , ngs_labels      :: ![Label]
  188         , ngs_debug       :: ![DebugBlock]
  189         , ngs_dwarfFiles  :: !DwarfFiles
  190         , ngs_unwinds     :: !(LabelMap [UnwindPoint])
  191              -- ^ see Note [Unwinding information in the NCG]
  192              -- and Note [What is this unwinding business?] in "GHC.Cmm.DebugBlock".
  193         }
  194 
  195 {-
  196 Note [Unwinding information in the NCG]
  197 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  198 
  199 Unwind information is a type of metadata which allows a debugging tool
  200 to reconstruct the values of machine registers at the time a procedure was
  201 entered. For the most part, the production of unwind information is handled by
  202 the Cmm stage, where it is represented by CmmUnwind nodes.
  203 
  204 Unfortunately, the Cmm stage doesn't know everything necessary to produce
  205 accurate unwinding information. For instance, the x86-64 calling convention
  206 requires that the stack pointer be aligned to 16 bytes, which in turn means that
  207 GHC must sometimes add padding to $sp prior to performing a foreign call. When
  208 this happens unwind information must be updated accordingly.
  209 For this reason, we make the NCG backends responsible for producing
  210 unwinding tables (with the extractUnwindPoints function in NcgImpl).
  211 
  212 We accumulate the produced unwind tables over CmmGroups in the ngs_unwinds
  213 field of NativeGenAcc. This is a label map which contains an entry for each
  214 procedure, containing a list of unwinding points (e.g. a label and an associated
  215 unwinding table).
  216 
  217 See also Note [What is this unwinding business?] in "GHC.Cmm.DebugBlock".
  218 -}
  219 
  220 nativeCodeGen' :: (OutputableP Platform statics, Outputable jumpDest, Instruction instr)
  221                => Logger
  222                -> NCGConfig
  223                -> ModLocation
  224                -> NcgImpl statics instr jumpDest
  225                -> Handle
  226                -> UniqSupply
  227                -> Stream IO RawCmmGroup a
  228                -> IO a
  229 nativeCodeGen' logger config modLoc ncgImpl h us cmms
  230  = do
  231         -- BufHandle is a performance hack.  We could hide it inside
  232         -- Pretty if it weren't for the fact that we do lots of little
  233         -- printDocs here (in order to do codegen in constant space).
  234         bufh <- newBufHandle h
  235         let ngs0 = NGS [] [] [] [] [] [] emptyUFM mapEmpty
  236         (ngs, us', a) <- cmmNativeGenStream logger config modLoc ncgImpl bufh us
  237                                          cmms ngs0
  238         _ <- finishNativeGen logger config modLoc bufh us' ngs
  239         return a
  240 
  241 finishNativeGen :: Instruction instr
  242                 => Logger
  243                 -> NCGConfig
  244                 -> ModLocation
  245                 -> BufHandle
  246                 -> UniqSupply
  247                 -> NativeGenAcc statics instr
  248                 -> IO UniqSupply
  249 finishNativeGen logger config modLoc bufh@(BufHandle _ _ h) us ngs
  250  = withTimingSilent logger (text "NCG") (`seq` ()) $ do
  251         -- Write debug data and finish
  252         us' <- if not (ncgDwarfEnabled config)
  253                   then return us
  254                   else do
  255                      (dwarf, us') <- dwarfGen config modLoc us (ngs_debug ngs)
  256                      emitNativeCode logger config bufh dwarf
  257                      return us'
  258         bFlush bufh
  259 
  260         -- dump global NCG stats for graph coloring allocator
  261         let stats = concat (ngs_colorStats ngs)
  262         unless (null stats) $ do
  263 
  264           -- build the global register conflict graph
  265           let graphGlobal
  266                   = foldl' Color.union Color.initGraph
  267                   $ [ Color.raGraph stat
  268                           | stat@Color.RegAllocStatsStart{} <- stats]
  269 
  270           dump_stats (Color.pprStats stats graphGlobal)
  271 
  272           let platform = ncgPlatform config
  273           putDumpFileMaybe logger
  274                   Opt_D_dump_asm_conflicts "Register conflict graph"
  275                   FormatText
  276                   $ Color.dotGraph
  277                           (targetRegDotColor platform)
  278                           (Color.trivColorable platform
  279                                   (targetVirtualRegSqueeze platform)
  280                                   (targetRealRegSqueeze platform))
  281                   $ graphGlobal
  282 
  283 
  284         -- dump global NCG stats for linear allocator
  285         let linearStats = concat (ngs_linearStats ngs)
  286         unless (null linearStats) $
  287           dump_stats (Linear.pprStats (concat (ngs_natives ngs)) linearStats)
  288 
  289         -- write out the imports
  290         let ctx = ncgAsmContext config
  291         printSDocLn ctx Pretty.LeftMode h
  292                 $ makeImportsDoc config (concat (ngs_imports ngs))
  293         return us'
  294   where
  295     dump_stats = logDumpFile logger (mkDumpStyle alwaysQualify)
  296                    Opt_D_dump_asm_stats "NCG stats"
  297                    FormatText
  298 
  299 cmmNativeGenStream :: forall statics jumpDest instr a . (OutputableP Platform statics, Outputable jumpDest, Instruction instr)
  300               => Logger
  301               -> NCGConfig
  302               -> ModLocation
  303               -> NcgImpl statics instr jumpDest
  304               -> BufHandle
  305               -> UniqSupply
  306               -> Stream.Stream IO RawCmmGroup a
  307               -> NativeGenAcc statics instr
  308               -> IO (NativeGenAcc statics instr, UniqSupply, a)
  309 
  310 cmmNativeGenStream logger config modLoc ncgImpl h us cmm_stream ngs
  311  = loop us (Stream.runStream cmm_stream) ngs
  312   where
  313     ncglabel = text "NCG"
  314     loop :: UniqSupply
  315               -> Stream.StreamS IO RawCmmGroup a
  316               -> NativeGenAcc statics instr
  317               -> IO (NativeGenAcc statics instr, UniqSupply, a)
  318     loop us s ngs =
  319       case s of
  320         Stream.Done a ->
  321           return (ngs { ngs_imports = reverse $ ngs_imports ngs
  322                       , ngs_natives = reverse $ ngs_natives ngs
  323                       , ngs_colorStats = reverse $ ngs_colorStats ngs
  324                       , ngs_linearStats = reverse $ ngs_linearStats ngs
  325                       },
  326                   us,
  327                   a)
  328         Stream.Effect m -> m >>= \cmm_stream' -> loop us cmm_stream' ngs
  329         Stream.Yield cmms cmm_stream' -> do
  330           (us', ngs'') <-
  331             withTimingSilent logger
  332                 ncglabel (\(a, b) -> a `seq` b `seq` ()) $ do
  333               -- Generate debug information
  334               let !ndbgs | ncgDwarfEnabled config = cmmDebugGen modLoc cmms
  335                          | otherwise              = []
  336                   dbgMap = debugToMap ndbgs
  337 
  338               -- Generate native code
  339               (ngs',us') <- cmmNativeGens logger config modLoc ncgImpl h
  340                                           dbgMap us cmms ngs 0
  341 
  342               -- Link native code information into debug blocks
  343               -- See Note [What is this unwinding business?] in "GHC.Cmm.DebugBlock".
  344               let !ldbgs = cmmDebugLink (ngs_labels ngs') (ngs_unwinds ngs') ndbgs
  345                   platform = ncgPlatform config
  346               unless (null ldbgs) $
  347                 putDumpFileMaybe logger Opt_D_dump_debug "Debug Infos" FormatText
  348                   (vcat $ map (pdoc platform) ldbgs)
  349 
  350               -- Accumulate debug information for emission in finishNativeGen.
  351               let ngs'' = ngs' { ngs_debug = ngs_debug ngs' ++ ldbgs, ngs_labels = [] }
  352               return (us', ngs'')
  353 
  354           loop us' cmm_stream' ngs''
  355 
  356 
  357 -- | Do native code generation on all these cmms.
  358 --
  359 cmmNativeGens :: forall statics instr jumpDest.
  360                  (OutputableP Platform statics, Outputable jumpDest, Instruction instr)
  361               => Logger
  362               -> NCGConfig
  363               -> ModLocation
  364               -> NcgImpl statics instr jumpDest
  365               -> BufHandle
  366               -> LabelMap DebugBlock
  367               -> UniqSupply
  368               -> [RawCmmDecl]
  369               -> NativeGenAcc statics instr
  370               -> Int
  371               -> IO (NativeGenAcc statics instr, UniqSupply)
  372 
  373 cmmNativeGens logger config modLoc ncgImpl h dbgMap = go
  374   where
  375     go :: UniqSupply -> [RawCmmDecl]
  376        -> NativeGenAcc statics instr -> Int
  377        -> IO (NativeGenAcc statics instr, UniqSupply)
  378 
  379     go us [] ngs !_ =
  380         return (ngs, us)
  381 
  382     go us (cmm : cmms) ngs count = do
  383         let fileIds = ngs_dwarfFiles ngs
  384         (us', fileIds', native, imports, colorStats, linearStats, unwinds)
  385           <- {-# SCC "cmmNativeGen" #-}
  386              cmmNativeGen logger modLoc ncgImpl us fileIds dbgMap
  387                           cmm count
  388 
  389         -- Generate .file directives for every new file that has been
  390         -- used. Note that it is important that we generate these in
  391         -- ascending order, as Clang's 3.6 assembler complains.
  392         let newFileIds = sortBy (comparing snd) $
  393                          nonDetEltsUFM $ fileIds' `minusUFM` fileIds
  394             -- See Note [Unique Determinism and code generation]
  395             pprDecl (f,n) = text "\t.file " <> ppr n <+>
  396                             pprFilePathString (unpackFS f)
  397 
  398         emitNativeCode logger config h $ vcat $
  399           map pprDecl newFileIds ++
  400           map (pprNatCmmDecl ncgImpl) native
  401 
  402         -- force evaluation all this stuff to avoid space leaks
  403         let platform = ncgPlatform config
  404         {-# SCC "seqString" #-} evaluate $ seqList (showSDocUnsafe $ vcat $ map (pdoc platform) imports) ()
  405 
  406         let !labels' = if ncgDwarfEnabled config
  407                        then cmmDebugLabels isMetaInstr native else []
  408             !natives' = if logHasDumpFlag logger Opt_D_dump_asm_stats
  409                         then native : ngs_natives ngs else []
  410 
  411             mCon = maybe id (:)
  412             ngs' = ngs{ ngs_imports     = imports : ngs_imports ngs
  413                       , ngs_natives     = natives'
  414                       , ngs_colorStats  = colorStats `mCon` ngs_colorStats ngs
  415                       , ngs_linearStats = linearStats `mCon` ngs_linearStats ngs
  416                       , ngs_labels      = ngs_labels ngs ++ labels'
  417                       , ngs_dwarfFiles  = fileIds'
  418                       , ngs_unwinds     = ngs_unwinds ngs `mapUnion` unwinds
  419                       }
  420         go us' cmms ngs' (count + 1)
  421 
  422 
  423 emitNativeCode :: Logger -> NCGConfig -> BufHandle -> SDoc -> IO ()
  424 emitNativeCode logger config h sdoc = do
  425 
  426         let ctx = ncgAsmContext config
  427         {-# SCC "pprNativeCode" #-} bufLeftRenderSDoc ctx h sdoc
  428 
  429         -- dump native code
  430         putDumpFileMaybe logger
  431                 Opt_D_dump_asm "Asm code" FormatASM
  432                 sdoc
  433 
  434 -- | Complete native code generation phase for a single top-level chunk of Cmm.
  435 --      Dumping the output of each stage along the way.
  436 --      Global conflict graph and NGC stats
  437 cmmNativeGen
  438     :: forall statics instr jumpDest. (Instruction instr, OutputableP Platform statics, Outputable jumpDest)
  439     => Logger
  440     -> ModLocation
  441     -> NcgImpl statics instr jumpDest
  442         -> UniqSupply
  443         -> DwarfFiles
  444         -> LabelMap DebugBlock
  445         -> RawCmmDecl                                   -- ^ the cmm to generate code for
  446         -> Int                                          -- ^ sequence number of this top thing
  447         -> IO   ( UniqSupply
  448                 , DwarfFiles
  449                 , [NatCmmDecl statics instr]                -- native code
  450                 , [CLabel]                                  -- things imported by this cmm
  451                 , Maybe [Color.RegAllocStats statics instr] -- stats for the coloring register allocator
  452                 , Maybe [Linear.RegAllocStats]              -- stats for the linear register allocators
  453                 , LabelMap [UnwindPoint]                    -- unwinding information for blocks
  454                 )
  455 
  456 cmmNativeGen logger modLoc ncgImpl us fileIds dbgMap cmm count
  457  = do
  458         let config   = ncgConfig ncgImpl
  459         let platform = ncgPlatform config
  460         let weights  = ncgCfgWeights config
  461 
  462         let proc_name = case cmm of
  463                 (CmmProc _ entry_label _ _) -> pdoc platform entry_label
  464                 _                           -> text "DataChunk"
  465 
  466         -- rewrite assignments to global regs
  467         let fixed_cmm =
  468                 {-# SCC "fixStgRegisters" #-}
  469                 fixStgRegisters platform cmm
  470 
  471         -- cmm to cmm optimisations
  472         let (opt_cmm, imports) =
  473                 {-# SCC "cmmToCmm" #-}
  474                 cmmToCmm config fixed_cmm
  475 
  476         putDumpFileMaybe logger
  477                 Opt_D_dump_opt_cmm "Optimised Cmm" FormatCMM
  478                 (pprCmmGroup platform [opt_cmm])
  479 
  480         let cmmCfg = {-# SCC "getCFG" #-}
  481                      getCfgProc platform weights opt_cmm
  482 
  483         -- generate native code from cmm
  484         let ((native, lastMinuteImports, fileIds', nativeCfgWeights), usGen) =
  485                 {-# SCC "genMachCode" #-}
  486                 initUs us $ genMachCode config modLoc
  487                                         (cmmTopCodeGen ncgImpl)
  488                                         fileIds dbgMap opt_cmm cmmCfg
  489 
  490         putDumpFileMaybe logger
  491                 Opt_D_dump_asm_native "Native code" FormatASM
  492                 (vcat $ map (pprNatCmmDecl ncgImpl) native)
  493 
  494         maybeDumpCfg logger (Just nativeCfgWeights) "CFG Weights - Native" proc_name
  495 
  496         -- tag instructions with register liveness information
  497         -- also drops dead code. We don't keep the cfg in sync on
  498         -- some backends, so don't use it there.
  499         let livenessCfg = if ncgEnableDeadCodeElimination config
  500                                 then Just nativeCfgWeights
  501                                 else Nothing
  502         let (withLiveness, usLive) =
  503                 {-# SCC "regLiveness" #-}
  504                 initUs usGen
  505                         $ mapM (cmmTopLiveness livenessCfg platform) native
  506 
  507         putDumpFileMaybe logger
  508                 Opt_D_dump_asm_liveness "Liveness annotations added"
  509                 FormatCMM
  510                 (vcat $ map (pprLiveCmmDecl platform) withLiveness)
  511 
  512         -- allocate registers
  513         (alloced, usAlloc, ppr_raStatsColor, ppr_raStatsLinear, raStats, stack_updt_blks) <-
  514          if ( ncgRegsGraph config || ncgRegsIterative config )
  515           then do
  516                 -- the regs usable for allocation
  517                 let (alloc_regs :: UniqFM RegClass (UniqSet RealReg))
  518                         = foldr (\r -> plusUFM_C unionUniqSets
  519                                         $ unitUFM (targetClassOfRealReg platform r) (unitUniqSet r))
  520                                 emptyUFM
  521                         $ allocatableRegs ncgImpl
  522 
  523                 -- do the graph coloring register allocation
  524                 let ((alloced, maybe_more_stack, regAllocStats), usAlloc)
  525                         = {-# SCC "RegAlloc-color" #-}
  526                           initUs usLive
  527                           $ Color.regAlloc
  528                                 config
  529                                 alloc_regs
  530                                 (mkUniqSet [0 .. maxSpillSlots ncgImpl])
  531                                 (maxSpillSlots ncgImpl)
  532                                 withLiveness
  533                                 livenessCfg
  534 
  535                 let ((alloced', stack_updt_blks), usAlloc')
  536                         = initUs usAlloc $
  537                                 case maybe_more_stack of
  538                                 Nothing     -> return (alloced, [])
  539                                 Just amount -> do
  540                                     (alloced',stack_updt_blks) <- unzip <$>
  541                                                 (mapM ((ncgAllocMoreStack ncgImpl) amount) alloced)
  542                                     return (alloced', concat stack_updt_blks )
  543 
  544 
  545                 -- dump out what happened during register allocation
  546                 putDumpFileMaybe logger
  547                         Opt_D_dump_asm_regalloc "Registers allocated"
  548                         FormatCMM
  549                         (vcat $ map (pprNatCmmDecl ncgImpl) alloced)
  550 
  551                 putDumpFileMaybe logger
  552                         Opt_D_dump_asm_regalloc_stages "Build/spill stages"
  553                         FormatText
  554                         (vcat   $ map (\(stage, stats)
  555                                         -> text "# --------------------------"
  556                                         $$ text "#  cmm " <> int count <> text " Stage " <> int stage
  557                                         $$ ppr (fmap (pprInstr platform) stats))
  558                                 $ zip [0..] regAllocStats)
  559 
  560                 let mPprStats =
  561                         if logHasDumpFlag logger Opt_D_dump_asm_stats
  562                          then Just regAllocStats else Nothing
  563 
  564                 -- force evaluation of the Maybe to avoid space leak
  565                 mPprStats `seq` return ()
  566 
  567                 return  ( alloced', usAlloc'
  568                         , mPprStats
  569                         , Nothing
  570                         , [], stack_updt_blks)
  571 
  572           else do
  573                 -- do linear register allocation
  574                 let reg_alloc proc = do
  575                        (alloced, maybe_more_stack, ra_stats) <-
  576                                Linear.regAlloc config proc
  577                        case maybe_more_stack of
  578                          Nothing -> return ( alloced, ra_stats, [] )
  579                          Just amount -> do
  580                            (alloced',stack_updt_blks) <-
  581                                ncgAllocMoreStack ncgImpl amount alloced
  582                            return (alloced', ra_stats, stack_updt_blks )
  583 
  584                 let ((alloced, regAllocStats, stack_updt_blks), usAlloc)
  585                         = {-# SCC "RegAlloc-linear" #-}
  586                           initUs usLive
  587                           $ liftM unzip3
  588                           $ mapM reg_alloc withLiveness
  589 
  590                 putDumpFileMaybe logger
  591                         Opt_D_dump_asm_regalloc "Registers allocated"
  592                         FormatCMM
  593                         (vcat $ map (pprNatCmmDecl ncgImpl) alloced)
  594 
  595                 let mPprStats =
  596                         if logHasDumpFlag logger Opt_D_dump_asm_stats
  597                          then Just (catMaybes regAllocStats) else Nothing
  598 
  599                 -- force evaluation of the Maybe to avoid space leak
  600                 mPprStats `seq` return ()
  601 
  602                 return  ( alloced, usAlloc
  603                         , Nothing
  604                         , mPprStats, (catMaybes regAllocStats)
  605                         , concat stack_updt_blks )
  606 
  607         -- Fixupblocks the register allocator inserted (from, regMoves, to)
  608         let cfgRegAllocUpdates :: [(BlockId,BlockId,BlockId)]
  609             cfgRegAllocUpdates = (concatMap Linear.ra_fixupList raStats)
  610 
  611         let cfgWithFixupBlks =
  612                 (\cfg -> addNodesBetween weights cfg cfgRegAllocUpdates) <$> livenessCfg
  613 
  614         -- Insert stack update blocks
  615         let postRegCFG =
  616                 pure (foldl' (\m (from,to) -> addImmediateSuccessor weights from to m ))
  617                      <*> cfgWithFixupBlks
  618                      <*> pure stack_updt_blks
  619 
  620         ---- generate jump tables
  621         let tabled      =
  622                 {-# SCC "generateJumpTables" #-}
  623                 generateJumpTables ncgImpl alloced
  624 
  625         when (not $ null nativeCfgWeights) $ putDumpFileMaybe logger
  626                 Opt_D_dump_cfg_weights "CFG Update information"
  627                 FormatText
  628                 ( text "stack:" <+> ppr stack_updt_blks $$
  629                   text "linearAlloc:" <+> ppr cfgRegAllocUpdates )
  630 
  631         ---- shortcut branches
  632         let (shorted, postShortCFG)     =
  633                 {-# SCC "shortcutBranches" #-}
  634                 shortcutBranches config ncgImpl tabled postRegCFG
  635 
  636         let optimizedCFG :: Maybe CFG
  637             optimizedCFG =
  638                 optimizeCFG (ncgCmmStaticPred config) weights cmm <$!> postShortCFG
  639 
  640         maybeDumpCfg logger optimizedCFG "CFG Weights - Final" proc_name
  641 
  642         --TODO: Partially check validity of the cfg.
  643         let getBlks (CmmProc _info _lbl _live (ListGraph blocks)) = blocks
  644             getBlks _ = []
  645 
  646         when ( ncgEnableDeadCodeElimination config &&
  647                 (ncgAsmLinting config || debugIsOn )) $ do
  648                 let blocks = concatMap getBlks shorted
  649                 let labels = setFromList $ fmap blockId blocks :: LabelSet
  650                 let cfg = fromJust optimizedCFG
  651                 return $! seq (sanityCheckCfg cfg labels $
  652                                 text "cfg not in lockstep") ()
  653 
  654         ---- sequence blocks
  655         let sequenced :: [NatCmmDecl statics instr]
  656             sequenced =
  657                 checkLayout shorted $
  658                 {-# SCC "sequenceBlocks" #-}
  659                 map (BlockLayout.sequenceTop
  660                         ncgImpl optimizedCFG)
  661                     shorted
  662 
  663         let branchOpt :: [NatCmmDecl statics instr]
  664             branchOpt =
  665                 {-# SCC "invertCondBranches" #-}
  666                 map invert sequenced
  667               where
  668                 invertConds :: LabelMap RawCmmStatics -> [NatBasicBlock instr]
  669                             -> [NatBasicBlock instr]
  670                 invertConds = invertCondBranches ncgImpl optimizedCFG
  671                 invert top@CmmData {} = top
  672                 invert (CmmProc info lbl live (ListGraph blocks)) =
  673                     CmmProc info lbl live (ListGraph $ invertConds info blocks)
  674 
  675         ---- expansion of SPARC synthetic instrs
  676         let expanded =
  677                 {-# SCC "sparc_expand" #-}
  678                 ncgExpandTop ncgImpl branchOpt
  679                 --ncgExpandTop ncgImpl sequenced
  680 
  681         putDumpFileMaybe logger
  682                 Opt_D_dump_asm_expanded "Synthetic instructions expanded"
  683                 FormatCMM
  684                 (vcat $ map (pprNatCmmDecl ncgImpl) expanded)
  685 
  686         -- generate unwinding information from cmm
  687         let unwinds :: BlockMap [UnwindPoint]
  688             unwinds =
  689                 {-# SCC "unwindingInfo" #-}
  690                 foldl' addUnwind mapEmpty expanded
  691               where
  692                 addUnwind acc proc =
  693                     acc `mapUnion` computeUnwinding config ncgImpl proc
  694 
  695         return  ( usAlloc
  696                 , fileIds'
  697                 , expanded
  698                 , lastMinuteImports ++ imports
  699                 , ppr_raStatsColor
  700                 , ppr_raStatsLinear
  701                 , unwinds )
  702 
  703 maybeDumpCfg :: Logger -> Maybe CFG -> String -> SDoc -> IO ()
  704 maybeDumpCfg _logger Nothing _ _ = return ()
  705 maybeDumpCfg logger (Just cfg) msg proc_name
  706         | null cfg = return ()
  707         | otherwise
  708         = putDumpFileMaybe logger
  709                 Opt_D_dump_cfg_weights msg
  710                 FormatText
  711                 (proc_name <> char ':' $$ pprEdgeWeights cfg)
  712 
  713 -- | Make sure all blocks we want the layout algorithm to place have been placed.
  714 checkLayout :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr]
  715             -> [NatCmmDecl statics instr]
  716 checkLayout procsUnsequenced procsSequenced =
  717         assertPpr (setNull diff) (ppr "Block sequencing dropped blocks:" <> ppr diff)
  718         procsSequenced
  719   where
  720         blocks1 = foldl' (setUnion) setEmpty $
  721                         map getBlockIds procsUnsequenced :: LabelSet
  722         blocks2 = foldl' (setUnion) setEmpty $
  723                         map getBlockIds procsSequenced
  724         diff = setDifference blocks1 blocks2
  725 
  726         getBlockIds (CmmData _ _) = setEmpty
  727         getBlockIds (CmmProc _ _ _ (ListGraph blocks)) =
  728                 setFromList $ map blockId blocks
  729 
  730 -- | Compute unwinding tables for the blocks of a procedure
  731 computeUnwinding :: Instruction instr
  732                  => NCGConfig
  733                  -> NcgImpl statics instr jumpDest
  734                  -> NatCmmDecl statics instr
  735                     -- ^ the native code generated for the procedure
  736                  -> LabelMap [UnwindPoint]
  737                     -- ^ unwinding tables for all points of all blocks of the
  738                     -- procedure
  739 computeUnwinding config _ _
  740   | not (ncgComputeUnwinding config) = mapEmpty
  741 computeUnwinding _ _ (CmmData _ _)   = mapEmpty
  742 computeUnwinding _ ncgImpl (CmmProc _ _ _ (ListGraph blks)) =
  743     -- In general we would need to push unwinding information down the
  744     -- block-level call-graph to ensure that we fully account for all
  745     -- relevant register writes within a procedure.
  746     --
  747     -- However, the only unwinding information that we care about in GHC is for
  748     -- Sp. The fact that GHC.Cmm.LayoutStack already ensures that we have unwind
  749     -- information at the beginning of every block means that there is no need
  750     -- to perform this sort of push-down.
  751     mapFromList [ (blk_lbl, extractUnwindPoints ncgImpl instrs)
  752                 | BasicBlock blk_lbl instrs <- blks ]
  753 
  754 -- | Build a doc for all the imports.
  755 --
  756 makeImportsDoc :: NCGConfig -> [CLabel] -> SDoc
  757 makeImportsDoc config imports
  758  = dyld_stubs imports
  759             $$
  760             -- On recent versions of Darwin, the linker supports
  761             -- dead-stripping of code and data on a per-symbol basis.
  762             -- There's a hack to make this work in PprMach.pprNatCmmDecl.
  763             (if platformHasSubsectionsViaSymbols platform
  764              then text ".subsections_via_symbols"
  765              else Outputable.empty)
  766             $$
  767                 -- On recent GNU ELF systems one can mark an object file
  768                 -- as not requiring an executable stack. If all objects
  769                 -- linked into a program have this note then the program
  770                 -- will not use an executable stack, which is good for
  771                 -- security. GHC generated code does not need an executable
  772                 -- stack so add the note in:
  773             (if platformHasGnuNonexecStack platform
  774              then text ".section .note.GNU-stack,\"\"," <> sectionType platform "progbits"
  775              else Outputable.empty)
  776             $$
  777                 -- And just because every other compiler does, let's stick in
  778                 -- an identifier directive: .ident "GHC x.y.z"
  779             (if platformHasIdentDirective platform
  780              then let compilerIdent = text "GHC" <+> text cProjectVersion
  781                    in text ".ident" <+> doubleQuotes compilerIdent
  782              else Outputable.empty)
  783 
  784  where
  785         platform = ncgPlatform config
  786 
  787         -- Generate "symbol stubs" for all external symbols that might
  788         -- come from a dynamic library.
  789         dyld_stubs :: [CLabel] -> SDoc
  790 {-      dyld_stubs imps = vcat $ map pprDyldSymbolStub $
  791                                     map head $ group $ sort imps-}
  792         -- (Hack) sometimes two Labels pretty-print the same, but have
  793         -- different uniques; so we compare their text versions...
  794         dyld_stubs imps
  795                 | needImportedSymbols config
  796                 = vcat $
  797                         (pprGotDeclaration config :) $
  798                         map ( pprImportedSymbol config . fst . head) $
  799                         groupBy (\(_,a) (_,b) -> a == b) $
  800                         sortBy (\(_,a) (_,b) -> compare a b) $
  801                         map doPpr $
  802                         imps
  803                 | otherwise
  804                 = Outputable.empty
  805 
  806         doPpr lbl = (lbl, renderWithContext
  807                               (ncgAsmContext config)
  808                               (pprCLabel platform AsmStyle lbl))
  809 
  810 -- -----------------------------------------------------------------------------
  811 -- Generate jump tables
  812 
  813 -- Analyzes all native code and generates data sections for all jump
  814 -- table instructions.
  815 generateJumpTables
  816         :: NcgImpl statics instr jumpDest
  817         -> [NatCmmDecl statics instr] -> [NatCmmDecl statics instr]
  818 generateJumpTables ncgImpl xs = concatMap f xs
  819     where f p@(CmmProc _ _ _ (ListGraph xs)) = p : concatMap g xs
  820           f p = [p]
  821           g (BasicBlock _ xs) = catMaybes (map (generateJumpTableForInstr ncgImpl) xs)
  822 
  823 -- -----------------------------------------------------------------------------
  824 -- Shortcut branches
  825 
  826 shortcutBranches
  827         :: forall statics instr jumpDest. (Outputable jumpDest)
  828         => NCGConfig
  829         -> NcgImpl statics instr jumpDest
  830         -> [NatCmmDecl statics instr]
  831         -> Maybe CFG
  832         -> ([NatCmmDecl statics instr],Maybe CFG)
  833 
  834 shortcutBranches config ncgImpl tops weights
  835   | ncgEnableShortcutting config
  836   = ( map (apply_mapping ncgImpl mapping) tops'
  837     , shortcutWeightMap mappingBid <$!> weights )
  838   | otherwise
  839   = (tops, weights)
  840   where
  841     (tops', mappings) = mapAndUnzip (build_mapping ncgImpl) tops
  842     mapping = mapUnions mappings :: LabelMap jumpDest
  843     mappingBid = fmap (getJumpDestBlockId ncgImpl) mapping
  844 
  845 build_mapping :: forall instr t d statics jumpDest.
  846                  NcgImpl statics instr jumpDest
  847               -> GenCmmDecl d (LabelMap t) (ListGraph instr)
  848               -> (GenCmmDecl d (LabelMap t) (ListGraph instr)
  849                  ,LabelMap jumpDest)
  850 build_mapping _ top@(CmmData _ _) = (top, mapEmpty)
  851 build_mapping _ (CmmProc info lbl live (ListGraph []))
  852   = (CmmProc info lbl live (ListGraph []), mapEmpty)
  853 build_mapping ncgImpl (CmmProc info lbl live (ListGraph (head:blocks)))
  854   = (CmmProc info lbl live (ListGraph (head:others)), mapping)
  855         -- drop the shorted blocks, but don't ever drop the first one,
  856         -- because it is pointed to by a global label.
  857   where
  858     -- find all the blocks that just consist of a jump that can be
  859     -- shorted.
  860     -- Don't completely eliminate loops here -- that can leave a dangling jump!
  861     shortcut_blocks :: [(BlockId, jumpDest)]
  862     (_, shortcut_blocks, others) =
  863         foldl' split (setEmpty :: LabelSet, [], []) blocks
  864     split (s, shortcut_blocks, others) b@(BasicBlock id [insn])
  865         | Just jd <- canShortcut ncgImpl insn
  866         , Just dest <- getJumpDestBlockId ncgImpl jd
  867         , not (has_info id)
  868         , (setMember dest s) || dest == id -- loop checks
  869         = (s, shortcut_blocks, b : others)
  870     split (s, shortcut_blocks, others) (BasicBlock id [insn])
  871         | Just dest <- canShortcut ncgImpl insn
  872         , not (has_info id)
  873         = (setInsert id s, (id,dest) : shortcut_blocks, others)
  874     split (s, shortcut_blocks, others) other = (s, shortcut_blocks, other : others)
  875 
  876     -- do not eliminate blocks that have an info table
  877     has_info l = mapMember l info
  878 
  879     -- build a mapping from BlockId to JumpDest for shorting branches
  880     mapping = mapFromList shortcut_blocks
  881 
  882 apply_mapping :: NcgImpl statics instr jumpDest
  883               -> LabelMap jumpDest
  884               -> GenCmmDecl statics h (ListGraph instr)
  885               -> GenCmmDecl statics h (ListGraph instr)
  886 apply_mapping ncgImpl ufm (CmmData sec statics)
  887   = CmmData sec (shortcutStatics ncgImpl (\bid -> mapLookup bid ufm) statics)
  888 apply_mapping ncgImpl ufm (CmmProc info lbl live (ListGraph blocks))
  889   = CmmProc info lbl live (ListGraph $ map short_bb blocks)
  890   where
  891     short_bb (BasicBlock id insns) = BasicBlock id $! map short_insn insns
  892     short_insn i = shortcutJump ncgImpl (\bid -> mapLookup bid ufm) i
  893                  -- shortcutJump should apply the mapping repeatedly,
  894                  -- just in case we can short multiple branches.
  895 
  896 -- -----------------------------------------------------------------------------
  897 -- Instruction selection
  898 
  899 -- Native code instruction selection for a chunk of stix code.  For
  900 -- this part of the computation, we switch from the UniqSM monad to
  901 -- the NatM monad.  The latter carries not only a Unique, but also an
  902 -- Int denoting the current C stack pointer offset in the generated
  903 -- code; this is needed for creating correct spill offsets on
  904 -- architectures which don't offer, or for which it would be
  905 -- prohibitively expensive to employ, a frame pointer register.  Viz,
  906 -- x86.
  907 
  908 -- The offset is measured in bytes, and indicates the difference
  909 -- between the current (simulated) C stack-ptr and the value it was at
  910 -- the beginning of the block.  For stacks which grow down, this value
  911 -- should be either zero or negative.
  912 
  913 -- Along with the stack pointer offset, we also carry along a LabelMap of
  914 -- DebugBlocks, which we read to generate .location directives.
  915 --
  916 -- Switching between the two monads whilst carrying along the same
  917 -- Unique supply breaks abstraction.  Is that bad?
  918 
  919 genMachCode
  920         :: NCGConfig
  921         -> ModLocation
  922         -> (RawCmmDecl -> NatM [NatCmmDecl statics instr])
  923         -> DwarfFiles
  924         -> LabelMap DebugBlock
  925         -> RawCmmDecl
  926         -> CFG
  927         -> UniqSM
  928                 ( [NatCmmDecl statics instr]
  929                 , [CLabel]
  930                 , DwarfFiles
  931                 , CFG
  932                 )
  933 
  934 genMachCode config modLoc cmmTopCodeGen fileIds dbgMap cmm_top cmm_cfg
  935   = do  { initial_us <- getUniqueSupplyM
  936         ; let initial_st           = mkNatM_State initial_us 0 config
  937                                                   modLoc fileIds dbgMap cmm_cfg
  938               (new_tops, final_st) = initNat initial_st (cmmTopCodeGen cmm_top)
  939               final_delta          = natm_delta final_st
  940               final_imports        = natm_imports final_st
  941               final_cfg            = natm_cfg final_st
  942         ; if   final_delta == 0
  943           then return (new_tops, final_imports
  944                       , natm_fileid final_st, final_cfg)
  945           else pprPanic "genMachCode: nonzero final delta" (int final_delta)
  946     }
  947 
  948 -- -----------------------------------------------------------------------------
  949 -- Generic Cmm optimiser
  950 
  951 {-
  952 Here we do:
  953 
  954   (a) Constant folding
  955   (c) Position independent code and dynamic linking
  956         (i)  introduce the appropriate indirections
  957              and position independent refs
  958         (ii) compile a list of imported symbols
  959   (d) Some arch-specific optimizations
  960 
  961 (a) will be moving to the new Hoopl pipeline, however, (c) and
  962 (d) are only needed by the native backend and will continue to live
  963 here.
  964 
  965 Ideas for other things we could do (put these in Hoopl please!):
  966 
  967   - shortcut jumps-to-jumps
  968   - simple CSE: if an expr is assigned to a temp, then replace later occs of
  969     that expr with the temp, until the expr is no longer valid (can push through
  970     temp assignments, and certain assigns to mem...)
  971 -}
  972 
  973 cmmToCmm :: NCGConfig -> RawCmmDecl -> (RawCmmDecl, [CLabel])
  974 cmmToCmm _ top@(CmmData _ _) = (top, [])
  975 cmmToCmm config (CmmProc info lbl live graph)
  976     = runCmmOpt config $
  977       do blocks' <- mapM cmmBlockConFold (toBlockList graph)
  978          return $ CmmProc info lbl live (ofBlockList (g_entry graph) blocks')
  979 
  980 type OptMResult a = (# a, [CLabel] #)
  981 
  982 pattern OptMResult :: a -> b -> (# a, b #)
  983 pattern OptMResult x y = (# x, y #)
  984 {-# COMPLETE OptMResult #-}
  985 
  986 newtype CmmOptM a = CmmOptM (NCGConfig -> [CLabel] -> OptMResult a)
  987     deriving (Functor)
  988 
  989 instance Applicative CmmOptM where
  990     pure x = CmmOptM $ \_ imports -> OptMResult x imports
  991     (<*>) = ap
  992 
  993 instance Monad CmmOptM where
  994   (CmmOptM f) >>= g =
  995     CmmOptM $ \config imports0 ->
  996                 case f config imports0 of
  997                   OptMResult x imports1 ->
  998                     case g x of
  999                       CmmOptM g' -> g' config imports1
 1000 
 1001 instance CmmMakeDynamicReferenceM CmmOptM where
 1002     addImport = addImportCmmOpt
 1003 
 1004 addImportCmmOpt :: CLabel -> CmmOptM ()
 1005 addImportCmmOpt lbl = CmmOptM $ \_ imports -> OptMResult () (lbl:imports)
 1006 
 1007 getCmmOptConfig :: CmmOptM NCGConfig
 1008 getCmmOptConfig = CmmOptM $ \config imports -> OptMResult config imports
 1009 
 1010 runCmmOpt :: NCGConfig -> CmmOptM a -> (a, [CLabel])
 1011 runCmmOpt config (CmmOptM f) =
 1012   case f config [] of
 1013     OptMResult result imports -> (result, imports)
 1014 
 1015 cmmBlockConFold :: CmmBlock -> CmmOptM CmmBlock
 1016 cmmBlockConFold block = do
 1017   let (entry, middle, last) = blockSplit block
 1018       stmts = blockToList middle
 1019   stmts' <- mapM cmmStmtConFold stmts
 1020   last' <- cmmStmtConFold last
 1021   return $ blockJoin entry (blockFromList stmts') last'
 1022 
 1023 -- This does three optimizations, but they're very quick to check, so we don't
 1024 -- bother turning them off even when the Hoopl code is active.  Since
 1025 -- this is on the old Cmm representation, we can't reuse the code either:
 1026 --  * reg = reg      --> nop
 1027 --  * if 0 then jump --> nop
 1028 --  * if 1 then jump --> jump
 1029 -- We might be tempted to skip this step entirely of not Opt_PIC, but
 1030 -- there is some PowerPC code for the non-PIC case, which would also
 1031 -- have to be separated.
 1032 cmmStmtConFold :: CmmNode e x -> CmmOptM (CmmNode e x)
 1033 cmmStmtConFold stmt
 1034    = case stmt of
 1035         CmmAssign reg src
 1036            -> do src' <- cmmExprConFold DataReference src
 1037                  return $ case src' of
 1038                    CmmReg reg' | reg == reg' -> CmmComment (fsLit "nop")
 1039                    new_src -> CmmAssign reg new_src
 1040 
 1041         CmmStore addr src
 1042            -> do addr' <- cmmExprConFold DataReference addr
 1043                  src'  <- cmmExprConFold DataReference src
 1044                  return $ CmmStore addr' src'
 1045 
 1046         CmmCall { cml_target = addr }
 1047            -> do addr' <- cmmExprConFold JumpReference addr
 1048                  return $ stmt { cml_target = addr' }
 1049 
 1050         CmmUnsafeForeignCall target regs args
 1051            -> do target' <- case target of
 1052                               ForeignTarget e conv -> do
 1053                                 e' <- cmmExprConFold CallReference e
 1054                                 return $ ForeignTarget e' conv
 1055                               PrimTarget _ ->
 1056                                 return target
 1057                  args' <- mapM (cmmExprConFold DataReference) args
 1058                  return $ CmmUnsafeForeignCall target' regs args'
 1059 
 1060         CmmCondBranch test true false likely
 1061            -> do test' <- cmmExprConFold DataReference test
 1062                  return $ case test' of
 1063                    CmmLit (CmmInt 0 _) -> CmmBranch false
 1064                    CmmLit (CmmInt _ _) -> CmmBranch true
 1065                    _other -> CmmCondBranch test' true false likely
 1066 
 1067         CmmSwitch expr ids
 1068            -> do expr' <- cmmExprConFold DataReference expr
 1069                  return $ CmmSwitch expr' ids
 1070 
 1071         other
 1072            -> return other
 1073 
 1074 cmmExprConFold :: ReferenceKind -> CmmExpr -> CmmOptM CmmExpr
 1075 cmmExprConFold referenceKind expr = do
 1076     config <- getCmmOptConfig
 1077 
 1078     let expr' = if not (ncgDoConstantFolding config)
 1079                     then expr
 1080                     else cmmExprCon config expr
 1081 
 1082     cmmExprNative referenceKind expr'
 1083 
 1084 cmmExprCon :: NCGConfig -> CmmExpr -> CmmExpr
 1085 cmmExprCon config (CmmLoad addr rep) = CmmLoad (cmmExprCon config addr) rep
 1086 cmmExprCon config (CmmMachOp mop args)
 1087     = cmmMachOpFold (ncgPlatform config) mop (map (cmmExprCon config) args)
 1088 cmmExprCon _ other = other
 1089 
 1090 -- handles both PIC and non-PIC cases... a very strange mixture
 1091 -- of things to do.
 1092 cmmExprNative :: ReferenceKind -> CmmExpr -> CmmOptM CmmExpr
 1093 cmmExprNative referenceKind expr = do
 1094      config <- getCmmOptConfig
 1095      let platform = ncgPlatform config
 1096          arch = platformArch platform
 1097      case expr of
 1098         CmmLoad addr rep
 1099           -> do addr' <- cmmExprNative DataReference addr
 1100                 return $ CmmLoad addr' rep
 1101 
 1102         CmmMachOp mop args
 1103           -> do args' <- mapM (cmmExprNative DataReference) args
 1104                 return $ CmmMachOp mop args'
 1105 
 1106         CmmLit (CmmBlock id)
 1107           -> cmmExprNative referenceKind (CmmLit (CmmLabel (infoTblLbl id)))
 1108           -- we must convert block Ids to CLabels here, because we
 1109           -- might have to do the PIC transformation.  Hence we must
 1110           -- not modify BlockIds beyond this point.
 1111 
 1112         CmmLit (CmmLabel lbl)
 1113           -> cmmMakeDynamicReference config referenceKind lbl
 1114         CmmLit (CmmLabelOff lbl off)
 1115           -> do dynRef <- cmmMakeDynamicReference config referenceKind lbl
 1116                 -- need to optimize here, since it's late
 1117                 return $ cmmMachOpFold platform (MO_Add (wordWidth platform)) [
 1118                     dynRef,
 1119                     (CmmLit $ CmmInt (fromIntegral off) (wordWidth platform))
 1120                   ]
 1121 
 1122         -- On powerpc (non-PIC), it's easier to jump directly to a label than
 1123         -- to use the register table, so we replace these registers
 1124         -- with the corresponding labels:
 1125         CmmReg (CmmGlobal EagerBlackholeInfo)
 1126           | arch == ArchPPC && not (ncgPIC config)
 1127           -> cmmExprNative referenceKind $
 1128              CmmLit (CmmLabel (mkCmmCodeLabel rtsUnitId (fsLit "__stg_EAGER_BLACKHOLE_info")))
 1129         CmmReg (CmmGlobal GCEnter1)
 1130           | arch == ArchPPC && not (ncgPIC config)
 1131           -> cmmExprNative referenceKind $
 1132              CmmLit (CmmLabel (mkCmmCodeLabel rtsUnitId (fsLit "__stg_gc_enter_1")))
 1133         CmmReg (CmmGlobal GCFun)
 1134           | arch == ArchPPC && not (ncgPIC config)
 1135           -> cmmExprNative referenceKind $
 1136              CmmLit (CmmLabel (mkCmmCodeLabel rtsUnitId (fsLit "__stg_gc_fun")))
 1137 
 1138         other
 1139            -> return other