never executed always true always false
    1 
    2 {-# LANGUAGE LambdaCase #-}
    3 
    4 -----------------------------------------------------------------------------
    5 --
    6 -- Pretty-printing assembly language
    7 --
    8 -- (c) The University of Glasgow 1993-2005
    9 --
   10 -----------------------------------------------------------------------------
   11 
   12 module GHC.CmmToAsm.X86.Ppr (
   13         pprNatCmmDecl,
   14         pprData,
   15         pprInstr,
   16         pprFormat,
   17         pprImm,
   18         pprDataItem,
   19 )
   20 
   21 where
   22 
   23 import GHC.Prelude
   24 
   25 import GHC.Platform
   26 import GHC.Platform.Reg
   27 
   28 import GHC.CmmToAsm.X86.Regs
   29 import GHC.CmmToAsm.X86.Instr
   30 import GHC.CmmToAsm.X86.Cond
   31 import GHC.CmmToAsm.Config
   32 import GHC.CmmToAsm.Format
   33 import GHC.CmmToAsm.Types
   34 import GHC.CmmToAsm.Utils
   35 import GHC.CmmToAsm.Ppr
   36 
   37 import GHC.Cmm              hiding (topInfoTable)
   38 import GHC.Cmm.Dataflow.Collections
   39 import GHC.Cmm.Dataflow.Label
   40 import GHC.Cmm.BlockId
   41 import GHC.Cmm.CLabel
   42 
   43 import GHC.Types.Basic (Alignment, mkAlignment, alignmentBytes)
   44 import GHC.Types.Unique ( pprUniqueAlways )
   45 
   46 import GHC.Utils.Outputable
   47 import GHC.Utils.Panic
   48 
   49 import Data.Word
   50 
   51 -- -----------------------------------------------------------------------------
   52 -- Printing this stuff out
   53 --
   54 --
   55 -- Note [Subsections Via Symbols]
   56 --
   57 -- If we are using the .subsections_via_symbols directive
   58 -- (available on recent versions of Darwin),
   59 -- we have to make sure that there is some kind of reference
   60 -- from the entry code to a label on the _top_ of the info table,
   61 -- so that the linker will not think it is unreferenced and dead-strip
   62 -- it. That's why the label is called a DeadStripPreventer (_dsp).
   63 --
   64 -- The LLVM code gen already creates `iTableSuf` symbols, where
   65 -- the X86 would generate the DeadStripPreventer (_dsp) symbol.
   66 -- Therefore all that is left for llvm code gen, is to ensure
   67 -- that all the `iTableSuf` symbols are marked as used.
   68 -- As of this writing the documentation regarding the
   69 -- .subsections_via_symbols and -dead_strip can be found at
   70 -- <https://developer.apple.com/library/mac/documentation/DeveloperTools/Reference/Assembler/040-Assembler_Directives/asm_directives.html#//apple_ref/doc/uid/TP30000823-TPXREF101>
   71 
   72 pprProcAlignment :: NCGConfig -> SDoc
   73 pprProcAlignment config = maybe empty (pprAlign platform . mkAlignment) (ncgProcAlignment config)
   74    where
   75       platform = ncgPlatform config
   76 
   77 pprNatCmmDecl :: NCGConfig -> NatCmmDecl (Alignment, RawCmmStatics) Instr -> SDoc
   78 pprNatCmmDecl config (CmmData section dats) =
   79   pprSectionAlign config section $$ pprDatas config dats
   80 
   81 pprNatCmmDecl config proc@(CmmProc top_info lbl _ (ListGraph blocks)) =
   82   let platform = ncgPlatform config in
   83   pprProcAlignment config $$
   84   case topInfoTable proc of
   85     Nothing ->
   86         -- special case for code without info table:
   87         pprSectionAlign config (Section Text lbl) $$
   88         pprProcAlignment config $$
   89         pprProcLabel config lbl $$
   90         pprLabel platform lbl $$ -- blocks guaranteed not null, so label needed
   91         vcat (map (pprBasicBlock config top_info) blocks) $$
   92         ppWhen (ncgDwarfEnabled config) (pprBlockEndLabel platform lbl $$ pprProcEndLabel platform lbl) $$
   93         pprSizeDecl platform lbl
   94 
   95     Just (CmmStaticsRaw info_lbl _) ->
   96       pprSectionAlign config (Section Text info_lbl) $$
   97       pprProcAlignment config $$
   98       pprProcLabel config lbl $$
   99       (if platformHasSubsectionsViaSymbols platform
  100           then pdoc platform (mkDeadStripPreventer info_lbl) <> colon
  101           else empty) $$
  102       vcat (map (pprBasicBlock config top_info) blocks) $$
  103       ppWhen (ncgDwarfEnabled config) (pprProcEndLabel platform info_lbl) $$
  104       -- above: Even the first block gets a label, because with branch-chain
  105       -- elimination, it might be the target of a goto.
  106       (if platformHasSubsectionsViaSymbols platform
  107        then -- See Note [Subsections Via Symbols]
  108                 text "\t.long "
  109             <+> pdoc platform info_lbl
  110             <+> char '-'
  111             <+> pdoc platform (mkDeadStripPreventer info_lbl)
  112        else empty) $$
  113       pprSizeDecl platform info_lbl
  114 
  115 -- | Output an internal proc label. See Note [Internal proc labels] in CLabel.
  116 pprProcLabel :: NCGConfig -> CLabel -> SDoc
  117 pprProcLabel config lbl
  118   | ncgExposeInternalSymbols config
  119   , Just lbl' <- ppInternalProcLabel (ncgThisModule config) lbl
  120   = lbl' <> colon
  121   | otherwise
  122   = empty
  123 
  124 pprProcEndLabel :: Platform -> CLabel -- ^ Procedure name
  125                 -> SDoc
  126 pprProcEndLabel platform lbl =
  127     pdoc platform (mkAsmTempProcEndLabel lbl) <> colon
  128 
  129 pprBlockEndLabel :: Platform -> CLabel -- ^ Block name
  130                  -> SDoc
  131 pprBlockEndLabel platform lbl =
  132     pdoc platform (mkAsmTempEndLabel lbl) <> colon
  133 
  134 -- | Output the ELF .size directive.
  135 pprSizeDecl :: Platform -> CLabel -> SDoc
  136 pprSizeDecl platform lbl
  137  = if osElfTarget (platformOS platform)
  138    then text "\t.size" <+> pdoc platform lbl <> text ", .-" <> pdoc platform lbl
  139    else empty
  140 
  141 pprBasicBlock :: NCGConfig -> LabelMap RawCmmStatics -> NatBasicBlock Instr -> SDoc
  142 pprBasicBlock config info_env (BasicBlock blockid instrs)
  143   = maybe_infotable $
  144     pprLabel platform asmLbl $$
  145     vcat (map (pprInstr platform) instrs) $$
  146     ppWhen (ncgDwarfEnabled config) (
  147       -- Emit both end labels since this may end up being a standalone
  148       -- top-level block
  149       pprBlockEndLabel platform asmLbl
  150       <> pprProcEndLabel platform asmLbl
  151     )
  152   where
  153     asmLbl = blockLbl blockid
  154     platform = ncgPlatform config
  155     maybe_infotable c = case mapLookup blockid info_env of
  156        Nothing -> c
  157        Just (CmmStaticsRaw infoLbl info) ->
  158            pprAlignForSection platform Text $$
  159            infoTableLoc $$
  160            vcat (map (pprData config) info) $$
  161            pprLabel platform infoLbl $$
  162            c $$
  163            ppWhen (ncgDwarfEnabled config) (pdoc platform (mkAsmTempEndLabel infoLbl) <> colon)
  164 
  165     -- Make sure the info table has the right .loc for the block
  166     -- coming right after it. See [Note: Info Offset]
  167     infoTableLoc = case instrs of
  168       (l@LOCATION{} : _) -> pprInstr platform l
  169       _other             -> empty
  170 
  171 
  172 pprDatas :: NCGConfig -> (Alignment, RawCmmStatics) -> SDoc
  173 -- See note [emit-time elimination of static indirections] in "GHC.Cmm.CLabel".
  174 pprDatas config (_, CmmStaticsRaw alias [CmmStaticLit (CmmLabel lbl), CmmStaticLit ind, _, _])
  175   | lbl == mkIndStaticInfoLabel
  176   , let labelInd (CmmLabelOff l _) = Just l
  177         labelInd (CmmLabel l) = Just l
  178         labelInd _ = Nothing
  179   , Just ind' <- labelInd ind
  180   , alias `mayRedirectTo` ind'
  181   = pprGloblDecl (ncgPlatform config) alias
  182     $$ text ".equiv" <+> pdoc (ncgPlatform config) alias <> comma <> pdoc (ncgPlatform config) (CmmLabel ind')
  183 
  184 pprDatas config (align, (CmmStaticsRaw lbl dats))
  185  = vcat (pprAlign platform align : pprLabel platform lbl : map (pprData config) dats)
  186    where
  187       platform = ncgPlatform config
  188 
  189 pprData :: NCGConfig -> CmmStatic -> SDoc
  190 pprData _config (CmmString str) = pprString str
  191 pprData _config (CmmFileEmbed path) = pprFileEmbed path
  192 
  193 pprData config (CmmUninitialised bytes)
  194  = let platform = ncgPlatform config
  195    in if platformOS platform == OSDarwin
  196          then text ".space " <> int bytes
  197          else text ".skip "  <> int bytes
  198 
  199 pprData config (CmmStaticLit lit) = pprDataItem config lit
  200 
  201 pprGloblDecl :: Platform -> CLabel -> SDoc
  202 pprGloblDecl platform lbl
  203   | not (externallyVisibleCLabel lbl) = empty
  204   | otherwise = text ".globl " <> pdoc platform lbl
  205 
  206 pprLabelType' :: Platform -> CLabel -> SDoc
  207 pprLabelType' platform lbl =
  208   if isCFunctionLabel lbl || functionOkInfoTable then
  209     text "@function"
  210   else
  211     text "@object"
  212   where
  213     {-
  214     NOTE: This is a bit hacky.
  215 
  216     With the `tablesNextToCode` info tables look like this:
  217     ```
  218       <info table data>
  219     label_info:
  220       <info table code>
  221     ```
  222     So actually info table label points exactly to the code and we can mark
  223     the label as @function. (This is required to make perf and potentially other
  224     tools to work on Haskell binaries).
  225     This usually works well but it can cause issues with a linker.
  226     A linker uses different algorithms for the relocation depending on
  227     the symbol type.For some reason, a linker will generate JUMP_SLOT relocation
  228     when constructor info table is referenced from a data section.
  229     This only happens with static constructor call so
  230     we mark _con_info symbols as `@object` to avoid the issue with relocations.
  231 
  232     @SimonMarlow hack explanation:
  233     "The reasoning goes like this:
  234 
  235     * The danger when we mark a symbol as `@function` is that the linker will
  236       redirect it to point to the PLT and use a `JUMP_SLOT` relocation when
  237       the symbol refers to something outside the current shared object.
  238       A PLT / JUMP_SLOT reference only works for symbols that we jump to, not
  239       for symbols representing data,, nor for info table symbol references which
  240       we expect to point directly to the info table.
  241     * GHC generates code that might refer to any info table symbol from the text
  242       segment, but that's OK, because those will be explicit GOT references
  243       generated by the code generator.
  244     * When we refer to info tables from the data segment, it's either
  245       * a FUN_STATIC/THUNK_STATIC local to this module
  246       * a `con_info` that could be from anywhere
  247 
  248     So, the only info table symbols that we might refer to from the data segment
  249     of another shared object are `con_info` symbols, so those are the ones we
  250     need to exclude from getting the @function treatment.
  251     "
  252 
  253     A good place to check for more
  254     https://gitlab.haskell.org/ghc/ghc/wikis/commentary/position-independent-code
  255 
  256     Another possible hack is to create an extra local function symbol for
  257     every code-like thing to give the needed information for to the tools
  258     but mess up with the relocation. https://phabricator.haskell.org/D4730
  259     -}
  260     functionOkInfoTable = platformTablesNextToCode platform &&
  261       isInfoTableLabel lbl && not (isConInfoTableLabel lbl)
  262 
  263 
  264 pprTypeDecl :: Platform -> CLabel -> SDoc
  265 pprTypeDecl platform lbl
  266     = if osElfTarget (platformOS platform) && externallyVisibleCLabel lbl
  267       then text ".type " <> pdoc platform lbl <> text ", " <> pprLabelType' platform lbl
  268       else empty
  269 
  270 pprLabel :: Platform -> CLabel -> SDoc
  271 pprLabel platform lbl =
  272    pprGloblDecl platform lbl
  273    $$ pprTypeDecl platform lbl
  274    $$ (pdoc platform lbl <> colon)
  275 
  276 pprAlign :: Platform -> Alignment -> SDoc
  277 pprAlign platform alignment
  278         = text ".align " <> int (alignmentOn platform)
  279   where
  280         bytes = alignmentBytes alignment
  281         alignmentOn platform = if platformOS platform == OSDarwin
  282                                then log2 bytes
  283                                else      bytes
  284 
  285         log2 :: Int -> Int  -- cache the common ones
  286         log2 1 = 0
  287         log2 2 = 1
  288         log2 4 = 2
  289         log2 8 = 3
  290         log2 n = 1 + log2 (n `quot` 2)
  291 
  292 pprReg :: Platform -> Format -> Reg -> SDoc
  293 pprReg platform f r
  294   = case r of
  295       RegReal    (RealRegSingle i) ->
  296           if target32Bit platform then ppr32_reg_no f i
  297                                   else ppr64_reg_no f i
  298       RegReal    (RealRegPair _ _) -> panic "X86.Ppr: no reg pairs on this arch"
  299       RegVirtual (VirtualRegI  u)  -> text "%vI_"   <> pprUniqueAlways u
  300       RegVirtual (VirtualRegHi u)  -> text "%vHi_"  <> pprUniqueAlways u
  301       RegVirtual (VirtualRegF  u)  -> text "%vF_"   <> pprUniqueAlways u
  302       RegVirtual (VirtualRegD  u)  -> text "%vD_"   <> pprUniqueAlways u
  303 
  304   where
  305     ppr32_reg_no :: Format -> Int -> SDoc
  306     ppr32_reg_no II8   = ppr32_reg_byte
  307     ppr32_reg_no II16  = ppr32_reg_word
  308     ppr32_reg_no _     = ppr32_reg_long
  309 
  310     ppr32_reg_byte i =
  311       case i of {
  312          0 -> text "%al";     1 -> text "%bl";
  313          2 -> text "%cl";     3 -> text "%dl";
  314         _  -> text "very naughty I386 byte register: " <> int i
  315       }
  316 
  317     ppr32_reg_word i =
  318       case i of {
  319          0 -> text "%ax";     1 -> text "%bx";
  320          2 -> text "%cx";     3 -> text "%dx";
  321          4 -> text "%si";     5 -> text "%di";
  322          6 -> text "%bp";     7 -> text "%sp";
  323         _  -> text "very naughty I386 word register"
  324       }
  325 
  326     ppr32_reg_long i =
  327       case i of {
  328          0 -> text "%eax";    1 -> text "%ebx";
  329          2 -> text "%ecx";    3 -> text "%edx";
  330          4 -> text "%esi";    5 -> text "%edi";
  331          6 -> text "%ebp";    7 -> text "%esp";
  332          _  -> ppr_reg_float i
  333       }
  334 
  335     ppr64_reg_no :: Format -> Int -> SDoc
  336     ppr64_reg_no II8   = ppr64_reg_byte
  337     ppr64_reg_no II16  = ppr64_reg_word
  338     ppr64_reg_no II32  = ppr64_reg_long
  339     ppr64_reg_no _     = ppr64_reg_quad
  340 
  341     ppr64_reg_byte i =
  342       case i of {
  343          0 -> text "%al";      1 -> text "%bl";
  344          2 -> text "%cl";      3 -> text "%dl";
  345          4 -> text "%sil";     5 -> text "%dil"; -- new 8-bit regs!
  346          6 -> text "%bpl";     7 -> text "%spl";
  347          8 -> text "%r8b";     9 -> text "%r9b";
  348         10 -> text "%r10b";   11 -> text "%r11b";
  349         12 -> text "%r12b";   13 -> text "%r13b";
  350         14 -> text "%r14b";   15 -> text "%r15b";
  351         _  -> text "very naughty x86_64 byte register: " <> int i
  352       }
  353 
  354     ppr64_reg_word i =
  355       case i of {
  356          0 -> text "%ax";      1 -> text "%bx";
  357          2 -> text "%cx";      3 -> text "%dx";
  358          4 -> text "%si";      5 -> text "%di";
  359          6 -> text "%bp";      7 -> text "%sp";
  360          8 -> text "%r8w";     9 -> text "%r9w";
  361         10 -> text "%r10w";   11 -> text "%r11w";
  362         12 -> text "%r12w";   13 -> text "%r13w";
  363         14 -> text "%r14w";   15 -> text "%r15w";
  364         _  -> text "very naughty x86_64 word register"
  365       }
  366 
  367     ppr64_reg_long i =
  368       case i of {
  369          0 -> text "%eax";    1  -> text "%ebx";
  370          2 -> text "%ecx";    3  -> text "%edx";
  371          4 -> text "%esi";    5  -> text "%edi";
  372          6 -> text "%ebp";    7  -> text "%esp";
  373          8 -> text "%r8d";    9  -> text "%r9d";
  374         10 -> text "%r10d";   11 -> text "%r11d";
  375         12 -> text "%r12d";   13 -> text "%r13d";
  376         14 -> text "%r14d";   15 -> text "%r15d";
  377         _  -> text "very naughty x86_64 register"
  378       }
  379 
  380     ppr64_reg_quad i =
  381       case i of {
  382          0 -> text "%rax";     1 -> text "%rbx";
  383          2 -> text "%rcx";     3 -> text "%rdx";
  384          4 -> text "%rsi";     5 -> text "%rdi";
  385          6 -> text "%rbp";     7 -> text "%rsp";
  386          8 -> text "%r8";      9 -> text "%r9";
  387         10 -> text "%r10";    11 -> text "%r11";
  388         12 -> text "%r12";    13 -> text "%r13";
  389         14 -> text "%r14";    15 -> text "%r15";
  390         _  -> ppr_reg_float i
  391       }
  392 
  393 ppr_reg_float :: Int -> SDoc
  394 ppr_reg_float i = case i of
  395         16 -> text "%xmm0" ;   17 -> text "%xmm1"
  396         18 -> text "%xmm2" ;   19 -> text "%xmm3"
  397         20 -> text "%xmm4" ;   21 -> text "%xmm5"
  398         22 -> text "%xmm6" ;   23 -> text "%xmm7"
  399         24 -> text "%xmm8" ;   25 -> text "%xmm9"
  400         26 -> text "%xmm10";   27 -> text "%xmm11"
  401         28 -> text "%xmm12";   29 -> text "%xmm13"
  402         30 -> text "%xmm14";   31 -> text "%xmm15"
  403         _  -> text "very naughty x86 register"
  404 
  405 pprFormat :: Format -> SDoc
  406 pprFormat x = case x of
  407   II8   -> text "b"
  408   II16  -> text "w"
  409   II32  -> text "l"
  410   II64  -> text "q"
  411   FF32  -> text "ss"      -- "scalar single-precision float" (SSE2)
  412   FF64  -> text "sd"      -- "scalar double-precision float" (SSE2)
  413 
  414 pprFormat_x87 :: Format -> SDoc
  415 pprFormat_x87 x = case x of
  416   FF32  -> text "s"
  417   FF64  -> text "l"
  418   _     -> panic "X86.Ppr.pprFormat_x87"
  419 
  420 
  421 pprCond :: Cond -> SDoc
  422 pprCond c = case c of {
  423   GEU     -> text "ae";   LU   -> text "b";
  424   EQQ     -> text "e";    GTT  -> text "g";
  425   GE      -> text "ge";   GU   -> text "a";
  426   LTT     -> text "l";    LE   -> text "le";
  427   LEU     -> text "be";   NE   -> text "ne";
  428   NEG     -> text "s";    POS  -> text "ns";
  429   CARRY   -> text "c";   OFLO  -> text "o";
  430   PARITY  -> text "p";   NOTPARITY -> text "np";
  431   ALWAYS  -> text "mp"}
  432 
  433 
  434 pprImm :: Platform -> Imm -> SDoc
  435 pprImm platform = \case
  436    ImmInt i            -> int i
  437    ImmInteger i        -> integer i
  438    ImmCLbl l           -> pdoc platform l
  439    ImmIndex l i        -> pdoc platform l <> char '+' <> int i
  440    ImmLit s            -> s
  441    ImmFloat f          -> float $ fromRational f
  442    ImmDouble d         -> double $ fromRational d
  443    ImmConstantSum a b  -> pprImm platform a <> char '+' <> pprImm platform b
  444    ImmConstantDiff a b -> pprImm platform a <> char '-' <> lparen <> pprImm platform b <> rparen
  445 
  446 
  447 
  448 pprAddr :: Platform -> AddrMode -> SDoc
  449 pprAddr platform (ImmAddr imm off)
  450   = let pp_imm = pprImm platform imm
  451     in
  452     if (off == 0) then
  453         pp_imm
  454     else if (off < 0) then
  455         pp_imm <> int off
  456     else
  457         pp_imm <> char '+' <> int off
  458 
  459 pprAddr platform (AddrBaseIndex base index displacement)
  460   = let
  461         pp_disp  = ppr_disp displacement
  462         pp_off p = pp_disp <> char '(' <> p <> char ')'
  463         pp_reg r = pprReg platform (archWordFormat (target32Bit platform)) r
  464     in
  465     case (base, index) of
  466       (EABaseNone,  EAIndexNone) -> pp_disp
  467       (EABaseReg b, EAIndexNone) -> pp_off (pp_reg b)
  468       (EABaseRip,   EAIndexNone) -> pp_off (text "%rip")
  469       (EABaseNone,  EAIndex r i) -> pp_off (comma <> pp_reg r <> comma <> int i)
  470       (EABaseReg b, EAIndex r i) -> pp_off (pp_reg b <> comma <> pp_reg r
  471                                        <> comma <> int i)
  472       _                         -> panic "X86.Ppr.pprAddr: no match"
  473 
  474   where
  475     ppr_disp (ImmInt 0) = empty
  476     ppr_disp imm        = pprImm platform imm
  477 
  478 -- | Print section header and appropriate alignment for that section.
  479 pprSectionAlign :: NCGConfig -> Section -> SDoc
  480 pprSectionAlign _config (Section (OtherSection _) _) =
  481      panic "X86.Ppr.pprSectionAlign: unknown section"
  482 pprSectionAlign config sec@(Section seg _) =
  483     pprSectionHeader config sec $$
  484     pprAlignForSection (ncgPlatform config) seg
  485 
  486 -- | Print appropriate alignment for the given section type.
  487 pprAlignForSection :: Platform -> SectionType -> SDoc
  488 pprAlignForSection platform seg =
  489     text ".align " <>
  490     case platformOS platform of
  491       -- Darwin: alignments are given as shifts.
  492       OSDarwin
  493        | target32Bit platform ->
  494           case seg of
  495            ReadOnlyData16    -> int 4
  496            CString           -> int 1
  497            _                 -> int 2
  498        | otherwise ->
  499           case seg of
  500            ReadOnlyData16    -> int 4
  501            CString           -> int 1
  502            _                 -> int 3
  503       -- Other: alignments are given as bytes.
  504       _
  505        | target32Bit platform ->
  506           case seg of
  507            Text              -> text "4,0x90"
  508            ReadOnlyData16    -> int 16
  509            CString           -> int 1
  510            _                 -> int 4
  511        | otherwise ->
  512           case seg of
  513            ReadOnlyData16    -> int 16
  514            CString           -> int 1
  515            _                 -> int 8
  516 
  517 pprDataItem :: NCGConfig -> CmmLit -> SDoc
  518 pprDataItem config lit
  519   = vcat (ppr_item (cmmTypeFormat $ cmmLitType platform lit) lit)
  520     where
  521         platform = ncgPlatform config
  522         imm = litToImm lit
  523 
  524         -- These seem to be common:
  525         ppr_item II8   _ = [text "\t.byte\t" <> pprImm platform imm]
  526         ppr_item II16  _ = [text "\t.word\t" <> pprImm platform imm]
  527         ppr_item II32  _ = [text "\t.long\t" <> pprImm platform imm]
  528 
  529         ppr_item FF32 _ = [text "\t.float\t" <> pprImm platform imm]
  530         ppr_item FF64 _ = [text "\t.double\t" <> pprImm platform imm]
  531 
  532         ppr_item II64 _
  533             = case platformOS platform of
  534               OSDarwin
  535                | target32Bit platform ->
  536                   case lit of
  537                   CmmInt x _ ->
  538                       [text "\t.long\t"
  539                           <> int (fromIntegral (fromIntegral x :: Word32)),
  540                        text "\t.long\t"
  541                           <> int (fromIntegral
  542                               (fromIntegral (x `shiftR` 32) :: Word32))]
  543                   _ -> panic "X86.Ppr.ppr_item: no match for II64"
  544                | otherwise ->
  545                   [text "\t.quad\t" <> pprImm platform imm]
  546               _
  547                | target32Bit platform ->
  548                   [text "\t.quad\t" <> pprImm platform imm]
  549                | otherwise ->
  550                   -- x86_64: binutils can't handle the R_X86_64_PC64
  551                   -- relocation type, which means we can't do
  552                   -- pc-relative 64-bit addresses. Fortunately we're
  553                   -- assuming the small memory model, in which all such
  554                   -- offsets will fit into 32 bits, so we have to stick
  555                   -- to 32-bit offset fields and modify the RTS
  556                   -- appropriately
  557                   --
  558                   -- See Note [x86-64-relative] in rts/include/rts/storage/InfoTables.h
  559                   --
  560                   case lit of
  561                   -- A relative relocation:
  562                   CmmLabelDiffOff _ _ _ _ ->
  563                       [text "\t.long\t" <> pprImm platform imm,
  564                        text "\t.long\t0"]
  565                   _ ->
  566                       [text "\t.quad\t" <> pprImm platform imm]
  567 
  568 
  569 asmComment :: SDoc -> SDoc
  570 asmComment c = whenPprDebug $ text "# " <> c
  571 
  572 pprInstr :: Platform -> Instr -> SDoc
  573 pprInstr platform i = case i of
  574    COMMENT s
  575       -> asmComment s
  576 
  577    LOCATION file line col _name
  578       -> text "\t.loc " <> ppr file <+> ppr line <+> ppr col
  579 
  580    DELTA d
  581       -> asmComment $ text ("\tdelta = " ++ show d)
  582 
  583    NEWBLOCK _
  584       -> panic "pprInstr: NEWBLOCK"
  585 
  586    UNWIND lbl d
  587       -> asmComment (text "\tunwind = " <> pdoc platform d)
  588          $$ pdoc platform lbl <> colon
  589 
  590    LDATA _ _
  591       -> panic "pprInstr: LDATA"
  592 
  593 {-
  594    SPILL reg slot
  595       -> hcat [
  596            text "\tSPILL",
  597            char ' ',
  598            pprUserReg reg,
  599            comma,
  600            text "SLOT" <> parens (int slot)]
  601 
  602    RELOAD slot reg
  603       -> hcat [
  604         text "\tRELOAD",
  605         char ' ',
  606         text "SLOT" <> parens (int slot),
  607         comma,
  608         pprUserReg reg]
  609 -}
  610 
  611    -- Replace 'mov $0x0,%reg' by 'xor %reg,%reg', which is smaller and cheaper.
  612    -- The code generator catches most of these already, but not all.
  613    MOV format (OpImm (ImmInt 0)) dst@(OpReg _)
  614      -> pprInstr platform (XOR format' dst dst)
  615         where format' = case format of
  616                 II64 -> II32          -- 32-bit version is equivalent, and smaller
  617                 _    -> format
  618 
  619    MOV format src dst
  620      -> pprFormatOpOp (text "mov") format src dst
  621 
  622    CMOV cc format src dst
  623      -> pprCondOpReg (text "cmov") format cc src dst
  624 
  625    MOVZxL II32 src dst
  626       -> pprFormatOpOp (text "mov") II32 src dst
  627         -- 32-to-64 bit zero extension on x86_64 is accomplished by a simple
  628         -- movl.  But we represent it as a MOVZxL instruction, because
  629         -- the reg alloc would tend to throw away a plain reg-to-reg
  630         -- move, and we still want it to do that.
  631 
  632    MOVZxL formats src dst
  633       -> pprFormatOpOpCoerce (text "movz") formats II32 src dst
  634         -- zero-extension only needs to extend to 32 bits: on x86_64,
  635         -- the remaining zero-extension to 64 bits is automatic, and the 32-bit
  636         -- instruction is shorter.
  637 
  638    MOVSxL formats src dst
  639       -> pprFormatOpOpCoerce (text "movs") formats (archWordFormat (target32Bit platform)) src dst
  640 
  641    -- here we do some patching, since the physical registers are only set late
  642    -- in the code generation.
  643    LEA format (OpAddr (AddrBaseIndex (EABaseReg reg1) (EAIndex reg2 1) (ImmInt 0))) dst@(OpReg reg3)
  644       | reg1 == reg3
  645       -> pprFormatOpOp (text "add") format (OpReg reg2) dst
  646 
  647    LEA format (OpAddr (AddrBaseIndex (EABaseReg reg1) (EAIndex reg2 1) (ImmInt 0))) dst@(OpReg reg3)
  648       | reg2 == reg3
  649       -> pprFormatOpOp (text "add") format (OpReg reg1) dst
  650 
  651    LEA format (OpAddr (AddrBaseIndex (EABaseReg reg1) EAIndexNone displ)) dst@(OpReg reg3)
  652       | reg1 == reg3
  653       -> pprInstr platform (ADD format (OpImm displ) dst)
  654 
  655    LEA format src dst
  656       -> pprFormatOpOp (text "lea") format src dst
  657 
  658    ADD format (OpImm (ImmInt (-1))) dst
  659       -> pprFormatOp (text "dec") format dst
  660 
  661    ADD format (OpImm (ImmInt 1)) dst
  662       -> pprFormatOp (text "inc") format dst
  663 
  664    ADD format src dst
  665       -> pprFormatOpOp (text "add") format src dst
  666 
  667    ADC format src dst
  668       -> pprFormatOpOp (text "adc") format src dst
  669 
  670    SUB format src dst
  671       -> pprFormatOpOp (text "sub") format src dst
  672 
  673    SBB format src dst
  674       -> pprFormatOpOp (text "sbb") format src dst
  675 
  676    IMUL format op1 op2
  677       -> pprFormatOpOp (text "imul") format op1 op2
  678 
  679    ADD_CC format src dst
  680       -> pprFormatOpOp (text "add") format src dst
  681 
  682    SUB_CC format src dst
  683       -> pprFormatOpOp (text "sub") format src dst
  684 
  685    -- Use a 32-bit instruction when possible as it saves a byte.
  686    -- Notably, extracting the tag bits of a pointer has this form.
  687    -- TODO: we could save a byte in a subsequent CMP instruction too,
  688    -- but need something like a peephole pass for this
  689    AND II64 src@(OpImm (ImmInteger mask)) dst
  690       | 0 <= mask && mask < 0xffffffff
  691       -> pprInstr platform (AND II32 src dst)
  692 
  693    AND FF32 src dst
  694       -> pprOpOp (text "andps") FF32 src dst
  695 
  696    AND FF64 src dst
  697       -> pprOpOp (text "andpd") FF64 src dst
  698 
  699    AND format src dst
  700       -> pprFormatOpOp (text "and") format src dst
  701 
  702    OR  format src dst
  703       -> pprFormatOpOp (text "or")  format src dst
  704 
  705    XOR FF32 src dst
  706       -> pprOpOp (text "xorps") FF32 src dst
  707 
  708    XOR FF64 src dst
  709       ->  pprOpOp (text "xorpd") FF64 src dst
  710 
  711    XOR format src dst
  712       -> pprFormatOpOp (text "xor") format src dst
  713 
  714    POPCNT format src dst
  715       -> pprOpOp (text "popcnt") format src (OpReg dst)
  716 
  717    LZCNT format src dst
  718       ->  pprOpOp (text "lzcnt") format src (OpReg dst)
  719 
  720    TZCNT format src dst
  721       -> pprOpOp (text "tzcnt") format src (OpReg dst)
  722 
  723    BSF format src dst
  724       -> pprOpOp (text "bsf") format src (OpReg dst)
  725 
  726    BSR format src dst
  727       -> pprOpOp (text "bsr") format src (OpReg dst)
  728 
  729    PDEP format src mask dst
  730       -> pprFormatOpOpReg (text "pdep") format src mask dst
  731 
  732    PEXT format src mask dst
  733       -> pprFormatOpOpReg (text "pext") format src mask dst
  734 
  735    PREFETCH NTA format src
  736       -> pprFormatOp_ (text "prefetchnta") format src
  737 
  738    PREFETCH Lvl0 format src
  739       -> pprFormatOp_ (text "prefetcht0") format src
  740 
  741    PREFETCH Lvl1 format src
  742       -> pprFormatOp_ (text "prefetcht1") format src
  743 
  744    PREFETCH Lvl2 format src
  745       -> pprFormatOp_ (text "prefetcht2") format src
  746 
  747    NOT format op
  748       -> pprFormatOp (text "not") format op
  749 
  750    BSWAP format op
  751       -> pprFormatOp (text "bswap") format (OpReg op)
  752 
  753    NEGI format op
  754       -> pprFormatOp (text "neg") format op
  755 
  756    SHL format src dst
  757       -> pprShift (text "shl") format src dst
  758 
  759    SAR format src dst
  760       -> pprShift (text "sar") format src dst
  761 
  762    SHR format src dst
  763       -> pprShift (text "shr") format src dst
  764 
  765    BT format imm src
  766       -> pprFormatImmOp (text "bt") format imm src
  767 
  768    CMP format src dst
  769      | isFloatFormat format -> pprFormatOpOp (text "ucomi") format src dst -- SSE2
  770      | otherwise            -> pprFormatOpOp (text "cmp")   format src dst
  771 
  772    TEST format src dst
  773       -> pprFormatOpOp (text "test") format' src dst
  774          where
  775         -- Match instructions like 'test $0x3,%esi' or 'test $0x7,%rbx'.
  776         -- We can replace them by equivalent, but smaller instructions
  777         -- by reducing the size of the immediate operand as far as possible.
  778         -- (We could handle masks larger than a single byte too,
  779         -- but it would complicate the code considerably
  780         -- and tag checks are by far the most common case.)
  781         -- The mask must have the high bit clear for this smaller encoding
  782         -- to be completely equivalent to the original; in particular so
  783         -- that the signed comparison condition bits are the same as they
  784         -- would be if doing a full word comparison. See #13425.
  785           format' = case (src,dst) of
  786            (OpImm (ImmInteger mask), OpReg dstReg)
  787              | 0 <= mask && mask < 128 -> minSizeOfReg platform dstReg
  788            _ -> format
  789           minSizeOfReg platform (RegReal (RealRegSingle i))
  790             | target32Bit platform && i <= 3        = II8  -- al, bl, cl, dl
  791             | target32Bit platform && i <= 7        = II16 -- si, di, bp, sp
  792             | not (target32Bit platform) && i <= 15 = II8  -- al .. r15b
  793           minSizeOfReg _ _ = format                 -- other
  794 
  795    PUSH format op
  796       -> pprFormatOp (text "push") format op
  797 
  798    POP format op
  799       -> pprFormatOp (text "pop") format op
  800 
  801 -- both unused (SDM):
  802 -- PUSHA -> text "\tpushal"
  803 -- POPA  -> text "\tpopal"
  804 
  805    NOP
  806       -> text "\tnop"
  807 
  808    CLTD II8
  809       -> text "\tcbtw"
  810 
  811    CLTD II16
  812       -> text "\tcwtd"
  813 
  814    CLTD II32
  815       -> text "\tcltd"
  816 
  817    CLTD II64
  818       -> text "\tcqto"
  819 
  820    CLTD x
  821       -> panic $ "pprInstr: CLTD " ++ show x
  822 
  823    SETCC cond op
  824       -> pprCondInstr (text "set") cond (pprOperand platform II8 op)
  825 
  826    XCHG format src val
  827       -> pprFormatOpReg (text "xchg") format src val
  828 
  829    JXX cond blockid
  830       -> pprCondInstr (text "j") cond (pdoc platform lab)
  831          where lab = blockLbl blockid
  832 
  833    JXX_GBL cond imm
  834       -> pprCondInstr (text "j") cond (pprImm platform imm)
  835 
  836    JMP (OpImm imm) _
  837       -> text "\tjmp " <> pprImm platform imm
  838 
  839    JMP op _
  840       -> text "\tjmp *" <> pprOperand platform (archWordFormat (target32Bit platform)) op
  841 
  842    JMP_TBL op _ _ _
  843       -> pprInstr platform (JMP op [])
  844 
  845    CALL (Left imm) _
  846       -> text "\tcall " <> pprImm platform imm
  847 
  848    CALL (Right reg) _
  849       -> text "\tcall *" <> pprReg platform (archWordFormat (target32Bit platform)) reg
  850 
  851    IDIV fmt op
  852       -> pprFormatOp (text "idiv") fmt op
  853 
  854    DIV fmt op
  855       -> pprFormatOp (text "div")  fmt op
  856 
  857    IMUL2 fmt op
  858       -> pprFormatOp (text "imul") fmt op
  859 
  860    -- x86_64 only
  861    MUL format op1 op2
  862       -> pprFormatOpOp (text "mul") format op1 op2
  863 
  864    MUL2 format op
  865       -> pprFormatOp (text "mul") format op
  866 
  867    FDIV format op1 op2
  868       -> pprFormatOpOp (text "div") format op1 op2
  869 
  870    SQRT format op1 op2
  871       -> pprFormatOpReg (text "sqrt") format op1 op2
  872 
  873    CVTSS2SD from to
  874       -> pprRegReg (text "cvtss2sd") from to
  875 
  876    CVTSD2SS from to
  877       -> pprRegReg (text "cvtsd2ss") from to
  878 
  879    CVTTSS2SIQ fmt from to
  880       -> pprFormatFormatOpReg (text "cvttss2si") FF32 fmt from to
  881 
  882    CVTTSD2SIQ fmt from to
  883       -> pprFormatFormatOpReg (text "cvttsd2si") FF64 fmt from to
  884 
  885    CVTSI2SS fmt from to
  886       -> pprFormatOpReg (text "cvtsi2ss") fmt from to
  887 
  888    CVTSI2SD fmt from to
  889       -> pprFormatOpReg (text "cvtsi2sd") fmt from to
  890 
  891        -- FETCHGOT for PIC on ELF platforms
  892    FETCHGOT reg
  893       -> vcat [ text "\tcall 1f",
  894                 hcat [ text "1:\tpopl\t", pprReg platform II32 reg ],
  895                 hcat [ text "\taddl\t$_GLOBAL_OFFSET_TABLE_+(.-1b), ",
  896                        pprReg platform II32 reg ]
  897               ]
  898 
  899     -- FETCHPC for PIC on Darwin/x86
  900     -- get the instruction pointer into a register
  901     -- (Terminology note: the IP is called Program Counter on PPC,
  902     --  and it's a good thing to use the same name on both platforms)
  903    FETCHPC reg
  904       -> vcat [ text "\tcall 1f",
  905                 hcat [ text "1:\tpopl\t", pprReg platform II32 reg ]
  906               ]
  907 
  908    -- the
  909    -- GST fmt src addr ==> FLD dst ; FSTPsz addr
  910    g@(X87Store fmt  addr)
  911       -> pprX87 g (hcat [gtab, text "fstp", pprFormat_x87 fmt, gsp, pprAddr platform addr])
  912 
  913    -- Atomics
  914    LOCK i
  915       -> text "\tlock" $$ pprInstr platform i
  916 
  917    MFENCE
  918       -> text "\tmfence"
  919 
  920    XADD format src dst
  921       -> pprFormatOpOp (text "xadd") format src dst
  922 
  923    CMPXCHG format src dst
  924       -> pprFormatOpOp (text "cmpxchg") format src dst
  925 
  926 
  927   where
  928    gtab :: SDoc
  929    gtab  = char '\t'
  930 
  931    gsp :: SDoc
  932    gsp   = char ' '
  933 
  934 
  935 
  936    pprX87 :: Instr -> SDoc -> SDoc
  937    pprX87 fake actual
  938       = (char '#' <> pprX87Instr fake) $$ actual
  939 
  940    pprX87Instr :: Instr -> SDoc
  941    pprX87Instr (X87Store fmt dst) = pprFormatAddr (text "gst") fmt dst
  942    pprX87Instr _ = panic "X86.Ppr.pprX87Instr: no match"
  943 
  944    pprDollImm :: Imm -> SDoc
  945    pprDollImm i = text "$" <> pprImm platform i
  946 
  947 
  948    pprOperand :: Platform -> Format -> Operand -> SDoc
  949    pprOperand platform f op = case op of
  950       OpReg r   -> pprReg platform f r
  951       OpImm i   -> pprDollImm i
  952       OpAddr ea -> pprAddr platform ea
  953 
  954 
  955    pprMnemonic_  :: SDoc -> SDoc
  956    pprMnemonic_ name =
  957       char '\t' <> name <> space
  958 
  959 
  960    pprMnemonic  :: SDoc -> Format -> SDoc
  961    pprMnemonic name format =
  962       char '\t' <> name <> pprFormat format <> space
  963 
  964 
  965    pprFormatImmOp :: SDoc -> Format -> Imm -> Operand -> SDoc
  966    pprFormatImmOp name format imm op1
  967      = hcat [
  968            pprMnemonic name format,
  969            char '$',
  970            pprImm platform imm,
  971            comma,
  972            pprOperand platform format op1
  973        ]
  974 
  975 
  976    pprFormatOp_ :: SDoc -> Format -> Operand -> SDoc
  977    pprFormatOp_ name format op1
  978      = hcat [
  979            pprMnemonic_ name ,
  980            pprOperand platform format op1
  981        ]
  982 
  983    pprFormatOp :: SDoc -> Format -> Operand -> SDoc
  984    pprFormatOp name format op1
  985      = hcat [
  986            pprMnemonic name format,
  987            pprOperand platform format op1
  988        ]
  989 
  990 
  991    pprFormatOpOp :: SDoc -> Format -> Operand -> Operand -> SDoc
  992    pprFormatOpOp name format op1 op2
  993      = hcat [
  994            pprMnemonic name format,
  995            pprOperand platform format op1,
  996            comma,
  997            pprOperand platform format op2
  998        ]
  999 
 1000 
 1001    pprOpOp :: SDoc -> Format -> Operand -> Operand -> SDoc
 1002    pprOpOp name format op1 op2
 1003      = hcat [
 1004            pprMnemonic_ name,
 1005            pprOperand platform format op1,
 1006            comma,
 1007            pprOperand platform format op2
 1008        ]
 1009 
 1010    pprRegReg :: SDoc -> Reg -> Reg -> SDoc
 1011    pprRegReg name reg1 reg2
 1012      = hcat [
 1013            pprMnemonic_ name,
 1014            pprReg platform (archWordFormat (target32Bit platform)) reg1,
 1015            comma,
 1016            pprReg platform (archWordFormat (target32Bit platform)) reg2
 1017        ]
 1018 
 1019 
 1020    pprFormatOpReg :: SDoc -> Format -> Operand -> Reg -> SDoc
 1021    pprFormatOpReg name format op1 reg2
 1022      = hcat [
 1023            pprMnemonic name format,
 1024            pprOperand platform format op1,
 1025            comma,
 1026            pprReg platform (archWordFormat (target32Bit platform)) reg2
 1027        ]
 1028 
 1029    pprCondOpReg :: SDoc -> Format -> Cond -> Operand -> Reg -> SDoc
 1030    pprCondOpReg name format cond op1 reg2
 1031      = hcat [
 1032            char '\t',
 1033            name,
 1034            pprCond cond,
 1035            space,
 1036            pprOperand platform format op1,
 1037            comma,
 1038            pprReg platform format reg2
 1039        ]
 1040 
 1041    pprFormatFormatOpReg :: SDoc -> Format -> Format -> Operand -> Reg -> SDoc
 1042    pprFormatFormatOpReg name format1 format2 op1 reg2
 1043      = hcat [
 1044            pprMnemonic name format2,
 1045            pprOperand platform format1 op1,
 1046            comma,
 1047            pprReg platform format2 reg2
 1048        ]
 1049 
 1050    pprFormatOpOpReg :: SDoc -> Format -> Operand -> Operand -> Reg -> SDoc
 1051    pprFormatOpOpReg name format op1 op2 reg3
 1052      = hcat [
 1053            pprMnemonic name format,
 1054            pprOperand platform format op1,
 1055            comma,
 1056            pprOperand platform format op2,
 1057            comma,
 1058            pprReg platform format reg3
 1059        ]
 1060 
 1061 
 1062 
 1063    pprFormatAddr :: SDoc -> Format -> AddrMode -> SDoc
 1064    pprFormatAddr name format  op
 1065      = hcat [
 1066            pprMnemonic name format,
 1067            comma,
 1068            pprAddr platform op
 1069        ]
 1070 
 1071    pprShift :: SDoc -> Format -> Operand -> Operand -> SDoc
 1072    pprShift name format src dest
 1073      = hcat [
 1074            pprMnemonic name format,
 1075            pprOperand platform II8 src,  -- src is 8-bit sized
 1076            comma,
 1077            pprOperand platform format dest
 1078        ]
 1079 
 1080 
 1081    pprFormatOpOpCoerce :: SDoc -> Format -> Format -> Operand -> Operand -> SDoc
 1082    pprFormatOpOpCoerce name format1 format2 op1 op2
 1083      = hcat [ char '\t', name, pprFormat format1, pprFormat format2, space,
 1084            pprOperand platform format1 op1,
 1085            comma,
 1086            pprOperand platform format2 op2
 1087        ]
 1088 
 1089 
 1090    pprCondInstr :: SDoc -> Cond -> SDoc -> SDoc
 1091    pprCondInstr name cond arg
 1092      = hcat [ char '\t', name, pprCond cond, space, arg]