never executed always true always false
    1 {-# LANGUAGE TypeFamilies #-}
    2 
    3 -----------------------------------------------------------------------------
    4 --
    5 -- Machine-dependent assembly language
    6 --
    7 -- (c) The University of Glasgow 1993-2004
    8 --
    9 -----------------------------------------------------------------------------
   10 
   11 module GHC.CmmToAsm.X86.Instr
   12    ( Instr(..)
   13    , Operand(..)
   14    , PrefetchVariant(..)
   15    , JumpDest(..)
   16    , getJumpDestBlockId
   17    , canShortcut
   18    , shortcutStatics
   19    , shortcutJump
   20    , allocMoreStack
   21    , maxSpillSlots
   22    , archWordFormat
   23    , takeRegRegMoveInstr
   24    , regUsageOfInstr
   25    , takeDeltaInstr
   26    , mkLoadInstr
   27    , mkJumpInstr
   28    , mkStackAllocInstr
   29    , mkStackDeallocInstr
   30    , mkSpillInstr
   31    , mkRegRegMoveInstr
   32    , jumpDestsOfInstr
   33    , patchRegsOfInstr
   34    , patchJumpInstr
   35    , isMetaInstr
   36    , isJumpishInstr
   37    )
   38 where
   39 
   40 import GHC.Prelude
   41 
   42 import GHC.CmmToAsm.X86.Cond
   43 import GHC.CmmToAsm.X86.Regs
   44 import GHC.CmmToAsm.Format
   45 import GHC.CmmToAsm.Types
   46 import GHC.CmmToAsm.Utils
   47 import GHC.CmmToAsm.Instr (RegUsage(..), noUsage)
   48 import GHC.Platform.Reg.Class
   49 import GHC.Platform.Reg
   50 import GHC.CmmToAsm.Reg.Target
   51 import GHC.CmmToAsm.Config
   52 
   53 import GHC.Cmm.BlockId
   54 import GHC.Cmm.Dataflow.Collections
   55 import GHC.Cmm.Dataflow.Label
   56 import GHC.Platform.Regs
   57 import GHC.Cmm
   58 import GHC.Utils.Outputable
   59 import GHC.Utils.Panic
   60 import GHC.Platform
   61 
   62 import GHC.Cmm.CLabel
   63 import GHC.Types.Unique.Set
   64 import GHC.Types.Unique
   65 import GHC.Types.Unique.Supply
   66 import GHC.Types.Basic (Alignment)
   67 import GHC.Cmm.DebugBlock (UnwindTable)
   68 
   69 import Control.Monad
   70 import Data.Maybe       (fromMaybe)
   71 
   72 -- Format of an x86/x86_64 memory address, in bytes.
   73 --
   74 archWordFormat :: Bool -> Format
   75 archWordFormat is32Bit
   76  | is32Bit   = II32
   77  | otherwise = II64
   78 
   79 -- -----------------------------------------------------------------------------
   80 -- Intel x86 instructions
   81 
   82 {-
   83 Intel, in their infinite wisdom, selected a stack model for floating
   84 point registers on x86.  That might have made sense back in 1979 --
   85 nowadays we can see it for the nonsense it really is.  A stack model
   86 fits poorly with the existing nativeGen infrastructure, which assumes
   87 flat integer and FP register sets.  Prior to this commit, nativeGen
   88 could not generate correct x86 FP code -- to do so would have meant
   89 somehow working the register-stack paradigm into the register
   90 allocator and spiller, which sounds very difficult.
   91 
   92 We have decided to cheat, and go for a simple fix which requires no
   93 infrastructure modifications, at the expense of generating ropey but
   94 correct FP code.  All notions of the x86 FP stack and its insns have
   95 been removed.  Instead, we pretend (to the instruction selector and
   96 register allocator) that x86 has six floating point registers, %fake0
   97 .. %fake5, which can be used in the usual flat manner.  We further
   98 claim that x86 has floating point instructions very similar to SPARC
   99 and Alpha, that is, a simple 3-operand register-register arrangement.
  100 Code generation and register allocation proceed on this basis.
  101 
  102 When we come to print out the final assembly, our convenient fiction
  103 is converted to dismal reality.  Each fake instruction is
  104 independently converted to a series of real x86 instructions.
  105 %fake0 .. %fake5 are mapped to %st(0) .. %st(5).  To do reg-reg
  106 arithmetic operations, the two operands are pushed onto the top of the
  107 FP stack, the operation done, and the result copied back into the
  108 relevant register.  There are only six %fake registers because 2 are
  109 needed for the translation, and x86 has 8 in total.
  110 
  111 The translation is inefficient but is simple and it works.  A cleverer
  112 translation would handle a sequence of insns, simulating the FP stack
  113 contents, would not impose a fixed mapping from %fake to %st regs, and
  114 hopefully could avoid most of the redundant reg-reg moves of the
  115 current translation.
  116 
  117 We might as well make use of whatever unique FP facilities Intel have
  118 chosen to bless us with (let's not be churlish, after all).
  119 Hence GLDZ and GLD1.  Bwahahahahahahaha!
  120 -}
  121 
  122 {-
  123 Note [x86 Floating point precision]
  124 
  125 Intel's internal floating point registers are by default 80 bit
  126 extended precision.  This means that all operations done on values in
  127 registers are done at 80 bits, and unless the intermediate values are
  128 truncated to the appropriate size (32 or 64 bits) by storing in
  129 memory, calculations in registers will give different results from
  130 calculations which pass intermediate values in memory (eg. via
  131 function calls).
  132 
  133 One solution is to set the FPU into 64 bit precision mode.  Some OSs
  134 do this (eg. FreeBSD) and some don't (eg. Linux).  The problem here is
  135 that this will only affect 64-bit precision arithmetic; 32-bit
  136 calculations will still be done at 64-bit precision in registers.  So
  137 it doesn't solve the whole problem.
  138 
  139 There's also the issue of what the C library is expecting in terms of
  140 precision.  It seems to be the case that glibc on Linux expects the
  141 FPU to be set to 80 bit precision, so setting it to 64 bit could have
  142 unexpected effects.  Changing the default could have undesirable
  143 effects on other 3rd-party library code too, so the right thing would
  144 be to save/restore the FPU control word across Haskell code if we were
  145 to do this.
  146 
  147 gcc's -ffloat-store gives consistent results by always storing the
  148 results of floating-point calculations in memory, which works for both
  149 32 and 64-bit precision.  However, it only affects the values of
  150 user-declared floating point variables in C, not intermediate results.
  151 GHC in -fvia-C mode uses -ffloat-store (see the -fexcess-precision
  152 flag).
  153 
  154 Another problem is how to spill floating point registers in the
  155 register allocator.  Should we spill the whole 80 bits, or just 64?
  156 On an OS which is set to 64 bit precision, spilling 64 is fine.  On
  157 Linux, spilling 64 bits will round the results of some operations.
  158 This is what gcc does.  Spilling at 80 bits requires taking up a full
  159 128 bit slot (so we get alignment).  We spill at 80-bits and ignore
  160 the alignment problems.
  161 
  162 In the future [edit: now available in GHC 7.0.1, with the -msse2
  163 flag], we'll use the SSE registers for floating point.  This requires
  164 a CPU that supports SSE2 (ordinary SSE only supports 32 bit precision
  165 float ops), which means P4 or Xeon and above.  Using SSE will solve
  166 all these problems, because the SSE registers use fixed 32 bit or 64
  167 bit precision.
  168 
  169 --SDM 1/2003
  170 -}
  171 
  172 data Instr
  173         -- comment pseudo-op
  174         = COMMENT SDoc
  175 
  176         -- location pseudo-op (file, line, col, name)
  177         | LOCATION Int Int Int String
  178 
  179         -- some static data spat out during code
  180         -- generation.  Will be extracted before
  181         -- pretty-printing.
  182         | LDATA   Section (Alignment, RawCmmStatics)
  183 
  184         -- start a new basic block.  Useful during
  185         -- codegen, removed later.  Preceding
  186         -- instruction should be a jump, as per the
  187         -- invariants for a BasicBlock (see Cmm).
  188         | NEWBLOCK BlockId
  189 
  190         -- unwinding information
  191         -- See Note [Unwinding information in the NCG].
  192         | UNWIND CLabel UnwindTable
  193 
  194         -- specify current stack offset for benefit of subsequent passes.
  195         -- This carries a BlockId so it can be used in unwinding information.
  196         | DELTA  Int
  197 
  198         -- Moves.
  199         | MOV         Format Operand Operand
  200         | CMOV   Cond Format Operand Reg
  201         | MOVZxL      Format Operand Operand
  202               -- ^ The format argument is the size of operand 1 (the number of bits we keep)
  203               -- We always zero *all* high bits, even though this isn't how the actual instruction
  204               -- works. The code generator also seems to rely on this behaviour and it's faster
  205               -- to execute on many cpus as well so for now I'm just documenting the fact.
  206         | MOVSxL      Format Operand Operand -- format is the size of operand 1
  207         -- x86_64 note: plain mov into a 32-bit register always zero-extends
  208         -- into the 64-bit reg, in contrast to the 8 and 16-bit movs which
  209         -- don't affect the high bits of the register.
  210 
  211         -- Load effective address (also a very useful three-operand add instruction :-)
  212         | LEA         Format Operand Operand
  213 
  214         -- Int Arithmetic.
  215         | ADD         Format Operand Operand
  216         | ADC         Format Operand Operand
  217         | SUB         Format Operand Operand
  218         | SBB         Format Operand Operand
  219 
  220         | MUL         Format Operand Operand
  221         | MUL2        Format Operand         -- %edx:%eax = operand * %rax
  222         | IMUL        Format Operand Operand -- signed int mul
  223         | IMUL2       Format Operand         -- %edx:%eax = operand * %eax
  224 
  225         | DIV         Format Operand         -- eax := eax:edx/op, edx := eax:edx%op
  226         | IDIV        Format Operand         -- ditto, but signed
  227 
  228         -- Int Arithmetic, where the effects on the condition register
  229         -- are important. Used in specialized sequences such as MO_Add2.
  230         -- Do not rewrite these instructions to "equivalent" ones that
  231         -- have different effect on the condition register! (See #9013.)
  232         | ADD_CC      Format Operand Operand
  233         | SUB_CC      Format Operand Operand
  234 
  235         -- Simple bit-twiddling.
  236         | AND         Format Operand Operand
  237         | OR          Format Operand Operand
  238         | XOR         Format Operand Operand
  239         | NOT         Format Operand
  240         | NEGI        Format Operand         -- NEG instruction (name clash with Cond)
  241         | BSWAP       Format Reg
  242 
  243         -- Shifts (amount may be immediate or %cl only)
  244         | SHL         Format Operand{-amount-} Operand
  245         | SAR         Format Operand{-amount-} Operand
  246         | SHR         Format Operand{-amount-} Operand
  247 
  248         | BT          Format Imm Operand
  249         | NOP
  250 
  251 
  252         -- We need to support the FSTP (x87 store and pop) instruction
  253         -- so that we can correctly read off the return value of an
  254         -- x86 CDECL C function call when its floating point.
  255         -- so we dont include a register argument, and just use st(0)
  256         -- this instruction is used ONLY for return values of C ffi calls
  257         -- in x86_32 abi
  258         | X87Store         Format  AddrMode -- st(0), dst
  259 
  260 
  261         -- SSE2 floating point: we use a restricted set of the available SSE2
  262         -- instructions for floating-point.
  263         -- use MOV for moving (either movss or movsd (movlpd better?))
  264         | CVTSS2SD      Reg Reg            -- F32 to F64
  265         | CVTSD2SS      Reg Reg            -- F64 to F32
  266         | CVTTSS2SIQ    Format Operand Reg -- F32 to I32/I64 (with truncation)
  267         | CVTTSD2SIQ    Format Operand Reg -- F64 to I32/I64 (with truncation)
  268         | CVTSI2SS      Format Operand Reg -- I32/I64 to F32
  269         | CVTSI2SD      Format Operand Reg -- I32/I64 to F64
  270 
  271         -- use ADD, SUB, and SQRT for arithmetic.  In both cases, operands
  272         -- are  Operand Reg.
  273 
  274         -- SSE2 floating-point division:
  275         | FDIV          Format Operand Operand   -- divisor, dividend(dst)
  276 
  277         -- use CMP for comparisons.  ucomiss and ucomisd instructions
  278         -- compare single/double prec floating point respectively.
  279 
  280         | SQRT          Format Operand Reg      -- src, dst
  281 
  282 
  283         -- Comparison
  284         | TEST          Format Operand Operand
  285         | CMP           Format Operand Operand
  286         | SETCC         Cond Operand
  287 
  288         -- Stack Operations.
  289         | PUSH          Format Operand
  290         | POP           Format Operand
  291         -- both unused (SDM):
  292         --  | PUSHA
  293         --  | POPA
  294 
  295         -- Jumping around.
  296         | JMP         Operand [Reg] -- including live Regs at the call
  297         | JXX         Cond BlockId  -- includes unconditional branches
  298         | JXX_GBL     Cond Imm      -- non-local version of JXX
  299         -- Table jump
  300         | JMP_TBL     Operand   -- Address to jump to
  301                       [Maybe JumpDest] -- Targets of the jump table
  302                       Section   -- Data section jump table should be put in
  303                       CLabel    -- Label of jump table
  304         -- | X86 call instruction
  305         | CALL        (Either Imm Reg) -- ^ Jump target
  306                       [Reg]            -- ^ Arguments (required for register allocation)
  307 
  308         -- Other things.
  309         | CLTD Format            -- sign extend %eax into %edx:%eax
  310 
  311         | FETCHGOT    Reg        -- pseudo-insn for ELF position-independent code
  312                                  -- pretty-prints as
  313                                  --       call 1f
  314                                  -- 1:    popl %reg
  315                                  --       addl __GLOBAL_OFFSET_TABLE__+.-1b, %reg
  316         | FETCHPC     Reg        -- pseudo-insn for Darwin position-independent code
  317                                  -- pretty-prints as
  318                                  --       call 1f
  319                                  -- 1:    popl %reg
  320 
  321     -- bit counting instructions
  322         | POPCNT      Format Operand Reg -- [SSE4.2] count number of bits set to 1
  323         | LZCNT       Format Operand Reg -- [BMI2] count number of leading zeros
  324         | TZCNT       Format Operand Reg -- [BMI2] count number of trailing zeros
  325         | BSF         Format Operand Reg -- bit scan forward
  326         | BSR         Format Operand Reg -- bit scan reverse
  327 
  328     -- bit manipulation instructions
  329         | PDEP        Format Operand Operand Reg -- [BMI2] deposit bits to   the specified mask
  330         | PEXT        Format Operand Operand Reg -- [BMI2] extract bits from the specified mask
  331 
  332     -- prefetch
  333         | PREFETCH  PrefetchVariant Format Operand -- prefetch Variant, addr size, address to prefetch
  334                                         -- variant can be NTA, Lvl0, Lvl1, or Lvl2
  335 
  336         | LOCK        Instr -- lock prefix
  337         | XADD        Format Operand Operand -- src (r), dst (r/m)
  338         | CMPXCHG     Format Operand Operand -- src (r), dst (r/m), eax implicit
  339         | XCHG        Format Operand Reg     -- src (r/m), dst (r/m)
  340         | MFENCE
  341 
  342 data PrefetchVariant = NTA | Lvl0 | Lvl1 | Lvl2
  343 
  344 
  345 data Operand
  346         = OpReg  Reg            -- register
  347         | OpImm  Imm            -- immediate value
  348         | OpAddr AddrMode       -- memory reference
  349 
  350 
  351 
  352 -- | Returns which registers are read and written as a (read, written)
  353 -- pair.
  354 regUsageOfInstr :: Platform -> Instr -> RegUsage
  355 regUsageOfInstr platform instr
  356  = case instr of
  357     MOV    _ src dst    -> usageRW src dst
  358     CMOV _ _ src dst    -> mkRU (use_R src [dst]) [dst]
  359     MOVZxL _ src dst    -> usageRW src dst
  360     MOVSxL _ src dst    -> usageRW src dst
  361     LEA    _ src dst    -> usageRW src dst
  362     ADD    _ src dst    -> usageRM src dst
  363     ADC    _ src dst    -> usageRM src dst
  364     SUB    _ src dst    -> usageRM src dst
  365     SBB    _ src dst    -> usageRM src dst
  366     IMUL   _ src dst    -> usageRM src dst
  367 
  368     -- Result of IMULB will be in just in %ax
  369     IMUL2  II8 src       -> mkRU (eax:use_R src []) [eax]
  370     -- Result of IMUL for wider values, will be split between %dx/%edx/%rdx and
  371     -- %ax/%eax/%rax.
  372     IMUL2  _ src        -> mkRU (eax:use_R src []) [eax,edx]
  373 
  374     MUL    _ src dst    -> usageRM src dst
  375     MUL2   _ src        -> mkRU (eax:use_R src []) [eax,edx]
  376     DIV    _ op -> mkRU (eax:edx:use_R op []) [eax,edx]
  377     IDIV   _ op -> mkRU (eax:edx:use_R op []) [eax,edx]
  378     ADD_CC _ src dst    -> usageRM src dst
  379     SUB_CC _ src dst    -> usageRM src dst
  380     AND    _ src dst    -> usageRM src dst
  381     OR     _ src dst    -> usageRM src dst
  382 
  383     XOR    _ (OpReg src) (OpReg dst)
  384         | src == dst    -> mkRU [] [dst]
  385 
  386     XOR    _ src dst    -> usageRM src dst
  387     NOT    _ op         -> usageM op
  388     BSWAP  _ reg        -> mkRU [reg] [reg]
  389     NEGI   _ op         -> usageM op
  390     SHL    _ imm dst    -> usageRM imm dst
  391     SAR    _ imm dst    -> usageRM imm dst
  392     SHR    _ imm dst    -> usageRM imm dst
  393     BT     _ _   src    -> mkRUR (use_R src [])
  394 
  395     PUSH   _ op         -> mkRUR (use_R op [])
  396     POP    _ op         -> mkRU [] (def_W op)
  397     TEST   _ src dst    -> mkRUR (use_R src $! use_R dst [])
  398     CMP    _ src dst    -> mkRUR (use_R src $! use_R dst [])
  399     SETCC  _ op         -> mkRU [] (def_W op)
  400     JXX    _ _          -> mkRU [] []
  401     JXX_GBL _ _         -> mkRU [] []
  402     JMP     op regs     -> mkRUR (use_R op regs)
  403     JMP_TBL op _ _ _    -> mkRUR (use_R op [])
  404     CALL (Left _)  params   -> mkRU params (callClobberedRegs platform)
  405     CALL (Right reg) params -> mkRU (reg:params) (callClobberedRegs platform)
  406     CLTD   _            -> mkRU [eax] [edx]
  407     NOP                 -> mkRU [] []
  408 
  409     X87Store    _  dst    -> mkRUR ( use_EA dst [])
  410 
  411     CVTSS2SD   src dst  -> mkRU [src] [dst]
  412     CVTSD2SS   src dst  -> mkRU [src] [dst]
  413     CVTTSS2SIQ _ src dst -> mkRU (use_R src []) [dst]
  414     CVTTSD2SIQ _ src dst -> mkRU (use_R src []) [dst]
  415     CVTSI2SS   _ src dst -> mkRU (use_R src []) [dst]
  416     CVTSI2SD   _ src dst -> mkRU (use_R src []) [dst]
  417     FDIV _     src dst  -> usageRM src dst
  418     SQRT _ src dst      -> mkRU (use_R src []) [dst]
  419 
  420     FETCHGOT reg        -> mkRU [] [reg]
  421     FETCHPC  reg        -> mkRU [] [reg]
  422 
  423     COMMENT _           -> noUsage
  424     LOCATION{}          -> noUsage
  425     UNWIND{}            -> noUsage
  426     DELTA   _           -> noUsage
  427 
  428     POPCNT _ src dst -> mkRU (use_R src []) [dst]
  429     LZCNT  _ src dst -> mkRU (use_R src []) [dst]
  430     TZCNT  _ src dst -> mkRU (use_R src []) [dst]
  431     BSF    _ src dst -> mkRU (use_R src []) [dst]
  432     BSR    _ src dst -> mkRU (use_R src []) [dst]
  433 
  434     PDEP   _ src mask dst -> mkRU (use_R src $ use_R mask []) [dst]
  435     PEXT   _ src mask dst -> mkRU (use_R src $ use_R mask []) [dst]
  436 
  437     -- note: might be a better way to do this
  438     PREFETCH _  _ src -> mkRU (use_R src []) []
  439     LOCK i              -> regUsageOfInstr platform i
  440     XADD _ src dst      -> usageMM src dst
  441     CMPXCHG _ src dst   -> usageRMM src dst (OpReg eax)
  442     XCHG _ src dst      -> usageMM src (OpReg dst)
  443     MFENCE -> noUsage
  444 
  445     _other              -> panic "regUsage: unrecognised instr"
  446  where
  447     -- # Definitions
  448     --
  449     -- Written: If the operand is a register, it's written. If it's an
  450     -- address, registers mentioned in the address are read.
  451     --
  452     -- Modified: If the operand is a register, it's both read and
  453     -- written. If it's an address, registers mentioned in the address
  454     -- are read.
  455 
  456     -- 2 operand form; first operand Read; second Written
  457     usageRW :: Operand -> Operand -> RegUsage
  458     usageRW op (OpReg reg)      = mkRU (use_R op []) [reg]
  459     usageRW op (OpAddr ea)      = mkRUR (use_R op $! use_EA ea [])
  460     usageRW _ _                 = panic "X86.RegInfo.usageRW: no match"
  461 
  462     -- 2 operand form; first operand Read; second Modified
  463     usageRM :: Operand -> Operand -> RegUsage
  464     usageRM op (OpReg reg)      = mkRU (use_R op [reg]) [reg]
  465     usageRM op (OpAddr ea)      = mkRUR (use_R op $! use_EA ea [])
  466     usageRM _ _                 = panic "X86.RegInfo.usageRM: no match"
  467 
  468     -- 2 operand form; first operand Modified; second Modified
  469     usageMM :: Operand -> Operand -> RegUsage
  470     usageMM (OpReg src) (OpReg dst) = mkRU [src, dst] [src, dst]
  471     usageMM (OpReg src) (OpAddr ea) = mkRU (use_EA ea [src]) [src]
  472     usageMM (OpAddr ea) (OpReg dst) = mkRU (use_EA ea [dst]) [dst]
  473     usageMM _ _                     = panic "X86.RegInfo.usageMM: no match"
  474 
  475     -- 3 operand form; first operand Read; second Modified; third Modified
  476     usageRMM :: Operand -> Operand -> Operand -> RegUsage
  477     usageRMM (OpReg src) (OpReg dst) (OpReg reg) = mkRU [src, dst, reg] [dst, reg]
  478     usageRMM (OpReg src) (OpAddr ea) (OpReg reg) = mkRU (use_EA ea [src, reg]) [reg]
  479     usageRMM _ _ _                               = panic "X86.RegInfo.usageRMM: no match"
  480 
  481     -- 1 operand form; operand Modified
  482     usageM :: Operand -> RegUsage
  483     usageM (OpReg reg)          = mkRU [reg] [reg]
  484     usageM (OpAddr ea)          = mkRUR (use_EA ea [])
  485     usageM _                    = panic "X86.RegInfo.usageM: no match"
  486 
  487     -- Registers defd when an operand is written.
  488     def_W (OpReg reg)           = [reg]
  489     def_W (OpAddr _ )           = []
  490     def_W _                     = panic "X86.RegInfo.def_W: no match"
  491 
  492     -- Registers used when an operand is read.
  493     use_R (OpReg reg)  tl = reg : tl
  494     use_R (OpImm _)    tl = tl
  495     use_R (OpAddr ea)  tl = use_EA ea tl
  496 
  497     -- Registers used to compute an effective address.
  498     use_EA (ImmAddr _ _) tl = tl
  499     use_EA (AddrBaseIndex base index _) tl =
  500         use_base base $! use_index index tl
  501         where use_base (EABaseReg r)  tl = r : tl
  502               use_base _              tl = tl
  503               use_index EAIndexNone   tl = tl
  504               use_index (EAIndex i _) tl = i : tl
  505 
  506     mkRUR src = src' `seq` RU src' []
  507         where src' = filter (interesting platform) src
  508 
  509     mkRU src dst = src' `seq` dst' `seq` RU src' dst'
  510         where src' = filter (interesting platform) src
  511               dst' = filter (interesting platform) dst
  512 
  513 -- | Is this register interesting for the register allocator?
  514 interesting :: Platform -> Reg -> Bool
  515 interesting _        (RegVirtual _)              = True
  516 interesting platform (RegReal (RealRegSingle i)) = freeReg platform i
  517 interesting _        (RegReal (RealRegPair{}))   = panic "X86.interesting: no reg pairs on this arch"
  518 
  519 
  520 
  521 -- | Applies the supplied function to all registers in instructions.
  522 -- Typically used to change virtual registers to real registers.
  523 patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr
  524 patchRegsOfInstr instr env
  525  = case instr of
  526     MOV  fmt src dst     -> patch2 (MOV  fmt) src dst
  527     CMOV cc fmt src dst  -> CMOV cc fmt (patchOp src) (env dst)
  528     MOVZxL fmt src dst   -> patch2 (MOVZxL fmt) src dst
  529     MOVSxL fmt src dst   -> patch2 (MOVSxL fmt) src dst
  530     LEA  fmt src dst     -> patch2 (LEA  fmt) src dst
  531     ADD  fmt src dst     -> patch2 (ADD  fmt) src dst
  532     ADC  fmt src dst     -> patch2 (ADC  fmt) src dst
  533     SUB  fmt src dst     -> patch2 (SUB  fmt) src dst
  534     SBB  fmt src dst     -> patch2 (SBB  fmt) src dst
  535     IMUL fmt src dst     -> patch2 (IMUL fmt) src dst
  536     IMUL2 fmt src        -> patch1 (IMUL2 fmt) src
  537     MUL fmt src dst      -> patch2 (MUL fmt) src dst
  538     MUL2 fmt src         -> patch1 (MUL2 fmt) src
  539     IDIV fmt op          -> patch1 (IDIV fmt) op
  540     DIV fmt op           -> patch1 (DIV fmt) op
  541     ADD_CC fmt src dst   -> patch2 (ADD_CC fmt) src dst
  542     SUB_CC fmt src dst   -> patch2 (SUB_CC fmt) src dst
  543     AND  fmt src dst     -> patch2 (AND  fmt) src dst
  544     OR   fmt src dst     -> patch2 (OR   fmt) src dst
  545     XOR  fmt src dst     -> patch2 (XOR  fmt) src dst
  546     NOT  fmt op          -> patch1 (NOT  fmt) op
  547     BSWAP fmt reg        -> BSWAP fmt (env reg)
  548     NEGI fmt op          -> patch1 (NEGI fmt) op
  549     SHL  fmt imm dst     -> patch1 (SHL fmt imm) dst
  550     SAR  fmt imm dst     -> patch1 (SAR fmt imm) dst
  551     SHR  fmt imm dst     -> patch1 (SHR fmt imm) dst
  552     BT   fmt imm src     -> patch1 (BT  fmt imm) src
  553     TEST fmt src dst     -> patch2 (TEST fmt) src dst
  554     CMP  fmt src dst     -> patch2 (CMP  fmt) src dst
  555     PUSH fmt op          -> patch1 (PUSH fmt) op
  556     POP  fmt op          -> patch1 (POP  fmt) op
  557     SETCC cond op        -> patch1 (SETCC cond) op
  558     JMP op regs          -> JMP (patchOp op) regs
  559     JMP_TBL op ids s lbl -> JMP_TBL (patchOp op) ids s lbl
  560 
  561     -- literally only support storing the top x87 stack value st(0)
  562     X87Store  fmt  dst     -> X87Store fmt  (lookupAddr dst)
  563 
  564     CVTSS2SD src dst    -> CVTSS2SD (env src) (env dst)
  565     CVTSD2SS src dst    -> CVTSD2SS (env src) (env dst)
  566     CVTTSS2SIQ fmt src dst -> CVTTSS2SIQ fmt (patchOp src) (env dst)
  567     CVTTSD2SIQ fmt src dst -> CVTTSD2SIQ fmt (patchOp src) (env dst)
  568     CVTSI2SS fmt src dst -> CVTSI2SS fmt (patchOp src) (env dst)
  569     CVTSI2SD fmt src dst -> CVTSI2SD fmt (patchOp src) (env dst)
  570     FDIV fmt src dst     -> FDIV fmt (patchOp src) (patchOp dst)
  571     SQRT fmt src dst    -> SQRT fmt (patchOp src) (env dst)
  572 
  573     CALL (Left _)  _    -> instr
  574     CALL (Right reg) p  -> CALL (Right (env reg)) p
  575 
  576     FETCHGOT reg        -> FETCHGOT (env reg)
  577     FETCHPC  reg        -> FETCHPC  (env reg)
  578 
  579     NOP                 -> instr
  580     COMMENT _           -> instr
  581     LOCATION {}         -> instr
  582     UNWIND {}           -> instr
  583     DELTA _             -> instr
  584 
  585     JXX _ _             -> instr
  586     JXX_GBL _ _         -> instr
  587     CLTD _              -> instr
  588 
  589     POPCNT fmt src dst -> POPCNT fmt (patchOp src) (env dst)
  590     LZCNT  fmt src dst -> LZCNT  fmt (patchOp src) (env dst)
  591     TZCNT  fmt src dst -> TZCNT  fmt (patchOp src) (env dst)
  592     PDEP   fmt src mask dst -> PDEP   fmt (patchOp src) (patchOp mask) (env dst)
  593     PEXT   fmt src mask dst -> PEXT   fmt (patchOp src) (patchOp mask) (env dst)
  594     BSF    fmt src dst -> BSF    fmt (patchOp src) (env dst)
  595     BSR    fmt src dst -> BSR    fmt (patchOp src) (env dst)
  596 
  597     PREFETCH lvl format src -> PREFETCH lvl format (patchOp src)
  598 
  599     LOCK i               -> LOCK (patchRegsOfInstr i env)
  600     XADD fmt src dst     -> patch2 (XADD fmt) src dst
  601     CMPXCHG fmt src dst  -> patch2 (CMPXCHG fmt) src dst
  602     XCHG fmt src dst     -> XCHG fmt (patchOp src) (env dst)
  603     MFENCE               -> instr
  604 
  605     _other              -> panic "patchRegs: unrecognised instr"
  606 
  607   where
  608     patch1 :: (Operand -> a) -> Operand -> a
  609     patch1 insn op      = insn $! patchOp op
  610     patch2 :: (Operand -> Operand -> a) -> Operand -> Operand -> a
  611     patch2 insn src dst = (insn $! patchOp src) $! patchOp dst
  612 
  613     patchOp (OpReg  reg) = OpReg $! env reg
  614     patchOp (OpImm  imm) = OpImm imm
  615     patchOp (OpAddr ea)  = OpAddr $! lookupAddr ea
  616 
  617     lookupAddr (ImmAddr imm off) = ImmAddr imm off
  618     lookupAddr (AddrBaseIndex base index disp)
  619       = ((AddrBaseIndex $! lookupBase base) $! lookupIndex index) disp
  620       where
  621         lookupBase EABaseNone       = EABaseNone
  622         lookupBase EABaseRip        = EABaseRip
  623         lookupBase (EABaseReg r)    = EABaseReg $! env r
  624 
  625         lookupIndex EAIndexNone     = EAIndexNone
  626         lookupIndex (EAIndex r i)   = (EAIndex $! env r) i
  627 
  628 
  629 --------------------------------------------------------------------------------
  630 isJumpishInstr
  631         :: Instr -> Bool
  632 
  633 isJumpishInstr instr
  634  = case instr of
  635         JMP{}           -> True
  636         JXX{}           -> True
  637         JXX_GBL{}       -> True
  638         JMP_TBL{}       -> True
  639         CALL{}          -> True
  640         _               -> False
  641 
  642 
  643 jumpDestsOfInstr
  644         :: Instr
  645         -> [BlockId]
  646 
  647 jumpDestsOfInstr insn
  648   = case insn of
  649         JXX _ id        -> [id]
  650         JMP_TBL _ ids _ _ -> [id | Just (DestBlockId id) <- ids]
  651         _               -> []
  652 
  653 
  654 patchJumpInstr
  655         :: Instr -> (BlockId -> BlockId) -> Instr
  656 
  657 patchJumpInstr insn patchF
  658   = case insn of
  659         JXX cc id       -> JXX cc (patchF id)
  660         JMP_TBL op ids section lbl
  661           -> JMP_TBL op (map (fmap (patchJumpDest patchF)) ids) section lbl
  662         _               -> insn
  663     where
  664         patchJumpDest f (DestBlockId id) = DestBlockId (f id)
  665         patchJumpDest _ dest             = dest
  666 
  667 
  668 
  669 
  670 
  671 -- -----------------------------------------------------------------------------
  672 -- | Make a spill instruction.
  673 mkSpillInstr
  674     :: NCGConfig
  675     -> Reg      -- register to spill
  676     -> Int      -- current stack delta
  677     -> Int      -- spill slot to use
  678     -> [Instr]
  679 
  680 mkSpillInstr config reg delta slot
  681   = let off     = spillSlotToOffset platform slot - delta
  682     in
  683     case targetClassOfReg platform reg of
  684            RcInteger   -> [MOV (archWordFormat is32Bit)
  685                                    (OpReg reg) (OpAddr (spRel platform off))]
  686            RcDouble    -> [MOV FF64 (OpReg reg) (OpAddr (spRel platform off))]
  687            _         -> panic "X86.mkSpillInstr: no match"
  688     where platform = ncgPlatform config
  689           is32Bit = target32Bit platform
  690 
  691 -- | Make a spill reload instruction.
  692 mkLoadInstr
  693     :: NCGConfig
  694     -> Reg      -- register to load
  695     -> Int      -- current stack delta
  696     -> Int      -- spill slot to use
  697     -> [Instr]
  698 
  699 mkLoadInstr config reg delta slot
  700   = let off     = spillSlotToOffset platform slot - delta
  701     in
  702         case targetClassOfReg platform reg of
  703               RcInteger -> ([MOV (archWordFormat is32Bit)
  704                                  (OpAddr (spRel platform off)) (OpReg reg)])
  705               RcDouble  -> ([MOV FF64 (OpAddr (spRel platform off)) (OpReg reg)])
  706               _         -> panic "X86.mkLoadInstr"
  707     where platform = ncgPlatform config
  708           is32Bit = target32Bit platform
  709 
  710 spillSlotSize :: Platform -> Int
  711 spillSlotSize platform
  712    | target32Bit platform = 12
  713    | otherwise            = 8
  714 
  715 maxSpillSlots :: NCGConfig -> Int
  716 maxSpillSlots config
  717     = ((ncgSpillPreallocSize config - 64) `div` spillSlotSize (ncgPlatform config)) - 1
  718 --  = 0 -- useful for testing allocMoreStack
  719 
  720 -- number of bytes that the stack pointer should be aligned to
  721 stackAlign :: Int
  722 stackAlign = 16
  723 
  724 -- convert a spill slot number to a *byte* offset, with no sign:
  725 -- decide on a per arch basis whether you are spilling above or below
  726 -- the C stack pointer.
  727 spillSlotToOffset :: Platform -> Int -> Int
  728 spillSlotToOffset platform slot
  729    = 64 + spillSlotSize platform * slot
  730 
  731 --------------------------------------------------------------------------------
  732 
  733 -- | See if this instruction is telling us the current C stack delta
  734 takeDeltaInstr
  735         :: Instr
  736         -> Maybe Int
  737 
  738 takeDeltaInstr instr
  739  = case instr of
  740         DELTA i         -> Just i
  741         _               -> Nothing
  742 
  743 
  744 isMetaInstr
  745         :: Instr
  746         -> Bool
  747 
  748 isMetaInstr instr
  749  = case instr of
  750         COMMENT{}       -> True
  751         LOCATION{}      -> True
  752         LDATA{}         -> True
  753         NEWBLOCK{}      -> True
  754         UNWIND{}        -> True
  755         DELTA{}         -> True
  756         _               -> False
  757 
  758 
  759 
  760 ---  TODO: why is there
  761 -- | Make a reg-reg move instruction.
  762 --      On SPARC v8 there are no instructions to move directly between
  763 --      floating point and integer regs. If we need to do that then we
  764 --      have to go via memory.
  765 --
  766 mkRegRegMoveInstr
  767     :: Platform
  768     -> Reg
  769     -> Reg
  770     -> Instr
  771 
  772 mkRegRegMoveInstr platform src dst
  773  = case targetClassOfReg platform src of
  774         RcInteger -> case platformArch platform of
  775                      ArchX86    -> MOV II32 (OpReg src) (OpReg dst)
  776                      ArchX86_64 -> MOV II64 (OpReg src) (OpReg dst)
  777                      _          -> panic "X86.mkRegRegMoveInstr: Bad arch"
  778         RcDouble    ->  MOV FF64 (OpReg src) (OpReg dst)
  779         -- this code is the lie we tell ourselves because both float and double
  780         -- use the same register class.on x86_64 and x86 32bit with SSE2,
  781         -- more plainly, both use the XMM registers
  782         _     -> panic "X86.RegInfo.mkRegRegMoveInstr: no match"
  783 
  784 -- | Check whether an instruction represents a reg-reg move.
  785 --      The register allocator attempts to eliminate reg->reg moves whenever it can,
  786 --      by assigning the src and dest temporaries to the same real register.
  787 --
  788 takeRegRegMoveInstr
  789         :: Instr
  790         -> Maybe (Reg,Reg)
  791 
  792 takeRegRegMoveInstr (MOV _ (OpReg r1) (OpReg r2))
  793         = Just (r1,r2)
  794 
  795 takeRegRegMoveInstr _  = Nothing
  796 
  797 
  798 -- | Make an unconditional branch instruction.
  799 mkJumpInstr
  800         :: BlockId
  801         -> [Instr]
  802 
  803 mkJumpInstr id
  804         = [JXX ALWAYS id]
  805 
  806 -- Note [Windows stack layout]
  807 -- | On most OSes the kernel will place a guard page after the current stack
  808 --   page.  If you allocate larger than a page worth you may jump over this
  809 --   guard page.  Not only is this a security issue, but on certain OSes such
  810 --   as Windows a new page won't be allocated if you don't hit the guard.  This
  811 --   will cause a segfault or access fault.
  812 --
  813 --   This function defines if the current allocation amount requires a probe.
  814 --   On Windows (for now) we emit a call to _chkstk for this.  For other OSes
  815 --   this is not yet implemented.
  816 --   See https://docs.microsoft.com/en-us/windows/desktop/DevNotes/-win32-chkstk
  817 --   The Windows stack looks like this:
  818 --
  819 --                         +-------------------+
  820 --                         |        SP         |
  821 --                         +-------------------+
  822 --                         |                   |
  823 --                         |    GUARD PAGE     |
  824 --                         |                   |
  825 --                         +-------------------+
  826 --                         |                   |
  827 --                         |                   |
  828 --                         |     UNMAPPED      |
  829 --                         |                   |
  830 --                         |                   |
  831 --                         +-------------------+
  832 --
  833 --   In essence each allocation larger than a page size needs to be chunked and
  834 --   a probe emitted after each page allocation.  You have to hit the guard
  835 --   page so the kernel can map in the next page, otherwise you'll segfault.
  836 --   See Note [Windows stack allocations].
  837 --
  838 needs_probe_call :: Platform -> Int -> Bool
  839 needs_probe_call platform amount
  840   = case platformOS platform of
  841      OSMinGW32 -> case platformArch platform of
  842                     ArchX86    -> amount > (4 * 1024)
  843                     ArchX86_64 -> amount > (4 * 1024)
  844                     _          -> False
  845      _         -> False
  846 
  847 mkStackAllocInstr
  848         :: Platform
  849         -> Int
  850         -> [Instr]
  851 mkStackAllocInstr platform amount
  852   = case platformOS platform of
  853       OSMinGW32 ->
  854         -- These will clobber AX but this should be ok because
  855         --
  856         -- 1. It is the first thing we do when entering the closure and AX is
  857         --    a caller saved registers on Windows both on x86_64 and x86.
  858         --
  859         -- 2. The closures are only entered via a call or longjmp in which case
  860         --    there are no expectations for volatile registers.
  861         --
  862         -- 3. When the target is a local branch point it is re-targeted
  863         --    after the dealloc, preserving #2.  See note [extra spill slots].
  864         --
  865         -- We emit a call because the stack probes are quite involved and
  866         -- would bloat code size a lot.  GHC doesn't really have an -Os.
  867         -- ___chkstk is guaranteed to leave all nonvolatile registers and AX
  868         -- untouched.  It's part of the standard prologue code for any Windows
  869         -- function dropping the stack more than a page.
  870         -- See Note [Windows stack layout]
  871         case platformArch platform of
  872             ArchX86    | needs_probe_call platform amount ->
  873                            [ MOV II32 (OpImm (ImmInt amount)) (OpReg eax)
  874                            , CALL (Left $ strImmLit "___chkstk_ms") [eax]
  875                            , SUB II32 (OpReg eax) (OpReg esp)
  876                            ]
  877                        | otherwise ->
  878                            [ SUB II32 (OpImm (ImmInt amount)) (OpReg esp)
  879                            , TEST II32 (OpReg esp) (OpReg esp)
  880                            ]
  881             ArchX86_64 | needs_probe_call platform amount ->
  882                            [ MOV II64 (OpImm (ImmInt amount)) (OpReg rax)
  883                            , CALL (Left $ strImmLit "___chkstk_ms") [rax]
  884                            , SUB II64 (OpReg rax) (OpReg rsp)
  885                            ]
  886                        | otherwise ->
  887                            [ SUB II64 (OpImm (ImmInt amount)) (OpReg rsp)
  888                            , TEST II64 (OpReg rsp) (OpReg rsp)
  889                            ]
  890             _ -> panic "X86.mkStackAllocInstr"
  891       _       ->
  892         case platformArch platform of
  893           ArchX86    -> [ SUB II32 (OpImm (ImmInt amount)) (OpReg esp) ]
  894           ArchX86_64 -> [ SUB II64 (OpImm (ImmInt amount)) (OpReg rsp) ]
  895           _ -> panic "X86.mkStackAllocInstr"
  896 
  897 mkStackDeallocInstr
  898         :: Platform
  899         -> Int
  900         -> [Instr]
  901 mkStackDeallocInstr platform amount
  902   = case platformArch platform of
  903       ArchX86    -> [ADD II32 (OpImm (ImmInt amount)) (OpReg esp)]
  904       ArchX86_64 -> [ADD II64 (OpImm (ImmInt amount)) (OpReg rsp)]
  905       _ -> panic "X86.mkStackDeallocInstr"
  906 
  907 
  908 --
  909 -- Note [extra spill slots]
  910 --
  911 -- If the register allocator used more spill slots than we have
  912 -- pre-allocated (rESERVED_C_STACK_BYTES), then we must allocate more
  913 -- C stack space on entry and exit from this proc.  Therefore we
  914 -- insert a "sub $N, %rsp" at every entry point, and an "add $N, %rsp"
  915 -- before every non-local jump.
  916 --
  917 -- This became necessary when the new codegen started bundling entire
  918 -- functions together into one proc, because the register allocator
  919 -- assigns a different stack slot to each virtual reg within a proc.
  920 -- To avoid using so many slots we could also:
  921 --
  922 --   - split up the proc into connected components before code generator
  923 --
  924 --   - rename the virtual regs, so that we re-use vreg names and hence
  925 --     stack slots for non-overlapping vregs.
  926 --
  927 -- Note that when a block is both a non-local entry point (with an
  928 -- info table) and a local branch target, we have to split it into
  929 -- two, like so:
  930 --
  931 --    <info table>
  932 --    L:
  933 --       <code>
  934 --
  935 -- becomes
  936 --
  937 --    <info table>
  938 --    L:
  939 --       subl $rsp, N
  940 --       jmp Lnew
  941 --    Lnew:
  942 --       <code>
  943 --
  944 -- and all branches pointing to L are retargetted to point to Lnew.
  945 -- Otherwise, we would repeat the $rsp adjustment for each branch to
  946 -- L.
  947 --
  948 -- Returns a list of (L,Lnew) pairs.
  949 --
  950 allocMoreStack
  951   :: Platform
  952   -> Int
  953   -> NatCmmDecl statics GHC.CmmToAsm.X86.Instr.Instr
  954   -> UniqSM (NatCmmDecl statics GHC.CmmToAsm.X86.Instr.Instr, [(BlockId,BlockId)])
  955 
  956 allocMoreStack _ _ top@(CmmData _ _) = return (top,[])
  957 allocMoreStack platform slots proc@(CmmProc info lbl live (ListGraph code)) = do
  958     let entries = entryBlocks proc
  959 
  960     uniqs <- replicateM (length entries) getUniqueM
  961 
  962     let
  963       delta = ((x + stackAlign - 1) `quot` stackAlign) * stackAlign -- round up
  964         where x = slots * spillSlotSize platform -- sp delta
  965 
  966       alloc   = mkStackAllocInstr   platform delta
  967       dealloc = mkStackDeallocInstr platform delta
  968 
  969       retargetList = (zip entries (map mkBlockId uniqs))
  970 
  971       new_blockmap :: LabelMap BlockId
  972       new_blockmap = mapFromList retargetList
  973 
  974       insert_stack_insns (BasicBlock id insns)
  975          | Just new_blockid <- mapLookup id new_blockmap
  976          = [ BasicBlock id $ alloc ++ [JXX ALWAYS new_blockid]
  977            , BasicBlock new_blockid block' ]
  978          | otherwise
  979          = [ BasicBlock id block' ]
  980          where
  981            block' = foldr insert_dealloc [] insns
  982 
  983       insert_dealloc insn r = case insn of
  984          JMP _ _     -> dealloc ++ (insn : r)
  985          JXX_GBL _ _ -> panic "insert_dealloc: cannot handle JXX_GBL"
  986          _other      -> patchJumpInstr insn retarget : r
  987            where retarget b = fromMaybe b (mapLookup b new_blockmap)
  988 
  989       new_code = concatMap insert_stack_insns code
  990     -- in
  991     return (CmmProc info lbl live (ListGraph new_code), retargetList)
  992 
  993 data JumpDest = DestBlockId BlockId | DestImm Imm
  994 
  995 -- Debug Instance
  996 instance Outputable JumpDest where
  997   ppr (DestBlockId bid) = text "jd<blk>:" <> ppr bid
  998   ppr (DestImm _imm)    = text "jd<imm>:noShow"
  999 
 1000 
 1001 getJumpDestBlockId :: JumpDest -> Maybe BlockId
 1002 getJumpDestBlockId (DestBlockId bid) = Just bid
 1003 getJumpDestBlockId _                 = Nothing
 1004 
 1005 canShortcut :: Instr -> Maybe JumpDest
 1006 canShortcut (JXX ALWAYS id)      = Just (DestBlockId id)
 1007 canShortcut (JMP (OpImm imm) _)  = Just (DestImm imm)
 1008 canShortcut _                    = Nothing
 1009 
 1010 
 1011 -- This helper shortcuts a sequence of branches.
 1012 -- The blockset helps avoid following cycles.
 1013 shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr
 1014 shortcutJump fn insn = shortcutJump' fn (setEmpty :: LabelSet) insn
 1015   where
 1016     shortcutJump' :: (BlockId -> Maybe JumpDest) -> LabelSet -> Instr -> Instr
 1017     shortcutJump' fn seen insn@(JXX cc id) =
 1018         if setMember id seen then insn
 1019         else case fn id of
 1020             Nothing                -> insn
 1021             Just (DestBlockId id') -> shortcutJump' fn seen' (JXX cc id')
 1022             Just (DestImm imm)     -> shortcutJump' fn seen' (JXX_GBL cc imm)
 1023         where seen' = setInsert id seen
 1024     shortcutJump' fn _ (JMP_TBL addr blocks section tblId) =
 1025         let updateBlock (Just (DestBlockId bid))  =
 1026                 case fn bid of
 1027                     Nothing   -> Just (DestBlockId bid )
 1028                     Just dest -> Just dest
 1029             updateBlock dest = dest
 1030             blocks' = map updateBlock blocks
 1031         in  JMP_TBL addr blocks' section tblId
 1032     shortcutJump' _ _ other = other
 1033 
 1034 -- Here because it knows about JumpDest
 1035 shortcutStatics :: (BlockId -> Maybe JumpDest) -> (Alignment, RawCmmStatics) -> (Alignment, RawCmmStatics)
 1036 shortcutStatics fn (align, CmmStaticsRaw lbl statics)
 1037   = (align, CmmStaticsRaw lbl $ map (shortcutStatic fn) statics)
 1038   -- we need to get the jump tables, so apply the mapping to the entries
 1039   -- of a CmmData too.
 1040 
 1041 shortcutLabel :: (BlockId -> Maybe JumpDest) -> CLabel -> CLabel
 1042 shortcutLabel fn lab
 1043   | Just blkId <- maybeLocalBlockLabel lab = shortBlockId fn emptyUniqSet blkId
 1044   | otherwise                              = lab
 1045 
 1046 shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic
 1047 shortcutStatic fn (CmmStaticLit (CmmLabel lab))
 1048   = CmmStaticLit (CmmLabel (shortcutLabel fn lab))
 1049 shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off w))
 1050   = CmmStaticLit (CmmLabelDiffOff (shortcutLabel fn lbl1) lbl2 off w)
 1051         -- slightly dodgy, we're ignoring the second label, but this
 1052         -- works with the way we use CmmLabelDiffOff for jump tables now.
 1053 shortcutStatic _ other_static
 1054         = other_static
 1055 
 1056 shortBlockId
 1057         :: (BlockId -> Maybe JumpDest)
 1058         -> UniqSet Unique
 1059         -> BlockId
 1060         -> CLabel
 1061 
 1062 shortBlockId fn seen blockid =
 1063   case (elementOfUniqSet uq seen, fn blockid) of
 1064     (True, _)    -> blockLbl blockid
 1065     (_, Nothing) -> blockLbl blockid
 1066     (_, Just (DestBlockId blockid'))  -> shortBlockId fn (addOneToUniqSet seen uq) blockid'
 1067     (_, Just (DestImm (ImmCLbl lbl))) -> lbl
 1068     (_, _other) -> panic "shortBlockId"
 1069   where uq = getUnique blockid