never executed always true always false

    1 {-# LANGUAGE CPP #-}
    2 {-# LANGUAGE GADTs, MultiWayIf #-}
    3 {-# OPTIONS_GHC -fno-warn-type-defaults #-}
    4 {-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}
    5 
    6 -- | Handle conversion of CmmProc to LLVM code.
    7 module GHC.CmmToLlvm.CodeGen ( genLlvmProc ) where
    8 
    9 import GHC.Prelude
   10 
   11 import GHC.Driver.Session
   12 import GHC.Driver.Ppr
   13 
   14 import GHC.Platform
   15 import GHC.Platform.Regs ( activeStgRegs )
   16 
   17 import GHC.Llvm
   18 import GHC.CmmToLlvm.Base
   19 import GHC.CmmToLlvm.Regs
   20 
   21 import GHC.Cmm.BlockId
   22 import GHC.Cmm.CLabel
   23 import GHC.Cmm
   24 import GHC.Cmm.Ppr as PprCmm
   25 import GHC.Cmm.Utils
   26 import GHC.Cmm.Switch
   27 import GHC.Cmm.Dataflow.Block
   28 import GHC.Cmm.Dataflow.Graph
   29 import GHC.Cmm.Dataflow.Collections
   30 
   31 import GHC.Data.FastString
   32 import GHC.Data.OrdList
   33 
   34 import GHC.Types.ForeignCall
   35 import GHC.Types.Unique.Supply
   36 import GHC.Types.Unique
   37 
   38 import GHC.Utils.Outputable
   39 import GHC.Utils.Panic.Plain (massert)
   40 import qualified GHC.Utils.Panic as Panic
   41 import GHC.Utils.Misc
   42 
   43 import Control.Monad.Trans.Class
   44 import Control.Monad.Trans.Writer
   45 import Control.Monad
   46 
   47 import qualified Data.Semigroup as Semigroup
   48 import Data.List ( nub )
   49 import Data.Maybe ( catMaybes )
   50 
   51 type Atomic = Bool
   52 type LlvmStatements = OrdList LlvmStatement
   53 
   54 data Signage = Signed | Unsigned deriving (Eq, Show)
   55 
   56 -- -----------------------------------------------------------------------------
   57 -- | Top-level of the LLVM proc Code generator
   58 --
   59 genLlvmProc :: RawCmmDecl -> LlvmM [LlvmCmmDecl]
   60 genLlvmProc (CmmProc infos lbl live graph) = do
   61     let blocks = toBlockListEntryFirstFalseFallthrough graph
   62     (lmblocks, lmdata) <- basicBlocksCodeGen live blocks
   63     let info = mapLookup (g_entry graph) infos
   64         proc = CmmProc info lbl live (ListGraph lmblocks)
   65     return (proc:lmdata)
   66 
   67 genLlvmProc _ = panic "genLlvmProc: case that shouldn't reach here!"
   68 
   69 -- -----------------------------------------------------------------------------
   70 -- * Block code generation
   71 --
   72 
   73 -- | Generate code for a list of blocks that make up a complete
   74 -- procedure. The first block in the list is expected to be the entry
   75 -- point.
   76 basicBlocksCodeGen :: LiveGlobalRegs -> [CmmBlock]
   77                       -> LlvmM ([LlvmBasicBlock], [LlvmCmmDecl])
   78 basicBlocksCodeGen _    []                     = panic "no entry block!"
   79 basicBlocksCodeGen live cmmBlocks
   80   = do -- Emit the prologue
   81        -- N.B. this must be its own block to ensure that the entry block of the
   82        -- procedure has no predecessors, as required by the LLVM IR. See #17589
   83        -- and #11649.
   84        bid <- newBlockId
   85        (prologue, prologueTops) <- funPrologue live cmmBlocks
   86        let entryBlock = BasicBlock bid (fromOL prologue)
   87 
   88        -- Generate code
   89        (blocks, topss) <- fmap unzip $ mapM basicBlockCodeGen cmmBlocks
   90 
   91        -- Compose
   92        return (entryBlock : blocks, prologueTops ++ concat topss)
   93 
   94 
   95 -- | Generate code for one block
   96 basicBlockCodeGen :: CmmBlock -> LlvmM ( LlvmBasicBlock, [LlvmCmmDecl] )
   97 basicBlockCodeGen block
   98   = do let (_, nodes, tail)  = blockSplit block
   99            id = entryLabel block
  100        (mid_instrs, top) <- stmtsToInstrs $ blockToList nodes
  101        (tail_instrs, top')  <- stmtToInstrs tail
  102        let instrs = fromOL (mid_instrs `appOL` tail_instrs)
  103        return (BasicBlock id instrs, top' ++ top)
  104 
  105 -- -----------------------------------------------------------------------------
  106 -- * CmmNode code generation
  107 --
  108 
  109 -- A statement conversion return data.
  110 --   * LlvmStatements: The compiled LLVM statements.
  111 --   * LlvmCmmDecl: Any global data needed.
  112 type StmtData = (LlvmStatements, [LlvmCmmDecl])
  113 
  114 
  115 -- | Convert a list of CmmNode's to LlvmStatement's
  116 stmtsToInstrs :: [CmmNode e x] -> LlvmM StmtData
  117 stmtsToInstrs stmts
  118    = do (instrss, topss) <- fmap unzip $ mapM stmtToInstrs stmts
  119         return (concatOL instrss, concat topss)
  120 
  121 
  122 -- | Convert a CmmStmt to a list of LlvmStatement's
  123 stmtToInstrs :: CmmNode e x -> LlvmM StmtData
  124 stmtToInstrs stmt = case stmt of
  125 
  126     CmmComment _         -> return (nilOL, []) -- nuke comments
  127     CmmTick    _         -> return (nilOL, [])
  128     CmmUnwind  {}        -> return (nilOL, [])
  129 
  130     CmmAssign reg src    -> genAssign reg src
  131     CmmStore addr src    -> genStore addr src
  132 
  133     CmmBranch id         -> genBranch id
  134     CmmCondBranch arg true false likely
  135                          -> genCondBranch arg true false likely
  136     CmmSwitch arg ids    -> genSwitch arg ids
  137 
  138     -- Foreign Call
  139     CmmUnsafeForeignCall target res args
  140         -> genCall target res args
  141 
  142     -- Tail call
  143     CmmCall { cml_target = arg,
  144               cml_args_regs = live } -> genJump arg live
  145 
  146     _ -> panic "Llvm.CodeGen.stmtToInstrs"
  147 
  148 -- | Wrapper function to declare an instrinct function by function type
  149 getInstrinct2 :: LMString -> LlvmType -> LlvmM ExprData
  150 getInstrinct2 fname fty@(LMFunction funSig) = do
  151 
  152     let fv   = LMGlobalVar fname fty (funcLinkage funSig) Nothing Nothing Constant
  153 
  154     fn <- funLookup fname
  155     tops <- case fn of
  156       Just _  ->
  157         return []
  158       Nothing -> do
  159         funInsert fname fty
  160         un <- getUniqueM
  161         let lbl = mkAsmTempLabel un
  162         return [CmmData (Section Data lbl) [([],[fty])]]
  163 
  164     return (fv, nilOL, tops)
  165 
  166 getInstrinct2 _ _ = error "getInstrinct2: Non-function type!"
  167 
  168 -- | Declares an instrinct function by return and parameter types
  169 getInstrinct :: LMString -> LlvmType -> [LlvmType] -> LlvmM ExprData
  170 getInstrinct fname retTy parTys =
  171     let funSig = LlvmFunctionDecl fname ExternallyVisible CC_Ccc retTy
  172                     FixedArgs (tysToParams parTys) Nothing
  173         fty = LMFunction funSig
  174     in getInstrinct2 fname fty
  175 
  176 -- | Memory barrier instruction for LLVM >= 3.0
  177 barrier :: LlvmM StmtData
  178 barrier = do
  179     let s = Fence False SyncSeqCst
  180     return (unitOL s, [])
  181 
  182 -- | Insert a 'barrier', unless the target platform is in the provided list of
  183 --   exceptions (where no code will be emitted instead).
  184 barrierUnless :: [Arch] -> LlvmM StmtData
  185 barrierUnless exs = do
  186     platform <- getPlatform
  187     if platformArch platform `elem` exs
  188         then return (nilOL, [])
  189         else barrier
  190 
  191 -- | Foreign Calls
  192 genCall :: ForeignTarget -> [CmmFormal] -> [CmmActual] -> LlvmM StmtData
  193 
  194 -- Barriers need to be handled specially as they are implemented as LLVM
  195 -- intrinsic functions.
  196 genCall (PrimTarget MO_ReadBarrier) _ _ =
  197     barrierUnless [ArchX86, ArchX86_64, ArchSPARC]
  198 
  199 genCall (PrimTarget MO_WriteBarrier) _ _ =
  200     barrierUnless [ArchX86, ArchX86_64, ArchSPARC]
  201 
  202 genCall (PrimTarget MO_Touch) _ _ =
  203     return (nilOL, [])
  204 
  205 genCall (PrimTarget (MO_UF_Conv w)) [dst] [e] = runStmtsDecls $ do
  206     dstV <- getCmmRegW (CmmLocal dst)
  207     let ty = cmmToLlvmType $ localRegType dst
  208         width = widthToLlvmFloat w
  209     castV <- lift $ mkLocalVar ty
  210     ve <- exprToVarW e
  211     statement $ Assignment castV $ Cast LM_Uitofp ve width
  212     statement $ Store castV dstV
  213 
  214 genCall (PrimTarget (MO_UF_Conv _)) [_] args =
  215     panic $ "genCall: Too many arguments to MO_UF_Conv. " ++
  216     "Can only handle 1, given" ++ show (length args) ++ "."
  217 
  218 -- Handle prefetching data
  219 genCall t@(PrimTarget (MO_Prefetch_Data localityInt)) [] args
  220   | 0 <= localityInt && localityInt <= 3 = runStmtsDecls $ do
  221     let argTy = [i8Ptr, i32, i32, i32]
  222         funTy = \name -> LMFunction $ LlvmFunctionDecl name ExternallyVisible
  223                              CC_Ccc LMVoid FixedArgs (tysToParams argTy) Nothing
  224 
  225     let (_, arg_hints) = foreignTargetHints t
  226     let args_hints' = zip args arg_hints
  227     argVars <- arg_varsW args_hints' ([], nilOL, [])
  228     fptr    <- liftExprData $ getFunPtr funTy t
  229     argVars' <- castVarsW Signed $ zip argVars argTy
  230 
  231     let argSuffix = [mkIntLit i32 0, mkIntLit i32 localityInt, mkIntLit i32 1]
  232     statement $ Expr $ Call StdCall fptr (argVars' ++ argSuffix) []
  233   | otherwise = panic $ "prefetch locality level integer must be between 0 and 3, given: " ++ (show localityInt)
  234 
  235 -- Handle PopCnt, Clz, Ctz, and BSwap that need to only convert arg
  236 -- and return types
  237 genCall t@(PrimTarget (MO_PopCnt w)) dsts args =
  238     genCallSimpleCast w t dsts args
  239 
  240 genCall t@(PrimTarget (MO_Pdep w)) dsts args =
  241     genCallSimpleCast2 w t dsts args
  242 genCall t@(PrimTarget (MO_Pext w)) dsts args =
  243     genCallSimpleCast2 w t dsts args
  244 genCall t@(PrimTarget (MO_Clz w)) dsts args =
  245     genCallSimpleCast w t dsts args
  246 genCall t@(PrimTarget (MO_Ctz w)) dsts args =
  247     genCallSimpleCast w t dsts args
  248 genCall t@(PrimTarget (MO_BSwap w)) dsts args =
  249     genCallSimpleCast w t dsts args
  250 genCall t@(PrimTarget (MO_BRev w)) dsts args =
  251     genCallSimpleCast w t dsts args
  252 
  253 genCall (PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n] = runStmtsDecls $ do
  254     addrVar <- exprToVarW addr
  255     nVar <- exprToVarW n
  256     let targetTy = widthToLlvmInt width
  257         ptrExpr = Cast LM_Inttoptr addrVar (pLift targetTy)
  258     ptrVar <- doExprW (pLift targetTy) ptrExpr
  259     dstVar <- getCmmRegW (CmmLocal dst)
  260     let op = case amop of
  261                AMO_Add  -> LAO_Add
  262                AMO_Sub  -> LAO_Sub
  263                AMO_And  -> LAO_And
  264                AMO_Nand -> LAO_Nand
  265                AMO_Or   -> LAO_Or
  266                AMO_Xor  -> LAO_Xor
  267     retVar <- doExprW targetTy $ AtomicRMW op ptrVar nVar SyncSeqCst
  268     statement $ Store retVar dstVar
  269 
  270 genCall (PrimTarget (MO_AtomicRead _)) [dst] [addr] = runStmtsDecls $ do
  271     dstV <- getCmmRegW (CmmLocal dst)
  272     v1 <- genLoadW True addr (localRegType dst)
  273     statement $ Store v1 dstV
  274 
  275 genCall (PrimTarget (MO_Cmpxchg _width))
  276         [dst] [addr, old, new] = runStmtsDecls $ do
  277     addrVar <- exprToVarW addr
  278     oldVar <- exprToVarW old
  279     newVar <- exprToVarW new
  280     let targetTy = getVarType oldVar
  281         ptrExpr = Cast LM_Inttoptr addrVar (pLift targetTy)
  282     ptrVar <- doExprW (pLift targetTy) ptrExpr
  283     dstVar <- getCmmRegW (CmmLocal dst)
  284     retVar <- doExprW (LMStructU [targetTy,i1])
  285               $ CmpXChg ptrVar oldVar newVar SyncSeqCst SyncSeqCst
  286     retVar' <- doExprW targetTy $ ExtractV retVar 0
  287     statement $ Store retVar' dstVar
  288 
  289 genCall (PrimTarget (MO_Xchg _width)) [dst] [addr, val] = runStmtsDecls $ do
  290     dstV <- getCmmRegW (CmmLocal dst) :: WriterT LlvmAccum LlvmM LlvmVar
  291     addrVar <- exprToVarW addr
  292     valVar <- exprToVarW val
  293     let ptrTy = pLift $ getVarType valVar
  294         ptrExpr = Cast LM_Inttoptr addrVar ptrTy
  295     ptrVar <- doExprW ptrTy ptrExpr
  296     resVar <- doExprW (getVarType valVar) (AtomicRMW LAO_Xchg ptrVar valVar SyncSeqCst)
  297     statement $ Store resVar dstV
  298 
  299 genCall (PrimTarget (MO_AtomicWrite _width)) [] [addr, val] = runStmtsDecls $ do
  300     addrVar <- exprToVarW addr
  301     valVar <- exprToVarW val
  302     let ptrTy = pLift $ getVarType valVar
  303         ptrExpr = Cast LM_Inttoptr addrVar ptrTy
  304     ptrVar <- doExprW ptrTy ptrExpr
  305     statement $ Expr $ AtomicRMW LAO_Xchg ptrVar valVar SyncSeqCst
  306 
  307 -- Handle memcpy function specifically since llvm's intrinsic version takes
  308 -- some extra parameters.
  309 genCall t@(PrimTarget op) [] args
  310  | Just align <- machOpMemcpyishAlign op
  311  = do
  312    platform <- getPlatform
  313    runStmtsDecls $ do
  314     let isVolTy = [i1]
  315         isVolVal = [mkIntLit i1 0]
  316         argTy | MO_Memset _ <- op = [i8Ptr, i8,    llvmWord platform, i32] ++ isVolTy
  317               | otherwise         = [i8Ptr, i8Ptr, llvmWord platform, i32] ++ isVolTy
  318         funTy = \name -> LMFunction $ LlvmFunctionDecl name ExternallyVisible
  319                              CC_Ccc LMVoid FixedArgs (tysToParams argTy) Nothing
  320 
  321     let (_, arg_hints) = foreignTargetHints t
  322     let args_hints = zip args arg_hints
  323     argVars       <- arg_varsW args_hints ([], nilOL, [])
  324     fptr          <- getFunPtrW funTy t
  325     argVars' <- castVarsW Signed $ zip argVars argTy
  326 
  327     let alignVal = mkIntLit i32 align
  328         arguments = argVars' ++ (alignVal:isVolVal)
  329     statement $ Expr $ Call StdCall fptr arguments []
  330 
  331 -- We handle MO_U_Mul2 by simply using a 'mul' instruction, but with operands
  332 -- twice the width (we first zero-extend them), e.g., on 64-bit arch we will
  333 -- generate 'mul' on 128-bit operands. Then we only need some plumbing to
  334 -- extract the two 64-bit values out of 128-bit result.
  335 genCall (PrimTarget (MO_U_Mul2 w)) [dstH, dstL] [lhs, rhs] = runStmtsDecls $ do
  336     let width = widthToLlvmInt w
  337         bitWidth = widthInBits w
  338         width2x = LMInt (bitWidth * 2)
  339     -- First zero-extend the operands ('mul' instruction requires the operands
  340     -- and the result to be of the same type). Note that we don't use 'castVars'
  341     -- because it tries to do LM_Sext.
  342     lhsVar <- exprToVarW lhs
  343     rhsVar <- exprToVarW rhs
  344     lhsExt <- doExprW width2x $ Cast LM_Zext lhsVar width2x
  345     rhsExt <- doExprW width2x $ Cast LM_Zext rhsVar width2x
  346     -- Do the actual multiplication (note that the result is also 2x width).
  347     retV <- doExprW width2x $ LlvmOp LM_MO_Mul lhsExt rhsExt
  348     -- Extract the lower bits of the result into retL.
  349     retL <- doExprW width $ Cast LM_Trunc retV width
  350     -- Now we unsigned right-shift the higher bits by width.
  351     let widthLlvmLit = LMLitVar $ LMIntLit (fromIntegral bitWidth) width
  352     retShifted <- doExprW width2x $ LlvmOp LM_MO_LShr retV widthLlvmLit
  353     -- And extract them into retH.
  354     retH <- doExprW width $ Cast LM_Trunc retShifted width
  355     dstRegL <- getCmmRegW (CmmLocal dstL)
  356     dstRegH <- getCmmRegW (CmmLocal dstH)
  357     statement $ Store retL dstRegL
  358     statement $ Store retH dstRegH
  359 
  360 genCall (PrimTarget (MO_S_Mul2 w)) [dstC, dstH, dstL] [lhs, rhs] = runStmtsDecls $ do
  361     let width = widthToLlvmInt w
  362         bitWidth = widthInBits w
  363         width2x = LMInt (bitWidth * 2)
  364     -- First sign-extend the operands ('mul' instruction requires the operands
  365     -- and the result to be of the same type). Note that we don't use 'castVars'
  366     -- because it tries to do LM_Sext.
  367     lhsVar <- exprToVarW lhs
  368     rhsVar <- exprToVarW rhs
  369     lhsExt <- doExprW width2x $ Cast LM_Sext lhsVar width2x
  370     rhsExt <- doExprW width2x $ Cast LM_Sext rhsVar width2x
  371     -- Do the actual multiplication (note that the result is also 2x width).
  372     retV <- doExprW width2x $ LlvmOp LM_MO_Mul lhsExt rhsExt
  373     -- Extract the lower bits of the result into retL.
  374     retL <- doExprW width $ Cast LM_Trunc retV width
  375     -- Now we signed right-shift the higher bits by width.
  376     let widthLlvmLit = LMLitVar $ LMIntLit (fromIntegral bitWidth) width
  377     retShifted <- doExprW width2x $ LlvmOp LM_MO_AShr retV widthLlvmLit
  378     -- And extract them into retH.
  379     retH <- doExprW width $ Cast LM_Trunc retShifted width
  380     -- Check if the carry is useful by doing a full arithmetic right shift on
  381     -- retL and comparing the result with retH
  382     let widthLlvmLitm1 = LMLitVar $ LMIntLit (fromIntegral bitWidth - 1) width
  383     retH' <- doExprW width $ LlvmOp LM_MO_AShr retL widthLlvmLitm1
  384     retC1  <- doExprW i1 $ Compare LM_CMP_Ne retH retH' -- Compare op returns a 1-bit value (i1)
  385     retC   <- doExprW width $ Cast LM_Zext retC1 width  -- so we zero-extend it
  386     dstRegL <- getCmmRegW (CmmLocal dstL)
  387     dstRegH <- getCmmRegW (CmmLocal dstH)
  388     dstRegC <- getCmmRegW (CmmLocal dstC)
  389     statement $ Store retL dstRegL
  390     statement $ Store retH dstRegH
  391     statement $ Store retC dstRegC
  392 
  393 -- MO_U_QuotRem2 is another case we handle by widening the registers to double
  394 -- the width and use normal LLVM instructions (similarly to the MO_U_Mul2). The
  395 -- main difference here is that we need to combine two words into one register
  396 -- and then use both 'udiv' and 'urem' instructions to compute the result.
  397 genCall (PrimTarget (MO_U_QuotRem2 w))
  398         [dstQ, dstR] [lhsH, lhsL, rhs] = runStmtsDecls $ do
  399     let width = widthToLlvmInt w
  400         bitWidth = widthInBits w
  401         width2x = LMInt (bitWidth * 2)
  402     -- First zero-extend all parameters to double width.
  403     let zeroExtend expr = do
  404             var <- exprToVarW expr
  405             doExprW width2x $ Cast LM_Zext var width2x
  406     lhsExtH <- zeroExtend lhsH
  407     lhsExtL <- zeroExtend lhsL
  408     rhsExt <- zeroExtend rhs
  409     -- Now we combine the first two parameters (that represent the high and low
  410     -- bits of the value). So first left-shift the high bits to their position
  411     -- and then bit-or them with the low bits.
  412     let widthLlvmLit = LMLitVar $ LMIntLit (fromIntegral bitWidth) width
  413     lhsExtHShifted <- doExprW width2x $ LlvmOp LM_MO_Shl lhsExtH widthLlvmLit
  414     lhsExt <- doExprW width2x $ LlvmOp LM_MO_Or lhsExtHShifted lhsExtL
  415     -- Finally, we can call 'udiv' and 'urem' to compute the results.
  416     retExtDiv <- doExprW width2x $ LlvmOp LM_MO_UDiv lhsExt rhsExt
  417     retExtRem <- doExprW width2x $ LlvmOp LM_MO_URem lhsExt rhsExt
  418     -- And since everything is in 2x width, we need to truncate the results and
  419     -- then return them.
  420     let narrow var = doExprW width $ Cast LM_Trunc var width
  421     retDiv <- narrow retExtDiv
  422     retRem <- narrow retExtRem
  423     dstRegQ <- lift $ getCmmReg (CmmLocal dstQ)
  424     dstRegR <- lift $ getCmmReg (CmmLocal dstR)
  425     statement $ Store retDiv dstRegQ
  426     statement $ Store retRem dstRegR
  427 
  428 -- Handle the MO_{Add,Sub}IntC separately. LLVM versions return a record from
  429 -- which we need to extract the actual values.
  430 genCall t@(PrimTarget (MO_AddIntC w)) [dstV, dstO] [lhs, rhs] =
  431     genCallWithOverflow t w [dstV, dstO] [lhs, rhs]
  432 genCall t@(PrimTarget (MO_SubIntC w)) [dstV, dstO] [lhs, rhs] =
  433     genCallWithOverflow t w [dstV, dstO] [lhs, rhs]
  434 
  435 -- Similar to MO_{Add,Sub}IntC, but MO_Add2 expects the first element of the
  436 -- return tuple to be the overflow bit and the second element to contain the
  437 -- actual result of the addition. So we still use genCallWithOverflow but swap
  438 -- the return registers.
  439 genCall t@(PrimTarget (MO_Add2 w)) [dstO, dstV] [lhs, rhs] =
  440     genCallWithOverflow t w [dstV, dstO] [lhs, rhs]
  441 
  442 genCall t@(PrimTarget (MO_AddWordC w)) [dstV, dstO] [lhs, rhs] =
  443     genCallWithOverflow t w [dstV, dstO] [lhs, rhs]
  444 
  445 genCall t@(PrimTarget (MO_SubWordC w)) [dstV, dstO] [lhs, rhs] =
  446     genCallWithOverflow t w [dstV, dstO] [lhs, rhs]
  447 
  448 -- Handle all other foreign calls and prim ops.
  449 genCall target res args = do
  450   platform <- getPlatform
  451   runStmtsDecls $ do
  452 
  453     -- extract Cmm call convention, and translate to LLVM call convention
  454     let lmconv = case target of
  455             ForeignTarget _ (ForeignConvention conv _ _ _) ->
  456               case conv of
  457                  StdCallConv  -> case platformArch platform of
  458                                  ArchX86    -> CC_X86_Stdcc
  459                                  ArchX86_64 -> CC_X86_Stdcc
  460                                  _          -> CC_Ccc
  461                  CCallConv    -> CC_Ccc
  462                  CApiConv     -> CC_Ccc
  463                  PrimCallConv       -> panic "GHC.CmmToLlvm.CodeGen.genCall: PrimCallConv"
  464                  JavaScriptCallConv -> panic "GHC.CmmToLlvm.CodeGen.genCall: JavaScriptCallConv"
  465 
  466             PrimTarget   _ -> CC_Ccc
  467 
  468     {-
  469         CC_Ccc of the possibilities here are a worry with the use of a custom
  470         calling convention for passing STG args. In practice the more
  471         dangerous combinations (e.g StdCall + llvmGhcCC) don't occur.
  472 
  473         The native code generator only handles StdCall and CCallConv.
  474     -}
  475 
  476     -- parameter types
  477     let arg_type (_, AddrHint) = (i8Ptr, [])
  478         -- cast pointers to i8*. Llvm equivalent of void*
  479         arg_type (expr, hint) =
  480             case cmmToLlvmType $ cmmExprType platform expr of
  481               ty@(LMInt n) | n < 64 && lmconv == CC_Ccc && platformCConvNeedsExtension platform
  482                  -> (ty, if hint == SignedHint then [SignExt] else [ZeroExt])
  483               ty -> (ty, [])
  484 
  485     -- ret type
  486     let ret_type [] = LMVoid
  487         ret_type [(_, AddrHint)] = i8Ptr
  488         ret_type [(reg, _)]      = cmmToLlvmType $ localRegType reg
  489         ret_type t = panic $ "genCall: Too many return values! Can only handle"
  490                         ++ " 0 or 1, given " ++ show (length t) ++ "."
  491 
  492     -- call attributes
  493     let fnAttrs | never_returns = NoReturn : llvmStdFunAttrs
  494                 | otherwise     = llvmStdFunAttrs
  495 
  496         never_returns = case target of
  497              ForeignTarget _ (ForeignConvention _ _ _ CmmNeverReturns) -> True
  498              _ -> False
  499 
  500     -- fun type
  501     let (res_hints, arg_hints) = foreignTargetHints target
  502     let args_hints = zip args arg_hints
  503     let ress_hints = zip res  res_hints
  504     let ccTy  = StdCall -- tail calls should be done through CmmJump
  505     let retTy = ret_type ress_hints
  506     let argTy = map arg_type args_hints
  507     let funTy = \name -> LMFunction $ LlvmFunctionDecl name ExternallyVisible
  508                              lmconv retTy FixedArgs argTy (llvmFunAlign platform)
  509 
  510 
  511     argVars <- arg_varsW args_hints ([], nilOL, [])
  512     fptr    <- getFunPtrW funTy target
  513 
  514     let doReturn | ccTy == TailCall  = statement $ Return Nothing
  515                  | never_returns     = statement $ Unreachable
  516                  | otherwise         = return ()
  517 
  518 
  519     -- make the actual call
  520     case retTy of
  521         LMVoid ->
  522             statement $ Expr $ Call ccTy fptr argVars fnAttrs
  523         _ -> do
  524             v1 <- doExprW retTy $ Call ccTy fptr argVars fnAttrs
  525             -- get the return register
  526             let ret_reg [reg] = reg
  527                 ret_reg t = panic $ "genCall: Bad number of registers! Can only handle"
  528                                 ++ " 1, given " ++ show (length t) ++ "."
  529             let creg = ret_reg res
  530             vreg <- getCmmRegW (CmmLocal creg)
  531             if retTy == pLower (getVarType vreg)
  532                 then do
  533                     statement $ Store v1 vreg
  534                     doReturn
  535                 else do
  536                     let ty = pLower $ getVarType vreg
  537                     let op = case ty of
  538                             vt | isPointer vt -> LM_Bitcast
  539                                | isInt     vt -> LM_Ptrtoint
  540                                | otherwise    ->
  541                                    panic $ "genCall: CmmReg bad match for"
  542                                         ++ " returned type!"
  543 
  544                     v2 <- doExprW ty $ Cast op v1 ty
  545                     statement $ Store v2 vreg
  546                     doReturn
  547 
  548 -- | Generate a call to an LLVM intrinsic that performs arithmetic operation
  549 -- with overflow bit (i.e., returns a struct containing the actual result of the
  550 -- operation and an overflow bit). This function will also extract the overflow
  551 -- bit and zero-extend it (all the corresponding Cmm PrimOps represent the
  552 -- overflow "bit" as a usual Int# or Word#).
  553 genCallWithOverflow
  554   :: ForeignTarget -> Width -> [CmmFormal] -> [CmmActual] -> LlvmM StmtData
  555 genCallWithOverflow t@(PrimTarget op) w [dstV, dstO] [lhs, rhs] = do
  556     -- So far this was only tested for the following four CallishMachOps.
  557     let valid = op `elem`   [ MO_Add2 w
  558                             , MO_AddIntC w
  559                             , MO_SubIntC w
  560                             , MO_AddWordC w
  561                             , MO_SubWordC w
  562                             ]
  563     massert valid
  564     let width = widthToLlvmInt w
  565     -- This will do most of the work of generating the call to the intrinsic and
  566     -- extracting the values from the struct.
  567     (value, overflowBit, (stmts, top)) <-
  568       genCallExtract t w (lhs, rhs) (width, i1)
  569     -- value is i<width>, but overflowBit is i1, so we need to cast (Cmm expects
  570     -- both to be i<width>)
  571     (overflow, zext) <- doExpr width $ Cast LM_Zext overflowBit width
  572     dstRegV <- getCmmReg (CmmLocal dstV)
  573     dstRegO <- getCmmReg (CmmLocal dstO)
  574     let storeV = Store value dstRegV
  575         storeO = Store overflow dstRegO
  576     return (stmts `snocOL` zext `snocOL` storeV `snocOL` storeO, top)
  577 genCallWithOverflow _ _ _ _ =
  578     panic "genCallExtract: wrong ForeignTarget or number of arguments"
  579 
  580 -- | A helper function for genCallWithOverflow that handles generating the call
  581 -- to the LLVM intrinsic and extracting the result from the struct to LlvmVars.
  582 genCallExtract
  583     :: ForeignTarget           -- ^ PrimOp
  584     -> Width                   -- ^ Width of the operands.
  585     -> (CmmActual, CmmActual)  -- ^ Actual arguments.
  586     -> (LlvmType, LlvmType)    -- ^ LLVM types of the returned struct.
  587     -> LlvmM (LlvmVar, LlvmVar, StmtData)
  588 genCallExtract target@(PrimTarget op) w (argA, argB) (llvmTypeA, llvmTypeB) = do
  589     let width = widthToLlvmInt w
  590         argTy = [width, width]
  591         retTy = LMStructU [llvmTypeA, llvmTypeB]
  592 
  593     -- Process the arguments.
  594     let args_hints = zip [argA, argB] (snd $ foreignTargetHints target)
  595     (argsV1, args1, top1) <- arg_vars args_hints ([], nilOL, [])
  596     (argsV2, args2) <- castVars Signed $ zip argsV1 argTy
  597 
  598     -- Get the function and make the call.
  599     fname <- cmmPrimOpFunctions op
  600     (fptr, _, top2) <- getInstrinct fname retTy argTy
  601     -- We use StdCall for primops. See also the last case of genCall.
  602     (retV, call) <- doExpr retTy $ Call StdCall fptr argsV2 []
  603 
  604     -- This will result in a two element struct, we need to use "extractvalue"
  605     -- to get them out of it.
  606     (res1, ext1) <- doExpr llvmTypeA (ExtractV retV 0)
  607     (res2, ext2) <- doExpr llvmTypeB (ExtractV retV 1)
  608 
  609     let stmts = args1 `appOL` args2 `snocOL` call `snocOL` ext1 `snocOL` ext2
  610         tops = top1 ++ top2
  611     return (res1, res2, (stmts, tops))
  612 
  613 genCallExtract _ _ _ _ =
  614     panic "genCallExtract: unsupported ForeignTarget"
  615 
  616 -- Handle simple function call that only need simple type casting, of the form:
  617 --   truncate arg >>= \a -> call(a) >>= zext
  618 --
  619 -- since GHC only really has i32 and i64 types and things like Word8 are backed
  620 -- by an i32 and just present a logical i8 range. So we must handle conversions
  621 -- from i32 to i8 explicitly as LLVM is strict about types.
  622 genCallSimpleCast :: Width -> ForeignTarget -> [CmmFormal] -> [CmmActual]
  623               -> LlvmM StmtData
  624 genCallSimpleCast w t@(PrimTarget op) [dst] args = do
  625     let width = widthToLlvmInt w
  626         dstTy = cmmToLlvmType $ localRegType dst
  627 
  628     fname                       <- cmmPrimOpFunctions op
  629     (fptr, _, top3)             <- getInstrinct fname width [width]
  630 
  631     dstV                        <- getCmmReg (CmmLocal dst)
  632 
  633     let (_, arg_hints) = foreignTargetHints t
  634     let args_hints = zip args arg_hints
  635     (argsV, stmts2, top2)       <- arg_vars args_hints ([], nilOL, [])
  636     (argsV', stmts4)            <- castVars Signed $ zip argsV [width]
  637     (retV, s1)                  <- doExpr width $ Call StdCall fptr argsV' []
  638     (retVs', stmts5)            <- castVars (cmmPrimOpRetValSignage op) [(retV,dstTy)]
  639     let retV'                    = singletonPanic "genCallSimpleCast" retVs'
  640     let s2                       = Store retV' dstV
  641 
  642     let stmts = stmts2 `appOL` stmts4 `snocOL`
  643                 s1 `appOL` stmts5 `snocOL` s2
  644     return (stmts, top2 ++ top3)
  645 genCallSimpleCast _ _ dsts _ =
  646     panic ("genCallSimpleCast: " ++ show (length dsts) ++ " dsts")
  647 
  648 -- Handle simple function call that only need simple type casting, of the form:
  649 --   truncate arg >>= \a -> call(a) >>= zext
  650 --
  651 -- since GHC only really has i32 and i64 types and things like Word8 are backed
  652 -- by an i32 and just present a logical i8 range. So we must handle conversions
  653 -- from i32 to i8 explicitly as LLVM is strict about types.
  654 genCallSimpleCast2 :: Width -> ForeignTarget -> [CmmFormal] -> [CmmActual]
  655               -> LlvmM StmtData
  656 genCallSimpleCast2 w t@(PrimTarget op) [dst] args = do
  657     let width = widthToLlvmInt w
  658         dstTy = cmmToLlvmType $ localRegType dst
  659 
  660     fname                       <- cmmPrimOpFunctions op
  661     (fptr, _, top3)             <- getInstrinct fname width (const width <$> args)
  662 
  663     dstV                        <- getCmmReg (CmmLocal dst)
  664 
  665     let (_, arg_hints) = foreignTargetHints t
  666     let args_hints = zip args arg_hints
  667     (argsV, stmts2, top2)       <- arg_vars args_hints ([], nilOL, [])
  668     (argsV', stmts4)            <- castVars Signed $ zip argsV (const width <$> argsV)
  669     (retV, s1)                  <- doExpr width $ Call StdCall fptr argsV' []
  670     (retVs', stmts5)             <- castVars (cmmPrimOpRetValSignage op) [(retV,dstTy)]
  671     let retV'                    = singletonPanic "genCallSimpleCast2" retVs'
  672     let s2                       = Store retV' dstV
  673 
  674     let stmts = stmts2 `appOL` stmts4 `snocOL`
  675                 s1 `appOL` stmts5 `snocOL` s2
  676     return (stmts, top2 ++ top3)
  677 genCallSimpleCast2 _ _ dsts _ =
  678     panic ("genCallSimpleCast2: " ++ show (length dsts) ++ " dsts")
  679 
  680 -- | Create a function pointer from a target.
  681 getFunPtrW :: (LMString -> LlvmType) -> ForeignTarget
  682            -> WriterT LlvmAccum LlvmM LlvmVar
  683 getFunPtrW funTy targ = liftExprData $ getFunPtr funTy targ
  684 
  685 -- | Create a function pointer from a target.
  686 getFunPtr :: (LMString -> LlvmType) -> ForeignTarget
  687           -> LlvmM ExprData
  688 getFunPtr funTy targ = case targ of
  689     ForeignTarget (CmmLit (CmmLabel lbl)) _ -> do
  690         name <- strCLabel_llvm lbl
  691         getHsFunc' name (funTy name)
  692 
  693     ForeignTarget expr _ -> do
  694         (v1, stmts, top) <- exprToVar expr
  695         dflags <- getDynFlags
  696         let fty = funTy $ fsLit "dynamic"
  697             cast = case getVarType v1 of
  698                 ty | isPointer ty -> LM_Bitcast
  699                 ty | isInt ty     -> LM_Inttoptr
  700 
  701                 ty -> panic $ "genCall: Expr is of bad type for function"
  702                               ++ " call! (" ++ showSDoc dflags (ppr ty) ++ ")"
  703 
  704         (v2,s1) <- doExpr (pLift fty) $ Cast cast v1 (pLift fty)
  705         return (v2, stmts `snocOL` s1, top)
  706 
  707     PrimTarget mop -> do
  708         name <- cmmPrimOpFunctions mop
  709         let fty = funTy name
  710         getInstrinct2 name fty
  711 
  712 -- | Conversion of call arguments.
  713 arg_varsW :: [(CmmActual, ForeignHint)]
  714           -> ([LlvmVar], LlvmStatements, [LlvmCmmDecl])
  715           -> WriterT LlvmAccum LlvmM [LlvmVar]
  716 arg_varsW xs ys = do
  717     (vars, stmts, decls) <- lift $ arg_vars xs ys
  718     tell $ LlvmAccum stmts decls
  719     return vars
  720 
  721 -- | Conversion of call arguments.
  722 arg_vars :: [(CmmActual, ForeignHint)]
  723          -> ([LlvmVar], LlvmStatements, [LlvmCmmDecl])
  724          -> LlvmM ([LlvmVar], LlvmStatements, [LlvmCmmDecl])
  725 
  726 arg_vars [] (vars, stmts, tops)
  727   = return (vars, stmts, tops)
  728 
  729 arg_vars ((e, AddrHint):rest) (vars, stmts, tops)
  730   = do (v1, stmts', top') <- exprToVar e
  731        dflags <- getDynFlags
  732        let op = case getVarType v1 of
  733                ty | isPointer ty -> LM_Bitcast
  734                ty | isInt ty     -> LM_Inttoptr
  735 
  736                a  -> panic $ "genCall: Can't cast llvmType to i8*! ("
  737                            ++ showSDoc dflags (ppr a) ++ ")"
  738 
  739        (v2, s1) <- doExpr i8Ptr $ Cast op v1 i8Ptr
  740        arg_vars rest (vars ++ [v2], stmts `appOL` stmts' `snocOL` s1,
  741                                tops ++ top')
  742 
  743 arg_vars ((e, _):rest) (vars, stmts, tops)
  744   = do (v1, stmts', top') <- exprToVar e
  745        arg_vars rest (vars ++ [v1], stmts `appOL` stmts', tops ++ top')
  746 
  747 
  748 -- | Cast a collection of LLVM variables to specific types.
  749 castVarsW :: Signage
  750           -> [(LlvmVar, LlvmType)]
  751           -> WriterT LlvmAccum LlvmM [LlvmVar]
  752 castVarsW signage vars = do
  753     (vars, stmts) <- lift $ castVars signage vars
  754     tell $ LlvmAccum stmts mempty
  755     return vars
  756 
  757 -- | Cast a collection of LLVM variables to specific types.
  758 castVars :: Signage -> [(LlvmVar, LlvmType)]
  759          -> LlvmM ([LlvmVar], LlvmStatements)
  760 castVars signage vars = do
  761                 done <- mapM (uncurry (castVar signage)) vars
  762                 let (vars', stmts) = unzip done
  763                 return (vars', toOL stmts)
  764 
  765 -- | Cast an LLVM variable to a specific type, panicing if it can't be done.
  766 castVar :: Signage -> LlvmVar -> LlvmType -> LlvmM (LlvmVar, LlvmStatement)
  767 castVar signage v t | getVarType v == t
  768             = return (v, Nop)
  769 
  770             | otherwise
  771             = do dflags <- getDynFlags
  772                  platform <- getPlatform
  773                  let op = case (getVarType v, t) of
  774                       (LMInt n, LMInt m)
  775                           -> if n < m then extend else LM_Trunc
  776                       (vt, _) | isFloat vt && isFloat t
  777                           -> if llvmWidthInBits platform vt < llvmWidthInBits platform t
  778                                 then LM_Fpext else LM_Fptrunc
  779                       (vt, _) | isInt vt && isFloat t       -> LM_Sitofp
  780                       (vt, _) | isFloat vt && isInt t       -> LM_Fptosi
  781                       (vt, _) | isInt vt && isPointer t     -> LM_Inttoptr
  782                       (vt, _) | isPointer vt && isInt t     -> LM_Ptrtoint
  783                       (vt, _) | isPointer vt && isPointer t -> LM_Bitcast
  784                       (vt, _) | isVector vt && isVector t   -> LM_Bitcast
  785 
  786                       (vt, _) -> panic $ "castVars: Can't cast this type ("
  787                                   ++ showSDoc dflags (ppr vt) ++ ") to (" ++ showSDoc dflags (ppr t) ++ ")"
  788                  doExpr t $ Cast op v t
  789     where extend = case signage of
  790             Signed      -> LM_Sext
  791             Unsigned    -> LM_Zext
  792 
  793 
  794 cmmPrimOpRetValSignage :: CallishMachOp -> Signage
  795 cmmPrimOpRetValSignage mop = case mop of
  796     MO_Pdep _   -> Unsigned
  797     MO_Pext _   -> Unsigned
  798     _           -> Signed
  799 
  800 -- | Decide what C function to use to implement a CallishMachOp
  801 cmmPrimOpFunctions :: CallishMachOp -> LlvmM LMString
  802 cmmPrimOpFunctions mop = do
  803 
  804   dflags <- getDynFlags
  805   platform <- getPlatform
  806   let intrinTy1 = "p0i8.p0i8." ++ showSDoc dflags (ppr $ llvmWord platform)
  807       intrinTy2 = "p0i8." ++ showSDoc dflags (ppr $ llvmWord platform)
  808       unsupported = panic ("cmmPrimOpFunctions: " ++ show mop
  809                         ++ " not supported here")
  810       dontReach64 = panic ("cmmPrimOpFunctions: " ++ show mop
  811                         ++ " should be not be encountered because the regular primop for this 64-bit operation is used instead.")
  812 
  813   return $ case mop of
  814     MO_F32_Exp    -> fsLit "expf"
  815     MO_F32_ExpM1  -> fsLit "expm1f"
  816     MO_F32_Log    -> fsLit "logf"
  817     MO_F32_Log1P  -> fsLit "log1pf"
  818     MO_F32_Sqrt   -> fsLit "llvm.sqrt.f32"
  819     MO_F32_Fabs   -> fsLit "llvm.fabs.f32"
  820     MO_F32_Pwr    -> fsLit "llvm.pow.f32"
  821 
  822     MO_F32_Sin    -> fsLit "llvm.sin.f32"
  823     MO_F32_Cos    -> fsLit "llvm.cos.f32"
  824     MO_F32_Tan    -> fsLit "tanf"
  825 
  826     MO_F32_Asin   -> fsLit "asinf"
  827     MO_F32_Acos   -> fsLit "acosf"
  828     MO_F32_Atan   -> fsLit "atanf"
  829 
  830     MO_F32_Sinh   -> fsLit "sinhf"
  831     MO_F32_Cosh   -> fsLit "coshf"
  832     MO_F32_Tanh   -> fsLit "tanhf"
  833 
  834     MO_F32_Asinh  -> fsLit "asinhf"
  835     MO_F32_Acosh  -> fsLit "acoshf"
  836     MO_F32_Atanh  -> fsLit "atanhf"
  837 
  838     MO_F64_Exp    -> fsLit "exp"
  839     MO_F64_ExpM1  -> fsLit "expm1"
  840     MO_F64_Log    -> fsLit "log"
  841     MO_F64_Log1P  -> fsLit "log1p"
  842     MO_F64_Sqrt   -> fsLit "llvm.sqrt.f64"
  843     MO_F64_Fabs   -> fsLit "llvm.fabs.f64"
  844     MO_F64_Pwr    -> fsLit "llvm.pow.f64"
  845 
  846     MO_F64_Sin    -> fsLit "llvm.sin.f64"
  847     MO_F64_Cos    -> fsLit "llvm.cos.f64"
  848     MO_F64_Tan    -> fsLit "tan"
  849 
  850     MO_F64_Asin   -> fsLit "asin"
  851     MO_F64_Acos   -> fsLit "acos"
  852     MO_F64_Atan   -> fsLit "atan"
  853 
  854     MO_F64_Sinh   -> fsLit "sinh"
  855     MO_F64_Cosh   -> fsLit "cosh"
  856     MO_F64_Tanh   -> fsLit "tanh"
  857 
  858     MO_F64_Asinh  -> fsLit "asinh"
  859     MO_F64_Acosh  -> fsLit "acosh"
  860     MO_F64_Atanh  -> fsLit "atanh"
  861 
  862     MO_Memcpy _   -> fsLit $ "llvm.memcpy."  ++ intrinTy1
  863     MO_Memmove _  -> fsLit $ "llvm.memmove." ++ intrinTy1
  864     MO_Memset _   -> fsLit $ "llvm.memset."  ++ intrinTy2
  865     MO_Memcmp _   -> fsLit $ "memcmp"
  866 
  867     MO_SuspendThread -> fsLit $ "suspendThread"
  868     MO_ResumeThread  -> fsLit $ "resumeThread"
  869 
  870     (MO_PopCnt w) -> fsLit $ "llvm.ctpop."      ++ showSDoc dflags (ppr $ widthToLlvmInt w)
  871     (MO_BSwap w)  -> fsLit $ "llvm.bswap."      ++ showSDoc dflags (ppr $ widthToLlvmInt w)
  872     (MO_BRev w)   -> fsLit $ "llvm.bitreverse." ++ showSDoc dflags (ppr $ widthToLlvmInt w)
  873     (MO_Clz w)    -> fsLit $ "llvm.ctlz."       ++ showSDoc dflags (ppr $ widthToLlvmInt w)
  874     (MO_Ctz w)    -> fsLit $ "llvm.cttz."       ++ showSDoc dflags (ppr $ widthToLlvmInt w)
  875 
  876     (MO_Pdep w)   ->  let w' = showSDoc dflags (ppr $ widthInBits w)
  877                       in  if isBmi2Enabled dflags
  878                             then fsLit $ "llvm.x86.bmi.pdep."   ++ w'
  879                             else fsLit $ "hs_pdep"              ++ w'
  880     (MO_Pext w)   ->  let w' = showSDoc dflags (ppr $ widthInBits w)
  881                       in  if isBmi2Enabled dflags
  882                             then fsLit $ "llvm.x86.bmi.pext."   ++ w'
  883                             else fsLit $ "hs_pext"              ++ w'
  884 
  885     (MO_Prefetch_Data _ )-> fsLit "llvm.prefetch"
  886 
  887     MO_AddIntC w    -> fsLit $ "llvm.sadd.with.overflow."
  888                              ++ showSDoc dflags (ppr $ widthToLlvmInt w)
  889     MO_SubIntC w    -> fsLit $ "llvm.ssub.with.overflow."
  890                              ++ showSDoc dflags (ppr $ widthToLlvmInt w)
  891     MO_Add2 w       -> fsLit $ "llvm.uadd.with.overflow."
  892                              ++ showSDoc dflags (ppr $ widthToLlvmInt w)
  893     MO_AddWordC w   -> fsLit $ "llvm.uadd.with.overflow."
  894                              ++ showSDoc dflags (ppr $ widthToLlvmInt w)
  895     MO_SubWordC w   -> fsLit $ "llvm.usub.with.overflow."
  896                              ++ showSDoc dflags (ppr $ widthToLlvmInt w)
  897 
  898     MO_S_Mul2    {}  -> unsupported
  899     MO_S_QuotRem {}  -> unsupported
  900     MO_U_QuotRem {}  -> unsupported
  901     MO_U_QuotRem2 {} -> unsupported
  902     -- We support MO_U_Mul2 through ordinary LLVM mul instruction, see the
  903     -- appropriate case of genCall.
  904     MO_U_Mul2 {}     -> unsupported
  905     MO_ReadBarrier   -> unsupported
  906     MO_WriteBarrier  -> unsupported
  907     MO_Touch         -> unsupported
  908     MO_UF_Conv _     -> unsupported
  909 
  910     MO_AtomicRead _  -> unsupported
  911     MO_AtomicRMW _ _ -> unsupported
  912     MO_AtomicWrite _ -> unsupported
  913     MO_Cmpxchg _     -> unsupported
  914     MO_Xchg _        -> unsupported
  915 
  916     MO_I64_ToI       -> dontReach64
  917     MO_I64_FromI     -> dontReach64
  918     MO_W64_ToW       -> dontReach64
  919     MO_W64_FromW     -> dontReach64
  920     MO_x64_Neg       -> dontReach64
  921     MO_x64_Add       -> dontReach64
  922     MO_x64_Sub       -> dontReach64
  923     MO_x64_Mul       -> dontReach64
  924     MO_I64_Quot      -> dontReach64
  925     MO_I64_Rem       -> dontReach64
  926     MO_W64_Quot      -> dontReach64
  927     MO_W64_Rem       -> dontReach64
  928     MO_x64_And       -> dontReach64
  929     MO_x64_Or        -> dontReach64
  930     MO_x64_Xor       -> dontReach64
  931     MO_x64_Not       -> dontReach64
  932     MO_x64_Shl       -> dontReach64
  933     MO_I64_Shr       -> dontReach64
  934     MO_W64_Shr       -> dontReach64
  935     MO_x64_Eq        -> dontReach64
  936     MO_x64_Ne        -> dontReach64
  937     MO_I64_Ge        -> dontReach64
  938     MO_I64_Gt        -> dontReach64
  939     MO_I64_Le        -> dontReach64
  940     MO_I64_Lt        -> dontReach64
  941     MO_W64_Ge        -> dontReach64
  942     MO_W64_Gt        -> dontReach64
  943     MO_W64_Le        -> dontReach64
  944     MO_W64_Lt        -> dontReach64
  945 
  946 
  947 -- | Tail function calls
  948 genJump :: CmmExpr -> [GlobalReg] -> LlvmM StmtData
  949 
  950 -- Call to known function
  951 genJump (CmmLit (CmmLabel lbl)) live = do
  952     (vf, stmts, top) <- getHsFunc live lbl
  953     (stgRegs, stgStmts) <- funEpilogue live
  954     let s1  = Expr $ Call TailCall vf stgRegs llvmStdFunAttrs
  955     let s2  = Return Nothing
  956     return (stmts `appOL` stgStmts `snocOL` s1 `snocOL` s2, top)
  957 
  958 
  959 -- Call to unknown function / address
  960 genJump expr live = do
  961     fty <- llvmFunTy live
  962     (vf, stmts, top) <- exprToVar expr
  963     dflags <- getDynFlags
  964 
  965     let cast = case getVarType vf of
  966          ty | isPointer ty -> LM_Bitcast
  967          ty | isInt ty     -> LM_Inttoptr
  968 
  969          ty -> panic $ "genJump: Expr is of bad type for function call! ("
  970                      ++ showSDoc dflags (ppr ty) ++ ")"
  971 
  972     (v1, s1) <- doExpr (pLift fty) $ Cast cast vf (pLift fty)
  973     (stgRegs, stgStmts) <- funEpilogue live
  974     let s2 = Expr $ Call TailCall v1 stgRegs llvmStdFunAttrs
  975     let s3 = Return Nothing
  976     return (stmts `snocOL` s1 `appOL` stgStmts `snocOL` s2 `snocOL` s3,
  977             top)
  978 
  979 
  980 -- | CmmAssign operation
  981 --
  982 -- We use stack allocated variables for CmmReg. The optimiser will replace
  983 -- these with registers when possible.
  984 genAssign :: CmmReg -> CmmExpr -> LlvmM StmtData
  985 genAssign reg val = do
  986     vreg <- getCmmReg reg
  987     (vval, stmts2, top2) <- exprToVar val
  988     let stmts = stmts2
  989 
  990     let ty = (pLower . getVarType) vreg
  991     platform <- getPlatform
  992     case ty of
  993       -- Some registers are pointer types, so need to cast value to pointer
  994       LMPointer _ | getVarType vval == llvmWord platform -> do
  995           (v, s1) <- doExpr ty $ Cast LM_Inttoptr vval ty
  996           let s2 = Store v vreg
  997           return (stmts `snocOL` s1 `snocOL` s2, top2)
  998 
  999       LMVector _ _ -> do
 1000           (v, s1) <- doExpr ty $ Cast LM_Bitcast vval ty
 1001           let s2 = Store v vreg
 1002           return (stmts `snocOL` s1 `snocOL` s2, top2)
 1003 
 1004       _ -> do
 1005           let s1 = Store vval vreg
 1006           return (stmts `snocOL` s1, top2)
 1007 
 1008 
 1009 -- | CmmStore operation
 1010 genStore :: CmmExpr -> CmmExpr -> LlvmM StmtData
 1011 
 1012 -- First we try to detect a few common cases and produce better code for
 1013 -- these then the default case. We are mostly trying to detect Cmm code
 1014 -- like I32[Sp + n] and use 'getelementptr' operations instead of the
 1015 -- generic case that uses casts and pointer arithmetic
 1016 genStore addr@(CmmReg (CmmGlobal r)) val
 1017     = genStore_fast addr r 0 val
 1018 
 1019 genStore addr@(CmmRegOff (CmmGlobal r) n) val
 1020     = genStore_fast addr r n val
 1021 
 1022 genStore addr@(CmmMachOp (MO_Add _) [
 1023                             (CmmReg (CmmGlobal r)),
 1024                             (CmmLit (CmmInt n _))])
 1025                 val
 1026     = genStore_fast addr r (fromInteger n) val
 1027 
 1028 genStore addr@(CmmMachOp (MO_Sub _) [
 1029                             (CmmReg (CmmGlobal r)),
 1030                             (CmmLit (CmmInt n _))])
 1031                 val
 1032     = genStore_fast addr r (negate $ fromInteger n) val
 1033 
 1034 -- generic case
 1035 genStore addr val
 1036     = getTBAAMeta topN >>= genStore_slow addr val
 1037 
 1038 -- | CmmStore operation
 1039 -- This is a special case for storing to a global register pointer
 1040 -- offset such as I32[Sp+8].
 1041 genStore_fast :: CmmExpr -> GlobalReg -> Int -> CmmExpr
 1042               -> LlvmM StmtData
 1043 genStore_fast addr r n val
 1044   = do platform <- getPlatform
 1045        (gv, grt, s1) <- getCmmRegVal (CmmGlobal r)
 1046        meta          <- getTBAARegMeta r
 1047        let (ix,rem) = n `divMod` ((llvmWidthInBits platform . pLower) grt  `div` 8)
 1048        case isPointer grt && rem == 0 of
 1049             True -> do
 1050                 (vval,  stmts, top) <- exprToVar val
 1051                 (ptr, s2) <- doExpr grt $ GetElemPtr True gv [toI32 ix]
 1052                 -- We might need a different pointer type, so check
 1053                 case pLower grt == getVarType vval of
 1054                      -- were fine
 1055                      True  -> do
 1056                          let s3 = MetaStmt meta $ Store vval ptr
 1057                          return (stmts `appOL` s1 `snocOL` s2
 1058                                  `snocOL` s3, top)
 1059 
 1060                      -- cast to pointer type needed
 1061                      False -> do
 1062                          let ty = (pLift . getVarType) vval
 1063                          (ptr', s3) <- doExpr ty $ Cast LM_Bitcast ptr ty
 1064                          let s4 = MetaStmt meta $ Store vval ptr'
 1065                          return (stmts `appOL` s1 `snocOL` s2
 1066                                  `snocOL` s3 `snocOL` s4, top)
 1067 
 1068             -- If its a bit type then we use the slow method since
 1069             -- we can't avoid casting anyway.
 1070             False -> genStore_slow addr val meta
 1071 
 1072 
 1073 -- | CmmStore operation
 1074 -- Generic case. Uses casts and pointer arithmetic if needed.
 1075 genStore_slow :: CmmExpr -> CmmExpr -> [MetaAnnot] -> LlvmM StmtData
 1076 genStore_slow addr val meta = do
 1077     (vaddr, stmts1, top1) <- exprToVar addr
 1078     (vval,  stmts2, top2) <- exprToVar val
 1079 
 1080     let stmts = stmts1 `appOL` stmts2
 1081     dflags <- getDynFlags
 1082     platform <- getPlatform
 1083     opts <- getLlvmOpts
 1084     case getVarType vaddr of
 1085         -- sometimes we need to cast an int to a pointer before storing
 1086         LMPointer ty@(LMPointer _) | getVarType vval == llvmWord platform -> do
 1087             (v, s1) <- doExpr ty $ Cast LM_Inttoptr vval ty
 1088             let s2 = MetaStmt meta $ Store v vaddr
 1089             return (stmts `snocOL` s1 `snocOL` s2, top1 ++ top2)
 1090 
 1091         LMPointer _ -> do
 1092             let s1 = MetaStmt meta $ Store vval vaddr
 1093             return (stmts `snocOL` s1, top1 ++ top2)
 1094 
 1095         i@(LMInt _) | i == llvmWord platform -> do
 1096             let vty = pLift $ getVarType vval
 1097             (vptr, s1) <- doExpr vty $ Cast LM_Inttoptr vaddr vty
 1098             let s2 = MetaStmt meta $ Store vval vptr
 1099             return (stmts `snocOL` s1 `snocOL` s2, top1 ++ top2)
 1100 
 1101         other ->
 1102             pprPanic "genStore: ptr not right type!"
 1103                     (PprCmm.pprExpr platform addr <+> text (
 1104                         "Size of Ptr: " ++ show (llvmPtrBits platform) ++
 1105                         ", Size of var: " ++ show (llvmWidthInBits platform other) ++
 1106                         ", Var: " ++ showSDoc dflags (ppVar opts vaddr)))
 1107 
 1108 
 1109 -- | Unconditional branch
 1110 genBranch :: BlockId -> LlvmM StmtData
 1111 genBranch id =
 1112     let label = blockIdToLlvm id
 1113     in return (unitOL $ Branch label, [])
 1114 
 1115 
 1116 -- | Conditional branch
 1117 genCondBranch :: CmmExpr -> BlockId -> BlockId -> Maybe Bool -> LlvmM StmtData
 1118 genCondBranch cond idT idF likely = do
 1119     let labelT = blockIdToLlvm idT
 1120     let labelF = blockIdToLlvm idF
 1121     -- See Note [Literals and branch conditions].
 1122     (vc, stmts1, top1) <- exprToVarOpt i1Option cond
 1123     if getVarType vc == i1
 1124         then do
 1125             (vc', (stmts2, top2)) <- case likely of
 1126               Just b -> genExpectLit (if b then 1 else 0) i1  vc
 1127               _      -> pure (vc, (nilOL, []))
 1128             let s1 = BranchIf vc' labelT labelF
 1129             return (stmts1 `appOL` stmts2 `snocOL` s1, top1 ++ top2)
 1130         else do
 1131             dflags <- getDynFlags
 1132             opts <- getLlvmOpts
 1133             panic $ "genCondBranch: Cond expr not bool! (" ++ showSDoc dflags (ppVar opts vc) ++ ")"
 1134 
 1135 
 1136 -- | Generate call to llvm.expect.x intrinsic. Assigning result to a new var.
 1137 genExpectLit :: Integer -> LlvmType -> LlvmVar -> LlvmM (LlvmVar, StmtData)
 1138 genExpectLit expLit expTy var = do
 1139   dflags <- getDynFlags
 1140 
 1141   let
 1142     lit = LMLitVar $ LMIntLit expLit expTy
 1143 
 1144     llvmExpectName
 1145       | isInt expTy = fsLit $ "llvm.expect." ++ showSDoc dflags (ppr expTy)
 1146       | otherwise   = panic $ "genExpectedLit: Type not an int!"
 1147 
 1148   (llvmExpect, stmts, top) <-
 1149     getInstrinct llvmExpectName expTy [expTy, expTy]
 1150   (var', call) <- doExpr expTy $ Call StdCall llvmExpect [var, lit] []
 1151   return (var', (stmts `snocOL` call, top))
 1152 
 1153 {- Note [Literals and branch conditions]
 1154 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 1155 
 1156 It is important that whenever we generate branch conditions for
 1157 literals like '1', they are properly narrowed to an LLVM expression of
 1158 type 'i1' (for bools.) Otherwise, nobody is happy. So when we convert
 1159 a CmmExpr to an LLVM expression for a branch conditional, exprToVarOpt
 1160 must be certain to return a properly narrowed type. genLit is
 1161 responsible for this, in the case of literal integers.
 1162 
 1163 Often, we won't see direct statements like:
 1164 
 1165     if(1) {
 1166       ...
 1167     } else {
 1168       ...
 1169     }
 1170 
 1171 at this point in the pipeline, because the Glorious Code Generator
 1172 will do trivial branch elimination in the sinking pass (among others,)
 1173 which will eliminate the expression entirely.
 1174 
 1175 However, it's certainly possible and reasonable for this to occur in
 1176 hand-written C-- code. Consider something like:
 1177 
 1178     #if !defined(SOME_CONDITIONAL)
 1179     #define CHECK_THING(x) 1
 1180     #else
 1181     #define CHECK_THING(x) some_operation((x))
 1182     #endif
 1183 
 1184     f() {
 1185 
 1186       if (CHECK_THING(xyz)) {
 1187         ...
 1188       } else {
 1189         ...
 1190       }
 1191 
 1192     }
 1193 
 1194 In such an instance, CHECK_THING might result in an *expression* in
 1195 one case, and a *literal* in the other, depending on what in
 1196 particular was #define'd. So we must be sure to properly narrow the
 1197 literal in this case to i1 as it won't be eliminated beforehand.
 1198 
 1199 For a real example of this, see ./rts/StgStdThunks.cmm
 1200 
 1201 -}
 1202 
 1203 
 1204 
 1205 -- | Switch branch
 1206 genSwitch :: CmmExpr -> SwitchTargets -> LlvmM StmtData
 1207 genSwitch cond ids = do
 1208     (vc, stmts, top) <- exprToVar cond
 1209     let ty = getVarType vc
 1210 
 1211     let labels = [ (mkIntLit ty ix, blockIdToLlvm b)
 1212                  | (ix, b) <- switchTargetsCases ids ]
 1213     -- out of range is undefined, so let's just branch to first label
 1214     let defLbl | Just l <- switchTargetsDefault ids = blockIdToLlvm l
 1215                | otherwise                          = snd (head labels)
 1216 
 1217     let s1 = Switch vc defLbl labels
 1218     return $ (stmts `snocOL` s1, top)
 1219 
 1220 
 1221 -- -----------------------------------------------------------------------------
 1222 -- * CmmExpr code generation
 1223 --
 1224 
 1225 -- | An expression conversion return data:
 1226 --   * LlvmVar: The var holding the result of the expression
 1227 --   * LlvmStatements: Any statements needed to evaluate the expression
 1228 --   * LlvmCmmDecl: Any global data needed for this expression
 1229 type ExprData = (LlvmVar, LlvmStatements, [LlvmCmmDecl])
 1230 
 1231 -- | Values which can be passed to 'exprToVar' to configure its
 1232 -- behaviour in certain circumstances.
 1233 --
 1234 -- Currently just used for determining if a comparison should return
 1235 -- a boolean (i1) or a word. See Note [Literals and branch conditions].
 1236 newtype EOption = EOption { i1Expected :: Bool }
 1237 -- XXX: EOption is an ugly and inefficient solution to this problem.
 1238 
 1239 -- | i1 type expected (condition scrutinee).
 1240 i1Option :: EOption
 1241 i1Option = EOption True
 1242 
 1243 -- | Word type expected (usual).
 1244 wordOption :: EOption
 1245 wordOption = EOption False
 1246 
 1247 -- | Convert a CmmExpr to a list of LlvmStatements with the result of the
 1248 -- expression being stored in the returned LlvmVar.
 1249 exprToVar :: CmmExpr -> LlvmM ExprData
 1250 exprToVar = exprToVarOpt wordOption
 1251 
 1252 exprToVarOpt :: EOption -> CmmExpr -> LlvmM ExprData
 1253 exprToVarOpt opt e = case e of
 1254 
 1255     CmmLit lit
 1256         -> genLit opt lit
 1257 
 1258     CmmLoad e' ty
 1259         -> genLoad False e' ty
 1260 
 1261     -- Cmmreg in expression is the value, so must load. If you want actual
 1262     -- reg pointer, call getCmmReg directly.
 1263     CmmReg r -> do
 1264         (v1, ty, s1) <- getCmmRegVal r
 1265         case isPointer ty of
 1266              True  -> do
 1267                  -- Cmm wants the value, so pointer types must be cast to ints
 1268                  platform <- getPlatform
 1269                  (v2, s2) <- doExpr (llvmWord platform) $ Cast LM_Ptrtoint v1 (llvmWord platform)
 1270                  return (v2, s1 `snocOL` s2, [])
 1271 
 1272              False -> return (v1, s1, [])
 1273 
 1274     CmmMachOp op exprs
 1275         -> genMachOp opt op exprs
 1276 
 1277     CmmRegOff r i
 1278         -> do platform <- getPlatform
 1279               exprToVar $ expandCmmReg platform (r, i)
 1280 
 1281     CmmStackSlot _ _
 1282         -> panic "exprToVar: CmmStackSlot not supported!"
 1283 
 1284 
 1285 -- | Handle CmmMachOp expressions
 1286 genMachOp :: EOption -> MachOp -> [CmmExpr] -> LlvmM ExprData
 1287 
 1288 -- Unary Machop
 1289 genMachOp _ op [x] = case op of
 1290 
 1291     MO_Not w ->
 1292         let all1 = mkIntLit (widthToLlvmInt w) (-1)
 1293         in negate (widthToLlvmInt w) all1 LM_MO_Xor
 1294 
 1295     MO_S_Neg w ->
 1296         let all0 = mkIntLit (widthToLlvmInt w) 0
 1297         in negate (widthToLlvmInt w) all0 LM_MO_Sub
 1298 
 1299     MO_F_Neg w ->
 1300         let all0 = LMLitVar $ LMFloatLit (-0) (widthToLlvmFloat w)
 1301         in negate (widthToLlvmFloat w) all0 LM_MO_FSub
 1302 
 1303     MO_SF_Conv _ w -> fiConv (widthToLlvmFloat w) LM_Sitofp
 1304     MO_FS_Conv _ w -> fiConv (widthToLlvmInt w) LM_Fptosi
 1305 
 1306     MO_SS_Conv from to
 1307         -> sameConv from (widthToLlvmInt to) LM_Trunc LM_Sext
 1308 
 1309     MO_UU_Conv from to
 1310         -> sameConv from (widthToLlvmInt to) LM_Trunc LM_Zext
 1311 
 1312     MO_XX_Conv from to
 1313         -> sameConv from (widthToLlvmInt to) LM_Trunc LM_Zext
 1314 
 1315     MO_FF_Conv from to
 1316         -> sameConv from (widthToLlvmFloat to) LM_Fptrunc LM_Fpext
 1317 
 1318     MO_VS_Neg len w ->
 1319         let ty    = widthToLlvmInt w
 1320             vecty = LMVector len ty
 1321             all0  = LMIntLit (-0) ty
 1322             all0s = LMLitVar $ LMVectorLit (replicate len all0)
 1323         in negateVec vecty all0s LM_MO_Sub
 1324 
 1325     MO_VF_Neg len w ->
 1326         let ty    = widthToLlvmFloat w
 1327             vecty = LMVector len ty
 1328             all0  = LMFloatLit (-0) ty
 1329             all0s = LMLitVar $ LMVectorLit (replicate len all0)
 1330         in negateVec vecty all0s LM_MO_FSub
 1331 
 1332     MO_AlignmentCheck _ _ -> panic "-falignment-sanitisation is not supported by -fllvm"
 1333 
 1334     -- Handle unsupported cases explicitly so we get a warning
 1335     -- of missing case when new MachOps added
 1336     MO_Add _          -> panicOp
 1337     MO_Mul _          -> panicOp
 1338     MO_Sub _          -> panicOp
 1339     MO_S_MulMayOflo _ -> panicOp
 1340     MO_S_Quot _       -> panicOp
 1341     MO_S_Rem _        -> panicOp
 1342     MO_U_MulMayOflo _ -> panicOp
 1343     MO_U_Quot _       -> panicOp
 1344     MO_U_Rem _        -> panicOp
 1345 
 1346     MO_Eq  _          -> panicOp
 1347     MO_Ne  _          -> panicOp
 1348     MO_S_Ge _         -> panicOp
 1349     MO_S_Gt _         -> panicOp
 1350     MO_S_Le _         -> panicOp
 1351     MO_S_Lt _         -> panicOp
 1352     MO_U_Ge _         -> panicOp
 1353     MO_U_Gt _         -> panicOp
 1354     MO_U_Le _         -> panicOp
 1355     MO_U_Lt _         -> panicOp
 1356 
 1357     MO_F_Add        _ -> panicOp
 1358     MO_F_Sub        _ -> panicOp
 1359     MO_F_Mul        _ -> panicOp
 1360     MO_F_Quot       _ -> panicOp
 1361     MO_F_Eq         _ -> panicOp
 1362     MO_F_Ne         _ -> panicOp
 1363     MO_F_Ge         _ -> panicOp
 1364     MO_F_Gt         _ -> panicOp
 1365     MO_F_Le         _ -> panicOp
 1366     MO_F_Lt         _ -> panicOp
 1367 
 1368     MO_And          _ -> panicOp
 1369     MO_Or           _ -> panicOp
 1370     MO_Xor          _ -> panicOp
 1371     MO_Shl          _ -> panicOp
 1372     MO_U_Shr        _ -> panicOp
 1373     MO_S_Shr        _ -> panicOp
 1374 
 1375     MO_V_Insert   _ _ -> panicOp
 1376     MO_V_Extract  _ _ -> panicOp
 1377 
 1378     MO_V_Add      _ _ -> panicOp
 1379     MO_V_Sub      _ _ -> panicOp
 1380     MO_V_Mul      _ _ -> panicOp
 1381 
 1382     MO_VS_Quot    _ _ -> panicOp
 1383     MO_VS_Rem     _ _ -> panicOp
 1384 
 1385     MO_VU_Quot    _ _ -> panicOp
 1386     MO_VU_Rem     _ _ -> panicOp
 1387 
 1388     MO_VF_Insert  _ _ -> panicOp
 1389     MO_VF_Extract _ _ -> panicOp
 1390 
 1391     MO_VF_Add     _ _ -> panicOp
 1392     MO_VF_Sub     _ _ -> panicOp
 1393     MO_VF_Mul     _ _ -> panicOp
 1394     MO_VF_Quot    _ _ -> panicOp
 1395 
 1396     where
 1397         negate ty v2 negOp = do
 1398             (vx, stmts, top) <- exprToVar x
 1399             (v1, s1) <- doExpr ty $ LlvmOp negOp v2 vx
 1400             return (v1, stmts `snocOL` s1, top)
 1401 
 1402         negateVec ty v2 negOp = do
 1403             (vx, stmts1, top) <- exprToVar x
 1404             (vxs', stmts2) <- castVars Signed [(vx, ty)]
 1405             let vx' = singletonPanic "genMachOp: negateVec" vxs'
 1406             (v1, s1) <- doExpr ty $ LlvmOp negOp v2 vx'
 1407             return (v1, stmts1 `appOL` stmts2 `snocOL` s1, top)
 1408 
 1409         fiConv ty convOp = do
 1410             (vx, stmts, top) <- exprToVar x
 1411             (v1, s1) <- doExpr ty $ Cast convOp vx ty
 1412             return (v1, stmts `snocOL` s1, top)
 1413 
 1414         sameConv from ty reduce expand = do
 1415             x'@(vx, stmts, top) <- exprToVar x
 1416             let sameConv' op = do
 1417                     (v1, s1) <- doExpr ty $ Cast op vx ty
 1418                     return (v1, stmts `snocOL` s1, top)
 1419             platform <- getPlatform
 1420             let toWidth = llvmWidthInBits platform ty
 1421             -- LLVM doesn't like trying to convert to same width, so
 1422             -- need to check for that as we do get Cmm code doing it.
 1423             case widthInBits from  of
 1424                  w | w < toWidth -> sameConv' expand
 1425                  w | w > toWidth -> sameConv' reduce
 1426                  _w              -> return x'
 1427 
 1428         panicOp = panic $ "LLVM.CodeGen.genMachOp: non unary op encountered"
 1429                        ++ "with one argument! (" ++ show op ++ ")"
 1430 
 1431 -- Handle GlobalRegs pointers
 1432 genMachOp opt o@(MO_Add _) e@[(CmmReg (CmmGlobal r)), (CmmLit (CmmInt n _))]
 1433     = genMachOp_fast opt o r (fromInteger n) e
 1434 
 1435 genMachOp opt o@(MO_Sub _) e@[(CmmReg (CmmGlobal r)), (CmmLit (CmmInt n _))]
 1436     = genMachOp_fast opt o r (negate . fromInteger $ n) e
 1437 
 1438 -- Generic case
 1439 genMachOp opt op e = genMachOp_slow opt op e
 1440 
 1441 
 1442 -- | Handle CmmMachOp expressions
 1443 -- This is a specialised method that handles Global register manipulations like
 1444 -- 'Sp - 16', using the getelementptr instruction.
 1445 genMachOp_fast :: EOption -> MachOp -> GlobalReg -> Int -> [CmmExpr]
 1446                -> LlvmM ExprData
 1447 genMachOp_fast opt op r n e
 1448   = do (gv, grt, s1) <- getCmmRegVal (CmmGlobal r)
 1449        platform <- getPlatform
 1450        let (ix,rem) = n `divMod` ((llvmWidthInBits platform . pLower) grt  `div` 8)
 1451        case isPointer grt && rem == 0 of
 1452             True -> do
 1453                 (ptr, s2) <- doExpr grt $ GetElemPtr True gv [toI32 ix]
 1454                 (var, s3) <- doExpr (llvmWord platform) $ Cast LM_Ptrtoint ptr (llvmWord platform)
 1455                 return (var, s1 `snocOL` s2 `snocOL` s3, [])
 1456 
 1457             False -> genMachOp_slow opt op e
 1458 
 1459 
 1460 -- | Handle CmmMachOp expressions
 1461 -- This handles all the cases not handle by the specialised genMachOp_fast.
 1462 genMachOp_slow :: EOption -> MachOp -> [CmmExpr] -> LlvmM ExprData
 1463 
 1464 -- Element extraction
 1465 genMachOp_slow _ (MO_V_Extract l w) [val, idx] = runExprData $ do
 1466     vval <- exprToVarW val
 1467     vidx <- exprToVarW idx
 1468     vval' <- singletonPanic "genMachOp_slow" <$>
 1469              castVarsW Signed [(vval, LMVector l ty)]
 1470     doExprW ty $ Extract vval' vidx
 1471   where
 1472     ty = widthToLlvmInt w
 1473 
 1474 genMachOp_slow _ (MO_VF_Extract l w) [val, idx] = runExprData $ do
 1475     vval <- exprToVarW val
 1476     vidx <- exprToVarW idx
 1477     vval' <- singletonPanic "genMachOp_slow" <$>
 1478              castVarsW Signed [(vval, LMVector l ty)]
 1479     doExprW ty $ Extract vval' vidx
 1480   where
 1481     ty = widthToLlvmFloat w
 1482 
 1483 -- Element insertion
 1484 genMachOp_slow _ (MO_V_Insert l w) [val, elt, idx] = runExprData $ do
 1485     vval <- exprToVarW val
 1486     velt <- exprToVarW elt
 1487     vidx <- exprToVarW idx
 1488     vval' <- singletonPanic "genMachOp_slow" <$>
 1489              castVarsW Signed [(vval, ty)]
 1490     doExprW ty $ Insert vval' velt vidx
 1491   where
 1492     ty = LMVector l (widthToLlvmInt w)
 1493 
 1494 genMachOp_slow _ (MO_VF_Insert l w) [val, elt, idx] = runExprData $ do
 1495     vval <- exprToVarW val
 1496     velt <- exprToVarW elt
 1497     vidx <- exprToVarW idx
 1498     vval' <- singletonPanic "genMachOp_slow" <$>
 1499              castVarsW Signed [(vval, ty)]
 1500     doExprW ty $ Insert vval' velt vidx
 1501   where
 1502     ty = LMVector l (widthToLlvmFloat w)
 1503 
 1504 -- Binary MachOp
 1505 genMachOp_slow opt op [x, y] = case op of
 1506 
 1507     MO_Eq _   -> genBinComp opt LM_CMP_Eq
 1508     MO_Ne _   -> genBinComp opt LM_CMP_Ne
 1509 
 1510     MO_S_Gt _ -> genBinComp opt LM_CMP_Sgt
 1511     MO_S_Ge _ -> genBinComp opt LM_CMP_Sge
 1512     MO_S_Lt _ -> genBinComp opt LM_CMP_Slt
 1513     MO_S_Le _ -> genBinComp opt LM_CMP_Sle
 1514 
 1515     MO_U_Gt _ -> genBinComp opt LM_CMP_Ugt
 1516     MO_U_Ge _ -> genBinComp opt LM_CMP_Uge
 1517     MO_U_Lt _ -> genBinComp opt LM_CMP_Ult
 1518     MO_U_Le _ -> genBinComp opt LM_CMP_Ule
 1519 
 1520     MO_Add _ -> genBinMach LM_MO_Add
 1521     MO_Sub _ -> genBinMach LM_MO_Sub
 1522     MO_Mul _ -> genBinMach LM_MO_Mul
 1523 
 1524     MO_U_MulMayOflo _ -> panic "genMachOp: MO_U_MulMayOflo unsupported!"
 1525 
 1526     MO_S_MulMayOflo w -> isSMulOK w x y
 1527 
 1528     MO_S_Quot _ -> genBinMach LM_MO_SDiv
 1529     MO_S_Rem  _ -> genBinMach LM_MO_SRem
 1530 
 1531     MO_U_Quot _ -> genBinMach LM_MO_UDiv
 1532     MO_U_Rem  _ -> genBinMach LM_MO_URem
 1533 
 1534     MO_F_Eq _ -> genBinComp opt LM_CMP_Feq
 1535     MO_F_Ne _ -> genBinComp opt LM_CMP_Fne
 1536     MO_F_Gt _ -> genBinComp opt LM_CMP_Fgt
 1537     MO_F_Ge _ -> genBinComp opt LM_CMP_Fge
 1538     MO_F_Lt _ -> genBinComp opt LM_CMP_Flt
 1539     MO_F_Le _ -> genBinComp opt LM_CMP_Fle
 1540 
 1541     MO_F_Add  _ -> genBinMach LM_MO_FAdd
 1542     MO_F_Sub  _ -> genBinMach LM_MO_FSub
 1543     MO_F_Mul  _ -> genBinMach LM_MO_FMul
 1544     MO_F_Quot _ -> genBinMach LM_MO_FDiv
 1545 
 1546     MO_And _   -> genBinMach LM_MO_And
 1547     MO_Or  _   -> genBinMach LM_MO_Or
 1548     MO_Xor _   -> genBinMach LM_MO_Xor
 1549     MO_Shl _   -> genBinCastYMach LM_MO_Shl
 1550     MO_U_Shr _ -> genBinCastYMach LM_MO_LShr
 1551     MO_S_Shr _ -> genBinCastYMach LM_MO_AShr
 1552 
 1553     MO_V_Add l w   -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_Add
 1554     MO_V_Sub l w   -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_Sub
 1555     MO_V_Mul l w   -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_Mul
 1556 
 1557     MO_VS_Quot l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_SDiv
 1558     MO_VS_Rem  l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_SRem
 1559 
 1560     MO_VU_Quot l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_UDiv
 1561     MO_VU_Rem  l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_URem
 1562 
 1563     MO_VF_Add  l w -> genCastBinMach (LMVector l (widthToLlvmFloat w)) LM_MO_FAdd
 1564     MO_VF_Sub  l w -> genCastBinMach (LMVector l (widthToLlvmFloat w)) LM_MO_FSub
 1565     MO_VF_Mul  l w -> genCastBinMach (LMVector l (widthToLlvmFloat w)) LM_MO_FMul
 1566     MO_VF_Quot l w -> genCastBinMach (LMVector l (widthToLlvmFloat w)) LM_MO_FDiv
 1567 
 1568     MO_Not _       -> panicOp
 1569     MO_S_Neg _     -> panicOp
 1570     MO_F_Neg _     -> panicOp
 1571 
 1572     MO_SF_Conv _ _ -> panicOp
 1573     MO_FS_Conv _ _ -> panicOp
 1574     MO_SS_Conv _ _ -> panicOp
 1575     MO_UU_Conv _ _ -> panicOp
 1576     MO_XX_Conv _ _ -> panicOp
 1577     MO_FF_Conv _ _ -> panicOp
 1578 
 1579     MO_V_Insert  {} -> panicOp
 1580 
 1581     MO_VS_Neg {} -> panicOp
 1582 
 1583     MO_VF_Insert  {} -> panicOp
 1584 
 1585     MO_VF_Neg {} -> panicOp
 1586 
 1587     MO_AlignmentCheck {} -> panicOp
 1588 
 1589 #if __GLASGOW_HASKELL__ < 811
 1590     MO_VF_Extract {} -> panicOp
 1591     MO_V_Extract {} -> panicOp
 1592 #endif
 1593 
 1594     where
 1595         binLlvmOp ty binOp allow_y_cast = do
 1596           platform <- getPlatform
 1597           runExprData $ do
 1598             vx <- exprToVarW x
 1599             vy <- exprToVarW y
 1600 
 1601             if | getVarType vx == getVarType vy
 1602                -> doExprW (ty vx) $ binOp vx vy
 1603 
 1604                | allow_y_cast
 1605                -> do
 1606                     vy' <- singletonPanic "binLlvmOp cast"<$>
 1607                             castVarsW Signed [(vy, (ty vx))]
 1608                     doExprW (ty vx) $ binOp vx vy'
 1609 
 1610                | otherwise
 1611                -> do
 1612                     -- Error. Continue anyway so we can debug the generated ll file.
 1613                     dflags <- getDynFlags
 1614                     let style = PprCode CStyle
 1615                         toString doc = renderWithContext (initSDocContext dflags style) doc
 1616                         cmmToStr = (lines . toString . PprCmm.pprExpr platform)
 1617                     statement $ Comment $ map fsLit $ cmmToStr x
 1618                     statement $ Comment $ map fsLit $ cmmToStr y
 1619                     doExprW (ty vx) $ binOp vx vy
 1620 
 1621         binCastLlvmOp ty binOp = runExprData $ do
 1622             vx <- exprToVarW x
 1623             vy <- exprToVarW y
 1624             vxy' <- castVarsW Signed [(vx, ty), (vy, ty)]
 1625             case vxy' of
 1626               [vx',vy'] -> doExprW ty $ binOp vx' vy'
 1627               _         -> panic "genMachOp_slow: binCastLlvmOp"
 1628 
 1629         -- | Need to use EOption here as Cmm expects word size results from
 1630         -- comparisons while LLVM return i1. Need to extend to llvmWord type
 1631         -- if expected. See Note [Literals and branch conditions].
 1632         genBinComp opt cmp = do
 1633             ed@(v1, stmts, top) <- binLlvmOp (\_ -> i1) (Compare cmp) False
 1634             dflags <- getDynFlags
 1635             platform <- getPlatform
 1636             if getVarType v1 == i1
 1637                 then case i1Expected opt of
 1638                     True  -> return ed
 1639                     False -> do
 1640                         let w_ = llvmWord platform
 1641                         (v2, s1) <- doExpr w_ $ Cast LM_Zext v1 w_
 1642                         return (v2, stmts `snocOL` s1, top)
 1643                 else
 1644                     panic $ "genBinComp: Compare returned type other then i1! "
 1645                         ++ (showSDoc dflags $ ppr $ getVarType v1)
 1646 
 1647         genBinMach op = binLlvmOp getVarType (LlvmOp op) False
 1648 
 1649         genBinCastYMach op = binLlvmOp getVarType (LlvmOp op) True
 1650 
 1651         genCastBinMach ty op = binCastLlvmOp ty (LlvmOp op)
 1652 
 1653         -- | Detect if overflow will occur in signed multiply of the two
 1654         -- CmmExpr's. This is the LLVM assembly equivalent of the NCG
 1655         -- implementation. Its much longer due to type information/safety.
 1656         -- This should actually compile to only about 3 asm instructions.
 1657         isSMulOK :: Width -> CmmExpr -> CmmExpr -> LlvmM ExprData
 1658         isSMulOK _ x y = do
 1659           platform <- getPlatform
 1660           dflags <- getDynFlags
 1661           runExprData $ do
 1662             vx <- exprToVarW x
 1663             vy <- exprToVarW y
 1664 
 1665             let word  = getVarType vx
 1666             let word2 = LMInt $ 2 * (llvmWidthInBits platform $ getVarType vx)
 1667             let shift = llvmWidthInBits platform word
 1668             let shift1 = toIWord platform (shift - 1)
 1669             let shift2 = toIWord platform shift
 1670 
 1671             if isInt word
 1672                 then do
 1673                     x1     <- doExprW word2 $ Cast LM_Sext vx word2
 1674                     y1     <- doExprW word2 $ Cast LM_Sext vy word2
 1675                     r1     <- doExprW word2 $ LlvmOp LM_MO_Mul x1 y1
 1676                     rlow1  <- doExprW word $ Cast LM_Trunc r1 word
 1677                     rlow2  <- doExprW word $ LlvmOp LM_MO_AShr rlow1 shift1
 1678                     rhigh1 <- doExprW word2 $ LlvmOp LM_MO_AShr r1 shift2
 1679                     rhigh2 <- doExprW word $ Cast LM_Trunc rhigh1 word
 1680                     doExprW word $ LlvmOp LM_MO_Sub rlow2 rhigh2
 1681 
 1682                 else
 1683                     panic $ "isSMulOK: Not bit type! (" ++ showSDoc dflags (ppr word) ++ ")"
 1684 
 1685         panicOp = panic $ "LLVM.CodeGen.genMachOp_slow: unary op encountered"
 1686                        ++ "with two arguments! (" ++ show op ++ ")"
 1687 
 1688 -- More than two expression, invalid!
 1689 genMachOp_slow _ _ _ = panic "genMachOp: More than 2 expressions in MachOp!"
 1690 
 1691 
 1692 -- | Handle CmmLoad expression.
 1693 genLoad :: Atomic -> CmmExpr -> CmmType -> LlvmM ExprData
 1694 
 1695 -- First we try to detect a few common cases and produce better code for
 1696 -- these then the default case. We are mostly trying to detect Cmm code
 1697 -- like I32[Sp + n] and use 'getelementptr' operations instead of the
 1698 -- generic case that uses casts and pointer arithmetic
 1699 genLoad atomic e@(CmmReg (CmmGlobal r)) ty
 1700     = genLoad_fast atomic e r 0 ty
 1701 
 1702 genLoad atomic e@(CmmRegOff (CmmGlobal r) n) ty
 1703     = genLoad_fast atomic e r n ty
 1704 
 1705 genLoad atomic e@(CmmMachOp (MO_Add _) [
 1706                             (CmmReg (CmmGlobal r)),
 1707                             (CmmLit (CmmInt n _))])
 1708                 ty
 1709     = genLoad_fast atomic e r (fromInteger n) ty
 1710 
 1711 genLoad atomic e@(CmmMachOp (MO_Sub _) [
 1712                             (CmmReg (CmmGlobal r)),
 1713                             (CmmLit (CmmInt n _))])
 1714                 ty
 1715     = genLoad_fast atomic e r (negate $ fromInteger n) ty
 1716 
 1717 -- generic case
 1718 genLoad atomic e ty
 1719     = getTBAAMeta topN >>= genLoad_slow atomic e ty
 1720 
 1721 -- | Handle CmmLoad expression.
 1722 -- This is a special case for loading from a global register pointer
 1723 -- offset such as I32[Sp+8].
 1724 genLoad_fast :: Atomic -> CmmExpr -> GlobalReg -> Int -> CmmType
 1725              -> LlvmM ExprData
 1726 genLoad_fast atomic e r n ty = do
 1727     platform <- getPlatform
 1728     (gv, grt, s1) <- getCmmRegVal (CmmGlobal r)
 1729     meta          <- getTBAARegMeta r
 1730     let ty'      = cmmToLlvmType ty
 1731         (ix,rem) = n `divMod` ((llvmWidthInBits platform . pLower) grt  `div` 8)
 1732     case isPointer grt && rem == 0 of
 1733             True  -> do
 1734                 (ptr, s2) <- doExpr grt $ GetElemPtr True gv [toI32 ix]
 1735                 -- We might need a different pointer type, so check
 1736                 case grt == ty' of
 1737                      -- were fine
 1738                      True -> do
 1739                          (var, s3) <- doExpr ty' (MExpr meta $ loadInstr ptr)
 1740                          return (var, s1 `snocOL` s2 `snocOL` s3,
 1741                                      [])
 1742 
 1743                      -- cast to pointer type needed
 1744                      False -> do
 1745                          let pty = pLift ty'
 1746                          (ptr', s3) <- doExpr pty $ Cast LM_Bitcast ptr pty
 1747                          (var, s4) <- doExpr ty' (MExpr meta $ loadInstr ptr')
 1748                          return (var, s1 `snocOL` s2 `snocOL` s3
 1749                                     `snocOL` s4, [])
 1750 
 1751             -- If its a bit type then we use the slow method since
 1752             -- we can't avoid casting anyway.
 1753             False -> genLoad_slow atomic  e ty meta
 1754   where
 1755     loadInstr ptr | atomic    = ALoad SyncSeqCst False ptr
 1756                   | otherwise = Load ptr
 1757 
 1758 -- | Handle Cmm load expression.
 1759 -- Generic case. Uses casts and pointer arithmetic if needed.
 1760 genLoad_slow :: Atomic -> CmmExpr -> CmmType -> [MetaAnnot] -> LlvmM ExprData
 1761 genLoad_slow atomic e ty meta = do
 1762   platform <- getPlatform
 1763   dflags <- getDynFlags
 1764   opts <- getLlvmOpts
 1765   runExprData $ do
 1766     iptr <- exprToVarW e
 1767     case getVarType iptr of
 1768         LMPointer _ ->
 1769                     doExprW (cmmToLlvmType ty) (MExpr meta $ loadInstr iptr)
 1770 
 1771         i@(LMInt _) | i == llvmWord platform -> do
 1772                     let pty = LMPointer $ cmmToLlvmType ty
 1773                     ptr <- doExprW pty $ Cast LM_Inttoptr iptr pty
 1774                     doExprW (cmmToLlvmType ty) (MExpr meta $ loadInstr ptr)
 1775 
 1776         other -> pprPanic "exprToVar: CmmLoad expression is not right type!"
 1777                      (PprCmm.pprExpr platform e <+> text (
 1778                          "Size of Ptr: " ++ show (llvmPtrBits platform) ++
 1779                          ", Size of var: " ++ show (llvmWidthInBits platform other) ++
 1780                          ", Var: " ++ showSDoc dflags (ppVar opts iptr)))
 1781   where
 1782     loadInstr ptr | atomic    = ALoad SyncSeqCst False ptr
 1783                   | otherwise = Load ptr
 1784 
 1785 
 1786 -- | Handle CmmReg expression. This will return a pointer to the stack
 1787 -- location of the register. Throws an error if it isn't allocated on
 1788 -- the stack.
 1789 getCmmReg :: CmmReg -> LlvmM LlvmVar
 1790 getCmmReg (CmmLocal (LocalReg un _))
 1791   = do exists <- varLookup un
 1792        dflags <- getDynFlags
 1793        case exists of
 1794          Just ety -> return (LMLocalVar un $ pLift ety)
 1795          Nothing  -> panic $ "getCmmReg: Cmm register " ++ showSDoc dflags (ppr un) ++ " was not allocated!"
 1796            -- This should never happen, as every local variable should
 1797            -- have been assigned a value at some point, triggering
 1798            -- "funPrologue" to allocate it on the stack.
 1799 
 1800 getCmmReg (CmmGlobal g)
 1801   = do onStack <- checkStackReg g
 1802        dflags <- getDynFlags
 1803        platform <- getPlatform
 1804        if onStack
 1805          then return (lmGlobalRegVar platform g)
 1806          else panic $ "getCmmReg: Cmm register " ++ showSDoc dflags (ppr g) ++ " not stack-allocated!"
 1807 
 1808 -- | Return the value of a given register, as well as its type. Might
 1809 -- need to be load from stack.
 1810 getCmmRegVal :: CmmReg -> LlvmM (LlvmVar, LlvmType, LlvmStatements)
 1811 getCmmRegVal reg =
 1812   case reg of
 1813     CmmGlobal g -> do
 1814       onStack <- checkStackReg g
 1815       platform <- getPlatform
 1816       if onStack then loadFromStack else do
 1817         let r = lmGlobalRegArg platform g
 1818         return (r, getVarType r, nilOL)
 1819     _ -> loadFromStack
 1820  where loadFromStack = do
 1821          ptr <- getCmmReg reg
 1822          let ty = pLower $ getVarType ptr
 1823          (v, s) <- doExpr ty (Load ptr)
 1824          return (v, ty, unitOL s)
 1825 
 1826 -- | Allocate a local CmmReg on the stack
 1827 allocReg :: CmmReg -> (LlvmVar, LlvmStatements)
 1828 allocReg (CmmLocal (LocalReg un ty))
 1829   = let ty' = cmmToLlvmType ty
 1830         var = LMLocalVar un (LMPointer ty')
 1831         alc = Alloca ty' 1
 1832     in (var, unitOL $ Assignment var alc)
 1833 
 1834 allocReg _ = panic $ "allocReg: Global reg encountered! Global registers should"
 1835                     ++ " have been handled elsewhere!"
 1836 
 1837 
 1838 -- | Generate code for a literal
 1839 genLit :: EOption -> CmmLit -> LlvmM ExprData
 1840 genLit opt (CmmInt i w)
 1841   -- See Note [Literals and branch conditions].
 1842   = let width | i1Expected opt = i1
 1843               | otherwise      = LMInt (widthInBits w)
 1844         -- comm  = Comment [ fsLit $ "EOption: " ++ show opt
 1845         --                 , fsLit $ "Width  : " ++ show w
 1846         --                 , fsLit $ "Width' : " ++ show (widthInBits w)
 1847         --                 ]
 1848     in return (mkIntLit width i, nilOL, [])
 1849 
 1850 genLit _ (CmmFloat r w)
 1851   = return (LMLitVar $ LMFloatLit (fromRational r) (widthToLlvmFloat w),
 1852               nilOL, [])
 1853 
 1854 genLit opt (CmmVec ls)
 1855   = do llvmLits <- mapM toLlvmLit ls
 1856        return (LMLitVar $ LMVectorLit llvmLits, nilOL, [])
 1857   where
 1858     toLlvmLit :: CmmLit -> LlvmM LlvmLit
 1859     toLlvmLit lit = do
 1860         (llvmLitVar, _, _) <- genLit opt lit
 1861         case llvmLitVar of
 1862           LMLitVar llvmLit -> return llvmLit
 1863           _ -> panic "genLit"
 1864 
 1865 genLit _ cmm@(CmmLabel l)
 1866   = do var <- getGlobalPtr =<< strCLabel_llvm l
 1867        platform <- getPlatform
 1868        let lmty = cmmToLlvmType $ cmmLitType platform cmm
 1869        (v1, s1) <- doExpr lmty $ Cast LM_Ptrtoint var (llvmWord platform)
 1870        return (v1, unitOL s1, [])
 1871 
 1872 genLit opt (CmmLabelOff label off) = do
 1873     platform <- getPlatform
 1874     (vlbl, stmts, stat) <- genLit opt (CmmLabel label)
 1875     let voff = toIWord platform off
 1876     (v1, s1) <- doExpr (getVarType vlbl) $ LlvmOp LM_MO_Add vlbl voff
 1877     return (v1, stmts `snocOL` s1, stat)
 1878 
 1879 genLit opt (CmmLabelDiffOff l1 l2 off w) = do
 1880     platform <- getPlatform
 1881     (vl1, stmts1, stat1) <- genLit opt (CmmLabel l1)
 1882     (vl2, stmts2, stat2) <- genLit opt (CmmLabel l2)
 1883     let voff = toIWord platform off
 1884     let ty1 = getVarType vl1
 1885     let ty2 = getVarType vl2
 1886     if (isInt ty1) && (isInt ty2)
 1887        && (llvmWidthInBits platform ty1 == llvmWidthInBits platform ty2)
 1888        then do
 1889             (v1, s1) <- doExpr (getVarType vl1) $ LlvmOp LM_MO_Sub vl1 vl2
 1890             (v2, s2) <- doExpr (getVarType v1 ) $ LlvmOp LM_MO_Add v1 voff
 1891             let ty = widthToLlvmInt w
 1892             let stmts = stmts1 `appOL` stmts2 `snocOL` s1 `snocOL` s2
 1893             if w /= wordWidth platform
 1894               then do
 1895                 (v3, s3) <- doExpr ty $ Cast LM_Trunc v2 ty
 1896                 return (v3, stmts `snocOL` s3, stat1 ++ stat2)
 1897               else
 1898                 return (v2, stmts, stat1 ++ stat2)
 1899         else
 1900             panic "genLit: CmmLabelDiffOff encountered with different label ty!"
 1901 
 1902 genLit opt (CmmBlock b)
 1903   = genLit opt (CmmLabel $ infoTblLbl b)
 1904 
 1905 genLit _ CmmHighStackMark
 1906   = panic "genStaticLit - CmmHighStackMark unsupported!"
 1907 
 1908 
 1909 -- -----------------------------------------------------------------------------
 1910 -- * Misc
 1911 --
 1912 
 1913 -- | Find CmmRegs that get assigned and allocate them on the stack
 1914 --
 1915 -- Any register that gets written needs to be allocated on the
 1916 -- stack. This avoids having to map a CmmReg to an equivalent SSA form
 1917 -- and avoids having to deal with Phi node insertion.  This is also
 1918 -- the approach recommended by LLVM developers.
 1919 --
 1920 -- On the other hand, this is unnecessarily verbose if the register in
 1921 -- question is never written. Therefore we skip it where we can to
 1922 -- save a few lines in the output and hopefully speed compilation up a
 1923 -- bit.
 1924 funPrologue :: LiveGlobalRegs -> [CmmBlock] -> LlvmM StmtData
 1925 funPrologue live cmmBlocks = do
 1926 
 1927   let getAssignedRegs :: CmmNode O O -> [CmmReg]
 1928       getAssignedRegs (CmmAssign reg _)  = [reg]
 1929       getAssignedRegs (CmmUnsafeForeignCall _ rs _) = map CmmLocal rs
 1930       getAssignedRegs _                  = []
 1931       getRegsBlock (_, body, _)          = concatMap getAssignedRegs $ blockToList body
 1932       assignedRegs = nub $ concatMap (getRegsBlock . blockSplit) cmmBlocks
 1933       isLive r     = r `elem` alwaysLive || r `elem` live
 1934 
 1935   platform <- getPlatform
 1936   stmtss <- forM assignedRegs $ \reg ->
 1937     case reg of
 1938       CmmLocal (LocalReg un _) -> do
 1939         let (newv, stmts) = allocReg reg
 1940         varInsert un (pLower $ getVarType newv)
 1941         return stmts
 1942       CmmGlobal r -> do
 1943         let reg   = lmGlobalRegVar platform r
 1944             arg   = lmGlobalRegArg platform r
 1945             ty    = (pLower . getVarType) reg
 1946             trash = LMLitVar $ LMUndefLit ty
 1947             rval  = if isLive r then arg else trash
 1948             alloc = Assignment reg $ Alloca (pLower $ getVarType reg) 1
 1949         markStackReg r
 1950         return $ toOL [alloc, Store rval reg]
 1951 
 1952   return (concatOL stmtss `snocOL` jumpToEntry, [])
 1953   where
 1954     entryBlk : _ = cmmBlocks
 1955     jumpToEntry = Branch $ blockIdToLlvm (entryLabel entryBlk)
 1956 
 1957 -- | Function epilogue. Load STG variables to use as argument for call.
 1958 -- STG Liveness optimisation done here.
 1959 funEpilogue :: LiveGlobalRegs -> LlvmM ([LlvmVar], LlvmStatements)
 1960 funEpilogue live = do
 1961     platform <- getPlatform
 1962 
 1963     let paddingRegs = padLiveArgs platform live
 1964 
 1965     -- Set to value or "undef" depending on whether the register is
 1966     -- actually live
 1967     let loadExpr r = do
 1968           (v, _, s) <- getCmmRegVal (CmmGlobal r)
 1969           return (Just $ v, s)
 1970         loadUndef r = do
 1971           let ty = (pLower . getVarType $ lmGlobalRegVar platform r)
 1972           return (Just $ LMLitVar $ LMUndefLit ty, nilOL)
 1973 
 1974     -- Note that floating-point registers in `activeStgRegs` must be sorted
 1975     -- according to the calling convention.
 1976     --  E.g. for X86:
 1977     --     GOOD: F1,D1,XMM1,F2,D2,XMM2,...
 1978     --     BAD : F1,F2,F3,D1,D2,D3,XMM1,XMM2,XMM3,...
 1979     --  As Fn, Dn and XMMn use the same register (XMMn) to be passed, we don't
 1980     --  want to pass F2 before D1 for example, otherwise we could get F2 -> XMM1
 1981     --  and D1 -> XMM2.
 1982     let allRegs = activeStgRegs platform
 1983     loads <- forM allRegs $ \r -> if
 1984       -- load live registers
 1985       | r `elem` alwaysLive  -> loadExpr r
 1986       | r `elem` live        -> loadExpr r
 1987       -- load all non Floating-Point Registers
 1988       | not (isFPR r)        -> loadUndef r
 1989       -- load padding Floating-Point Registers
 1990       | r `elem` paddingRegs -> loadUndef r
 1991       | otherwise            -> return (Nothing, nilOL)
 1992 
 1993     let (vars, stmts) = unzip loads
 1994     return (catMaybes vars, concatOL stmts)
 1995 
 1996 -- | Get a function pointer to the CLabel specified.
 1997 --
 1998 -- This is for Haskell functions, function type is assumed, so doesn't work
 1999 -- with foreign functions.
 2000 getHsFunc :: LiveGlobalRegs -> CLabel -> LlvmM ExprData
 2001 getHsFunc live lbl
 2002   = do fty <- llvmFunTy live
 2003        name <- strCLabel_llvm lbl
 2004        getHsFunc' name fty
 2005 
 2006 getHsFunc' :: LMString -> LlvmType -> LlvmM ExprData
 2007 getHsFunc' name fty
 2008   = do fun <- getGlobalPtr name
 2009        if getVarType fun == fty
 2010          then return (fun, nilOL, [])
 2011          else do (v1, s1) <- doExpr (pLift fty)
 2012                                $ Cast LM_Bitcast fun (pLift fty)
 2013                  return  (v1, unitOL s1, [])
 2014 
 2015 -- | Create a new local var
 2016 mkLocalVar :: LlvmType -> LlvmM LlvmVar
 2017 mkLocalVar ty = do
 2018     un <- getUniqueM
 2019     return $ LMLocalVar un ty
 2020 
 2021 
 2022 -- | Execute an expression, assigning result to a var
 2023 doExpr :: LlvmType -> LlvmExpression -> LlvmM (LlvmVar, LlvmStatement)
 2024 doExpr ty expr = do
 2025     v <- mkLocalVar ty
 2026     return (v, Assignment v expr)
 2027 
 2028 
 2029 -- | Expand CmmRegOff
 2030 expandCmmReg :: Platform -> (CmmReg, Int) -> CmmExpr
 2031 expandCmmReg platform (reg, off)
 2032   = let width = typeWidth (cmmRegType platform reg)
 2033         voff  = CmmLit $ CmmInt (fromIntegral off) width
 2034     in CmmMachOp (MO_Add width) [CmmReg reg, voff]
 2035 
 2036 
 2037 -- | Convert a block id into a appropriate Llvm label
 2038 blockIdToLlvm :: BlockId -> LlvmVar
 2039 blockIdToLlvm bid = LMLocalVar (getUnique bid) LMLabel
 2040 
 2041 -- | Create Llvm int Literal
 2042 mkIntLit :: Integral a => LlvmType -> a -> LlvmVar
 2043 mkIntLit ty i = LMLitVar $ LMIntLit (toInteger i) ty
 2044 
 2045 -- | Convert int type to a LLvmVar of word or i32 size
 2046 toI32 :: Integral a => a -> LlvmVar
 2047 toI32 = mkIntLit i32
 2048 
 2049 toIWord :: Integral a => Platform -> a -> LlvmVar
 2050 toIWord platform = mkIntLit (llvmWord platform)
 2051 
 2052 
 2053 -- | Error functions
 2054 panic :: HasCallStack => String -> a
 2055 panic s = Panic.panic $ "GHC.CmmToLlvm.CodeGen." ++ s
 2056 
 2057 pprPanic :: HasCallStack => String -> SDoc -> a
 2058 pprPanic s d = Panic.pprPanic ("GHC.CmmToLlvm.CodeGen." ++ s) d
 2059 
 2060 
 2061 -- | Returns TBAA meta data by unique
 2062 getTBAAMeta :: Unique -> LlvmM [MetaAnnot]
 2063 getTBAAMeta u = do
 2064     mi <- getUniqMeta u
 2065     return [MetaAnnot tbaa (MetaNode i) | let Just i = mi]
 2066 
 2067 -- | Returns TBAA meta data for given register
 2068 getTBAARegMeta :: GlobalReg -> LlvmM [MetaAnnot]
 2069 getTBAARegMeta = getTBAAMeta . getTBAA
 2070 
 2071 
 2072 -- | A more convenient way of accumulating LLVM statements and declarations.
 2073 data LlvmAccum = LlvmAccum LlvmStatements [LlvmCmmDecl]
 2074 
 2075 instance Semigroup LlvmAccum where
 2076   LlvmAccum stmtsA declsA <> LlvmAccum stmtsB declsB =
 2077         LlvmAccum (stmtsA Semigroup.<> stmtsB) (declsA Semigroup.<> declsB)
 2078 
 2079 instance Monoid LlvmAccum where
 2080     mempty = LlvmAccum nilOL []
 2081     mappend = (Semigroup.<>)
 2082 
 2083 liftExprData :: LlvmM ExprData -> WriterT LlvmAccum LlvmM LlvmVar
 2084 liftExprData action = do
 2085     (var, stmts, decls) <- lift action
 2086     tell $ LlvmAccum stmts decls
 2087     return var
 2088 
 2089 statement :: LlvmStatement -> WriterT LlvmAccum LlvmM ()
 2090 statement stmt = tell $ LlvmAccum (unitOL stmt) []
 2091 
 2092 doExprW :: LlvmType -> LlvmExpression -> WriterT LlvmAccum LlvmM LlvmVar
 2093 doExprW a b = do
 2094     (var, stmt) <- lift $ doExpr a b
 2095     statement stmt
 2096     return var
 2097 
 2098 exprToVarW :: CmmExpr -> WriterT LlvmAccum LlvmM LlvmVar
 2099 exprToVarW = liftExprData . exprToVar
 2100 
 2101 runExprData :: WriterT LlvmAccum LlvmM LlvmVar -> LlvmM ExprData
 2102 runExprData action = do
 2103     (var, LlvmAccum stmts decls) <- runWriterT action
 2104     return (var, stmts, decls)
 2105 
 2106 runStmtsDecls :: WriterT LlvmAccum LlvmM () -> LlvmM (LlvmStatements, [LlvmCmmDecl])
 2107 runStmtsDecls action = do
 2108     LlvmAccum stmts decls <- execWriterT action
 2109     return (stmts, decls)
 2110 
 2111 getCmmRegW :: CmmReg -> WriterT LlvmAccum LlvmM LlvmVar
 2112 getCmmRegW = lift . getCmmReg
 2113 
 2114 genLoadW :: Atomic -> CmmExpr -> CmmType -> WriterT LlvmAccum LlvmM LlvmVar
 2115 genLoadW atomic e ty = liftExprData $ genLoad atomic e ty
 2116 
 2117 -- | Return element of single-element list; 'panic' if list is not a single-element list
 2118 singletonPanic :: String -> [a] -> a
 2119 singletonPanic _ [x] = x
 2120 singletonPanic s _ = panic s