never executed always true always false
    1 {-# LANGUAGE LambdaCase #-}
    2 
    3 {-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}
    4 
    5 ----------------------------------------------------------------------------
    6 --
    7 -- Stg to C--: primitive operations
    8 --
    9 -- (c) The University of Glasgow 2004-2006
   10 --
   11 -----------------------------------------------------------------------------
   12 
   13 module GHC.StgToCmm.Prim (
   14    cgOpApp,
   15    shouldInlinePrimOp
   16  ) where
   17 
   18 import GHC.Prelude hiding ((<*>))
   19 
   20 import GHC.Platform
   21 import GHC.Platform.Profile
   22 
   23 import GHC.StgToCmm.Layout
   24 import GHC.StgToCmm.Foreign
   25 import GHC.StgToCmm.Monad
   26 import GHC.StgToCmm.Utils
   27 import GHC.StgToCmm.Ticky
   28 import GHC.StgToCmm.Heap
   29 import GHC.StgToCmm.Prof ( costCentreFrom )
   30 
   31 import GHC.Driver.Session
   32 import GHC.Driver.Backend
   33 import GHC.Types.Basic
   34 import GHC.Cmm.BlockId
   35 import GHC.Cmm.Graph
   36 import GHC.Stg.Syntax
   37 import GHC.Cmm
   38 import GHC.Unit         ( rtsUnit )
   39 import GHC.Core.Type    ( Type, tyConAppTyCon )
   40 import GHC.Core.TyCon
   41 import GHC.Cmm.CLabel
   42 import GHC.Cmm.Utils
   43 import GHC.Builtin.PrimOps
   44 import GHC.Runtime.Heap.Layout
   45 import GHC.Data.FastString
   46 import GHC.Utils.Misc
   47 import GHC.Utils.Panic
   48 import GHC.Utils.Panic.Plain
   49 import Data.Maybe
   50 
   51 import Control.Monad (liftM, when, unless)
   52 
   53 ------------------------------------------------------------------------
   54 --      Primitive operations and foreign calls
   55 ------------------------------------------------------------------------
   56 
   57 {- Note [Foreign call results]
   58    ~~~~~~~~~~~~~~~~~~~~~~~~~~~
   59 A foreign call always returns an unboxed tuple of results, one
   60 of which is the state token.  This seems to happen even for pure
   61 calls.
   62 
   63 Even if we returned a single result for pure calls, it'd still be
   64 right to wrap it in a singleton unboxed tuple, because the result
   65 might be a Haskell closure pointer, we don't want to evaluate it. -}
   66 
   67 ----------------------------------
   68 cgOpApp :: StgOp        -- The op
   69         -> [StgArg]     -- Arguments
   70         -> Type         -- Result type (always an unboxed tuple)
   71         -> FCode ReturnKind
   72 
   73 -- Foreign calls
   74 cgOpApp (StgFCallOp fcall ty) stg_args res_ty
   75   = cgForeignCall fcall ty stg_args res_ty
   76       -- Note [Foreign call results]
   77 
   78 cgOpApp (StgPrimOp primop) args res_ty = do
   79     dflags <- getDynFlags
   80     cmm_args <- getNonVoidArgAmodes args
   81     cmmPrimOpApp dflags primop cmm_args (Just res_ty)
   82 
   83 cgOpApp (StgPrimCallOp primcall) args _res_ty
   84   = do  { cmm_args <- getNonVoidArgAmodes args
   85         ; let fun = CmmLit (CmmLabel (mkPrimCallLabel primcall))
   86         ; emitCall (NativeNodeCall, NativeReturn) fun cmm_args }
   87 
   88 cmmPrimOpApp :: DynFlags -> PrimOp -> [CmmExpr] -> Maybe Type -> FCode ReturnKind
   89 cmmPrimOpApp dflags primop cmm_args mres_ty =
   90   case emitPrimOp dflags primop cmm_args of
   91     PrimopCmmEmit_Internal f ->
   92       let
   93          -- if the result type isn't explicitly given, we directly use the
   94          -- result type of the primop.
   95          res_ty = fromMaybe (primOpResultType primop) mres_ty
   96       in emitReturn =<< f res_ty
   97     PrimopCmmEmit_External -> do
   98       let fun = CmmLit (CmmLabel (mkRtsPrimOpLabel primop))
   99       emitCall (NativeNodeCall, NativeReturn) fun cmm_args
  100 
  101 
  102 -- | Interpret the argument as an unsigned value, assuming the value
  103 -- is given in two-complement form in the given width.
  104 --
  105 -- Example: @asUnsigned W64 (-1)@ is 18446744073709551615.
  106 --
  107 -- This function is used to work around the fact that many array
  108 -- primops take Int# arguments, but we interpret them as unsigned
  109 -- quantities in the code gen. This means that we have to be careful
  110 -- every time we work on e.g. a CmmInt literal that corresponds to the
  111 -- array size, as it might contain a negative Integer value if the
  112 -- user passed a value larger than 2^(wORD_SIZE_IN_BITS-1) as the Int#
  113 -- literal.
  114 asUnsigned :: Width -> Integer -> Integer
  115 asUnsigned w n = n .&. (bit (widthInBits w) - 1)
  116 
  117 ------------------------------------------------------------------------
  118 --      Emitting code for a primop
  119 ------------------------------------------------------------------------
  120 
  121 shouldInlinePrimOp :: DynFlags -> PrimOp -> [CmmExpr] -> Bool
  122 shouldInlinePrimOp dflags op args = case emitPrimOp dflags op args of
  123   PrimopCmmEmit_External -> False
  124   PrimopCmmEmit_Internal _ -> True
  125 
  126 -- TODO: Several primop implementations (e.g. 'doNewByteArrayOp') use
  127 -- ByteOff (or some other fixed width signed type) to represent
  128 -- array sizes or indices. This means that these will overflow for
  129 -- large enough sizes.
  130 
  131 -- TODO: Several primops, such as 'copyArray#', only have an inline
  132 -- implementation (below) but could possibly have both an inline
  133 -- implementation and an out-of-line implementation, just like
  134 -- 'newArray#'. This would lower the amount of code generated,
  135 -- hopefully without a performance impact (needs to be measured).
  136 
  137 -- | The big function handling all the primops.
  138 --
  139 -- In the simple case, there is just one implementation, and we emit that.
  140 --
  141 -- In more complex cases, there is a foreign call (out of line) fallback. This
  142 -- might happen e.g. if there's enough static information, such as statically
  143 -- know arguments.
  144 emitPrimOp
  145   :: DynFlags
  146   -> PrimOp            -- ^ The primop
  147   -> [CmmExpr]         -- ^ The primop arguments
  148   -> PrimopCmmEmit
  149 emitPrimOp dflags primop = case primop of
  150   NewByteArrayOp_Char -> \case
  151     [(CmmLit (CmmInt n w))]
  152       | asUnsigned w n <= fromIntegral (maxInlineAllocSize dflags)
  153       -> opIntoRegs  $ \ [res] -> doNewByteArrayOp res (fromInteger n)
  154     _ -> PrimopCmmEmit_External
  155 
  156   NewArrayOp -> \case
  157     [(CmmLit (CmmInt n w)), init]
  158       | wordsToBytes platform (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags)
  159       -> opIntoRegs $ \[res] -> doNewArrayOp res (arrPtrsRep platform (fromInteger n)) mkMAP_DIRTY_infoLabel
  160         [ (mkIntExpr platform (fromInteger n),
  161            fixedHdrSize profile + pc_OFFSET_StgMutArrPtrs_ptrs (platformConstants platform))
  162         , (mkIntExpr platform (nonHdrSizeW (arrPtrsRep platform (fromInteger n))),
  163            fixedHdrSize profile + pc_OFFSET_StgMutArrPtrs_size (platformConstants platform))
  164         ]
  165         (fromInteger n) init
  166     _ -> PrimopCmmEmit_External
  167 
  168   CopyArrayOp -> \case
  169     [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] ->
  170       opIntoRegs $ \ [] -> doCopyArrayOp src src_off dst dst_off (fromInteger n)
  171     _ -> PrimopCmmEmit_External
  172 
  173   CopyMutableArrayOp -> \case
  174     [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] ->
  175       opIntoRegs $ \ [] -> doCopyMutableArrayOp src src_off dst dst_off (fromInteger n)
  176     _ -> PrimopCmmEmit_External
  177 
  178   CopyArrayArrayOp -> \case
  179     [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] ->
  180       opIntoRegs $ \ [] -> doCopyArrayOp src src_off dst dst_off (fromInteger n)
  181     _ -> PrimopCmmEmit_External
  182 
  183   CopyMutableArrayArrayOp -> \case
  184     [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] ->
  185       opIntoRegs $ \ [] -> doCopyMutableArrayOp src src_off dst dst_off (fromInteger n)
  186     _ -> PrimopCmmEmit_External
  187 
  188   CloneArrayOp -> \case
  189     [src, src_off, (CmmLit (CmmInt n w))]
  190       | wordsToBytes platform (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags)
  191       -> opIntoRegs $ \ [res] -> emitCloneArray mkMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n)
  192     _ -> PrimopCmmEmit_External
  193 
  194   CloneMutableArrayOp -> \case
  195     [src, src_off, (CmmLit (CmmInt n w))]
  196       | wordsToBytes platform (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags)
  197       -> opIntoRegs $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n)
  198     _ -> PrimopCmmEmit_External
  199 
  200   FreezeArrayOp -> \case
  201     [src, src_off, (CmmLit (CmmInt n w))]
  202       | wordsToBytes platform (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags)
  203       -> opIntoRegs $ \ [res] -> emitCloneArray mkMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n)
  204     _ -> PrimopCmmEmit_External
  205 
  206   ThawArrayOp -> \case
  207     [src, src_off, (CmmLit (CmmInt n w))]
  208       | wordsToBytes platform (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags)
  209       -> opIntoRegs $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n)
  210     _ -> PrimopCmmEmit_External
  211 
  212   NewSmallArrayOp -> \case
  213     [(CmmLit (CmmInt n w)), init]
  214       | wordsToBytes platform (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags)
  215       -> opIntoRegs $ \ [res] ->
  216         doNewArrayOp res (smallArrPtrsRep (fromInteger n)) mkSMAP_DIRTY_infoLabel
  217         [ (mkIntExpr platform (fromInteger n),
  218            fixedHdrSize profile + pc_OFFSET_StgSmallMutArrPtrs_ptrs (platformConstants platform))
  219         ]
  220         (fromInteger n) init
  221     _ -> PrimopCmmEmit_External
  222 
  223   CopySmallArrayOp -> \case
  224     [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] ->
  225       opIntoRegs $ \ [] -> doCopySmallArrayOp src src_off dst dst_off (fromInteger n)
  226     _ -> PrimopCmmEmit_External
  227 
  228   CopySmallMutableArrayOp -> \case
  229     [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] ->
  230       opIntoRegs $ \ [] -> doCopySmallMutableArrayOp src src_off dst dst_off (fromInteger n)
  231     _ -> PrimopCmmEmit_External
  232 
  233   CloneSmallArrayOp -> \case
  234     [src, src_off, (CmmLit (CmmInt n w))]
  235       | wordsToBytes platform (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags)
  236       -> opIntoRegs $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n)
  237     _ -> PrimopCmmEmit_External
  238 
  239   CloneSmallMutableArrayOp -> \case
  240     [src, src_off, (CmmLit (CmmInt n w))]
  241       | wordsToBytes platform (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags)
  242       -> opIntoRegs $ \ [res] -> emitCloneSmallArray mkSMAP_DIRTY_infoLabel res src src_off (fromInteger n)
  243     _ -> PrimopCmmEmit_External
  244 
  245   FreezeSmallArrayOp -> \case
  246     [src, src_off, (CmmLit (CmmInt n w))]
  247       | wordsToBytes platform (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags)
  248       -> opIntoRegs $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n)
  249     _ -> PrimopCmmEmit_External
  250 
  251   ThawSmallArrayOp -> \case
  252     [src, src_off, (CmmLit (CmmInt n w))]
  253       | wordsToBytes platform (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags)
  254       -> opIntoRegs $ \ [res] -> emitCloneSmallArray mkSMAP_DIRTY_infoLabel res src src_off (fromInteger n)
  255     _ -> PrimopCmmEmit_External
  256 
  257 -- First we handle various awkward cases specially.
  258 
  259   ParOp -> \[arg] -> opIntoRegs $ \[res] ->
  260     -- for now, just implement this in a C function
  261     -- later, we might want to inline it.
  262     emitCCall
  263         [(res,NoHint)]
  264         (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
  265         [(baseExpr, AddrHint), (arg,AddrHint)]
  266 
  267   SparkOp -> \[arg] -> opIntoRegs $ \[res] -> do
  268     -- returns the value of arg in res.  We're going to therefore
  269     -- refer to arg twice (once to pass to newSpark(), and once to
  270     -- assign to res), so put it in a temporary.
  271     tmp <- assignTemp arg
  272     tmp2 <- newTemp (bWord platform)
  273     emitCCall
  274         [(tmp2,NoHint)]
  275         (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
  276         [(baseExpr, AddrHint), ((CmmReg (CmmLocal tmp)), AddrHint)]
  277     emitAssign (CmmLocal res) (CmmReg (CmmLocal tmp))
  278 
  279   GetCCSOfOp -> \[arg] -> opIntoRegs $ \[res] -> do
  280     let
  281       val
  282        | profileIsProfiling profile = costCentreFrom platform (cmmUntag platform arg)
  283        | otherwise                  = CmmLit (zeroCLit platform)
  284     emitAssign (CmmLocal res) val
  285 
  286   GetCurrentCCSOp -> \[_] -> opIntoRegs $ \[res] ->
  287     emitAssign (CmmLocal res) cccsExpr
  288 
  289   MyThreadIdOp -> \[] -> opIntoRegs $ \[res] ->
  290     emitAssign (CmmLocal res) currentTSOExpr
  291 
  292   ReadMutVarOp -> \[mutv] -> opIntoRegs $ \[res] ->
  293     emitAssign (CmmLocal res) (cmmLoadIndexW platform mutv (fixedHdrSizeW profile) (gcWord platform))
  294 
  295   WriteMutVarOp -> \[mutv, var] -> opIntoRegs $ \res@[] -> do
  296     old_val <- CmmLocal <$> newTemp (cmmExprType platform var)
  297     emitAssign old_val (cmmLoadIndexW platform mutv (fixedHdrSizeW profile) (gcWord platform))
  298 
  299     -- Without this write barrier, other CPUs may see this pointer before
  300     -- the writes for the closure it points to have occurred.
  301     -- Note that this also must come after we read the old value to ensure
  302     -- that the read of old_val comes before another core's write to the
  303     -- MutVar's value.
  304     emitPrimCall res MO_WriteBarrier []
  305     emitStore (cmmOffsetW platform mutv (fixedHdrSizeW profile)) var
  306     emitCCall
  307             [{-no results-}]
  308             (CmmLit (CmmLabel mkDirty_MUT_VAR_Label))
  309             [(baseExpr, AddrHint), (mutv, AddrHint), (CmmReg old_val, AddrHint)]
  310 
  311 --  #define sizzeofByteArrayzh(r,a) \
  312 --     r = ((StgArrBytes *)(a))->bytes
  313   SizeofByteArrayOp -> \[arg] -> opIntoRegs $ \[res] ->
  314     emit $ mkAssign (CmmLocal res) (cmmLoadIndexW platform arg (fixedHdrSizeW profile) (bWord platform))
  315 
  316 --  #define sizzeofMutableByteArrayzh(r,a) \
  317 --      r = ((StgArrBytes *)(a))->bytes
  318   SizeofMutableByteArrayOp -> emitPrimOp dflags SizeofByteArrayOp
  319 
  320 --  #define getSizzeofMutableByteArrayzh(r,a) \
  321 --      r = ((StgArrBytes *)(a))->bytes
  322   GetSizeofMutableByteArrayOp -> \[arg] -> opIntoRegs $ \[res] ->
  323     emitAssign (CmmLocal res) (cmmLoadIndexW platform arg (fixedHdrSizeW profile) (bWord platform))
  324 
  325 
  326 --  #define touchzh(o)                  /* nothing */
  327   TouchOp -> \args@[_] -> opIntoRegs $ \res@[] ->
  328     emitPrimCall res MO_Touch args
  329 
  330 --  #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a)
  331   ByteArrayContents_Char -> \[arg] -> opIntoRegs $ \[res] ->
  332     emitAssign (CmmLocal res) (cmmOffsetB platform arg (arrWordsHdrSize profile))
  333 
  334 --  #define mutableByteArrayContentszh(r,a) r = BYTE_ARR_CTS(a)
  335   MutableByteArrayContents_Char -> \[arg] -> opIntoRegs $ \[res] ->
  336     emitAssign (CmmLocal res) (cmmOffsetB platform arg (arrWordsHdrSize profile))
  337 
  338 --  #define stableNameToIntzh(r,s)   (r = ((StgStableName *)s)->sn)
  339   StableNameToIntOp -> \[arg] -> opIntoRegs $ \[res] ->
  340     emitAssign (CmmLocal res) (cmmLoadIndexW platform arg (fixedHdrSizeW profile) (bWord platform))
  341 
  342   EqStablePtrOp -> \args -> opTranslate args (mo_wordEq platform)
  343 
  344   ReallyUnsafePtrEqualityOp -> \[arg1, arg2] -> opIntoRegs $ \[res] ->
  345     emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq platform) [arg1,arg2])
  346 
  347 --  #define addrToHValuezh(r,a) r=(P_)a
  348   AddrToAnyOp -> \[arg] -> opIntoRegs $ \[res] ->
  349     emitAssign (CmmLocal res) arg
  350 
  351 --  #define hvalueToAddrzh(r, a) r=(W_)a
  352   AnyToAddrOp -> \[arg] -> opIntoRegs $ \[res] ->
  353     emitAssign (CmmLocal res) arg
  354 
  355 {- Freezing arrays-of-ptrs requires changing an info table, for the
  356    benefit of the generational collector.  It needs to scavenge mutable
  357    objects, even if they are in old space.  When they become immutable,
  358    they can be removed from this scavenge list.  -}
  359 
  360 --  #define unsafeFreezzeArrayzh(r,a)
  361 --      {
  362 --        SET_INFO((StgClosure *)a,&stg_MUT_ARR_PTRS_FROZEN_DIRTY_info);
  363 --        r = a;
  364 --      }
  365   UnsafeFreezeArrayOp -> \[arg] -> opIntoRegs $ \[res] ->
  366     emit $ catAGraphs
  367       [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_DIRTY_infoLabel)),
  368         mkAssign (CmmLocal res) arg ]
  369   UnsafeFreezeArrayArrayOp -> \[arg] -> opIntoRegs $ \[res] ->
  370     emit $ catAGraphs
  371       [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_DIRTY_infoLabel)),
  372         mkAssign (CmmLocal res) arg ]
  373   UnsafeFreezeSmallArrayOp -> \[arg] -> opIntoRegs $ \[res] ->
  374     emit $ catAGraphs
  375       [ setInfo arg (CmmLit (CmmLabel mkSMAP_FROZEN_DIRTY_infoLabel)),
  376         mkAssign (CmmLocal res) arg ]
  377 
  378 --  #define unsafeFreezzeByteArrayzh(r,a)       r=(a)
  379   UnsafeFreezeByteArrayOp -> \[arg] -> opIntoRegs $ \[res] ->
  380     emitAssign (CmmLocal res) arg
  381 
  382 -- Reading/writing pointer arrays
  383 
  384   ReadArrayOp -> \[obj, ix] -> opIntoRegs $ \[res] ->
  385     doReadPtrArrayOp res obj ix
  386   IndexArrayOp -> \[obj, ix] -> opIntoRegs $ \[res] ->
  387     doReadPtrArrayOp res obj ix
  388   WriteArrayOp -> \[obj, ix, v] -> opIntoRegs $ \[] ->
  389     doWritePtrArrayOp obj ix v
  390 
  391   IndexArrayArrayOp_ByteArray -> \[obj, ix] -> opIntoRegs $ \[res] ->
  392     doReadPtrArrayOp res obj ix
  393   IndexArrayArrayOp_ArrayArray -> \[obj, ix] -> opIntoRegs $ \[res] ->
  394     doReadPtrArrayOp res obj ix
  395   ReadArrayArrayOp_ByteArray -> \[obj, ix] -> opIntoRegs $ \[res] ->
  396     doReadPtrArrayOp res obj ix
  397   ReadArrayArrayOp_MutableByteArray -> \[obj, ix] -> opIntoRegs $ \[res] ->
  398     doReadPtrArrayOp res obj ix
  399   ReadArrayArrayOp_ArrayArray -> \[obj, ix] -> opIntoRegs $ \[res] ->
  400     doReadPtrArrayOp res obj ix
  401   ReadArrayArrayOp_MutableArrayArray -> \[obj, ix] -> opIntoRegs $ \[res] ->
  402     doReadPtrArrayOp res obj ix
  403   WriteArrayArrayOp_ByteArray -> \[obj,ix,v] -> opIntoRegs $ \[] ->
  404     doWritePtrArrayOp obj ix v
  405   WriteArrayArrayOp_MutableByteArray -> \[obj,ix,v] -> opIntoRegs $ \[] ->
  406     doWritePtrArrayOp obj ix v
  407   WriteArrayArrayOp_ArrayArray -> \[obj,ix,v] -> opIntoRegs $ \[] ->
  408     doWritePtrArrayOp obj ix v
  409   WriteArrayArrayOp_MutableArrayArray -> \[obj,ix,v] -> opIntoRegs $ \[] ->
  410     doWritePtrArrayOp obj ix v
  411 
  412   ReadSmallArrayOp -> \[obj, ix] -> opIntoRegs $ \[res] ->
  413     doReadSmallPtrArrayOp res obj ix
  414   IndexSmallArrayOp -> \[obj, ix] -> opIntoRegs $ \[res] ->
  415     doReadSmallPtrArrayOp res obj ix
  416   WriteSmallArrayOp -> \[obj,ix,v] -> opIntoRegs $ \[] ->
  417     doWriteSmallPtrArrayOp obj ix v
  418 
  419 -- Getting the size of pointer arrays
  420 
  421   SizeofArrayOp -> \[arg] -> opIntoRegs $ \[res] ->
  422     emit $ mkAssign (CmmLocal res) (cmmLoadIndexW platform arg
  423       (fixedHdrSizeW profile + bytesToWordsRoundUp platform (pc_OFFSET_StgMutArrPtrs_ptrs (platformConstants platform)))
  424         (bWord platform))
  425   SizeofMutableArrayOp -> emitPrimOp dflags SizeofArrayOp
  426   SizeofArrayArrayOp -> emitPrimOp dflags SizeofArrayOp
  427   SizeofMutableArrayArrayOp -> emitPrimOp dflags SizeofArrayOp
  428   SizeofSmallArrayOp -> \[arg] -> opIntoRegs $ \[res] ->
  429     emit $ mkAssign (CmmLocal res)
  430      (cmmLoadIndexW platform arg
  431      (fixedHdrSizeW profile + bytesToWordsRoundUp platform (pc_OFFSET_StgSmallMutArrPtrs_ptrs (platformConstants platform)))
  432         (bWord platform))
  433 
  434   SizeofSmallMutableArrayOp -> emitPrimOp dflags SizeofSmallArrayOp
  435   GetSizeofSmallMutableArrayOp -> emitPrimOp dflags SizeofSmallArrayOp
  436 
  437 -- IndexXXXoffAddr
  438 
  439   IndexOffAddrOp_Char -> \args -> opIntoRegs $ \res ->
  440     doIndexOffAddrOp   (Just (mo_u_8ToWord platform)) b8 res args
  441   IndexOffAddrOp_WideChar -> \args -> opIntoRegs $ \res ->
  442     doIndexOffAddrOp   (Just (mo_u_32ToWord platform)) b32 res args
  443   IndexOffAddrOp_Int -> \args -> opIntoRegs $ \res ->
  444     doIndexOffAddrOp   Nothing (bWord platform) res args
  445   IndexOffAddrOp_Word -> \args -> opIntoRegs $ \res ->
  446     doIndexOffAddrOp   Nothing (bWord platform) res args
  447   IndexOffAddrOp_Addr -> \args -> opIntoRegs $ \res ->
  448     doIndexOffAddrOp   Nothing (bWord platform) res args
  449   IndexOffAddrOp_Float -> \args -> opIntoRegs $ \res ->
  450     doIndexOffAddrOp   Nothing f32 res args
  451   IndexOffAddrOp_Double -> \args -> opIntoRegs $ \res ->
  452     doIndexOffAddrOp   Nothing f64 res args
  453   IndexOffAddrOp_StablePtr -> \args -> opIntoRegs $ \res ->
  454     doIndexOffAddrOp   Nothing (bWord platform) res args
  455   IndexOffAddrOp_Int8 -> \args -> opIntoRegs $ \res ->
  456     doIndexOffAddrOp   Nothing b8  res args
  457   IndexOffAddrOp_Int16 -> \args -> opIntoRegs $ \res ->
  458     doIndexOffAddrOp   Nothing b16 res args
  459   IndexOffAddrOp_Int32 -> \args -> opIntoRegs $ \res ->
  460     doIndexOffAddrOp   Nothing b32 res args
  461   IndexOffAddrOp_Int64 -> \args -> opIntoRegs $ \res ->
  462     doIndexOffAddrOp   Nothing b64 res args
  463   IndexOffAddrOp_Word8 -> \args -> opIntoRegs $ \res ->
  464     doIndexOffAddrOp   Nothing b8  res args
  465   IndexOffAddrOp_Word16 -> \args -> opIntoRegs $ \res ->
  466     doIndexOffAddrOp   Nothing b16 res args
  467   IndexOffAddrOp_Word32 -> \args -> opIntoRegs $ \res ->
  468     doIndexOffAddrOp   Nothing b32 res args
  469   IndexOffAddrOp_Word64 -> \args -> opIntoRegs $ \res ->
  470     doIndexOffAddrOp   Nothing b64 res args
  471 
  472 -- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr.
  473 
  474   ReadOffAddrOp_Char -> \args -> opIntoRegs $ \res ->
  475     doIndexOffAddrOp   (Just (mo_u_8ToWord platform)) b8 res args
  476   ReadOffAddrOp_WideChar -> \args -> opIntoRegs $ \res ->
  477     doIndexOffAddrOp   (Just (mo_u_32ToWord platform)) b32 res args
  478   ReadOffAddrOp_Int -> \args -> opIntoRegs $ \res ->
  479     doIndexOffAddrOp   Nothing (bWord platform) res args
  480   ReadOffAddrOp_Word -> \args -> opIntoRegs $ \res ->
  481     doIndexOffAddrOp   Nothing (bWord platform) res args
  482   ReadOffAddrOp_Addr -> \args -> opIntoRegs $ \res ->
  483     doIndexOffAddrOp   Nothing (bWord platform) res args
  484   ReadOffAddrOp_Float -> \args -> opIntoRegs $ \res ->
  485     doIndexOffAddrOp   Nothing f32 res args
  486   ReadOffAddrOp_Double -> \args -> opIntoRegs $ \res ->
  487     doIndexOffAddrOp   Nothing f64 res args
  488   ReadOffAddrOp_StablePtr -> \args -> opIntoRegs $ \res ->
  489     doIndexOffAddrOp   Nothing (bWord platform) res args
  490   ReadOffAddrOp_Int8 -> \args -> opIntoRegs $ \res ->
  491     doIndexOffAddrOp   Nothing b8  res args
  492   ReadOffAddrOp_Int16 -> \args -> opIntoRegs $ \res ->
  493     doIndexOffAddrOp   Nothing b16 res args
  494   ReadOffAddrOp_Int32 -> \args -> opIntoRegs $ \res ->
  495     doIndexOffAddrOp   Nothing b32 res args
  496   ReadOffAddrOp_Int64 -> \args -> opIntoRegs $ \res ->
  497     doIndexOffAddrOp   Nothing b64 res args
  498   ReadOffAddrOp_Word8 -> \args -> opIntoRegs $ \res ->
  499     doIndexOffAddrOp   Nothing b8  res args
  500   ReadOffAddrOp_Word16 -> \args -> opIntoRegs $ \res ->
  501     doIndexOffAddrOp   Nothing b16 res args
  502   ReadOffAddrOp_Word32 -> \args -> opIntoRegs $ \res ->
  503     doIndexOffAddrOp   Nothing b32 res args
  504   ReadOffAddrOp_Word64 -> \args -> opIntoRegs $ \res ->
  505     doIndexOffAddrOp   Nothing b64 res args
  506 
  507 -- IndexXXXArray
  508 
  509   IndexByteArrayOp_Char -> \args -> opIntoRegs $ \res ->
  510     doIndexByteArrayOp   (Just (mo_u_8ToWord platform)) b8 res args
  511   IndexByteArrayOp_WideChar -> \args -> opIntoRegs $ \res ->
  512     doIndexByteArrayOp   (Just (mo_u_32ToWord platform)) b32 res args
  513   IndexByteArrayOp_Int -> \args -> opIntoRegs $ \res ->
  514     doIndexByteArrayOp   Nothing (bWord platform) res args
  515   IndexByteArrayOp_Word -> \args -> opIntoRegs $ \res ->
  516     doIndexByteArrayOp   Nothing (bWord platform) res args
  517   IndexByteArrayOp_Addr -> \args -> opIntoRegs $ \res ->
  518     doIndexByteArrayOp   Nothing (bWord platform) res args
  519   IndexByteArrayOp_Float -> \args -> opIntoRegs $ \res ->
  520     doIndexByteArrayOp   Nothing f32 res args
  521   IndexByteArrayOp_Double -> \args -> opIntoRegs $ \res ->
  522     doIndexByteArrayOp   Nothing f64 res args
  523   IndexByteArrayOp_StablePtr -> \args -> opIntoRegs $ \res ->
  524     doIndexByteArrayOp   Nothing (bWord platform) res args
  525   IndexByteArrayOp_Int8 -> \args -> opIntoRegs $ \res ->
  526     doIndexByteArrayOp   Nothing b8  res args
  527   IndexByteArrayOp_Int16 -> \args -> opIntoRegs $ \res ->
  528     doIndexByteArrayOp   Nothing b16  res args
  529   IndexByteArrayOp_Int32 -> \args -> opIntoRegs $ \res ->
  530     doIndexByteArrayOp   Nothing b32  res args
  531   IndexByteArrayOp_Int64 -> \args -> opIntoRegs $ \res ->
  532     doIndexByteArrayOp   Nothing b64  res args
  533   IndexByteArrayOp_Word8 -> \args -> opIntoRegs $ \res ->
  534     doIndexByteArrayOp   Nothing b8  res args
  535   IndexByteArrayOp_Word16 -> \args -> opIntoRegs $ \res ->
  536     doIndexByteArrayOp   Nothing b16  res args
  537   IndexByteArrayOp_Word32 -> \args -> opIntoRegs $ \res ->
  538     doIndexByteArrayOp   Nothing b32  res args
  539   IndexByteArrayOp_Word64 -> \args -> opIntoRegs $ \res ->
  540     doIndexByteArrayOp   Nothing b64  res args
  541 
  542 -- ReadXXXArray, identical to IndexXXXArray.
  543 
  544   ReadByteArrayOp_Char -> \args -> opIntoRegs $ \res ->
  545     doIndexByteArrayOp   (Just (mo_u_8ToWord platform)) b8 res args
  546   ReadByteArrayOp_WideChar -> \args -> opIntoRegs $ \res ->
  547     doIndexByteArrayOp   (Just (mo_u_32ToWord platform)) b32 res args
  548   ReadByteArrayOp_Int -> \args -> opIntoRegs $ \res ->
  549     doIndexByteArrayOp   Nothing (bWord platform) res args
  550   ReadByteArrayOp_Word -> \args -> opIntoRegs $ \res ->
  551     doIndexByteArrayOp   Nothing (bWord platform) res args
  552   ReadByteArrayOp_Addr -> \args -> opIntoRegs $ \res ->
  553     doIndexByteArrayOp   Nothing (bWord platform) res args
  554   ReadByteArrayOp_Float -> \args -> opIntoRegs $ \res ->
  555     doIndexByteArrayOp   Nothing f32 res args
  556   ReadByteArrayOp_Double -> \args -> opIntoRegs $ \res ->
  557     doIndexByteArrayOp   Nothing f64 res args
  558   ReadByteArrayOp_StablePtr -> \args -> opIntoRegs $ \res ->
  559     doIndexByteArrayOp   Nothing (bWord platform) res args
  560   ReadByteArrayOp_Int8 -> \args -> opIntoRegs $ \res ->
  561     doIndexByteArrayOp   Nothing b8  res args
  562   ReadByteArrayOp_Int16 -> \args -> opIntoRegs $ \res ->
  563     doIndexByteArrayOp   Nothing b16  res args
  564   ReadByteArrayOp_Int32 -> \args -> opIntoRegs $ \res ->
  565     doIndexByteArrayOp   Nothing b32  res args
  566   ReadByteArrayOp_Int64 -> \args -> opIntoRegs $ \res ->
  567     doIndexByteArrayOp   Nothing b64  res args
  568   ReadByteArrayOp_Word8 -> \args -> opIntoRegs $ \res ->
  569     doIndexByteArrayOp   Nothing b8  res args
  570   ReadByteArrayOp_Word16 -> \args -> opIntoRegs $ \res ->
  571     doIndexByteArrayOp   Nothing b16  res args
  572   ReadByteArrayOp_Word32 -> \args -> opIntoRegs $ \res ->
  573     doIndexByteArrayOp   Nothing b32  res args
  574   ReadByteArrayOp_Word64 -> \args -> opIntoRegs $ \res ->
  575     doIndexByteArrayOp   Nothing b64  res args
  576 
  577 -- IndexWord8ArrayAsXXX
  578 
  579   IndexByteArrayOp_Word8AsChar -> \args -> opIntoRegs $ \res ->
  580     doIndexByteArrayOpAs   (Just (mo_u_8ToWord platform)) b8 b8 res args
  581   IndexByteArrayOp_Word8AsWideChar -> \args -> opIntoRegs $ \res ->
  582     doIndexByteArrayOpAs   (Just (mo_u_32ToWord platform)) b32 b8 res args
  583   IndexByteArrayOp_Word8AsInt -> \args -> opIntoRegs $ \res ->
  584     doIndexByteArrayOpAs   Nothing (bWord platform) b8 res args
  585   IndexByteArrayOp_Word8AsWord -> \args -> opIntoRegs $ \res ->
  586     doIndexByteArrayOpAs   Nothing (bWord platform) b8 res args
  587   IndexByteArrayOp_Word8AsAddr -> \args -> opIntoRegs $ \res ->
  588     doIndexByteArrayOpAs   Nothing (bWord platform) b8 res args
  589   IndexByteArrayOp_Word8AsFloat -> \args -> opIntoRegs $ \res ->
  590     doIndexByteArrayOpAs   Nothing f32 b8 res args
  591   IndexByteArrayOp_Word8AsDouble -> \args -> opIntoRegs $ \res ->
  592     doIndexByteArrayOpAs   Nothing f64 b8 res args
  593   IndexByteArrayOp_Word8AsStablePtr -> \args -> opIntoRegs $ \res ->
  594     doIndexByteArrayOpAs   Nothing (bWord platform) b8 res args
  595   IndexByteArrayOp_Word8AsInt16 -> \args -> opIntoRegs $ \res ->
  596     doIndexByteArrayOpAs   Nothing b16 b8 res args
  597   IndexByteArrayOp_Word8AsInt32 -> \args -> opIntoRegs $ \res ->
  598     doIndexByteArrayOpAs   Nothing b32 b8 res args
  599   IndexByteArrayOp_Word8AsInt64 -> \args -> opIntoRegs $ \res ->
  600     doIndexByteArrayOpAs   Nothing b64 b8 res args
  601   IndexByteArrayOp_Word8AsWord16 -> \args -> opIntoRegs $ \res ->
  602     doIndexByteArrayOpAs   Nothing b16 b8 res args
  603   IndexByteArrayOp_Word8AsWord32 -> \args -> opIntoRegs $ \res ->
  604     doIndexByteArrayOpAs   Nothing b32 b8 res args
  605   IndexByteArrayOp_Word8AsWord64 -> \args -> opIntoRegs $ \res ->
  606     doIndexByteArrayOpAs   Nothing b64 b8 res args
  607 
  608 -- ReadInt8ArrayAsXXX, identical to IndexInt8ArrayAsXXX
  609 
  610   ReadByteArrayOp_Word8AsChar -> \args -> opIntoRegs $ \res ->
  611     doIndexByteArrayOpAs   (Just (mo_u_8ToWord platform)) b8 b8 res args
  612   ReadByteArrayOp_Word8AsWideChar -> \args -> opIntoRegs $ \res ->
  613     doIndexByteArrayOpAs   (Just (mo_u_32ToWord platform)) b32 b8 res args
  614   ReadByteArrayOp_Word8AsInt -> \args -> opIntoRegs $ \res ->
  615     doIndexByteArrayOpAs   Nothing (bWord platform) b8 res args
  616   ReadByteArrayOp_Word8AsWord -> \args -> opIntoRegs $ \res ->
  617     doIndexByteArrayOpAs   Nothing (bWord platform) b8 res args
  618   ReadByteArrayOp_Word8AsAddr -> \args -> opIntoRegs $ \res ->
  619     doIndexByteArrayOpAs   Nothing (bWord platform) b8 res args
  620   ReadByteArrayOp_Word8AsFloat -> \args -> opIntoRegs $ \res ->
  621     doIndexByteArrayOpAs   Nothing f32 b8 res args
  622   ReadByteArrayOp_Word8AsDouble -> \args -> opIntoRegs $ \res ->
  623     doIndexByteArrayOpAs   Nothing f64 b8 res args
  624   ReadByteArrayOp_Word8AsStablePtr -> \args -> opIntoRegs $ \res ->
  625     doIndexByteArrayOpAs   Nothing (bWord platform) b8 res args
  626   ReadByteArrayOp_Word8AsInt16 -> \args -> opIntoRegs $ \res ->
  627     doIndexByteArrayOpAs   Nothing b16 b8 res args
  628   ReadByteArrayOp_Word8AsInt32 -> \args -> opIntoRegs $ \res ->
  629     doIndexByteArrayOpAs   Nothing b32 b8 res args
  630   ReadByteArrayOp_Word8AsInt64 -> \args -> opIntoRegs $ \res ->
  631     doIndexByteArrayOpAs   Nothing b64 b8 res args
  632   ReadByteArrayOp_Word8AsWord16 -> \args -> opIntoRegs $ \res ->
  633     doIndexByteArrayOpAs   Nothing b16 b8 res args
  634   ReadByteArrayOp_Word8AsWord32 -> \args -> opIntoRegs $ \res ->
  635     doIndexByteArrayOpAs   Nothing b32 b8 res args
  636   ReadByteArrayOp_Word8AsWord64 -> \args -> opIntoRegs $ \res ->
  637     doIndexByteArrayOpAs   Nothing b64 b8 res args
  638 
  639 -- WriteXXXoffAddr
  640 
  641   WriteOffAddrOp_Char -> \args -> opIntoRegs $ \res ->
  642     doWriteOffAddrOp (Just (mo_WordTo8 platform))  b8 res args
  643   WriteOffAddrOp_WideChar -> \args -> opIntoRegs $ \res ->
  644     doWriteOffAddrOp (Just (mo_WordTo32 platform)) b32 res args
  645   WriteOffAddrOp_Int -> \args -> opIntoRegs $ \res ->
  646     doWriteOffAddrOp Nothing (bWord platform) res args
  647   WriteOffAddrOp_Word -> \args -> opIntoRegs $ \res ->
  648     doWriteOffAddrOp Nothing (bWord platform) res args
  649   WriteOffAddrOp_Addr -> \args -> opIntoRegs $ \res ->
  650     doWriteOffAddrOp Nothing (bWord platform) res args
  651   WriteOffAddrOp_Float -> \args -> opIntoRegs $ \res ->
  652     doWriteOffAddrOp Nothing f32 res args
  653   WriteOffAddrOp_Double -> \args -> opIntoRegs $ \res ->
  654     doWriteOffAddrOp Nothing f64 res args
  655   WriteOffAddrOp_StablePtr -> \args -> opIntoRegs $ \res ->
  656     doWriteOffAddrOp Nothing (bWord platform) res args
  657   WriteOffAddrOp_Int8 -> \args -> opIntoRegs $ \res ->
  658     doWriteOffAddrOp Nothing b8 res args
  659   WriteOffAddrOp_Int16 -> \args -> opIntoRegs $ \res ->
  660     doWriteOffAddrOp Nothing b16 res args
  661   WriteOffAddrOp_Int32 -> \args -> opIntoRegs $ \res ->
  662     doWriteOffAddrOp Nothing b32 res args
  663   WriteOffAddrOp_Int64 -> \args -> opIntoRegs $ \res ->
  664     doWriteOffAddrOp Nothing b64 res args
  665   WriteOffAddrOp_Word8 -> \args -> opIntoRegs $ \res ->
  666     doWriteOffAddrOp Nothing b8 res args
  667   WriteOffAddrOp_Word16 -> \args -> opIntoRegs $ \res ->
  668     doWriteOffAddrOp Nothing b16 res args
  669   WriteOffAddrOp_Word32 -> \args -> opIntoRegs $ \res ->
  670     doWriteOffAddrOp Nothing b32 res args
  671   WriteOffAddrOp_Word64 -> \args -> opIntoRegs $ \res ->
  672     doWriteOffAddrOp Nothing b64 res args
  673 
  674 -- WriteXXXArray
  675 
  676   WriteByteArrayOp_Char -> \args -> opIntoRegs $ \res ->
  677     doWriteByteArrayOp (Just (mo_WordTo8 platform))  b8 res args
  678   WriteByteArrayOp_WideChar -> \args -> opIntoRegs $ \res ->
  679     doWriteByteArrayOp (Just (mo_WordTo32 platform)) b32 res args
  680   WriteByteArrayOp_Int -> \args -> opIntoRegs $ \res ->
  681     doWriteByteArrayOp Nothing (bWord platform) res args
  682   WriteByteArrayOp_Word -> \args -> opIntoRegs $ \res ->
  683     doWriteByteArrayOp Nothing (bWord platform) res args
  684   WriteByteArrayOp_Addr -> \args -> opIntoRegs $ \res ->
  685     doWriteByteArrayOp Nothing (bWord platform) res args
  686   WriteByteArrayOp_Float -> \args -> opIntoRegs $ \res ->
  687     doWriteByteArrayOp Nothing f32 res args
  688   WriteByteArrayOp_Double -> \args -> opIntoRegs $ \res ->
  689     doWriteByteArrayOp Nothing f64 res args
  690   WriteByteArrayOp_StablePtr -> \args -> opIntoRegs $ \res ->
  691     doWriteByteArrayOp Nothing (bWord platform) res args
  692   WriteByteArrayOp_Int8 -> \args -> opIntoRegs $ \res ->
  693     doWriteByteArrayOp Nothing b8 res args
  694   WriteByteArrayOp_Int16 -> \args -> opIntoRegs $ \res ->
  695     doWriteByteArrayOp Nothing b16 res args
  696   WriteByteArrayOp_Int32 -> \args -> opIntoRegs $ \res ->
  697     doWriteByteArrayOp Nothing b32 res args
  698   WriteByteArrayOp_Int64 -> \args -> opIntoRegs $ \res ->
  699     doWriteByteArrayOp Nothing b64 res args
  700   WriteByteArrayOp_Word8 -> \args -> opIntoRegs $ \res ->
  701     doWriteByteArrayOp Nothing b8  res args
  702   WriteByteArrayOp_Word16 -> \args -> opIntoRegs $ \res ->
  703     doWriteByteArrayOp Nothing b16 res args
  704   WriteByteArrayOp_Word32 -> \args -> opIntoRegs $ \res ->
  705     doWriteByteArrayOp Nothing b32 res args
  706   WriteByteArrayOp_Word64 -> \args -> opIntoRegs $ \res ->
  707     doWriteByteArrayOp Nothing b64 res args
  708 
  709 -- WriteInt8ArrayAsXXX
  710 
  711   WriteByteArrayOp_Word8AsChar -> \args -> opIntoRegs $ \res ->
  712     doWriteByteArrayOp (Just (mo_WordTo8 platform))  b8 res args
  713   WriteByteArrayOp_Word8AsWideChar -> \args -> opIntoRegs $ \res ->
  714     doWriteByteArrayOp (Just (mo_WordTo32 platform)) b8 res args
  715   WriteByteArrayOp_Word8AsInt -> \args -> opIntoRegs $ \res ->
  716     doWriteByteArrayOp Nothing b8 res args
  717   WriteByteArrayOp_Word8AsWord -> \args -> opIntoRegs $ \res ->
  718     doWriteByteArrayOp Nothing b8 res args
  719   WriteByteArrayOp_Word8AsAddr -> \args -> opIntoRegs $ \res ->
  720     doWriteByteArrayOp Nothing b8 res args
  721   WriteByteArrayOp_Word8AsFloat -> \args -> opIntoRegs $ \res ->
  722     doWriteByteArrayOp Nothing b8 res args
  723   WriteByteArrayOp_Word8AsDouble -> \args -> opIntoRegs $ \res ->
  724     doWriteByteArrayOp Nothing b8 res args
  725   WriteByteArrayOp_Word8AsStablePtr -> \args -> opIntoRegs $ \res ->
  726     doWriteByteArrayOp Nothing b8 res args
  727   WriteByteArrayOp_Word8AsInt16 -> \args -> opIntoRegs $ \res ->
  728     doWriteByteArrayOp Nothing b8 res args
  729   WriteByteArrayOp_Word8AsInt32 -> \args -> opIntoRegs $ \res ->
  730     doWriteByteArrayOp Nothing b8 res args
  731   WriteByteArrayOp_Word8AsInt64 -> \args -> opIntoRegs $ \res ->
  732     doWriteByteArrayOp Nothing b8 res args
  733   WriteByteArrayOp_Word8AsWord16 -> \args -> opIntoRegs $ \res ->
  734     doWriteByteArrayOp Nothing b8 res args
  735   WriteByteArrayOp_Word8AsWord32 -> \args -> opIntoRegs $ \res ->
  736     doWriteByteArrayOp Nothing b8 res args
  737   WriteByteArrayOp_Word8AsWord64 -> \args -> opIntoRegs $ \res ->
  738     doWriteByteArrayOp Nothing b8 res args
  739 
  740 -- Copying and setting byte arrays
  741   CopyByteArrayOp -> \[src,src_off,dst,dst_off,n] -> opIntoRegs $ \[] ->
  742     doCopyByteArrayOp src src_off dst dst_off n
  743   CopyMutableByteArrayOp -> \[src,src_off,dst,dst_off,n] -> opIntoRegs $ \[] ->
  744     doCopyMutableByteArrayOp src src_off dst dst_off n
  745   CopyByteArrayToAddrOp -> \[src,src_off,dst,n] -> opIntoRegs $ \[] ->
  746     doCopyByteArrayToAddrOp src src_off dst n
  747   CopyMutableByteArrayToAddrOp -> \[src,src_off,dst,n] -> opIntoRegs $ \[] ->
  748     doCopyMutableByteArrayToAddrOp src src_off dst n
  749   CopyAddrToByteArrayOp -> \[src,dst,dst_off,n] -> opIntoRegs $ \[] ->
  750     doCopyAddrToByteArrayOp src dst dst_off n
  751   SetByteArrayOp -> \[ba,off,len,c] -> opIntoRegs $ \[] ->
  752     doSetByteArrayOp ba off len c
  753 
  754 -- Comparing byte arrays
  755   CompareByteArraysOp -> \[ba1,ba1_off,ba2,ba2_off,n] -> opIntoRegs $ \[res] ->
  756     doCompareByteArraysOp res ba1 ba1_off ba2 ba2_off n
  757 
  758   BSwap16Op -> \[w] -> opIntoRegs $ \[res] ->
  759     emitBSwapCall res w W16
  760   BSwap32Op -> \[w] -> opIntoRegs $ \[res] ->
  761     emitBSwapCall res w W32
  762   BSwap64Op -> \[w] -> opIntoRegs $ \[res] ->
  763     emitBSwapCall res w W64
  764   BSwapOp -> \[w] -> opIntoRegs $ \[res] ->
  765     emitBSwapCall res w (wordWidth platform)
  766 
  767   BRev8Op -> \[w] -> opIntoRegs $ \[res] ->
  768     emitBRevCall res w W8
  769   BRev16Op -> \[w] -> opIntoRegs $ \[res] ->
  770     emitBRevCall res w W16
  771   BRev32Op -> \[w] -> opIntoRegs $ \[res] ->
  772     emitBRevCall res w W32
  773   BRev64Op -> \[w] -> opIntoRegs $ \[res] ->
  774     emitBRevCall res w W64
  775   BRevOp -> \[w] -> opIntoRegs $ \[res] ->
  776     emitBRevCall res w (wordWidth platform)
  777 
  778 -- Population count
  779   PopCnt8Op -> \[w] -> opIntoRegs $ \[res] ->
  780     emitPopCntCall res w W8
  781   PopCnt16Op -> \[w] -> opIntoRegs $ \[res] ->
  782     emitPopCntCall res w W16
  783   PopCnt32Op -> \[w] -> opIntoRegs $ \[res] ->
  784     emitPopCntCall res w W32
  785   PopCnt64Op -> \[w] -> opIntoRegs $ \[res] ->
  786     emitPopCntCall res w W64
  787   PopCntOp -> \[w] -> opIntoRegs $ \[res] ->
  788     emitPopCntCall res w (wordWidth platform)
  789 
  790 -- Parallel bit deposit
  791   Pdep8Op -> \[src, mask] -> opIntoRegs $ \[res] ->
  792     emitPdepCall res src mask W8
  793   Pdep16Op -> \[src, mask] -> opIntoRegs $ \[res] ->
  794     emitPdepCall res src mask W16
  795   Pdep32Op -> \[src, mask] -> opIntoRegs $ \[res] ->
  796     emitPdepCall res src mask W32
  797   Pdep64Op -> \[src, mask] -> opIntoRegs $ \[res] ->
  798     emitPdepCall res src mask W64
  799   PdepOp -> \[src, mask] -> opIntoRegs $ \[res] ->
  800     emitPdepCall res src mask (wordWidth platform)
  801 
  802 -- Parallel bit extract
  803   Pext8Op -> \[src, mask] -> opIntoRegs $ \[res] ->
  804     emitPextCall res src mask W8
  805   Pext16Op -> \[src, mask] -> opIntoRegs $ \[res] ->
  806     emitPextCall res src mask W16
  807   Pext32Op -> \[src, mask] -> opIntoRegs $ \[res] ->
  808     emitPextCall res src mask W32
  809   Pext64Op -> \[src, mask] -> opIntoRegs $ \[res] ->
  810     emitPextCall res src mask W64
  811   PextOp -> \[src, mask] -> opIntoRegs $ \[res] ->
  812     emitPextCall res src mask (wordWidth platform)
  813 
  814 -- count leading zeros
  815   Clz8Op -> \[w] -> opIntoRegs $ \[res] ->
  816     emitClzCall res w W8
  817   Clz16Op -> \[w] -> opIntoRegs $ \[res] ->
  818     emitClzCall res w W16
  819   Clz32Op -> \[w] -> opIntoRegs $ \[res] ->
  820     emitClzCall res w W32
  821   Clz64Op -> \[w] -> opIntoRegs $ \[res] ->
  822     emitClzCall res w W64
  823   ClzOp -> \[w] -> opIntoRegs $ \[res] ->
  824     emitClzCall res w (wordWidth platform)
  825 
  826 -- count trailing zeros
  827   Ctz8Op -> \[w] -> opIntoRegs $ \[res] ->
  828     emitCtzCall res w W8
  829   Ctz16Op -> \[w] -> opIntoRegs $ \[res] ->
  830     emitCtzCall res w W16
  831   Ctz32Op -> \[w] -> opIntoRegs $ \[res] ->
  832     emitCtzCall res w W32
  833   Ctz64Op -> \[w] -> opIntoRegs $ \[res] ->
  834     emitCtzCall res w W64
  835   CtzOp -> \[w] -> opIntoRegs $ \[res] ->
  836     emitCtzCall res w (wordWidth platform)
  837 
  838 -- Unsigned int to floating point conversions
  839   WordToFloatOp -> \[w] -> opIntoRegs $ \[res] ->
  840     emitPrimCall [res] (MO_UF_Conv W32) [w]
  841   WordToDoubleOp -> \[w] -> opIntoRegs $ \[res] ->
  842     emitPrimCall [res] (MO_UF_Conv W64) [w]
  843 
  844 -- Atomic operations
  845   InterlockedExchange_Addr -> \[src, value] -> opIntoRegs $ \[res] ->
  846     emitPrimCall [res] (MO_Xchg (wordWidth platform)) [src, value]
  847   InterlockedExchange_Word -> \[src, value] -> opIntoRegs $ \[res] ->
  848     emitPrimCall [res] (MO_Xchg (wordWidth platform)) [src, value]
  849 
  850   FetchAddAddrOp_Word -> \[addr, n] -> opIntoRegs $ \[res] ->
  851     doAtomicAddrRMW res AMO_Add addr (bWord platform) n
  852   FetchSubAddrOp_Word -> \[addr, n] -> opIntoRegs $ \[res] ->
  853     doAtomicAddrRMW res AMO_Sub addr (bWord platform) n
  854   FetchAndAddrOp_Word -> \[addr, n] -> opIntoRegs $ \[res] ->
  855     doAtomicAddrRMW res AMO_And addr (bWord platform) n
  856   FetchNandAddrOp_Word -> \[addr, n] -> opIntoRegs $ \[res] ->
  857     doAtomicAddrRMW res AMO_Nand addr (bWord platform) n
  858   FetchOrAddrOp_Word -> \[addr, n] -> opIntoRegs $ \[res] ->
  859     doAtomicAddrRMW res AMO_Or addr (bWord platform) n
  860   FetchXorAddrOp_Word -> \[addr, n] -> opIntoRegs $ \[res] ->
  861     doAtomicAddrRMW res AMO_Xor addr (bWord platform) n
  862 
  863   AtomicReadAddrOp_Word -> \[addr] -> opIntoRegs $ \[res] ->
  864     doAtomicReadAddr res addr (bWord platform)
  865   AtomicWriteAddrOp_Word -> \[addr, val] -> opIntoRegs $ \[] ->
  866     doAtomicWriteAddr addr (bWord platform) val
  867 
  868   CasAddrOp_Addr -> \[dst, expected, new] -> opIntoRegs $ \[res] ->
  869     emitPrimCall [res] (MO_Cmpxchg (wordWidth platform)) [dst, expected, new]
  870   CasAddrOp_Word -> \[dst, expected, new] -> opIntoRegs $ \[res] ->
  871     emitPrimCall [res] (MO_Cmpxchg (wordWidth platform)) [dst, expected, new]
  872   CasAddrOp_Word8 -> \[dst, expected, new] -> opIntoRegs $ \[res] ->
  873     emitPrimCall [res] (MO_Cmpxchg W8) [dst, expected, new]
  874   CasAddrOp_Word16 -> \[dst, expected, new] -> opIntoRegs $ \[res] ->
  875     emitPrimCall [res] (MO_Cmpxchg W16) [dst, expected, new]
  876   CasAddrOp_Word32 -> \[dst, expected, new] -> opIntoRegs $ \[res] ->
  877     emitPrimCall [res] (MO_Cmpxchg W32) [dst, expected, new]
  878   CasAddrOp_Word64 -> \[dst, expected, new] -> opIntoRegs $ \[res] ->
  879     emitPrimCall [res] (MO_Cmpxchg W64) [dst, expected, new]
  880 
  881 -- SIMD primops
  882   (VecBroadcastOp vcat n w) -> \[e] -> opIntoRegs $ \[res] -> do
  883     checkVecCompatibility dflags vcat n w
  884     doVecPackOp (vecElemInjectCast platform vcat w) ty zeros (replicate n e) res
  885    where
  886     zeros :: CmmExpr
  887     zeros = CmmLit $ CmmVec (replicate n zero)
  888 
  889     zero :: CmmLit
  890     zero = case vcat of
  891              IntVec   -> CmmInt 0 w
  892              WordVec  -> CmmInt 0 w
  893              FloatVec -> CmmFloat 0 w
  894 
  895     ty :: CmmType
  896     ty = vecVmmType vcat n w
  897 
  898   (VecPackOp vcat n w) -> \es -> opIntoRegs $ \[res] -> do
  899     checkVecCompatibility dflags vcat n w
  900     when (es `lengthIsNot` n) $
  901         panic "emitPrimOp: VecPackOp has wrong number of arguments"
  902     doVecPackOp (vecElemInjectCast platform vcat w) ty zeros es res
  903    where
  904     zeros :: CmmExpr
  905     zeros = CmmLit $ CmmVec (replicate n zero)
  906 
  907     zero :: CmmLit
  908     zero = case vcat of
  909              IntVec   -> CmmInt 0 w
  910              WordVec  -> CmmInt 0 w
  911              FloatVec -> CmmFloat 0 w
  912 
  913     ty :: CmmType
  914     ty = vecVmmType vcat n w
  915 
  916   (VecUnpackOp vcat n w) -> \[arg] -> opIntoRegs $ \res -> do
  917     checkVecCompatibility dflags vcat n w
  918     when (res `lengthIsNot` n) $
  919         panic "emitPrimOp: VecUnpackOp has wrong number of results"
  920     doVecUnpackOp (vecElemProjectCast platform vcat w) ty arg res
  921    where
  922     ty :: CmmType
  923     ty = vecVmmType vcat n w
  924 
  925   (VecInsertOp vcat n w) -> \[v,e,i] -> opIntoRegs $ \[res] -> do
  926     checkVecCompatibility dflags vcat n w
  927     doVecInsertOp (vecElemInjectCast platform vcat w) ty v e i res
  928    where
  929     ty :: CmmType
  930     ty = vecVmmType vcat n w
  931 
  932   (VecIndexByteArrayOp vcat n w) -> \args -> opIntoRegs $ \res0 -> do
  933     checkVecCompatibility dflags vcat n w
  934     doIndexByteArrayOp Nothing ty res0 args
  935    where
  936     ty :: CmmType
  937     ty = vecVmmType vcat n w
  938 
  939   (VecReadByteArrayOp vcat n w) -> \args -> opIntoRegs $ \res0 -> do
  940     checkVecCompatibility dflags vcat n w
  941     doIndexByteArrayOp Nothing ty res0 args
  942    where
  943     ty :: CmmType
  944     ty = vecVmmType vcat n w
  945 
  946   (VecWriteByteArrayOp vcat n w) -> \args -> opIntoRegs $ \res0 -> do
  947     checkVecCompatibility dflags vcat n w
  948     doWriteByteArrayOp Nothing ty res0 args
  949    where
  950     ty :: CmmType
  951     ty = vecVmmType vcat n w
  952 
  953   (VecIndexOffAddrOp vcat n w) -> \args -> opIntoRegs $ \res0 -> do
  954     checkVecCompatibility dflags vcat n w
  955     doIndexOffAddrOp Nothing ty res0 args
  956    where
  957     ty :: CmmType
  958     ty = vecVmmType vcat n w
  959 
  960   (VecReadOffAddrOp vcat n w) -> \args -> opIntoRegs $ \res0 -> do
  961     checkVecCompatibility dflags vcat n w
  962     doIndexOffAddrOp Nothing ty res0 args
  963    where
  964     ty :: CmmType
  965     ty = vecVmmType vcat n w
  966 
  967   (VecWriteOffAddrOp vcat n w) -> \args -> opIntoRegs $ \res0 -> do
  968     checkVecCompatibility dflags vcat n w
  969     doWriteOffAddrOp Nothing ty res0 args
  970    where
  971     ty :: CmmType
  972     ty = vecVmmType vcat n w
  973 
  974   (VecIndexScalarByteArrayOp vcat n w) -> \args -> opIntoRegs $ \res0 -> do
  975     checkVecCompatibility dflags vcat n w
  976     doIndexByteArrayOpAs Nothing vecty ty res0 args
  977    where
  978     vecty :: CmmType
  979     vecty = vecVmmType vcat n w
  980 
  981     ty :: CmmType
  982     ty = vecCmmCat vcat w
  983 
  984   (VecReadScalarByteArrayOp vcat n w) -> \args -> opIntoRegs $ \res0 -> do
  985     checkVecCompatibility dflags vcat n w
  986     doIndexByteArrayOpAs Nothing vecty ty res0 args
  987    where
  988     vecty :: CmmType
  989     vecty = vecVmmType vcat n w
  990 
  991     ty :: CmmType
  992     ty = vecCmmCat vcat w
  993 
  994   (VecWriteScalarByteArrayOp vcat n w) -> \args -> opIntoRegs $ \res0 -> do
  995     checkVecCompatibility dflags vcat n w
  996     doWriteByteArrayOp Nothing ty res0 args
  997    where
  998     ty :: CmmType
  999     ty = vecCmmCat vcat w
 1000 
 1001   (VecIndexScalarOffAddrOp vcat n w) -> \args -> opIntoRegs $ \res0 -> do
 1002     checkVecCompatibility dflags vcat n w
 1003     doIndexOffAddrOpAs Nothing vecty ty res0 args
 1004    where
 1005     vecty :: CmmType
 1006     vecty = vecVmmType vcat n w
 1007 
 1008     ty :: CmmType
 1009     ty = vecCmmCat vcat w
 1010 
 1011   (VecReadScalarOffAddrOp vcat n w) -> \args -> opIntoRegs $ \res0 -> do
 1012     checkVecCompatibility dflags vcat n w
 1013     doIndexOffAddrOpAs Nothing vecty ty res0 args
 1014    where
 1015     vecty :: CmmType
 1016     vecty = vecVmmType vcat n w
 1017 
 1018     ty :: CmmType
 1019     ty = vecCmmCat vcat w
 1020 
 1021   (VecWriteScalarOffAddrOp vcat n w) -> \args -> opIntoRegs $ \res0 -> do
 1022     checkVecCompatibility dflags vcat n w
 1023     doWriteOffAddrOp Nothing ty res0 args
 1024    where
 1025     ty :: CmmType
 1026     ty = vecCmmCat vcat w
 1027 
 1028 -- Prefetch
 1029   PrefetchByteArrayOp3         -> \args -> opIntoRegs $ \[] ->
 1030     doPrefetchByteArrayOp 3  args
 1031   PrefetchMutableByteArrayOp3  -> \args -> opIntoRegs $ \[] ->
 1032     doPrefetchMutableByteArrayOp 3  args
 1033   PrefetchAddrOp3              -> \args -> opIntoRegs $ \[] ->
 1034     doPrefetchAddrOp  3  args
 1035   PrefetchValueOp3             -> \args -> opIntoRegs $ \[] ->
 1036     doPrefetchValueOp 3 args
 1037 
 1038   PrefetchByteArrayOp2         -> \args -> opIntoRegs $ \[] ->
 1039     doPrefetchByteArrayOp 2  args
 1040   PrefetchMutableByteArrayOp2  -> \args -> opIntoRegs $ \[] ->
 1041     doPrefetchMutableByteArrayOp 2  args
 1042   PrefetchAddrOp2              -> \args -> opIntoRegs $ \[] ->
 1043     doPrefetchAddrOp 2  args
 1044   PrefetchValueOp2             -> \args -> opIntoRegs $ \[] ->
 1045     doPrefetchValueOp 2 args
 1046   PrefetchByteArrayOp1         -> \args -> opIntoRegs $ \[] ->
 1047     doPrefetchByteArrayOp 1  args
 1048   PrefetchMutableByteArrayOp1  -> \args -> opIntoRegs $ \[] ->
 1049     doPrefetchMutableByteArrayOp 1  args
 1050   PrefetchAddrOp1              -> \args -> opIntoRegs $ \[] ->
 1051     doPrefetchAddrOp 1  args
 1052   PrefetchValueOp1             -> \args -> opIntoRegs $ \[] ->
 1053     doPrefetchValueOp 1 args
 1054 
 1055   PrefetchByteArrayOp0         -> \args -> opIntoRegs $ \[] ->
 1056     doPrefetchByteArrayOp 0  args
 1057   PrefetchMutableByteArrayOp0  -> \args -> opIntoRegs $ \[] ->
 1058     doPrefetchMutableByteArrayOp 0  args
 1059   PrefetchAddrOp0              -> \args -> opIntoRegs $ \[] ->
 1060     doPrefetchAddrOp 0  args
 1061   PrefetchValueOp0             -> \args -> opIntoRegs $ \[] ->
 1062     doPrefetchValueOp 0 args
 1063 
 1064 -- Atomic read-modify-write
 1065   FetchAddByteArrayOp_Int -> \[mba, ix, n] -> opIntoRegs $ \[res] ->
 1066     doAtomicByteArrayRMW res AMO_Add mba ix (bWord platform) n
 1067   FetchSubByteArrayOp_Int -> \[mba, ix, n] -> opIntoRegs $ \[res] ->
 1068     doAtomicByteArrayRMW res AMO_Sub mba ix (bWord platform) n
 1069   FetchAndByteArrayOp_Int -> \[mba, ix, n] -> opIntoRegs $ \[res] ->
 1070     doAtomicByteArrayRMW res AMO_And mba ix (bWord platform) n
 1071   FetchNandByteArrayOp_Int -> \[mba, ix, n] -> opIntoRegs $ \[res] ->
 1072     doAtomicByteArrayRMW res AMO_Nand mba ix (bWord platform) n
 1073   FetchOrByteArrayOp_Int -> \[mba, ix, n] -> opIntoRegs $ \[res] ->
 1074     doAtomicByteArrayRMW res AMO_Or mba ix (bWord platform) n
 1075   FetchXorByteArrayOp_Int -> \[mba, ix, n] -> opIntoRegs $ \[res] ->
 1076     doAtomicByteArrayRMW res AMO_Xor mba ix (bWord platform) n
 1077   AtomicReadByteArrayOp_Int -> \[mba, ix] -> opIntoRegs $ \[res] ->
 1078     doAtomicReadByteArray res mba ix (bWord platform)
 1079   AtomicWriteByteArrayOp_Int -> \[mba, ix, val] -> opIntoRegs $ \[] ->
 1080     doAtomicWriteByteArray mba ix (bWord platform) val
 1081   CasByteArrayOp_Int -> \[mba, ix, old, new] -> opIntoRegs $ \[res] ->
 1082     doCasByteArray res mba ix (bWord platform) old new
 1083   CasByteArrayOp_Int8 -> \[mba, ix, old, new] -> opIntoRegs $ \[res] ->
 1084     doCasByteArray res mba ix b8 old new
 1085   CasByteArrayOp_Int16 -> \[mba, ix, old, new] -> opIntoRegs $ \[res] ->
 1086     doCasByteArray res mba ix b16 old new
 1087   CasByteArrayOp_Int32 -> \[mba, ix, old, new] -> opIntoRegs $ \[res] ->
 1088     doCasByteArray res mba ix b32 old new
 1089   CasByteArrayOp_Int64 -> \[mba, ix, old, new] -> opIntoRegs $ \[res] ->
 1090     doCasByteArray res mba ix b64 old new
 1091 
 1092 -- The rest just translate straightforwardly
 1093 
 1094   Int8ToWord8Op   -> \args -> opNop args
 1095   Word8ToInt8Op   -> \args -> opNop args
 1096   Int16ToWord16Op -> \args -> opNop args
 1097   Word16ToInt16Op -> \args -> opNop args
 1098   Int32ToWord32Op -> \args -> opNop args
 1099   Word32ToInt32Op -> \args -> opNop args
 1100   Int64ToWord64Op -> \args -> opNop args
 1101   Word64ToInt64Op -> \args -> opNop args
 1102   IntToWordOp     -> \args -> opNop args
 1103   WordToIntOp     -> \args -> opNop args
 1104   IntToAddrOp     -> \args -> opNop args
 1105   AddrToIntOp     -> \args -> opNop args
 1106   ChrOp           -> \args -> opNop args  -- Int# and Char# are rep'd the same
 1107   OrdOp           -> \args -> opNop args
 1108 
 1109   Narrow8IntOp   -> \args -> opNarrow args (MO_SS_Conv, W8)
 1110   Narrow16IntOp  -> \args -> opNarrow args (MO_SS_Conv, W16)
 1111   Narrow32IntOp  -> \args -> opNarrow args (MO_SS_Conv, W32)
 1112   Narrow8WordOp  -> \args -> opNarrow args (MO_UU_Conv, W8)
 1113   Narrow16WordOp -> \args -> opNarrow args (MO_UU_Conv, W16)
 1114   Narrow32WordOp -> \args -> opNarrow args (MO_UU_Conv, W32)
 1115 
 1116   DoublePowerOp  -> \args -> opCallish args MO_F64_Pwr
 1117   DoubleSinOp    -> \args -> opCallish args MO_F64_Sin
 1118   DoubleCosOp    -> \args -> opCallish args MO_F64_Cos
 1119   DoubleTanOp    -> \args -> opCallish args MO_F64_Tan
 1120   DoubleSinhOp   -> \args -> opCallish args MO_F64_Sinh
 1121   DoubleCoshOp   -> \args -> opCallish args MO_F64_Cosh
 1122   DoubleTanhOp   -> \args -> opCallish args MO_F64_Tanh
 1123   DoubleAsinOp   -> \args -> opCallish args MO_F64_Asin
 1124   DoubleAcosOp   -> \args -> opCallish args MO_F64_Acos
 1125   DoubleAtanOp   -> \args -> opCallish args MO_F64_Atan
 1126   DoubleAsinhOp  -> \args -> opCallish args MO_F64_Asinh
 1127   DoubleAcoshOp  -> \args -> opCallish args MO_F64_Acosh
 1128   DoubleAtanhOp  -> \args -> opCallish args MO_F64_Atanh
 1129   DoubleLogOp    -> \args -> opCallish args MO_F64_Log
 1130   DoubleLog1POp  -> \args -> opCallish args MO_F64_Log1P
 1131   DoubleExpOp    -> \args -> opCallish args MO_F64_Exp
 1132   DoubleExpM1Op  -> \args -> opCallish args MO_F64_ExpM1
 1133   DoubleSqrtOp   -> \args -> opCallish args MO_F64_Sqrt
 1134 
 1135   FloatPowerOp   -> \args -> opCallish args MO_F32_Pwr
 1136   FloatSinOp     -> \args -> opCallish args MO_F32_Sin
 1137   FloatCosOp     -> \args -> opCallish args MO_F32_Cos
 1138   FloatTanOp     -> \args -> opCallish args MO_F32_Tan
 1139   FloatSinhOp    -> \args -> opCallish args MO_F32_Sinh
 1140   FloatCoshOp    -> \args -> opCallish args MO_F32_Cosh
 1141   FloatTanhOp    -> \args -> opCallish args MO_F32_Tanh
 1142   FloatAsinOp    -> \args -> opCallish args MO_F32_Asin
 1143   FloatAcosOp    -> \args -> opCallish args MO_F32_Acos
 1144   FloatAtanOp    -> \args -> opCallish args MO_F32_Atan
 1145   FloatAsinhOp   -> \args -> opCallish args MO_F32_Asinh
 1146   FloatAcoshOp   -> \args -> opCallish args MO_F32_Acosh
 1147   FloatAtanhOp   -> \args -> opCallish args MO_F32_Atanh
 1148   FloatLogOp     -> \args -> opCallish args MO_F32_Log
 1149   FloatLog1POp   -> \args -> opCallish args MO_F32_Log1P
 1150   FloatExpOp     -> \args -> opCallish args MO_F32_Exp
 1151   FloatExpM1Op   -> \args -> opCallish args MO_F32_ExpM1
 1152   FloatSqrtOp    -> \args -> opCallish args MO_F32_Sqrt
 1153 
 1154 -- Native word signless ops
 1155 
 1156   IntAddOp       -> \args -> opTranslate args (mo_wordAdd platform)
 1157   IntSubOp       -> \args -> opTranslate args (mo_wordSub platform)
 1158   WordAddOp      -> \args -> opTranslate args (mo_wordAdd platform)
 1159   WordSubOp      -> \args -> opTranslate args (mo_wordSub platform)
 1160   AddrAddOp      -> \args -> opTranslate args (mo_wordAdd platform)
 1161   AddrSubOp      -> \args -> opTranslate args (mo_wordSub platform)
 1162 
 1163   IntEqOp        -> \args -> opTranslate args (mo_wordEq platform)
 1164   IntNeOp        -> \args -> opTranslate args (mo_wordNe platform)
 1165   WordEqOp       -> \args -> opTranslate args (mo_wordEq platform)
 1166   WordNeOp       -> \args -> opTranslate args (mo_wordNe platform)
 1167   AddrEqOp       -> \args -> opTranslate args (mo_wordEq platform)
 1168   AddrNeOp       -> \args -> opTranslate args (mo_wordNe platform)
 1169 
 1170   WordAndOp      -> \args -> opTranslate args (mo_wordAnd platform)
 1171   WordOrOp       -> \args -> opTranslate args (mo_wordOr platform)
 1172   WordXorOp      -> \args -> opTranslate args (mo_wordXor platform)
 1173   WordNotOp      -> \args -> opTranslate args (mo_wordNot platform)
 1174   WordSllOp      -> \args -> opTranslate args (mo_wordShl platform)
 1175   WordSrlOp      -> \args -> opTranslate args (mo_wordUShr platform)
 1176 
 1177   AddrRemOp      -> \args -> opTranslate args (mo_wordURem platform)
 1178 
 1179 -- Native word signed ops
 1180 
 1181   IntMulOp        -> \args -> opTranslate args (mo_wordMul platform)
 1182   IntMulMayOfloOp -> \args -> opTranslate args (MO_S_MulMayOflo (wordWidth platform))
 1183   IntQuotOp       -> \args -> opTranslate args (mo_wordSQuot platform)
 1184   IntRemOp        -> \args -> opTranslate args (mo_wordSRem platform)
 1185   IntNegOp        -> \args -> opTranslate args (mo_wordSNeg platform)
 1186 
 1187   IntGeOp        -> \args -> opTranslate args (mo_wordSGe platform)
 1188   IntLeOp        -> \args -> opTranslate args (mo_wordSLe platform)
 1189   IntGtOp        -> \args -> opTranslate args (mo_wordSGt platform)
 1190   IntLtOp        -> \args -> opTranslate args (mo_wordSLt platform)
 1191 
 1192   IntAndOp       -> \args -> opTranslate args (mo_wordAnd platform)
 1193   IntOrOp        -> \args -> opTranslate args (mo_wordOr platform)
 1194   IntXorOp       -> \args -> opTranslate args (mo_wordXor platform)
 1195   IntNotOp       -> \args -> opTranslate args (mo_wordNot platform)
 1196   IntSllOp       -> \args -> opTranslate args (mo_wordShl platform)
 1197   IntSraOp       -> \args -> opTranslate args (mo_wordSShr platform)
 1198   IntSrlOp       -> \args -> opTranslate args (mo_wordUShr platform)
 1199 
 1200 -- Native word unsigned ops
 1201 
 1202   WordGeOp       -> \args -> opTranslate args (mo_wordUGe platform)
 1203   WordLeOp       -> \args -> opTranslate args (mo_wordULe platform)
 1204   WordGtOp       -> \args -> opTranslate args (mo_wordUGt platform)
 1205   WordLtOp       -> \args -> opTranslate args (mo_wordULt platform)
 1206 
 1207   WordMulOp      -> \args -> opTranslate args (mo_wordMul platform)
 1208   WordQuotOp     -> \args -> opTranslate args (mo_wordUQuot platform)
 1209   WordRemOp      -> \args -> opTranslate args (mo_wordURem platform)
 1210 
 1211   AddrGeOp       -> \args -> opTranslate args (mo_wordUGe platform)
 1212   AddrLeOp       -> \args -> opTranslate args (mo_wordULe platform)
 1213   AddrGtOp       -> \args -> opTranslate args (mo_wordUGt platform)
 1214   AddrLtOp       -> \args -> opTranslate args (mo_wordULt platform)
 1215 
 1216 -- Int8# signed ops
 1217 
 1218   Int8ToIntOp    -> \args -> opTranslate args (MO_SS_Conv W8 (wordWidth platform))
 1219   IntToInt8Op    -> \args -> opTranslate args (MO_SS_Conv (wordWidth platform) W8)
 1220   Int8NegOp      -> \args -> opTranslate args (MO_S_Neg W8)
 1221   Int8AddOp      -> \args -> opTranslate args (MO_Add W8)
 1222   Int8SubOp      -> \args -> opTranslate args (MO_Sub W8)
 1223   Int8MulOp      -> \args -> opTranslate args (MO_Mul W8)
 1224   Int8QuotOp     -> \args -> opTranslate args (MO_S_Quot W8)
 1225   Int8RemOp      -> \args -> opTranslate args (MO_S_Rem W8)
 1226 
 1227   Int8SllOp     -> \args -> opTranslate args (MO_Shl W8)
 1228   Int8SraOp     -> \args -> opTranslate args (MO_S_Shr W8)
 1229   Int8SrlOp     -> \args -> opTranslate args (MO_U_Shr W8)
 1230 
 1231   Int8EqOp       -> \args -> opTranslate args (MO_Eq W8)
 1232   Int8GeOp       -> \args -> opTranslate args (MO_S_Ge W8)
 1233   Int8GtOp       -> \args -> opTranslate args (MO_S_Gt W8)
 1234   Int8LeOp       -> \args -> opTranslate args (MO_S_Le W8)
 1235   Int8LtOp       -> \args -> opTranslate args (MO_S_Lt W8)
 1236   Int8NeOp       -> \args -> opTranslate args (MO_Ne W8)
 1237 
 1238 -- Word8# unsigned ops
 1239 
 1240   Word8ToWordOp  -> \args -> opTranslate args (MO_UU_Conv W8 (wordWidth platform))
 1241   WordToWord8Op  -> \args -> opTranslate args (MO_UU_Conv (wordWidth platform) W8)
 1242   Word8AddOp     -> \args -> opTranslate args (MO_Add W8)
 1243   Word8SubOp     -> \args -> opTranslate args (MO_Sub W8)
 1244   Word8MulOp     -> \args -> opTranslate args (MO_Mul W8)
 1245   Word8QuotOp    -> \args -> opTranslate args (MO_U_Quot W8)
 1246   Word8RemOp     -> \args -> opTranslate args (MO_U_Rem W8)
 1247 
 1248   Word8AndOp    -> \args -> opTranslate args (MO_And W8)
 1249   Word8OrOp     -> \args -> opTranslate args (MO_Or W8)
 1250   Word8XorOp    -> \args -> opTranslate args (MO_Xor W8)
 1251   Word8NotOp    -> \args -> opTranslate args (MO_Not W8)
 1252   Word8SllOp    -> \args -> opTranslate args (MO_Shl W8)
 1253   Word8SrlOp    -> \args -> opTranslate args (MO_U_Shr W8)
 1254 
 1255   Word8EqOp      -> \args -> opTranslate args (MO_Eq W8)
 1256   Word8GeOp      -> \args -> opTranslate args (MO_U_Ge W8)
 1257   Word8GtOp      -> \args -> opTranslate args (MO_U_Gt W8)
 1258   Word8LeOp      -> \args -> opTranslate args (MO_U_Le W8)
 1259   Word8LtOp      -> \args -> opTranslate args (MO_U_Lt W8)
 1260   Word8NeOp      -> \args -> opTranslate args (MO_Ne W8)
 1261 
 1262 -- Int16# signed ops
 1263 
 1264   Int16ToIntOp   -> \args -> opTranslate args (MO_SS_Conv W16 (wordWidth platform))
 1265   IntToInt16Op   -> \args -> opTranslate args (MO_SS_Conv (wordWidth platform) W16)
 1266   Int16NegOp     -> \args -> opTranslate args (MO_S_Neg W16)
 1267   Int16AddOp     -> \args -> opTranslate args (MO_Add W16)
 1268   Int16SubOp     -> \args -> opTranslate args (MO_Sub W16)
 1269   Int16MulOp     -> \args -> opTranslate args (MO_Mul W16)
 1270   Int16QuotOp    -> \args -> opTranslate args (MO_S_Quot W16)
 1271   Int16RemOp     -> \args -> opTranslate args (MO_S_Rem W16)
 1272 
 1273   Int16SllOp     -> \args -> opTranslate args (MO_Shl W16)
 1274   Int16SraOp     -> \args -> opTranslate args (MO_S_Shr W16)
 1275   Int16SrlOp     -> \args -> opTranslate args (MO_U_Shr W16)
 1276 
 1277   Int16EqOp      -> \args -> opTranslate args (MO_Eq W16)
 1278   Int16GeOp      -> \args -> opTranslate args (MO_S_Ge W16)
 1279   Int16GtOp      -> \args -> opTranslate args (MO_S_Gt W16)
 1280   Int16LeOp      -> \args -> opTranslate args (MO_S_Le W16)
 1281   Int16LtOp      -> \args -> opTranslate args (MO_S_Lt W16)
 1282   Int16NeOp      -> \args -> opTranslate args (MO_Ne W16)
 1283 
 1284 -- Word16# unsigned ops
 1285 
 1286   Word16ToWordOp -> \args -> opTranslate args (MO_UU_Conv W16 (wordWidth platform))
 1287   WordToWord16Op -> \args -> opTranslate args (MO_UU_Conv (wordWidth platform) W16)
 1288   Word16AddOp    -> \args -> opTranslate args (MO_Add W16)
 1289   Word16SubOp    -> \args -> opTranslate args (MO_Sub W16)
 1290   Word16MulOp    -> \args -> opTranslate args (MO_Mul W16)
 1291   Word16QuotOp   -> \args -> opTranslate args (MO_U_Quot W16)
 1292   Word16RemOp    -> \args -> opTranslate args (MO_U_Rem W16)
 1293 
 1294   Word16AndOp    -> \args -> opTranslate args (MO_And W16)
 1295   Word16OrOp     -> \args -> opTranslate args (MO_Or W16)
 1296   Word16XorOp    -> \args -> opTranslate args (MO_Xor W16)
 1297   Word16NotOp    -> \args -> opTranslate args (MO_Not W16)
 1298   Word16SllOp    -> \args -> opTranslate args (MO_Shl W16)
 1299   Word16SrlOp    -> \args -> opTranslate args (MO_U_Shr W16)
 1300 
 1301   Word16EqOp     -> \args -> opTranslate args (MO_Eq W16)
 1302   Word16GeOp     -> \args -> opTranslate args (MO_U_Ge W16)
 1303   Word16GtOp     -> \args -> opTranslate args (MO_U_Gt W16)
 1304   Word16LeOp     -> \args -> opTranslate args (MO_U_Le W16)
 1305   Word16LtOp     -> \args -> opTranslate args (MO_U_Lt W16)
 1306   Word16NeOp     -> \args -> opTranslate args (MO_Ne W16)
 1307 
 1308 -- Int32# signed ops
 1309 
 1310   Int32ToIntOp   -> \args -> opTranslate args (MO_SS_Conv W32 (wordWidth platform))
 1311   IntToInt32Op   -> \args -> opTranslate args (MO_SS_Conv (wordWidth platform) W32)
 1312   Int32NegOp     -> \args -> opTranslate args (MO_S_Neg W32)
 1313   Int32AddOp     -> \args -> opTranslate args (MO_Add W32)
 1314   Int32SubOp     -> \args -> opTranslate args (MO_Sub W32)
 1315   Int32MulOp     -> \args -> opTranslate args (MO_Mul W32)
 1316   Int32QuotOp    -> \args -> opTranslate args (MO_S_Quot W32)
 1317   Int32RemOp     -> \args -> opTranslate args (MO_S_Rem W32)
 1318 
 1319   Int32SllOp     -> \args -> opTranslate args (MO_Shl W32)
 1320   Int32SraOp     -> \args -> opTranslate args (MO_S_Shr W32)
 1321   Int32SrlOp     -> \args -> opTranslate args (MO_U_Shr W32)
 1322 
 1323   Int32EqOp      -> \args -> opTranslate args (MO_Eq W32)
 1324   Int32GeOp      -> \args -> opTranslate args (MO_S_Ge W32)
 1325   Int32GtOp      -> \args -> opTranslate args (MO_S_Gt W32)
 1326   Int32LeOp      -> \args -> opTranslate args (MO_S_Le W32)
 1327   Int32LtOp      -> \args -> opTranslate args (MO_S_Lt W32)
 1328   Int32NeOp      -> \args -> opTranslate args (MO_Ne W32)
 1329 
 1330 -- Word32# unsigned ops
 1331 
 1332   Word32ToWordOp -> \args -> opTranslate args (MO_UU_Conv W32 (wordWidth platform))
 1333   WordToWord32Op -> \args -> opTranslate args (MO_UU_Conv (wordWidth platform) W32)
 1334   Word32AddOp    -> \args -> opTranslate args (MO_Add W32)
 1335   Word32SubOp    -> \args -> opTranslate args (MO_Sub W32)
 1336   Word32MulOp    -> \args -> opTranslate args (MO_Mul W32)
 1337   Word32QuotOp   -> \args -> opTranslate args (MO_U_Quot W32)
 1338   Word32RemOp    -> \args -> opTranslate args (MO_U_Rem W32)
 1339 
 1340   Word32AndOp    -> \args -> opTranslate args (MO_And W32)
 1341   Word32OrOp     -> \args -> opTranslate args (MO_Or W32)
 1342   Word32XorOp    -> \args -> opTranslate args (MO_Xor W32)
 1343   Word32NotOp    -> \args -> opTranslate args (MO_Not W32)
 1344   Word32SllOp    -> \args -> opTranslate args (MO_Shl W32)
 1345   Word32SrlOp    -> \args -> opTranslate args (MO_U_Shr W32)
 1346 
 1347   Word32EqOp     -> \args -> opTranslate args (MO_Eq W32)
 1348   Word32GeOp     -> \args -> opTranslate args (MO_U_Ge W32)
 1349   Word32GtOp     -> \args -> opTranslate args (MO_U_Gt W32)
 1350   Word32LeOp     -> \args -> opTranslate args (MO_U_Le W32)
 1351   Word32LtOp     -> \args -> opTranslate args (MO_U_Lt W32)
 1352   Word32NeOp     -> \args -> opTranslate args (MO_Ne W32)
 1353 
 1354 -- Int64# signed ops
 1355 
 1356   Int64ToIntOp   -> \args -> opTranslate64 args (\w -> MO_SS_Conv w (wordWidth platform)) MO_I64_ToI
 1357   IntToInt64Op   -> \args -> opTranslate64 args (\w -> MO_SS_Conv (wordWidth platform) w) MO_I64_FromI
 1358   Int64NegOp     -> \args -> opTranslate64 args MO_S_Neg  MO_x64_Neg
 1359   Int64AddOp     -> \args -> opTranslate64 args MO_Add    MO_x64_Add
 1360   Int64SubOp     -> \args -> opTranslate64 args MO_Sub    MO_x64_Sub
 1361   Int64MulOp     -> \args -> opTranslate64 args MO_Mul    MO_x64_Mul
 1362   Int64QuotOp    -> \args -> opTranslate64 args MO_S_Quot MO_I64_Quot
 1363   Int64RemOp     -> \args -> opTranslate64 args MO_S_Rem  MO_I64_Rem
 1364 
 1365   Int64SllOp     -> \args -> opTranslate64 args MO_Shl    MO_x64_Shl
 1366   Int64SraOp     -> \args -> opTranslate64 args MO_S_Shr  MO_I64_Shr
 1367   Int64SrlOp     -> \args -> opTranslate64 args MO_U_Shr  MO_W64_Shr
 1368 
 1369   Int64EqOp      -> \args -> opTranslate64 args MO_Eq     MO_x64_Eq
 1370   Int64GeOp      -> \args -> opTranslate64 args MO_S_Ge   MO_I64_Ge
 1371   Int64GtOp      -> \args -> opTranslate64 args MO_S_Gt   MO_I64_Gt
 1372   Int64LeOp      -> \args -> opTranslate64 args MO_S_Le   MO_I64_Le
 1373   Int64LtOp      -> \args -> opTranslate64 args MO_S_Lt   MO_I64_Lt
 1374   Int64NeOp      -> \args -> opTranslate64 args MO_Ne     MO_x64_Ne
 1375 
 1376 -- Word64# unsigned ops
 1377 
 1378   Word64ToWordOp -> \args -> opTranslate64 args (\w -> MO_UU_Conv w (wordWidth platform)) MO_W64_ToW
 1379   WordToWord64Op -> \args -> opTranslate64 args (\w -> MO_UU_Conv (wordWidth platform) w) MO_W64_FromW
 1380   Word64AddOp    -> \args -> opTranslate64 args MO_Add    MO_x64_Add
 1381   Word64SubOp    -> \args -> opTranslate64 args MO_Sub    MO_x64_Sub
 1382   Word64MulOp    -> \args -> opTranslate64 args MO_Mul    MO_x64_Mul
 1383   Word64QuotOp   -> \args -> opTranslate64 args MO_U_Quot MO_W64_Quot
 1384   Word64RemOp    -> \args -> opTranslate64 args MO_U_Rem  MO_W64_Rem
 1385 
 1386   Word64AndOp    -> \args -> opTranslate64 args MO_And    MO_x64_And
 1387   Word64OrOp     -> \args -> opTranslate64 args MO_Or     MO_x64_Or
 1388   Word64XorOp    -> \args -> opTranslate64 args MO_Xor    MO_x64_Xor
 1389   Word64NotOp    -> \args -> opTranslate64 args MO_Not    MO_x64_Not
 1390   Word64SllOp    -> \args -> opTranslate64 args MO_Shl    MO_x64_Shl
 1391   Word64SrlOp    -> \args -> opTranslate64 args MO_U_Shr  MO_W64_Shr
 1392 
 1393   Word64EqOp     -> \args -> opTranslate64 args MO_Eq     MO_x64_Eq
 1394   Word64GeOp     -> \args -> opTranslate64 args MO_U_Ge   MO_W64_Ge
 1395   Word64GtOp     -> \args -> opTranslate64 args MO_U_Gt   MO_W64_Gt
 1396   Word64LeOp     -> \args -> opTranslate64 args MO_U_Le   MO_W64_Le
 1397   Word64LtOp     -> \args -> opTranslate64 args MO_U_Lt   MO_W64_Lt
 1398   Word64NeOp     -> \args -> opTranslate64 args MO_Ne     MO_x64_Ne
 1399 
 1400 -- Char# ops
 1401 
 1402   CharEqOp       -> \args -> opTranslate args (MO_Eq (wordWidth platform))
 1403   CharNeOp       -> \args -> opTranslate args (MO_Ne (wordWidth platform))
 1404   CharGeOp       -> \args -> opTranslate args (MO_U_Ge (wordWidth platform))
 1405   CharLeOp       -> \args -> opTranslate args (MO_U_Le (wordWidth platform))
 1406   CharGtOp       -> \args -> opTranslate args (MO_U_Gt (wordWidth platform))
 1407   CharLtOp       -> \args -> opTranslate args (MO_U_Lt (wordWidth platform))
 1408 
 1409 -- Double ops
 1410 
 1411   DoubleEqOp     -> \args -> opTranslate args (MO_F_Eq W64)
 1412   DoubleNeOp     -> \args -> opTranslate args (MO_F_Ne W64)
 1413   DoubleGeOp     -> \args -> opTranslate args (MO_F_Ge W64)
 1414   DoubleLeOp     -> \args -> opTranslate args (MO_F_Le W64)
 1415   DoubleGtOp     -> \args -> opTranslate args (MO_F_Gt W64)
 1416   DoubleLtOp     -> \args -> opTranslate args (MO_F_Lt W64)
 1417 
 1418   DoubleAddOp    -> \args -> opTranslate args (MO_F_Add W64)
 1419   DoubleSubOp    -> \args -> opTranslate args (MO_F_Sub W64)
 1420   DoubleMulOp    -> \args -> opTranslate args (MO_F_Mul W64)
 1421   DoubleDivOp    -> \args -> opTranslate args (MO_F_Quot W64)
 1422   DoubleNegOp    -> \args -> opTranslate args (MO_F_Neg W64)
 1423 
 1424 -- Float ops
 1425 
 1426   FloatEqOp     -> \args -> opTranslate args (MO_F_Eq W32)
 1427   FloatNeOp     -> \args -> opTranslate args (MO_F_Ne W32)
 1428   FloatGeOp     -> \args -> opTranslate args (MO_F_Ge W32)
 1429   FloatLeOp     -> \args -> opTranslate args (MO_F_Le W32)
 1430   FloatGtOp     -> \args -> opTranslate args (MO_F_Gt W32)
 1431   FloatLtOp     -> \args -> opTranslate args (MO_F_Lt W32)
 1432 
 1433   FloatAddOp    -> \args -> opTranslate args (MO_F_Add  W32)
 1434   FloatSubOp    -> \args -> opTranslate args (MO_F_Sub  W32)
 1435   FloatMulOp    -> \args -> opTranslate args (MO_F_Mul  W32)
 1436   FloatDivOp    -> \args -> opTranslate args (MO_F_Quot W32)
 1437   FloatNegOp    -> \args -> opTranslate args (MO_F_Neg  W32)
 1438 
 1439 -- Vector ops
 1440 
 1441   (VecAddOp  FloatVec n w) -> \args -> opTranslate args (MO_VF_Add  n w)
 1442   (VecSubOp  FloatVec n w) -> \args -> opTranslate args (MO_VF_Sub  n w)
 1443   (VecMulOp  FloatVec n w) -> \args -> opTranslate args (MO_VF_Mul  n w)
 1444   (VecDivOp  FloatVec n w) -> \args -> opTranslate args (MO_VF_Quot n w)
 1445   (VecQuotOp FloatVec _ _) -> \_ -> panic "unsupported primop"
 1446   (VecRemOp  FloatVec _ _) -> \_ -> panic "unsupported primop"
 1447   (VecNegOp  FloatVec n w) -> \args -> opTranslate args (MO_VF_Neg  n w)
 1448 
 1449   (VecAddOp  IntVec n w) -> \args -> opTranslate args (MO_V_Add   n w)
 1450   (VecSubOp  IntVec n w) -> \args -> opTranslate args (MO_V_Sub   n w)
 1451   (VecMulOp  IntVec n w) -> \args -> opTranslate args (MO_V_Mul   n w)
 1452   (VecDivOp  IntVec _ _) -> \_ -> panic "unsupported primop"
 1453   (VecQuotOp IntVec n w) -> \args -> opTranslate args (MO_VS_Quot n w)
 1454   (VecRemOp  IntVec n w) -> \args -> opTranslate args (MO_VS_Rem  n w)
 1455   (VecNegOp  IntVec n w) -> \args -> opTranslate args (MO_VS_Neg  n w)
 1456 
 1457   (VecAddOp  WordVec n w) -> \args -> opTranslate args (MO_V_Add   n w)
 1458   (VecSubOp  WordVec n w) -> \args -> opTranslate args (MO_V_Sub   n w)
 1459   (VecMulOp  WordVec n w) -> \args -> opTranslate args (MO_V_Mul   n w)
 1460   (VecDivOp  WordVec _ _) -> \_ -> panic "unsupported primop"
 1461   (VecQuotOp WordVec n w) -> \args -> opTranslate args (MO_VU_Quot n w)
 1462   (VecRemOp  WordVec n w) -> \args -> opTranslate args (MO_VU_Rem  n w)
 1463   (VecNegOp  WordVec _ _) -> \_ -> panic "unsupported primop"
 1464 
 1465 -- Conversions
 1466 
 1467   IntToDoubleOp   -> \args -> opTranslate args (MO_SF_Conv (wordWidth platform) W64)
 1468   DoubleToIntOp   -> \args -> opTranslate args (MO_FS_Conv W64 (wordWidth platform))
 1469 
 1470   IntToFloatOp    -> \args -> opTranslate args (MO_SF_Conv (wordWidth platform) W32)
 1471   FloatToIntOp    -> \args -> opTranslate args (MO_FS_Conv W32 (wordWidth platform))
 1472 
 1473   FloatToDoubleOp -> \args -> opTranslate args (MO_FF_Conv W32 W64)
 1474   DoubleToFloatOp -> \args -> opTranslate args (MO_FF_Conv W64 W32)
 1475 
 1476   IntQuotRemOp -> \args -> opCallishHandledLater args $
 1477     if ncg && (x86ish || ppc) && not (quotRemCanBeOptimized args)
 1478     then Left (MO_S_QuotRem  (wordWidth platform))
 1479     else Right (genericIntQuotRemOp (wordWidth platform))
 1480 
 1481   Int8QuotRemOp -> \args -> opCallishHandledLater args $
 1482     if ncg && (x86ish || ppc) && not (quotRemCanBeOptimized args)
 1483     then Left (MO_S_QuotRem W8)
 1484     else Right (genericIntQuotRemOp W8)
 1485 
 1486   Int16QuotRemOp -> \args -> opCallishHandledLater args $
 1487     if ncg && (x86ish || ppc) && not (quotRemCanBeOptimized args)
 1488     then Left (MO_S_QuotRem W16)
 1489     else Right (genericIntQuotRemOp W16)
 1490 
 1491   Int32QuotRemOp -> \args -> opCallishHandledLater args $
 1492     if ncg && (x86ish || ppc) && not (quotRemCanBeOptimized args)
 1493     then Left (MO_S_QuotRem W32)
 1494     else Right (genericIntQuotRemOp W32)
 1495 
 1496   WordQuotRemOp -> \args -> opCallishHandledLater args $
 1497     if ncg && (x86ish || ppc) && not (quotRemCanBeOptimized args)
 1498     then Left (MO_U_QuotRem  (wordWidth platform))
 1499     else Right (genericWordQuotRemOp (wordWidth platform))
 1500 
 1501   WordQuotRem2Op -> \args -> opCallishHandledLater args $
 1502     if (ncg && (x86ish || ppc)) || llvm
 1503     then Left (MO_U_QuotRem2 (wordWidth platform))
 1504     else Right (genericWordQuotRem2Op platform)
 1505 
 1506   Word8QuotRemOp -> \args -> opCallishHandledLater args $
 1507     if ncg && (x86ish || ppc) && not (quotRemCanBeOptimized args)
 1508     then Left (MO_U_QuotRem W8)
 1509     else Right (genericWordQuotRemOp W8)
 1510 
 1511   Word16QuotRemOp -> \args -> opCallishHandledLater args $
 1512     if ncg && (x86ish || ppc) && not (quotRemCanBeOptimized args)
 1513     then Left (MO_U_QuotRem W16)
 1514     else Right (genericWordQuotRemOp W16)
 1515 
 1516   Word32QuotRemOp -> \args -> opCallishHandledLater args $
 1517     if ncg && (x86ish || ppc) && not (quotRemCanBeOptimized args)
 1518     then Left (MO_U_QuotRem W32)
 1519     else Right (genericWordQuotRemOp W32)
 1520 
 1521   WordAdd2Op -> \args -> opCallishHandledLater args $
 1522     if (ncg && (x86ish || ppc)) || llvm
 1523     then Left (MO_Add2       (wordWidth platform))
 1524     else Right genericWordAdd2Op
 1525 
 1526   WordAddCOp -> \args -> opCallishHandledLater args $
 1527     if (ncg && (x86ish || ppc)) || llvm
 1528     then Left (MO_AddWordC   (wordWidth platform))
 1529     else Right genericWordAddCOp
 1530 
 1531   WordSubCOp -> \args -> opCallishHandledLater args $
 1532     if (ncg && (x86ish || ppc)) || llvm
 1533     then Left (MO_SubWordC   (wordWidth platform))
 1534     else Right genericWordSubCOp
 1535 
 1536   IntAddCOp -> \args -> opCallishHandledLater args $
 1537     if (ncg && (x86ish || ppc)) || llvm
 1538     then Left (MO_AddIntC    (wordWidth platform))
 1539     else Right genericIntAddCOp
 1540 
 1541   IntSubCOp -> \args -> opCallishHandledLater args $
 1542     if (ncg && (x86ish || ppc)) || llvm
 1543     then Left (MO_SubIntC    (wordWidth platform))
 1544     else Right genericIntSubCOp
 1545 
 1546   WordMul2Op -> \args -> opCallishHandledLater args $
 1547     if ncg && (x86ish || ppc) || llvm
 1548     then Left (MO_U_Mul2     (wordWidth platform))
 1549     else Right genericWordMul2Op
 1550 
 1551   IntMul2Op  -> \args -> opCallishHandledLater args $
 1552     if ncg && x86ish || llvm
 1553     then Left (MO_S_Mul2     (wordWidth platform))
 1554     else Right genericIntMul2Op
 1555 
 1556   FloatFabsOp -> \args -> opCallishHandledLater args $
 1557     if (ncg && (x86ish || ppc || aarch64)) || llvm
 1558     then Left MO_F32_Fabs
 1559     else Right $ genericFabsOp W32
 1560 
 1561   DoubleFabsOp -> \args -> opCallishHandledLater args $
 1562     if (ncg && (x86ish || ppc || aarch64)) || llvm
 1563     then Left MO_F64_Fabs
 1564     else Right $ genericFabsOp W64
 1565 
 1566   -- tagToEnum# is special: we need to pull the constructor
 1567   -- out of the table, and perform an appropriate return.
 1568   TagToEnumOp -> \[amode] -> PrimopCmmEmit_Internal $ \res_ty -> do
 1569     -- If you're reading this code in the attempt to figure
 1570     -- out why the compiler panic'ed here, it is probably because
 1571     -- you used tagToEnum# in a non-monomorphic setting, e.g.,
 1572     --         intToTg :: Enum a => Int -> a ; intToTg (I# x#) = tagToEnum# x#
 1573     -- That won't work.
 1574     let tycon = tyConAppTyCon res_ty
 1575     massert (isEnumerationTyCon tycon)
 1576     platform <- getPlatform
 1577     pure [tagToClosure platform tycon amode]
 1578 
 1579 -- Out of line primops.
 1580 -- TODO compiler need not know about these
 1581 
 1582   UnsafeThawArrayOp -> alwaysExternal
 1583   CasArrayOp -> alwaysExternal
 1584   UnsafeThawSmallArrayOp -> alwaysExternal
 1585   CasSmallArrayOp -> alwaysExternal
 1586   NewPinnedByteArrayOp_Char -> alwaysExternal
 1587   NewAlignedPinnedByteArrayOp_Char -> alwaysExternal
 1588   MutableByteArrayIsPinnedOp -> alwaysExternal
 1589   DoubleDecode_2IntOp -> alwaysExternal
 1590   DoubleDecode_Int64Op -> alwaysExternal
 1591   FloatDecode_IntOp -> alwaysExternal
 1592   ByteArrayIsPinnedOp -> alwaysExternal
 1593   ShrinkMutableByteArrayOp_Char -> alwaysExternal
 1594   ResizeMutableByteArrayOp_Char -> alwaysExternal
 1595   ShrinkSmallMutableArrayOp_Char -> alwaysExternal
 1596   NewArrayArrayOp -> alwaysExternal
 1597   NewMutVarOp -> alwaysExternal
 1598   AtomicModifyMutVar2Op -> alwaysExternal
 1599   AtomicModifyMutVar_Op -> alwaysExternal
 1600   CasMutVarOp -> alwaysExternal
 1601   CatchOp -> alwaysExternal
 1602   RaiseOp -> alwaysExternal
 1603   RaiseIOOp -> alwaysExternal
 1604   MaskAsyncExceptionsOp -> alwaysExternal
 1605   MaskUninterruptibleOp -> alwaysExternal
 1606   UnmaskAsyncExceptionsOp -> alwaysExternal
 1607   MaskStatus -> alwaysExternal
 1608   AtomicallyOp -> alwaysExternal
 1609   RetryOp -> alwaysExternal
 1610   CatchRetryOp -> alwaysExternal
 1611   CatchSTMOp -> alwaysExternal
 1612   NewTVarOp -> alwaysExternal
 1613   ReadTVarOp -> alwaysExternal
 1614   ReadTVarIOOp -> alwaysExternal
 1615   WriteTVarOp -> alwaysExternal
 1616   NewMVarOp -> alwaysExternal
 1617   TakeMVarOp -> alwaysExternal
 1618   TryTakeMVarOp -> alwaysExternal
 1619   PutMVarOp -> alwaysExternal
 1620   TryPutMVarOp -> alwaysExternal
 1621   ReadMVarOp -> alwaysExternal
 1622   TryReadMVarOp -> alwaysExternal
 1623   IsEmptyMVarOp -> alwaysExternal
 1624   NewIOPortrOp -> alwaysExternal
 1625   ReadIOPortOp -> alwaysExternal
 1626   WriteIOPortOp -> alwaysExternal
 1627   DelayOp -> alwaysExternal
 1628   WaitReadOp -> alwaysExternal
 1629   WaitWriteOp -> alwaysExternal
 1630   ForkOp -> alwaysExternal
 1631   ForkOnOp -> alwaysExternal
 1632   KillThreadOp -> alwaysExternal
 1633   YieldOp -> alwaysExternal
 1634   LabelThreadOp -> alwaysExternal
 1635   IsCurrentThreadBoundOp -> alwaysExternal
 1636   NoDuplicateOp -> alwaysExternal
 1637   ThreadStatusOp -> alwaysExternal
 1638   MkWeakOp -> alwaysExternal
 1639   MkWeakNoFinalizerOp -> alwaysExternal
 1640   AddCFinalizerToWeakOp -> alwaysExternal
 1641   DeRefWeakOp -> alwaysExternal
 1642   FinalizeWeakOp -> alwaysExternal
 1643   MakeStablePtrOp -> alwaysExternal
 1644   DeRefStablePtrOp -> alwaysExternal
 1645   MakeStableNameOp -> alwaysExternal
 1646   CompactNewOp -> alwaysExternal
 1647   CompactResizeOp -> alwaysExternal
 1648   CompactContainsOp -> alwaysExternal
 1649   CompactContainsAnyOp -> alwaysExternal
 1650   CompactGetFirstBlockOp -> alwaysExternal
 1651   CompactGetNextBlockOp -> alwaysExternal
 1652   CompactAllocateBlockOp -> alwaysExternal
 1653   CompactFixupPointersOp -> alwaysExternal
 1654   CompactAdd -> alwaysExternal
 1655   CompactAddWithSharing -> alwaysExternal
 1656   CompactSize -> alwaysExternal
 1657   SeqOp -> alwaysExternal
 1658   GetSparkOp -> alwaysExternal
 1659   NumSparks -> alwaysExternal
 1660   DataToTagOp -> alwaysExternal
 1661   MkApUpd0_Op -> alwaysExternal
 1662   NewBCOOp -> alwaysExternal
 1663   UnpackClosureOp -> alwaysExternal
 1664   ClosureSizeOp -> alwaysExternal
 1665   WhereFromOp   -> alwaysExternal
 1666   GetApStackValOp -> alwaysExternal
 1667   ClearCCSOp -> alwaysExternal
 1668   TraceEventOp -> alwaysExternal
 1669   TraceEventBinaryOp -> alwaysExternal
 1670   TraceMarkerOp -> alwaysExternal
 1671   SetThreadAllocationCounter -> alwaysExternal
 1672 
 1673   -- See Note [keepAlive# magic] in GHC.CoreToStg.Prep.
 1674   KeepAliveOp -> panic "keepAlive# should have been eliminated in CorePrep"
 1675 
 1676  where
 1677   profile = targetProfile dflags
 1678   platform = profilePlatform profile
 1679   result_info = getPrimOpResultInfo primop
 1680 
 1681   opNop :: [CmmExpr] -> PrimopCmmEmit
 1682   opNop args = opIntoRegs $ \[res] -> emitAssign (CmmLocal res) arg
 1683     where [arg] = args
 1684 
 1685   opNarrow
 1686     :: [CmmExpr]
 1687     -> (Width -> Width -> MachOp, Width)
 1688     -> PrimopCmmEmit
 1689   opNarrow args (mop, rep) = opIntoRegs $ \[res] -> emitAssign (CmmLocal res) $
 1690     CmmMachOp (mop rep (wordWidth platform)) [CmmMachOp (mop (wordWidth platform) rep) [arg]]
 1691     where [arg] = args
 1692 
 1693   -- | These primops are implemented by CallishMachOps, because they sometimes
 1694   -- turn into foreign calls depending on the backend.
 1695   opCallish :: [CmmExpr] -> CallishMachOp -> PrimopCmmEmit
 1696   opCallish args prim = opIntoRegs $ \[res] -> emitPrimCall [res] prim args
 1697 
 1698   opTranslate :: [CmmExpr] -> MachOp -> PrimopCmmEmit
 1699   opTranslate args mop = opIntoRegs $ \[res] -> do
 1700     let stmt = mkAssign (CmmLocal res) (CmmMachOp mop args)
 1701     emit stmt
 1702 
 1703   opTranslate64
 1704     :: [CmmExpr]
 1705     -> (Width -> MachOp)
 1706     -> CallishMachOp
 1707     -> PrimopCmmEmit
 1708   opTranslate64 args mkMop callish =
 1709     case platformWordSize platform of
 1710       -- LLVM and C `can handle larger than native size arithmetic natively.
 1711       _ | not ncg -> opTranslate args $ mkMop W64
 1712       PW4 -> opCallish args callish
 1713       PW8 -> opTranslate args $ mkMop W64
 1714 
 1715   -- | Basically a "manual" case, rather than one of the common repetitive forms
 1716   -- above. The results are a parameter to the returned function so we know the
 1717   -- choice of variant never depends on them.
 1718   opCallishHandledLater
 1719     :: [CmmExpr]
 1720     -> Either CallishMachOp GenericOp
 1721     -> PrimopCmmEmit
 1722   opCallishHandledLater args callOrNot = opIntoRegs $ \res0 -> case callOrNot of
 1723     Left op   -> emit $ mkUnsafeCall (PrimTarget op) res0 args
 1724     Right gen -> gen res0 args
 1725 
 1726   opIntoRegs
 1727     :: ([LocalReg] -- where to put the results
 1728         -> FCode ())
 1729     -> PrimopCmmEmit
 1730   opIntoRegs f = PrimopCmmEmit_Internal $ \res_ty -> do
 1731     regs <- case result_info of
 1732       ReturnsPrim VoidRep -> pure []
 1733       ReturnsPrim rep
 1734         -> do reg <- newTemp (primRepCmmType platform rep)
 1735               pure [reg]
 1736 
 1737       ReturnsAlg tycon | isUnboxedTupleTyCon tycon
 1738         -> do (regs, _hints) <- newUnboxedTupleRegs res_ty
 1739               pure regs
 1740 
 1741       _ -> panic "cgOpApp"
 1742     f regs
 1743     pure $ map (CmmReg . CmmLocal) regs
 1744 
 1745   alwaysExternal = \_ -> PrimopCmmEmit_External
 1746   -- Note [QuotRem optimization]
 1747   -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 1748   --
 1749   -- `quot` and `rem` with constant divisor can be implemented with fast bit-ops
 1750   -- (shift, .&.).
 1751   --
 1752   -- Currently we only support optimization (performed in GHC.Cmm.Opt) when the
 1753   -- constant is a power of 2. #9041 tracks the implementation of the general
 1754   -- optimization.
 1755   --
 1756   -- `quotRem` can be optimized in the same way. However as it returns two values,
 1757   -- it is implemented as a "callish" primop which is harder to match and
 1758   -- to transform later on. For simplicity, the current implementation detects cases
 1759   -- that can be optimized (see `quotRemCanBeOptimized`) and converts STG quotRem
 1760   -- primop into two CMM quot and rem primops.
 1761   quotRemCanBeOptimized = \case
 1762     [_, CmmLit (CmmInt n _) ] -> isJust (exactLog2 n)
 1763     _                         -> False
 1764 
 1765   ncg  = backend dflags == NCG
 1766   llvm = backend dflags == LLVM
 1767   x86ish = case platformArch platform of
 1768              ArchX86    -> True
 1769              ArchX86_64 -> True
 1770              _          -> False
 1771   ppc = case platformArch platform of
 1772           ArchPPC      -> True
 1773           ArchPPC_64 _ -> True
 1774           _            -> False
 1775   aarch64 = platformArch platform == ArchAArch64
 1776 
 1777 data PrimopCmmEmit
 1778   -- | Out of line fake primop that's actually just a foreign call to other
 1779   -- (presumably) C--.
 1780   = PrimopCmmEmit_External
 1781   -- | Real primop turned into inline C--.
 1782   | PrimopCmmEmit_Internal (Type -- the return type, some primops are specialized to it
 1783                             -> FCode [CmmExpr]) -- just for TagToEnum for now
 1784 
 1785 type GenericOp = [CmmFormal] -> [CmmActual] -> FCode ()
 1786 
 1787 genericIntQuotRemOp :: Width -> GenericOp
 1788 genericIntQuotRemOp width [res_q, res_r] [arg_x, arg_y]
 1789    = emit $ mkAssign (CmmLocal res_q)
 1790               (CmmMachOp (MO_S_Quot width) [arg_x, arg_y]) <*>
 1791             mkAssign (CmmLocal res_r)
 1792               (CmmMachOp (MO_S_Rem  width) [arg_x, arg_y])
 1793 genericIntQuotRemOp _ _ _ = panic "genericIntQuotRemOp"
 1794 
 1795 genericWordQuotRemOp :: Width -> GenericOp
 1796 genericWordQuotRemOp width [res_q, res_r] [arg_x, arg_y]
 1797     = emit $ mkAssign (CmmLocal res_q)
 1798                (CmmMachOp (MO_U_Quot width) [arg_x, arg_y]) <*>
 1799              mkAssign (CmmLocal res_r)
 1800                (CmmMachOp (MO_U_Rem  width) [arg_x, arg_y])
 1801 genericWordQuotRemOp _ _ _ = panic "genericWordQuotRemOp"
 1802 
 1803 genericWordQuotRem2Op :: Platform -> GenericOp
 1804 genericWordQuotRem2Op platform [res_q, res_r] [arg_x_high, arg_x_low, arg_y]
 1805     = emit =<< f (widthInBits (wordWidth platform)) zero arg_x_high arg_x_low
 1806     where    ty = cmmExprType platform arg_x_high
 1807              shl   x i = CmmMachOp (MO_Shl   (wordWidth platform)) [x, i]
 1808              shr   x i = CmmMachOp (MO_U_Shr (wordWidth platform)) [x, i]
 1809              or    x y = CmmMachOp (MO_Or    (wordWidth platform)) [x, y]
 1810              ge    x y = CmmMachOp (MO_U_Ge  (wordWidth platform)) [x, y]
 1811              ne    x y = CmmMachOp (MO_Ne    (wordWidth platform)) [x, y]
 1812              minus x y = CmmMachOp (MO_Sub   (wordWidth platform)) [x, y]
 1813              times x y = CmmMachOp (MO_Mul   (wordWidth platform)) [x, y]
 1814              zero   = lit 0
 1815              one    = lit 1
 1816              negone = lit (fromIntegral (platformWordSizeInBits platform) - 1)
 1817              lit i = CmmLit (CmmInt i (wordWidth platform))
 1818 
 1819              f :: Int -> CmmExpr -> CmmExpr -> CmmExpr -> FCode CmmAGraph
 1820              f 0 acc high _ = return (mkAssign (CmmLocal res_q) acc <*>
 1821                                       mkAssign (CmmLocal res_r) high)
 1822              f i acc high low =
 1823                  do roverflowedBit <- newTemp ty
 1824                     rhigh'         <- newTemp ty
 1825                     rhigh''        <- newTemp ty
 1826                     rlow'          <- newTemp ty
 1827                     risge          <- newTemp ty
 1828                     racc'          <- newTemp ty
 1829                     let high'         = CmmReg (CmmLocal rhigh')
 1830                         isge          = CmmReg (CmmLocal risge)
 1831                         overflowedBit = CmmReg (CmmLocal roverflowedBit)
 1832                     let this = catAGraphs
 1833                                [mkAssign (CmmLocal roverflowedBit)
 1834                                           (shr high negone),
 1835                                 mkAssign (CmmLocal rhigh')
 1836                                           (or (shl high one) (shr low negone)),
 1837                                 mkAssign (CmmLocal rlow')
 1838                                           (shl low one),
 1839                                 mkAssign (CmmLocal risge)
 1840                                           (or (overflowedBit `ne` zero)
 1841                                               (high' `ge` arg_y)),
 1842                                 mkAssign (CmmLocal rhigh'')
 1843                                           (high' `minus` (arg_y `times` isge)),
 1844                                 mkAssign (CmmLocal racc')
 1845                                           (or (shl acc one) isge)]
 1846                     rest <- f (i - 1) (CmmReg (CmmLocal racc'))
 1847                                       (CmmReg (CmmLocal rhigh''))
 1848                                       (CmmReg (CmmLocal rlow'))
 1849                     return (this <*> rest)
 1850 genericWordQuotRem2Op _ _ _ = panic "genericWordQuotRem2Op"
 1851 
 1852 genericWordAdd2Op :: GenericOp
 1853 genericWordAdd2Op [res_h, res_l] [arg_x, arg_y]
 1854   = do platform <- getPlatform
 1855        r1 <- newTemp (cmmExprType platform arg_x)
 1856        r2 <- newTemp (cmmExprType platform arg_x)
 1857        let topHalf x = CmmMachOp (MO_U_Shr (wordWidth platform)) [x, hww]
 1858            toTopHalf x = CmmMachOp (MO_Shl (wordWidth platform)) [x, hww]
 1859            bottomHalf x = CmmMachOp (MO_And (wordWidth platform)) [x, hwm]
 1860            add x y = CmmMachOp (MO_Add (wordWidth platform)) [x, y]
 1861            or x y = CmmMachOp (MO_Or (wordWidth platform)) [x, y]
 1862            hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth platform)))
 1863                                 (wordWidth platform))
 1864            hwm = CmmLit (CmmInt (halfWordMask platform) (wordWidth platform))
 1865        emit $ catAGraphs
 1866           [mkAssign (CmmLocal r1)
 1867                (add (bottomHalf arg_x) (bottomHalf arg_y)),
 1868            mkAssign (CmmLocal r2)
 1869                (add (topHalf (CmmReg (CmmLocal r1)))
 1870                     (add (topHalf arg_x) (topHalf arg_y))),
 1871            mkAssign (CmmLocal res_h)
 1872                (topHalf (CmmReg (CmmLocal r2))),
 1873            mkAssign (CmmLocal res_l)
 1874                (or (toTopHalf (CmmReg (CmmLocal r2)))
 1875                    (bottomHalf (CmmReg (CmmLocal r1))))]
 1876 genericWordAdd2Op _ _ = panic "genericWordAdd2Op"
 1877 
 1878 -- | Implements branchless recovery of the carry flag @c@ by checking the
 1879 -- leftmost bits of both inputs @a@ and @b@ and result @r = a + b@:
 1880 --
 1881 -- @
 1882 --    c = a&b | (a|b)&~r
 1883 -- @
 1884 --
 1885 -- https://brodowsky.it-sky.net/2015/04/02/how-to-recover-the-carry-bit/
 1886 genericWordAddCOp :: GenericOp
 1887 genericWordAddCOp [res_r, res_c] [aa, bb]
 1888  = do platform <- getPlatform
 1889       emit $ catAGraphs [
 1890         mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordAdd platform) [aa,bb]),
 1891         mkAssign (CmmLocal res_c) $
 1892           CmmMachOp (mo_wordUShr platform) [
 1893             CmmMachOp (mo_wordOr platform) [
 1894               CmmMachOp (mo_wordAnd platform) [aa,bb],
 1895               CmmMachOp (mo_wordAnd platform) [
 1896                 CmmMachOp (mo_wordOr platform) [aa,bb],
 1897                 CmmMachOp (mo_wordNot platform) [CmmReg (CmmLocal res_r)]
 1898               ]
 1899             ],
 1900             mkIntExpr platform (platformWordSizeInBits platform - 1)
 1901           ]
 1902         ]
 1903 genericWordAddCOp _ _ = panic "genericWordAddCOp"
 1904 
 1905 -- | Implements branchless recovery of the carry flag @c@ by checking the
 1906 -- leftmost bits of both inputs @a@ and @b@ and result @r = a - b@:
 1907 --
 1908 -- @
 1909 --    c = ~a&b | (~a|b)&r
 1910 -- @
 1911 --
 1912 -- https://brodowsky.it-sky.net/2015/04/02/how-to-recover-the-carry-bit/
 1913 genericWordSubCOp :: GenericOp
 1914 genericWordSubCOp [res_r, res_c] [aa, bb]
 1915  = do platform <- getPlatform
 1916       emit $ catAGraphs [
 1917         mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordSub platform) [aa,bb]),
 1918         mkAssign (CmmLocal res_c) $
 1919           CmmMachOp (mo_wordUShr platform) [
 1920             CmmMachOp (mo_wordOr platform) [
 1921               CmmMachOp (mo_wordAnd platform) [
 1922                 CmmMachOp (mo_wordNot platform) [aa],
 1923                 bb
 1924               ],
 1925               CmmMachOp (mo_wordAnd platform) [
 1926                 CmmMachOp (mo_wordOr platform) [
 1927                   CmmMachOp (mo_wordNot platform) [aa],
 1928                   bb
 1929                 ],
 1930                 CmmReg (CmmLocal res_r)
 1931               ]
 1932             ],
 1933             mkIntExpr platform (platformWordSizeInBits platform - 1)
 1934           ]
 1935         ]
 1936 genericWordSubCOp _ _ = panic "genericWordSubCOp"
 1937 
 1938 genericIntAddCOp :: GenericOp
 1939 genericIntAddCOp [res_r, res_c] [aa, bb]
 1940 {-
 1941    With some bit-twiddling, we can define int{Add,Sub}Czh portably in
 1942    C, and without needing any comparisons.  This may not be the
 1943    fastest way to do it - if you have better code, please send it! --SDM
 1944 
 1945    Return : r = a + b,  c = 0 if no overflow, 1 on overflow.
 1946 
 1947    We currently don't make use of the r value if c is != 0 (i.e.
 1948    overflow), we just convert to big integers and try again.  This
 1949    could be improved by making r and c the correct values for
 1950    plugging into a new J#.
 1951 
 1952    { r = ((I_)(a)) + ((I_)(b));                                 \
 1953      c = ((StgWord)(~(((I_)(a))^((I_)(b))) & (((I_)(a))^r)))    \
 1954          >> (BITS_IN (I_) - 1);                                 \
 1955    }
 1956    Wading through the mass of bracketry, it seems to reduce to:
 1957    c = ( (~(a^b)) & (a^r) ) >>unsigned (BITS_IN(I_)-1)
 1958 
 1959 -}
 1960  = do platform <- getPlatform
 1961       emit $ catAGraphs [
 1962         mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordAdd platform) [aa,bb]),
 1963         mkAssign (CmmLocal res_c) $
 1964           CmmMachOp (mo_wordUShr platform) [
 1965                 CmmMachOp (mo_wordAnd platform) [
 1966                     CmmMachOp (mo_wordNot platform) [CmmMachOp (mo_wordXor platform) [aa,bb]],
 1967                     CmmMachOp (mo_wordXor platform) [aa, CmmReg (CmmLocal res_r)]
 1968                 ],
 1969                 mkIntExpr platform (platformWordSizeInBits platform - 1)
 1970           ]
 1971         ]
 1972 genericIntAddCOp _ _ = panic "genericIntAddCOp"
 1973 
 1974 genericIntSubCOp :: GenericOp
 1975 genericIntSubCOp [res_r, res_c] [aa, bb]
 1976 {- Similarly:
 1977    #define subIntCzh(r,c,a,b)                                   \
 1978    { r = ((I_)(a)) - ((I_)(b));                                 \
 1979      c = ((StgWord)((((I_)(a))^((I_)(b))) & (((I_)(a))^r)))     \
 1980          >> (BITS_IN (I_) - 1);                                 \
 1981    }
 1982 
 1983    c =  ((a^b) & (a^r)) >>unsigned (BITS_IN(I_)-1)
 1984 -}
 1985  = do platform <- getPlatform
 1986       emit $ catAGraphs [
 1987         mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordSub platform) [aa,bb]),
 1988         mkAssign (CmmLocal res_c) $
 1989           CmmMachOp (mo_wordUShr platform) [
 1990                 CmmMachOp (mo_wordAnd platform) [
 1991                     CmmMachOp (mo_wordXor platform) [aa,bb],
 1992                     CmmMachOp (mo_wordXor platform) [aa, CmmReg (CmmLocal res_r)]
 1993                 ],
 1994                 mkIntExpr platform (platformWordSizeInBits platform - 1)
 1995           ]
 1996         ]
 1997 genericIntSubCOp _ _ = panic "genericIntSubCOp"
 1998 
 1999 genericWordMul2Op :: GenericOp
 2000 genericWordMul2Op [res_h, res_l] [arg_x, arg_y]
 2001  = do platform <- getPlatform
 2002       let t = cmmExprType platform arg_x
 2003       xlyl <- liftM CmmLocal $ newTemp t
 2004       xlyh <- liftM CmmLocal $ newTemp t
 2005       xhyl <- liftM CmmLocal $ newTemp t
 2006       r    <- liftM CmmLocal $ newTemp t
 2007       -- This generic implementation is very simple and slow. We might
 2008       -- well be able to do better, but for now this at least works.
 2009       let topHalf x = CmmMachOp (MO_U_Shr (wordWidth platform)) [x, hww]
 2010           toTopHalf x = CmmMachOp (MO_Shl (wordWidth platform)) [x, hww]
 2011           bottomHalf x = CmmMachOp (MO_And (wordWidth platform)) [x, hwm]
 2012           add x y = CmmMachOp (MO_Add (wordWidth platform)) [x, y]
 2013           sum = foldl1 add
 2014           mul x y = CmmMachOp (MO_Mul (wordWidth platform)) [x, y]
 2015           or x y = CmmMachOp (MO_Or (wordWidth platform)) [x, y]
 2016           hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth platform)))
 2017                                (wordWidth platform))
 2018           hwm = CmmLit (CmmInt (halfWordMask platform) (wordWidth platform))
 2019       emit $ catAGraphs
 2020              [mkAssign xlyl
 2021                   (mul (bottomHalf arg_x) (bottomHalf arg_y)),
 2022               mkAssign xlyh
 2023                   (mul (bottomHalf arg_x) (topHalf arg_y)),
 2024               mkAssign xhyl
 2025                   (mul (topHalf arg_x) (bottomHalf arg_y)),
 2026               mkAssign r
 2027                   (sum [topHalf    (CmmReg xlyl),
 2028                         bottomHalf (CmmReg xhyl),
 2029                         bottomHalf (CmmReg xlyh)]),
 2030               mkAssign (CmmLocal res_l)
 2031                   (or (bottomHalf (CmmReg xlyl))
 2032                       (toTopHalf (CmmReg r))),
 2033               mkAssign (CmmLocal res_h)
 2034                   (sum [mul (topHalf arg_x) (topHalf arg_y),
 2035                         topHalf (CmmReg xhyl),
 2036                         topHalf (CmmReg xlyh),
 2037                         topHalf (CmmReg r)])]
 2038 genericWordMul2Op _ _ = panic "genericWordMul2Op"
 2039 
 2040 genericIntMul2Op :: GenericOp
 2041 genericIntMul2Op [res_c, res_h, res_l] both_args@[arg_x, arg_y]
 2042  = do dflags <- getDynFlags
 2043       platform <- getPlatform
 2044       -- Implement algorithm from Hacker's Delight, 2nd edition, p.174
 2045       let t = cmmExprType platform arg_x
 2046       p   <- newTemp t
 2047       -- 1) compute the multiplication as if numbers were unsigned
 2048       _ <- withSequel (AssignTo [p, res_l] False) $
 2049              cmmPrimOpApp dflags WordMul2Op both_args Nothing
 2050       -- 2) correct the high bits of the unsigned result
 2051       let carryFill x = CmmMachOp (MO_S_Shr ww) [x, wwm1]
 2052           sub x y     = CmmMachOp (MO_Sub   ww) [x, y]
 2053           and x y     = CmmMachOp (MO_And   ww) [x, y]
 2054           neq x y     = CmmMachOp (MO_Ne    ww) [x, y]
 2055           f   x y     = (carryFill x) `and` y
 2056           wwm1        = CmmLit (CmmInt (fromIntegral (widthInBits ww - 1)) ww)
 2057           rl x        = CmmReg (CmmLocal x)
 2058           ww          = wordWidth platform
 2059       emit $ catAGraphs
 2060              [ mkAssign (CmmLocal res_h) (rl p `sub` f arg_x arg_y `sub` f arg_y arg_x)
 2061              , mkAssign (CmmLocal res_c) (rl res_h `neq` carryFill (rl res_l))
 2062              ]
 2063 genericIntMul2Op _ _ = panic "genericIntMul2Op"
 2064 
 2065 -- This replicates what we had in libraries/base/GHC/Float.hs:
 2066 --
 2067 --    abs x    | x == 0    = 0 -- handles (-0.0)
 2068 --             | x >  0    = x
 2069 --             | otherwise = negateFloat x
 2070 genericFabsOp :: Width -> GenericOp
 2071 genericFabsOp w [res_r] [aa]
 2072  = do platform <- getPlatform
 2073       let zero   = CmmLit (CmmFloat 0 w)
 2074 
 2075           eq x y = CmmMachOp (MO_F_Eq w) [x, y]
 2076           gt x y = CmmMachOp (MO_F_Gt w) [x, y]
 2077 
 2078           neg x  = CmmMachOp (MO_F_Neg w) [x]
 2079 
 2080           g1 = catAGraphs [mkAssign (CmmLocal res_r) zero]
 2081           g2 = catAGraphs [mkAssign (CmmLocal res_r) aa]
 2082 
 2083       res_t <- CmmLocal <$> newTemp (cmmExprType platform aa)
 2084       let g3 = catAGraphs [mkAssign res_t aa,
 2085                            mkAssign (CmmLocal res_r) (neg (CmmReg res_t))]
 2086 
 2087       g4 <- mkCmmIfThenElse (gt aa zero) g2 g3
 2088 
 2089       emit =<< mkCmmIfThenElse (eq aa zero) g1 g4
 2090 
 2091 genericFabsOp _ _ _ = panic "genericFabsOp"
 2092 
 2093 ------------------------------------------------------------------------------
 2094 -- Helpers for translating various minor variants of array indexing.
 2095 
 2096 doIndexOffAddrOp :: Maybe MachOp
 2097                  -> CmmType
 2098                  -> [LocalReg]
 2099                  -> [CmmExpr]
 2100                  -> FCode ()
 2101 doIndexOffAddrOp maybe_post_read_cast rep [res] [addr,idx]
 2102    = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr rep idx
 2103 doIndexOffAddrOp _ _ _ _
 2104    = panic "GHC.StgToCmm.Prim: doIndexOffAddrOp"
 2105 
 2106 doIndexOffAddrOpAs :: Maybe MachOp
 2107                    -> CmmType
 2108                    -> CmmType
 2109                    -> [LocalReg]
 2110                    -> [CmmExpr]
 2111                    -> FCode ()
 2112 doIndexOffAddrOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
 2113    = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr idx_rep idx
 2114 doIndexOffAddrOpAs _ _ _ _ _
 2115    = panic "GHC.StgToCmm.Prim: doIndexOffAddrOpAs"
 2116 
 2117 doIndexByteArrayOp :: Maybe MachOp
 2118                    -> CmmType
 2119                    -> [LocalReg]
 2120                    -> [CmmExpr]
 2121                    -> FCode ()
 2122 doIndexByteArrayOp maybe_post_read_cast rep [res] [addr,idx]
 2123    = do profile <- getProfile
 2124         mkBasicIndexedRead (arrWordsHdrSize profile) maybe_post_read_cast rep res addr rep idx
 2125 doIndexByteArrayOp _ _ _ _
 2126    = panic "GHC.StgToCmm.Prim: doIndexByteArrayOp"
 2127 
 2128 doIndexByteArrayOpAs :: Maybe MachOp
 2129                     -> CmmType
 2130                     -> CmmType
 2131                     -> [LocalReg]
 2132                     -> [CmmExpr]
 2133                     -> FCode ()
 2134 doIndexByteArrayOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
 2135    = do profile <- getProfile
 2136         mkBasicIndexedRead (arrWordsHdrSize profile) maybe_post_read_cast rep res addr idx_rep idx
 2137 doIndexByteArrayOpAs _ _ _ _ _
 2138    = panic "GHC.StgToCmm.Prim: doIndexByteArrayOpAs"
 2139 
 2140 doReadPtrArrayOp :: LocalReg
 2141                  -> CmmExpr
 2142                  -> CmmExpr
 2143                  -> FCode ()
 2144 doReadPtrArrayOp res addr idx
 2145    = do profile <- getProfile
 2146         platform <- getPlatform
 2147         mkBasicIndexedRead (arrPtrsHdrSize profile) Nothing (gcWord platform) res addr (gcWord platform) idx
 2148 
 2149 doWriteOffAddrOp :: Maybe MachOp
 2150                  -> CmmType
 2151                  -> [LocalReg]
 2152                  -> [CmmExpr]
 2153                  -> FCode ()
 2154 doWriteOffAddrOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
 2155    = mkBasicIndexedWrite 0 maybe_pre_write_cast addr idx_ty idx val
 2156 doWriteOffAddrOp _ _ _ _
 2157    = panic "GHC.StgToCmm.Prim: doWriteOffAddrOp"
 2158 
 2159 doWriteByteArrayOp :: Maybe MachOp
 2160                    -> CmmType
 2161                    -> [LocalReg]
 2162                    -> [CmmExpr]
 2163                    -> FCode ()
 2164 doWriteByteArrayOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
 2165    = do profile <- getProfile
 2166         mkBasicIndexedWrite (arrWordsHdrSize profile) maybe_pre_write_cast addr idx_ty idx val
 2167 doWriteByteArrayOp _ _ _ _
 2168    = panic "GHC.StgToCmm.Prim: doWriteByteArrayOp"
 2169 
 2170 doWritePtrArrayOp :: CmmExpr
 2171                   -> CmmExpr
 2172                   -> CmmExpr
 2173                   -> FCode ()
 2174 doWritePtrArrayOp addr idx val
 2175   = do profile  <- getProfile
 2176        platform <- getPlatform
 2177        let ty = cmmExprType platform val
 2178            hdr_size = arrPtrsHdrSize profile
 2179        -- Update remembered set for non-moving collector
 2180        whenUpdRemSetEnabled
 2181            $ emitUpdRemSetPush (cmmLoadIndexOffExpr platform hdr_size ty addr ty idx)
 2182        -- This write barrier is to ensure that the heap writes to the object
 2183        -- referred to by val have happened before we write val into the array.
 2184        -- See #12469 for details.
 2185        emitPrimCall [] MO_WriteBarrier []
 2186        mkBasicIndexedWrite hdr_size Nothing addr ty idx val
 2187        emit (setInfo addr (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
 2188        -- the write barrier.  We must write a byte into the mark table:
 2189        -- bits8[a + header_size + StgMutArrPtrs_size(a) + x >> N]
 2190        emit $ mkStore (
 2191          cmmOffsetExpr platform
 2192           (cmmOffsetExprW platform (cmmOffsetB platform addr hdr_size)
 2193                          (loadArrPtrsSize profile addr))
 2194           (CmmMachOp (mo_wordUShr platform) [idx,
 2195                                            mkIntExpr platform (pc_MUT_ARR_PTRS_CARD_BITS (platformConstants platform))])
 2196          ) (CmmLit (CmmInt 1 W8))
 2197 
 2198 loadArrPtrsSize :: Profile -> CmmExpr -> CmmExpr
 2199 loadArrPtrsSize profile addr = CmmLoad (cmmOffsetB platform addr off) (bWord platform)
 2200  where off = fixedHdrSize profile + pc_OFFSET_StgMutArrPtrs_ptrs (profileConstants profile)
 2201        platform = profilePlatform profile
 2202 
 2203 mkBasicIndexedRead :: ByteOff      -- Initial offset in bytes
 2204                    -> Maybe MachOp -- Optional result cast
 2205                    -> CmmType      -- Type of element we are accessing
 2206                    -> LocalReg     -- Destination
 2207                    -> CmmExpr      -- Base address
 2208                    -> CmmType      -- Type of element by which we are indexing
 2209                    -> CmmExpr      -- Index
 2210                    -> FCode ()
 2211 mkBasicIndexedRead off Nothing ty res base idx_ty idx
 2212    = do platform <- getPlatform
 2213         emitAssign (CmmLocal res) (cmmLoadIndexOffExpr platform off ty base idx_ty idx)
 2214 mkBasicIndexedRead off (Just cast) ty res base idx_ty idx
 2215    = do platform <- getPlatform
 2216         emitAssign (CmmLocal res) (CmmMachOp cast [
 2217                                    cmmLoadIndexOffExpr platform off ty base idx_ty idx])
 2218 
 2219 mkBasicIndexedWrite :: ByteOff      -- Initial offset in bytes
 2220                     -> Maybe MachOp -- Optional value cast
 2221                     -> CmmExpr      -- Base address
 2222                     -> CmmType      -- Type of element by which we are indexing
 2223                     -> CmmExpr      -- Index
 2224                     -> CmmExpr      -- Value to write
 2225                     -> FCode ()
 2226 mkBasicIndexedWrite off Nothing base idx_ty idx val
 2227    = do platform <- getPlatform
 2228         emitStore (cmmIndexOffExpr platform off (typeWidth idx_ty) base idx) val
 2229 mkBasicIndexedWrite off (Just cast) base idx_ty idx val
 2230    = mkBasicIndexedWrite off Nothing base idx_ty idx (CmmMachOp cast [val])
 2231 
 2232 -- ----------------------------------------------------------------------------
 2233 -- Misc utils
 2234 
 2235 cmmIndexOffExpr :: Platform
 2236                 -> ByteOff  -- Initial offset in bytes
 2237                 -> Width    -- Width of element by which we are indexing
 2238                 -> CmmExpr  -- Base address
 2239                 -> CmmExpr  -- Index
 2240                 -> CmmExpr
 2241 cmmIndexOffExpr platform off width base idx
 2242    = cmmIndexExpr platform width (cmmOffsetB platform base off) idx
 2243 
 2244 cmmLoadIndexOffExpr :: Platform
 2245                     -> ByteOff  -- Initial offset in bytes
 2246                     -> CmmType  -- Type of element we are accessing
 2247                     -> CmmExpr  -- Base address
 2248                     -> CmmType  -- Type of element by which we are indexing
 2249                     -> CmmExpr  -- Index
 2250                     -> CmmExpr
 2251 cmmLoadIndexOffExpr platform off ty base idx_ty idx
 2252    = CmmLoad (cmmIndexOffExpr platform off (typeWidth idx_ty) base idx) ty
 2253 
 2254 setInfo :: CmmExpr -> CmmExpr -> CmmAGraph
 2255 setInfo closure_ptr info_ptr = mkStore closure_ptr info_ptr
 2256 
 2257 ------------------------------------------------------------------------------
 2258 -- Helpers for translating vector primops.
 2259 
 2260 vecVmmType :: PrimOpVecCat -> Length -> Width -> CmmType
 2261 vecVmmType pocat n w = vec n (vecCmmCat pocat w)
 2262 
 2263 vecCmmCat :: PrimOpVecCat -> Width -> CmmType
 2264 vecCmmCat IntVec   = cmmBits
 2265 vecCmmCat WordVec  = cmmBits
 2266 vecCmmCat FloatVec = cmmFloat
 2267 
 2268 vecElemInjectCast :: Platform -> PrimOpVecCat -> Width -> Maybe MachOp
 2269 vecElemInjectCast _        FloatVec _   =  Nothing
 2270 vecElemInjectCast platform   IntVec   W8  =  Just (mo_WordTo8  platform)
 2271 vecElemInjectCast platform   IntVec   W16 =  Just (mo_WordTo16 platform)
 2272 vecElemInjectCast platform   IntVec   W32 =  Just (mo_WordTo32 platform)
 2273 vecElemInjectCast _        IntVec   W64 =  Nothing
 2274 vecElemInjectCast platform   WordVec  W8  =  Just (mo_WordTo8  platform)
 2275 vecElemInjectCast platform   WordVec  W16 =  Just (mo_WordTo16 platform)
 2276 vecElemInjectCast platform   WordVec  W32 =  Just (mo_WordTo32 platform)
 2277 vecElemInjectCast _        WordVec  W64 =  Nothing
 2278 vecElemInjectCast _        _        _   =  Nothing
 2279 
 2280 vecElemProjectCast :: Platform -> PrimOpVecCat -> Width -> Maybe MachOp
 2281 vecElemProjectCast _        FloatVec _   =  Nothing
 2282 vecElemProjectCast platform   IntVec   W8  =  Just (mo_s_8ToWord  platform)
 2283 vecElemProjectCast platform   IntVec   W16 =  Just (mo_s_16ToWord platform)
 2284 vecElemProjectCast platform   IntVec   W32 =  Just (mo_s_32ToWord platform)
 2285 vecElemProjectCast _        IntVec   W64 =  Nothing
 2286 vecElemProjectCast platform   WordVec  W8  =  Just (mo_u_8ToWord  platform)
 2287 vecElemProjectCast platform   WordVec  W16 =  Just (mo_u_16ToWord platform)
 2288 vecElemProjectCast platform   WordVec  W32 =  Just (mo_u_32ToWord platform)
 2289 vecElemProjectCast _        WordVec  W64 =  Nothing
 2290 vecElemProjectCast _        _        _   =  Nothing
 2291 
 2292 
 2293 -- NOTE [SIMD Design for the future]
 2294 -- Check to make sure that we can generate code for the specified vector type
 2295 -- given the current set of dynamic flags.
 2296 -- Currently these checks are specific to x86 and x86_64 architecture.
 2297 -- This should be fixed!
 2298 -- In particular,
 2299 -- 1) Add better support for other architectures! (this may require a redesign)
 2300 -- 2) Decouple design choices from LLVM's pseudo SIMD model!
 2301 --   The high level LLVM naive rep makes per CPU family SIMD generation is own
 2302 --   optimization problem, and hides important differences in eg ARM vs x86_64 simd
 2303 -- 3) Depending on the architecture, the SIMD registers may also support general
 2304 --    computations on Float/Double/Word/Int scalars, but currently on
 2305 --    for example x86_64, we always put Word/Int (or sized) in GPR
 2306 --    (general purpose) registers. Would relaxing that allow for
 2307 --    useful optimization opportunities?
 2308 --      Phrased differently, it is worth experimenting with supporting
 2309 --    different register mapping strategies than we currently have, especially if
 2310 --    someday we want SIMD to be a first class denizen in GHC along with scalar
 2311 --    values!
 2312 --      The current design with respect to register mapping of scalars could
 2313 --    very well be the best,but exploring the  design space and doing careful
 2314 --    measurements is the only way to validate that.
 2315 --      In some next generation CPU ISAs, notably RISC V, the SIMD extension
 2316 --    includes  support for a sort of run time CPU dependent vectorization parameter,
 2317 --    where a loop may act upon a single scalar each iteration OR some 2,4,8 ...
 2318 --    element chunk! Time will tell if that direction sees wide adoption,
 2319 --    but it is from that context that unifying our handling of simd and scalars
 2320 --    may benefit. It is not likely to benefit current architectures, though
 2321 --    it may very well be a design perspective that helps guide improving the NCG.
 2322 
 2323 
 2324 checkVecCompatibility :: DynFlags -> PrimOpVecCat -> Length -> Width -> FCode ()
 2325 checkVecCompatibility dflags vcat l w = do
 2326     when (backend dflags /= LLVM) $
 2327         sorry $ unlines ["SIMD vector instructions require the LLVM back-end."
 2328                          ,"Please use -fllvm."]
 2329     check vecWidth vcat l w
 2330   where
 2331     platform = targetPlatform dflags
 2332     check :: Width -> PrimOpVecCat -> Length -> Width -> FCode ()
 2333     check W128 FloatVec 4 W32 | not (isSseEnabled platform) =
 2334         sorry $ "128-bit wide single-precision floating point " ++
 2335                 "SIMD vector instructions require at least -msse."
 2336     check W128 _ _ _ | not (isSse2Enabled platform) =
 2337         sorry $ "128-bit wide integer and double precision " ++
 2338                 "SIMD vector instructions require at least -msse2."
 2339     check W256 FloatVec _ _ | not (isAvxEnabled dflags) =
 2340         sorry $ "256-bit wide floating point " ++
 2341                 "SIMD vector instructions require at least -mavx."
 2342     check W256 _ _ _ | not (isAvx2Enabled dflags) =
 2343         sorry $ "256-bit wide integer " ++
 2344                 "SIMD vector instructions require at least -mavx2."
 2345     check W512 _ _ _ | not (isAvx512fEnabled dflags) =
 2346         sorry $ "512-bit wide " ++
 2347                 "SIMD vector instructions require -mavx512f."
 2348     check _ _ _ _ = return ()
 2349 
 2350     vecWidth = typeWidth (vecVmmType vcat l w)
 2351 
 2352 ------------------------------------------------------------------------------
 2353 -- Helpers for translating vector packing and unpacking.
 2354 
 2355 doVecPackOp :: Maybe MachOp  -- Cast from element to vector component
 2356             -> CmmType       -- Type of vector
 2357             -> CmmExpr       -- Initial vector
 2358             -> [CmmExpr]     -- Elements
 2359             -> CmmFormal     -- Destination for result
 2360             -> FCode ()
 2361 doVecPackOp maybe_pre_write_cast ty z es res = do
 2362     dst <- newTemp ty
 2363     emitAssign (CmmLocal dst) z
 2364     vecPack dst es 0
 2365   where
 2366     vecPack :: CmmFormal -> [CmmExpr] -> Int -> FCode ()
 2367     vecPack src [] _ =
 2368         emitAssign (CmmLocal res) (CmmReg (CmmLocal src))
 2369 
 2370     vecPack src (e : es) i = do
 2371         dst <- newTemp ty
 2372         if isFloatType (vecElemType ty)
 2373           then emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Insert len wid)
 2374                                                     [CmmReg (CmmLocal src), cast e, iLit])
 2375           else emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid)
 2376                                                     [CmmReg (CmmLocal src), cast e, iLit])
 2377         vecPack dst es (i + 1)
 2378       where
 2379         -- vector indices are always 32-bits
 2380         iLit = CmmLit (CmmInt (toInteger i) W32)
 2381 
 2382     cast :: CmmExpr -> CmmExpr
 2383     cast val = case maybe_pre_write_cast of
 2384                  Nothing   -> val
 2385                  Just cast -> CmmMachOp cast [val]
 2386 
 2387     len :: Length
 2388     len = vecLength ty
 2389 
 2390     wid :: Width
 2391     wid = typeWidth (vecElemType ty)
 2392 
 2393 doVecUnpackOp :: Maybe MachOp  -- Cast from vector component to element result
 2394               -> CmmType       -- Type of vector
 2395               -> CmmExpr       -- Vector
 2396               -> [CmmFormal]   -- Element results
 2397               -> FCode ()
 2398 doVecUnpackOp maybe_post_read_cast ty e res =
 2399     vecUnpack res 0
 2400   where
 2401     vecUnpack :: [CmmFormal] -> Int -> FCode ()
 2402     vecUnpack [] _ =
 2403         return ()
 2404 
 2405     vecUnpack (r : rs) i = do
 2406         if isFloatType (vecElemType ty)
 2407           then emitAssign (CmmLocal r) (cast (CmmMachOp (MO_VF_Extract len wid)
 2408                                              [e, iLit]))
 2409           else emitAssign (CmmLocal r) (cast (CmmMachOp (MO_V_Extract len wid)
 2410                                              [e, iLit]))
 2411         vecUnpack rs (i + 1)
 2412       where
 2413         -- vector indices are always 32-bits
 2414         iLit = CmmLit (CmmInt (toInteger i) W32)
 2415 
 2416     cast :: CmmExpr -> CmmExpr
 2417     cast val = case maybe_post_read_cast of
 2418                  Nothing   -> val
 2419                  Just cast -> CmmMachOp cast [val]
 2420 
 2421     len :: Length
 2422     len = vecLength ty
 2423 
 2424     wid :: Width
 2425     wid = typeWidth (vecElemType ty)
 2426 
 2427 doVecInsertOp :: Maybe MachOp  -- Cast from element to vector component
 2428               -> CmmType       -- Vector type
 2429               -> CmmExpr       -- Source vector
 2430               -> CmmExpr       -- Element
 2431               -> CmmExpr       -- Index at which to insert element
 2432               -> CmmFormal     -- Destination for result
 2433               -> FCode ()
 2434 doVecInsertOp maybe_pre_write_cast ty src e idx res = do
 2435     platform <- getPlatform
 2436     -- vector indices are always 32-bits
 2437     let idx' :: CmmExpr
 2438         idx' = CmmMachOp (MO_SS_Conv (wordWidth platform) W32) [idx]
 2439     if isFloatType (vecElemType ty)
 2440       then emitAssign (CmmLocal res) (CmmMachOp (MO_VF_Insert len wid) [src, cast e, idx'])
 2441       else emitAssign (CmmLocal res) (CmmMachOp (MO_V_Insert len wid) [src, cast e, idx'])
 2442   where
 2443     cast :: CmmExpr -> CmmExpr
 2444     cast val = case maybe_pre_write_cast of
 2445                  Nothing   -> val
 2446                  Just cast -> CmmMachOp cast [val]
 2447 
 2448     len :: Length
 2449     len = vecLength ty
 2450 
 2451     wid :: Width
 2452     wid = typeWidth (vecElemType ty)
 2453 
 2454 ------------------------------------------------------------------------------
 2455 -- Helpers for translating prefetching.
 2456 
 2457 
 2458 -- | Translate byte array prefetch operations into proper primcalls.
 2459 doPrefetchByteArrayOp :: Int
 2460                       -> [CmmExpr]
 2461                       -> FCode ()
 2462 doPrefetchByteArrayOp locality  [addr,idx]
 2463    = do profile <- getProfile
 2464         mkBasicPrefetch locality (arrWordsHdrSize profile)  addr idx
 2465 doPrefetchByteArrayOp _ _
 2466    = panic "GHC.StgToCmm.Prim: doPrefetchByteArrayOp"
 2467 
 2468 -- | Translate mutable byte array prefetch operations into proper primcalls.
 2469 doPrefetchMutableByteArrayOp :: Int
 2470                       -> [CmmExpr]
 2471                       -> FCode ()
 2472 doPrefetchMutableByteArrayOp locality  [addr,idx]
 2473    = do profile <- getProfile
 2474         mkBasicPrefetch locality (arrWordsHdrSize profile)  addr idx
 2475 doPrefetchMutableByteArrayOp _ _
 2476    = panic "GHC.StgToCmm.Prim: doPrefetchByteArrayOp"
 2477 
 2478 -- | Translate address prefetch operations into proper primcalls.
 2479 doPrefetchAddrOp ::Int
 2480                  -> [CmmExpr]
 2481                  -> FCode ()
 2482 doPrefetchAddrOp locality   [addr,idx]
 2483    = mkBasicPrefetch locality 0  addr idx
 2484 doPrefetchAddrOp _ _
 2485    = panic "GHC.StgToCmm.Prim: doPrefetchAddrOp"
 2486 
 2487 -- | Translate value prefetch operations into proper primcalls.
 2488 doPrefetchValueOp :: Int
 2489                  -> [CmmExpr]
 2490                  -> FCode ()
 2491 doPrefetchValueOp  locality   [addr]
 2492   =  do platform <- getPlatform
 2493         mkBasicPrefetch locality 0 addr  (CmmLit (CmmInt 0 (wordWidth platform)))
 2494 doPrefetchValueOp _ _
 2495   = panic "GHC.StgToCmm.Prim: doPrefetchValueOp"
 2496 
 2497 -- | helper to generate prefetch primcalls
 2498 mkBasicPrefetch :: Int          -- Locality level 0-3
 2499                 -> ByteOff      -- Initial offset in bytes
 2500                 -> CmmExpr      -- Base address
 2501                 -> CmmExpr      -- Index
 2502                 -> FCode ()
 2503 mkBasicPrefetch locality off base idx
 2504    = do platform <- getPlatform
 2505         emitPrimCall [] (MO_Prefetch_Data locality) [cmmIndexExpr platform W8 (cmmOffsetB platform base off) idx]
 2506         return ()
 2507 
 2508 -- ----------------------------------------------------------------------------
 2509 -- Allocating byte arrays
 2510 
 2511 -- | Takes a register to return the newly allocated array in and the
 2512 -- size of the new array in bytes. Allocates a new
 2513 -- 'MutableByteArray#'.
 2514 doNewByteArrayOp :: CmmFormal -> ByteOff -> FCode ()
 2515 doNewByteArrayOp res_r n = do
 2516     profile <- getProfile
 2517     platform <- getPlatform
 2518 
 2519     let info_ptr = mkLblExpr mkArrWords_infoLabel
 2520         rep = arrWordsRep platform n
 2521 
 2522     tickyAllocPrim (mkIntExpr platform (arrWordsHdrSize profile))
 2523         (mkIntExpr platform (nonHdrSize platform rep))
 2524         (zeroExpr platform)
 2525 
 2526     let hdr_size = fixedHdrSize profile
 2527 
 2528     base <- allocHeapClosure rep info_ptr cccsExpr
 2529                      [ (mkIntExpr platform n,
 2530                         hdr_size + pc_OFFSET_StgArrBytes_bytes (platformConstants platform))
 2531                      ]
 2532 
 2533     emit $ mkAssign (CmmLocal res_r) base
 2534 
 2535 -- ----------------------------------------------------------------------------
 2536 -- Comparing byte arrays
 2537 
 2538 doCompareByteArraysOp :: LocalReg -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
 2539                      -> FCode ()
 2540 doCompareByteArraysOp res ba1 ba1_off ba2 ba2_off n = do
 2541     profile <- getProfile
 2542     platform <- getPlatform
 2543     ba1_p <- assignTempE $ cmmOffsetExpr platform (cmmOffsetB platform ba1 (arrWordsHdrSize profile)) ba1_off
 2544     ba2_p <- assignTempE $ cmmOffsetExpr platform (cmmOffsetB platform ba2 (arrWordsHdrSize profile)) ba2_off
 2545 
 2546     -- short-cut in case of equal pointers avoiding a costly
 2547     -- subroutine call to the memcmp(3) routine; the Cmm logic below
 2548     -- results in assembly code being generated for
 2549     --
 2550     --   cmpPrefix10 :: ByteArray# -> ByteArray# -> Int#
 2551     --   cmpPrefix10 ba1 ba2 = compareByteArrays# ba1 0# ba2 0# 10#
 2552     --
 2553     -- that looks like
 2554     --
 2555     --          leaq 16(%r14),%rax
 2556     --          leaq 16(%rsi),%rbx
 2557     --          xorl %ecx,%ecx
 2558     --          cmpq %rbx,%rax
 2559     --          je l_ptr_eq
 2560     --
 2561     --          ; NB: the common case (unequal pointers) falls-through
 2562     --          ; the conditional jump, and therefore matches the
 2563     --          ; usual static branch prediction convention of modern cpus
 2564     --
 2565     --          subq $8,%rsp
 2566     --          movq %rbx,%rsi
 2567     --          movq %rax,%rdi
 2568     --          movl $10,%edx
 2569     --          xorl %eax,%eax
 2570     --          call memcmp
 2571     --          addq $8,%rsp
 2572     --          movslq %eax,%rax
 2573     --          movq %rax,%rcx
 2574     --  l_ptr_eq:
 2575     --          movq %rcx,%rbx
 2576     --          jmp *(%rbp)
 2577 
 2578     l_ptr_eq <- newBlockId
 2579     l_ptr_ne <- newBlockId
 2580 
 2581     emit (mkAssign (CmmLocal res) (zeroExpr platform))
 2582     emit (mkCbranch (cmmEqWord platform ba1_p ba2_p)
 2583                     l_ptr_eq l_ptr_ne (Just False))
 2584 
 2585     emitLabel l_ptr_ne
 2586     emitMemcmpCall res ba1_p ba2_p n 1
 2587 
 2588     emitLabel l_ptr_eq
 2589 
 2590 -- ----------------------------------------------------------------------------
 2591 -- Copying byte arrays
 2592 
 2593 -- | Takes a source 'ByteArray#', an offset in the source array, a
 2594 -- destination 'MutableByteArray#', an offset into the destination
 2595 -- array, and the number of bytes to copy.  Copies the given number of
 2596 -- bytes from the source array to the destination array.
 2597 doCopyByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
 2598                   -> FCode ()
 2599 doCopyByteArrayOp = emitCopyByteArray copy
 2600   where
 2601     -- Copy data (we assume the arrays aren't overlapping since
 2602     -- they're of different types)
 2603     copy _src _dst dst_p src_p bytes align =
 2604         emitMemcpyCall dst_p src_p bytes align
 2605 
 2606 -- | Takes a source 'MutableByteArray#', an offset in the source
 2607 -- array, a destination 'MutableByteArray#', an offset into the
 2608 -- destination array, and the number of bytes to copy.  Copies the
 2609 -- given number of bytes from the source array to the destination
 2610 -- array.
 2611 doCopyMutableByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
 2612                          -> FCode ()
 2613 doCopyMutableByteArrayOp = emitCopyByteArray copy
 2614   where
 2615     -- The only time the memory might overlap is when the two arrays
 2616     -- we were provided are the same array!
 2617     -- TODO: Optimize branch for common case of no aliasing.
 2618     copy src dst dst_p src_p bytes align = do
 2619         platform <- getPlatform
 2620         (moveCall, cpyCall) <- forkAltPair
 2621             (getCode $ emitMemmoveCall dst_p src_p bytes align)
 2622             (getCode $ emitMemcpyCall  dst_p src_p bytes align)
 2623         emit =<< mkCmmIfThenElse (cmmEqWord platform src dst) moveCall cpyCall
 2624 
 2625 emitCopyByteArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
 2626                       -> Alignment -> FCode ())
 2627                   -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
 2628                   -> FCode ()
 2629 emitCopyByteArray copy src src_off dst dst_off n = do
 2630     profile <- getProfile
 2631     platform <- getPlatform
 2632     let byteArrayAlignment = wordAlignment platform
 2633         srcOffAlignment = cmmExprAlignment src_off
 2634         dstOffAlignment = cmmExprAlignment dst_off
 2635         align = minimum [byteArrayAlignment, srcOffAlignment, dstOffAlignment]
 2636     dst_p <- assignTempE $ cmmOffsetExpr platform (cmmOffsetB platform dst (arrWordsHdrSize profile)) dst_off
 2637     src_p <- assignTempE $ cmmOffsetExpr platform (cmmOffsetB platform src (arrWordsHdrSize profile)) src_off
 2638     copy src dst dst_p src_p n align
 2639 
 2640 -- | Takes a source 'ByteArray#', an offset in the source array, a
 2641 -- destination 'Addr#', and the number of bytes to copy.  Copies the given
 2642 -- number of bytes from the source array to the destination memory region.
 2643 doCopyByteArrayToAddrOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
 2644 doCopyByteArrayToAddrOp src src_off dst_p bytes = do
 2645     -- Use memcpy (we are allowed to assume the arrays aren't overlapping)
 2646     profile <- getProfile
 2647     platform <- getPlatform
 2648     src_p <- assignTempE $ cmmOffsetExpr platform (cmmOffsetB platform src (arrWordsHdrSize profile)) src_off
 2649     emitMemcpyCall dst_p src_p bytes (mkAlignment 1)
 2650 
 2651 -- | Takes a source 'MutableByteArray#', an offset in the source array, a
 2652 -- destination 'Addr#', and the number of bytes to copy.  Copies the given
 2653 -- number of bytes from the source array to the destination memory region.
 2654 doCopyMutableByteArrayToAddrOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
 2655                                -> FCode ()
 2656 doCopyMutableByteArrayToAddrOp = doCopyByteArrayToAddrOp
 2657 
 2658 -- | Takes a source 'Addr#', a destination 'MutableByteArray#', an offset into
 2659 -- the destination array, and the number of bytes to copy.  Copies the given
 2660 -- number of bytes from the source memory region to the destination array.
 2661 doCopyAddrToByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
 2662 doCopyAddrToByteArrayOp src_p dst dst_off bytes = do
 2663     -- Use memcpy (we are allowed to assume the arrays aren't overlapping)
 2664     profile <- getProfile
 2665     platform <- getPlatform
 2666     dst_p <- assignTempE $ cmmOffsetExpr platform (cmmOffsetB platform dst (arrWordsHdrSize profile)) dst_off
 2667     emitMemcpyCall dst_p src_p bytes (mkAlignment 1)
 2668 
 2669 
 2670 -- ----------------------------------------------------------------------------
 2671 -- Setting byte arrays
 2672 
 2673 -- | Takes a 'MutableByteArray#', an offset into the array, a length,
 2674 -- and a byte, and sets each of the selected bytes in the array to the
 2675 -- character.
 2676 doSetByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
 2677                  -> FCode ()
 2678 doSetByteArrayOp ba off len c = do
 2679     profile <- getProfile
 2680     platform <- getPlatform
 2681 
 2682     let byteArrayAlignment = wordAlignment platform -- known since BA is allocated on heap
 2683         offsetAlignment = cmmExprAlignment off
 2684         align = min byteArrayAlignment offsetAlignment
 2685 
 2686     p <- assignTempE $ cmmOffsetExpr platform (cmmOffsetB platform ba (arrWordsHdrSize profile)) off
 2687     emitMemsetCall p c len align
 2688 
 2689 -- ----------------------------------------------------------------------------
 2690 -- Allocating arrays
 2691 
 2692 -- | Allocate a new array.
 2693 doNewArrayOp :: CmmFormal             -- ^ return register
 2694              -> SMRep                 -- ^ representation of the array
 2695              -> CLabel                -- ^ info pointer
 2696              -> [(CmmExpr, ByteOff)]  -- ^ header payload
 2697              -> WordOff               -- ^ array size
 2698              -> CmmExpr               -- ^ initial element
 2699              -> FCode ()
 2700 doNewArrayOp res_r rep info payload n init = do
 2701     profile <- getProfile
 2702     platform <- getPlatform
 2703 
 2704     let info_ptr = mkLblExpr info
 2705 
 2706     tickyAllocPrim (mkIntExpr platform (hdrSize profile rep))
 2707         (mkIntExpr platform (nonHdrSize platform rep))
 2708         (zeroExpr platform)
 2709 
 2710     base <- allocHeapClosure rep info_ptr cccsExpr payload
 2711 
 2712     arr <- CmmLocal `fmap` newTemp (bWord platform)
 2713     emit $ mkAssign arr base
 2714 
 2715     -- Initialise all elements of the array
 2716     let mkOff off = cmmOffsetW platform (CmmReg arr) (hdrSizeW profile rep + off)
 2717         initialization = [ mkStore (mkOff off) init | off <- [0.. n - 1] ]
 2718     emit (catAGraphs initialization)
 2719 
 2720     emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
 2721 
 2722 -- ----------------------------------------------------------------------------
 2723 -- Copying pointer arrays
 2724 
 2725 -- EZY: This code has an unusually high amount of assignTemp calls, seen
 2726 -- nowhere else in the code generator.  This is mostly because these
 2727 -- "primitive" ops result in a surprisingly large amount of code.  It
 2728 -- will likely be worthwhile to optimize what is emitted here, so that
 2729 -- our optimization passes don't waste time repeatedly optimizing the
 2730 -- same bits of code.
 2731 
 2732 -- More closely imitates 'assignTemp' from the old code generator, which
 2733 -- returns a CmmExpr rather than a LocalReg.
 2734 assignTempE :: CmmExpr -> FCode CmmExpr
 2735 assignTempE e = do
 2736     t <- assignTemp e
 2737     return (CmmReg (CmmLocal t))
 2738 
 2739 -- | Takes a source 'Array#', an offset in the source array, a
 2740 -- destination 'MutableArray#', an offset into the destination array,
 2741 -- and the number of elements to copy.  Copies the given number of
 2742 -- elements from the source array to the destination array.
 2743 doCopyArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
 2744               -> FCode ()
 2745 doCopyArrayOp = emitCopyArray copy
 2746   where
 2747     -- Copy data (we assume the arrays aren't overlapping since
 2748     -- they're of different types)
 2749     copy _src _dst dst_p src_p bytes =
 2750         do platform <- getPlatform
 2751            emitMemcpyCall dst_p src_p (mkIntExpr platform bytes)
 2752                (wordAlignment platform)
 2753 
 2754 
 2755 -- | Takes a source 'MutableArray#', an offset in the source array, a
 2756 -- destination 'MutableArray#', an offset into the destination array,
 2757 -- and the number of elements to copy.  Copies the given number of
 2758 -- elements from the source array to the destination array.
 2759 doCopyMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
 2760                      -> FCode ()
 2761 doCopyMutableArrayOp = emitCopyArray copy
 2762   where
 2763     -- The only time the memory might overlap is when the two arrays
 2764     -- we were provided are the same array!
 2765     -- TODO: Optimize branch for common case of no aliasing.
 2766     copy src dst dst_p src_p bytes = do
 2767         platform <- getPlatform
 2768         (moveCall, cpyCall) <- forkAltPair
 2769             (getCode $ emitMemmoveCall dst_p src_p (mkIntExpr platform bytes)
 2770              (wordAlignment platform))
 2771             (getCode $ emitMemcpyCall  dst_p src_p (mkIntExpr platform bytes)
 2772              (wordAlignment platform))
 2773         emit =<< mkCmmIfThenElse (cmmEqWord platform src dst) moveCall cpyCall
 2774 
 2775 emitCopyArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> ByteOff
 2776                   -> FCode ())  -- ^ copy function
 2777               -> CmmExpr        -- ^ source array
 2778               -> CmmExpr        -- ^ offset in source array
 2779               -> CmmExpr        -- ^ destination array
 2780               -> CmmExpr        -- ^ offset in destination array
 2781               -> WordOff        -- ^ number of elements to copy
 2782               -> FCode ()
 2783 emitCopyArray copy src0 src_off dst0 dst_off0 n =
 2784     when (n /= 0) $ do
 2785         profile <- getProfile
 2786         platform <- getPlatform
 2787 
 2788         -- Passed as arguments (be careful)
 2789         src     <- assignTempE src0
 2790         dst     <- assignTempE dst0
 2791         dst_off <- assignTempE dst_off0
 2792 
 2793         -- Nonmoving collector write barrier
 2794         emitCopyUpdRemSetPush platform (arrPtrsHdrSize profile) dst dst_off n
 2795 
 2796         -- Set the dirty bit in the header.
 2797         emit (setInfo dst (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
 2798 
 2799         dst_elems_p <- assignTempE $ cmmOffsetB platform dst
 2800                        (arrPtrsHdrSize profile)
 2801         dst_p <- assignTempE $ cmmOffsetExprW platform dst_elems_p dst_off
 2802         src_p <- assignTempE $ cmmOffsetExprW platform
 2803                  (cmmOffsetB platform src (arrPtrsHdrSize profile)) src_off
 2804         let bytes = wordsToBytes platform n
 2805 
 2806         copy src dst dst_p src_p bytes
 2807 
 2808         -- The base address of the destination card table
 2809         dst_cards_p <- assignTempE $ cmmOffsetExprW platform dst_elems_p
 2810                        (loadArrPtrsSize profile dst)
 2811 
 2812         emitSetCards dst_off dst_cards_p n
 2813 
 2814 doCopySmallArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
 2815                    -> FCode ()
 2816 doCopySmallArrayOp = emitCopySmallArray copy
 2817   where
 2818     -- Copy data (we assume the arrays aren't overlapping since
 2819     -- they're of different types)
 2820     copy _src _dst dst_p src_p bytes =
 2821         do platform <- getPlatform
 2822            emitMemcpyCall dst_p src_p (mkIntExpr platform bytes)
 2823                (wordAlignment platform)
 2824 
 2825 
 2826 doCopySmallMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
 2827                           -> FCode ()
 2828 doCopySmallMutableArrayOp = emitCopySmallArray copy
 2829   where
 2830     -- The only time the memory might overlap is when the two arrays
 2831     -- we were provided are the same array!
 2832     -- TODO: Optimize branch for common case of no aliasing.
 2833     copy src dst dst_p src_p bytes = do
 2834         platform <- getPlatform
 2835         (moveCall, cpyCall) <- forkAltPair
 2836             (getCode $ emitMemmoveCall dst_p src_p (mkIntExpr platform bytes)
 2837              (wordAlignment platform))
 2838             (getCode $ emitMemcpyCall  dst_p src_p (mkIntExpr platform bytes)
 2839              (wordAlignment platform))
 2840         emit =<< mkCmmIfThenElse (cmmEqWord platform src dst) moveCall cpyCall
 2841 
 2842 emitCopySmallArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> ByteOff
 2843                        -> FCode ())  -- ^ copy function
 2844                    -> CmmExpr        -- ^ source array
 2845                    -> CmmExpr        -- ^ offset in source array
 2846                    -> CmmExpr        -- ^ destination array
 2847                    -> CmmExpr        -- ^ offset in destination array
 2848                    -> WordOff        -- ^ number of elements to copy
 2849                    -> FCode ()
 2850 emitCopySmallArray copy src0 src_off dst0 dst_off n =
 2851     when (n /= 0) $ do
 2852         profile <- getProfile
 2853         platform <- getPlatform
 2854 
 2855         -- Passed as arguments (be careful)
 2856         src     <- assignTempE src0
 2857         dst     <- assignTempE dst0
 2858 
 2859         -- Nonmoving collector write barrier
 2860         emitCopyUpdRemSetPush platform (smallArrPtrsHdrSize profile) dst dst_off n
 2861 
 2862         -- Set the dirty bit in the header.
 2863         emit (setInfo dst (CmmLit (CmmLabel mkSMAP_DIRTY_infoLabel)))
 2864 
 2865         dst_p <- assignTempE $ cmmOffsetExprW platform
 2866                  (cmmOffsetB platform dst (smallArrPtrsHdrSize profile)) dst_off
 2867         src_p <- assignTempE $ cmmOffsetExprW platform
 2868                  (cmmOffsetB platform src (smallArrPtrsHdrSize profile)) src_off
 2869         let bytes = wordsToBytes platform n
 2870 
 2871         copy src dst dst_p src_p bytes
 2872 
 2873 -- | Takes an info table label, a register to return the newly
 2874 -- allocated array in, a source array, an offset in the source array,
 2875 -- and the number of elements to copy. Allocates a new array and
 2876 -- initializes it from the source array.
 2877 emitCloneArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> WordOff
 2878                -> FCode ()
 2879 emitCloneArray info_p res_r src src_off n = do
 2880     profile <- getProfile
 2881     platform <- getPlatform
 2882 
 2883     let info_ptr = mkLblExpr info_p
 2884         rep = arrPtrsRep platform n
 2885 
 2886     tickyAllocPrim (mkIntExpr platform (arrPtrsHdrSize profile))
 2887         (mkIntExpr platform (nonHdrSize platform rep))
 2888         (zeroExpr platform)
 2889 
 2890     let hdr_size = fixedHdrSize profile
 2891         constants = platformConstants platform
 2892 
 2893     base <- allocHeapClosure rep info_ptr cccsExpr
 2894                      [ (mkIntExpr platform n,
 2895                         hdr_size + pc_OFFSET_StgMutArrPtrs_ptrs constants)
 2896                      , (mkIntExpr platform (nonHdrSizeW rep),
 2897                         hdr_size + pc_OFFSET_StgMutArrPtrs_size constants)
 2898                      ]
 2899 
 2900     arr <- CmmLocal `fmap` newTemp (bWord platform)
 2901     emit $ mkAssign arr base
 2902 
 2903     dst_p <- assignTempE $ cmmOffsetB platform (CmmReg arr)
 2904              (arrPtrsHdrSize profile)
 2905     src_p <- assignTempE $ cmmOffsetExprW platform src
 2906              (cmmAddWord platform
 2907               (mkIntExpr platform (arrPtrsHdrSizeW profile)) src_off)
 2908 
 2909     emitMemcpyCall dst_p src_p (mkIntExpr platform (wordsToBytes platform n))
 2910         (wordAlignment platform)
 2911 
 2912     emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
 2913 
 2914 -- | Takes an info table label, a register to return the newly
 2915 -- allocated array in, a source array, an offset in the source array,
 2916 -- and the number of elements to copy. Allocates a new array and
 2917 -- initializes it from the source array.
 2918 emitCloneSmallArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> WordOff
 2919                     -> FCode ()
 2920 emitCloneSmallArray info_p res_r src src_off n = do
 2921     profile  <- getProfile
 2922     platform <- getPlatform
 2923 
 2924     let info_ptr = mkLblExpr info_p
 2925         rep = smallArrPtrsRep n
 2926 
 2927     tickyAllocPrim (mkIntExpr platform (smallArrPtrsHdrSize profile))
 2928         (mkIntExpr platform (nonHdrSize platform rep))
 2929         (zeroExpr platform)
 2930 
 2931     let hdr_size = fixedHdrSize profile
 2932 
 2933     base <- allocHeapClosure rep info_ptr cccsExpr
 2934                      [ (mkIntExpr platform n,
 2935                         hdr_size + pc_OFFSET_StgSmallMutArrPtrs_ptrs (platformConstants platform))
 2936                      ]
 2937 
 2938     arr <- CmmLocal `fmap` newTemp (bWord platform)
 2939     emit $ mkAssign arr base
 2940 
 2941     dst_p <- assignTempE $ cmmOffsetB platform (CmmReg arr)
 2942              (smallArrPtrsHdrSize profile)
 2943     src_p <- assignTempE $ cmmOffsetExprW platform src
 2944              (cmmAddWord platform
 2945               (mkIntExpr platform (smallArrPtrsHdrSizeW profile)) src_off)
 2946 
 2947     emitMemcpyCall dst_p src_p (mkIntExpr platform (wordsToBytes platform n))
 2948         (wordAlignment platform)
 2949 
 2950     emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
 2951 
 2952 -- | Takes and offset in the destination array, the base address of
 2953 -- the card table, and the number of elements affected (*not* the
 2954 -- number of cards). The number of elements may not be zero.
 2955 -- Marks the relevant cards as dirty.
 2956 emitSetCards :: CmmExpr -> CmmExpr -> WordOff -> FCode ()
 2957 emitSetCards dst_start dst_cards_start n = do
 2958     platform <- getPlatform
 2959     start_card <- assignTempE $ cardCmm platform dst_start
 2960     let end_card = cardCmm platform
 2961                    (cmmSubWord platform
 2962                     (cmmAddWord platform dst_start (mkIntExpr platform n))
 2963                     (mkIntExpr platform 1))
 2964     emitMemsetCall (cmmAddWord platform dst_cards_start start_card)
 2965         (mkIntExpr platform 1)
 2966         (cmmAddWord platform (cmmSubWord platform end_card start_card) (mkIntExpr platform 1))
 2967         (mkAlignment 1) -- no alignment (1 byte)
 2968 
 2969 -- Convert an element index to a card index
 2970 cardCmm :: Platform -> CmmExpr -> CmmExpr
 2971 cardCmm platform i =
 2972     cmmUShrWord platform i (mkIntExpr platform (pc_MUT_ARR_PTRS_CARD_BITS (platformConstants platform)))
 2973 
 2974 ------------------------------------------------------------------------------
 2975 -- SmallArray PrimOp implementations
 2976 
 2977 doReadSmallPtrArrayOp :: LocalReg
 2978                       -> CmmExpr
 2979                       -> CmmExpr
 2980                       -> FCode ()
 2981 doReadSmallPtrArrayOp res addr idx = do
 2982     profile <- getProfile
 2983     platform <- getPlatform
 2984     mkBasicIndexedRead (smallArrPtrsHdrSize profile) Nothing (gcWord platform) res addr
 2985         (gcWord platform) idx
 2986 
 2987 doWriteSmallPtrArrayOp :: CmmExpr
 2988                        -> CmmExpr
 2989                        -> CmmExpr
 2990                        -> FCode ()
 2991 doWriteSmallPtrArrayOp addr idx val = do
 2992     profile <- getProfile
 2993     platform <- getPlatform
 2994     let ty = cmmExprType platform val
 2995 
 2996     -- Update remembered set for non-moving collector
 2997     tmp <- newTemp ty
 2998     mkBasicIndexedRead (smallArrPtrsHdrSize profile) Nothing ty tmp addr ty idx
 2999     whenUpdRemSetEnabled $ emitUpdRemSetPush (CmmReg (CmmLocal tmp))
 3000 
 3001     emitPrimCall [] MO_WriteBarrier [] -- #12469
 3002     mkBasicIndexedWrite (smallArrPtrsHdrSize profile) Nothing addr ty idx val
 3003     emit (setInfo addr (CmmLit (CmmLabel mkSMAP_DIRTY_infoLabel)))
 3004 
 3005 ------------------------------------------------------------------------------
 3006 -- Atomic read-modify-write
 3007 
 3008 -- | Emit an atomic modification to a byte array element. The result
 3009 -- reg contains that previous value of the element. Implies a full
 3010 -- memory barrier.
 3011 doAtomicByteArrayRMW
 3012             :: LocalReg      -- ^ Result reg
 3013             -> AtomicMachOp  -- ^ Atomic op (e.g. add)
 3014             -> CmmExpr       -- ^ MutableByteArray#
 3015             -> CmmExpr       -- ^ Index
 3016             -> CmmType       -- ^ Type of element by which we are indexing
 3017             -> CmmExpr       -- ^ Op argument (e.g. amount to add)
 3018             -> FCode ()
 3019 doAtomicByteArrayRMW res amop mba idx idx_ty n = do
 3020     profile <- getProfile
 3021     platform <- getPlatform
 3022     let width = typeWidth idx_ty
 3023         addr  = cmmIndexOffExpr platform (arrWordsHdrSize profile)
 3024                 width mba idx
 3025     doAtomicAddrRMW res amop addr idx_ty n
 3026 
 3027 doAtomicAddrRMW
 3028             :: LocalReg      -- ^ Result reg
 3029             -> AtomicMachOp  -- ^ Atomic op (e.g. add)
 3030             -> CmmExpr       -- ^ Addr#
 3031             -> CmmType       -- ^ Pointed value type
 3032             -> CmmExpr       -- ^ Op argument (e.g. amount to add)
 3033             -> FCode ()
 3034 doAtomicAddrRMW res amop addr ty n =
 3035     emitPrimCall
 3036         [ res ]
 3037         (MO_AtomicRMW (typeWidth ty) amop)
 3038         [ addr, n ]
 3039 
 3040 -- | Emit an atomic read to a byte array that acts as a memory barrier.
 3041 doAtomicReadByteArray
 3042     :: LocalReg  -- ^ Result reg
 3043     -> CmmExpr   -- ^ MutableByteArray#
 3044     -> CmmExpr   -- ^ Index
 3045     -> CmmType   -- ^ Type of element by which we are indexing
 3046     -> FCode ()
 3047 doAtomicReadByteArray res mba idx idx_ty = do
 3048     profile <- getProfile
 3049     platform <- getPlatform
 3050     let width = typeWidth idx_ty
 3051         addr  = cmmIndexOffExpr platform (arrWordsHdrSize profile)
 3052                 width mba idx
 3053     doAtomicReadAddr res addr idx_ty
 3054 
 3055 -- | Emit an atomic read to an address that acts as a memory barrier.
 3056 doAtomicReadAddr
 3057     :: LocalReg  -- ^ Result reg
 3058     -> CmmExpr   -- ^ Addr#
 3059     -> CmmType   -- ^ Type of element by which we are indexing
 3060     -> FCode ()
 3061 doAtomicReadAddr res addr ty =
 3062     emitPrimCall
 3063         [ res ]
 3064         (MO_AtomicRead (typeWidth ty))
 3065         [ addr ]
 3066 
 3067 -- | Emit an atomic write to a byte array that acts as a memory barrier.
 3068 doAtomicWriteByteArray
 3069     :: CmmExpr   -- ^ MutableByteArray#
 3070     -> CmmExpr   -- ^ Index
 3071     -> CmmType   -- ^ Type of element by which we are indexing
 3072     -> CmmExpr   -- ^ Value to write
 3073     -> FCode ()
 3074 doAtomicWriteByteArray mba idx idx_ty val = do
 3075     profile <- getProfile
 3076     platform <- getPlatform
 3077     let width = typeWidth idx_ty
 3078         addr  = cmmIndexOffExpr platform (arrWordsHdrSize profile)
 3079                 width mba idx
 3080     doAtomicWriteAddr addr idx_ty val
 3081 
 3082 -- | Emit an atomic write to an address that acts as a memory barrier.
 3083 doAtomicWriteAddr
 3084     :: CmmExpr   -- ^ Addr#
 3085     -> CmmType   -- ^ Type of element by which we are indexing
 3086     -> CmmExpr   -- ^ Value to write
 3087     -> FCode ()
 3088 doAtomicWriteAddr addr ty val =
 3089     emitPrimCall
 3090         [ {- no results -} ]
 3091         (MO_AtomicWrite (typeWidth ty))
 3092         [ addr, val ]
 3093 
 3094 doCasByteArray
 3095     :: LocalReg  -- ^ Result reg
 3096     -> CmmExpr   -- ^ MutableByteArray#
 3097     -> CmmExpr   -- ^ Index
 3098     -> CmmType   -- ^ Type of element by which we are indexing
 3099     -> CmmExpr   -- ^ Old value
 3100     -> CmmExpr   -- ^ New value
 3101     -> FCode ()
 3102 doCasByteArray res mba idx idx_ty old new = do
 3103     profile <- getProfile
 3104     platform <- getPlatform
 3105     let width = typeWidth idx_ty
 3106         addr = cmmIndexOffExpr platform (arrWordsHdrSize profile)
 3107                width mba idx
 3108     emitPrimCall
 3109         [ res ]
 3110         (MO_Cmpxchg width)
 3111         [ addr, old, new ]
 3112 
 3113 ------------------------------------------------------------------------------
 3114 -- Helpers for emitting function calls
 3115 
 3116 -- | Emit a call to @memcpy@.
 3117 emitMemcpyCall :: CmmExpr -> CmmExpr -> CmmExpr -> Alignment -> FCode ()
 3118 emitMemcpyCall dst src n align =
 3119     emitPrimCall
 3120         [ {-no results-} ]
 3121         (MO_Memcpy (alignmentBytes align))
 3122         [ dst, src, n ]
 3123 
 3124 -- | Emit a call to @memmove@.
 3125 emitMemmoveCall :: CmmExpr -> CmmExpr -> CmmExpr -> Alignment -> FCode ()
 3126 emitMemmoveCall dst src n align =
 3127     emitPrimCall
 3128         [ {- no results -} ]
 3129         (MO_Memmove (alignmentBytes align))
 3130         [ dst, src, n ]
 3131 
 3132 -- | Emit a call to @memset@.  The second argument must fit inside an
 3133 -- unsigned char.
 3134 emitMemsetCall :: CmmExpr -> CmmExpr -> CmmExpr -> Alignment -> FCode ()
 3135 emitMemsetCall dst c n align =
 3136     emitPrimCall
 3137         [ {- no results -} ]
 3138         (MO_Memset (alignmentBytes align))
 3139         [ dst, c, n ]
 3140 
 3141 emitMemcmpCall :: LocalReg -> CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()
 3142 emitMemcmpCall res ptr1 ptr2 n align = do
 3143     -- 'MO_Memcmp' is assumed to return an 32bit 'CInt' because all
 3144     -- code-gens currently call out to the @memcmp(3)@ C function.
 3145     -- This was easier than moving the sign-extensions into
 3146     -- all the code-gens.
 3147     platform <- getPlatform
 3148     let is32Bit = typeWidth (localRegType res) == W32
 3149 
 3150     cres <- if is32Bit
 3151               then return res
 3152               else newTemp b32
 3153 
 3154     emitPrimCall
 3155         [ cres ]
 3156         (MO_Memcmp align)
 3157         [ ptr1, ptr2, n ]
 3158 
 3159     unless is32Bit $
 3160       emit $ mkAssign (CmmLocal res)
 3161                       (CmmMachOp
 3162                          (mo_s_32ToWord platform)
 3163                          [(CmmReg (CmmLocal cres))])
 3164 
 3165 emitBSwapCall :: LocalReg -> CmmExpr -> Width -> FCode ()
 3166 emitBSwapCall res x width =
 3167     emitPrimCall
 3168         [ res ]
 3169         (MO_BSwap width)
 3170         [ x ]
 3171 
 3172 emitBRevCall :: LocalReg -> CmmExpr -> Width -> FCode ()
 3173 emitBRevCall res x width =
 3174     emitPrimCall
 3175         [ res ]
 3176         (MO_BRev width)
 3177         [ x ]
 3178 
 3179 emitPopCntCall :: LocalReg -> CmmExpr -> Width -> FCode ()
 3180 emitPopCntCall res x width =
 3181     emitPrimCall
 3182         [ res ]
 3183         (MO_PopCnt width)
 3184         [ x ]
 3185 
 3186 emitPdepCall :: LocalReg -> CmmExpr -> CmmExpr -> Width -> FCode ()
 3187 emitPdepCall res x y width =
 3188     emitPrimCall
 3189         [ res ]
 3190         (MO_Pdep width)
 3191         [ x, y ]
 3192 
 3193 emitPextCall :: LocalReg -> CmmExpr -> CmmExpr -> Width -> FCode ()
 3194 emitPextCall res x y width =
 3195     emitPrimCall
 3196         [ res ]
 3197         (MO_Pext width)
 3198         [ x, y ]
 3199 
 3200 emitClzCall :: LocalReg -> CmmExpr -> Width -> FCode ()
 3201 emitClzCall res x width =
 3202     emitPrimCall
 3203         [ res ]
 3204         (MO_Clz width)
 3205         [ x ]
 3206 
 3207 emitCtzCall :: LocalReg -> CmmExpr -> Width -> FCode ()
 3208 emitCtzCall res x width =
 3209     emitPrimCall
 3210         [ res ]
 3211         (MO_Ctz width)
 3212         [ x ]
 3213 
 3214 ---------------------------------------------------------------------------
 3215 -- Pushing to the update remembered set
 3216 ---------------------------------------------------------------------------
 3217 
 3218 -- | Push a range of pointer-array elements that are about to be copied over to
 3219 -- the update remembered set.
 3220 emitCopyUpdRemSetPush :: Platform
 3221                       -> ByteOff    -- ^ array header size (in bytes)
 3222                       -> CmmExpr    -- ^ destination array
 3223                       -> CmmExpr    -- ^ offset in destination array (in words)
 3224                       -> Int        -- ^ number of elements to copy
 3225                       -> FCode ()
 3226 emitCopyUpdRemSetPush _platform _hdr_size _dst _dst_off 0 = return ()
 3227 emitCopyUpdRemSetPush platform hdr_size dst dst_off n =
 3228     whenUpdRemSetEnabled $ do
 3229         updfr_off <- getUpdFrameOff
 3230         graph <- mkCall lbl (NativeNodeCall,NativeReturn) [] args updfr_off []
 3231         emit graph
 3232   where
 3233     lbl = mkLblExpr $ mkPrimCallLabel
 3234           $ PrimCall (fsLit "stg_copyArray_barrier") rtsUnit
 3235     args =
 3236       [ mkIntExpr platform hdr_size
 3237       , dst
 3238       , dst_off
 3239       , mkIntExpr platform n
 3240       ]