never executed always true always false
1 {-# LANGUAGE LambdaCase #-}
2
3 {-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}
4
5 ----------------------------------------------------------------------------
6 --
7 -- Stg to C--: primitive operations
8 --
9 -- (c) The University of Glasgow 2004-2006
10 --
11 -----------------------------------------------------------------------------
12
13 module GHC.StgToCmm.Prim (
14 cgOpApp,
15 shouldInlinePrimOp
16 ) where
17
18 import GHC.Prelude hiding ((<*>))
19
20 import GHC.Platform
21 import GHC.Platform.Profile
22
23 import GHC.StgToCmm.Layout
24 import GHC.StgToCmm.Foreign
25 import GHC.StgToCmm.Monad
26 import GHC.StgToCmm.Utils
27 import GHC.StgToCmm.Ticky
28 import GHC.StgToCmm.Heap
29 import GHC.StgToCmm.Prof ( costCentreFrom )
30
31 import GHC.Driver.Session
32 import GHC.Driver.Backend
33 import GHC.Types.Basic
34 import GHC.Cmm.BlockId
35 import GHC.Cmm.Graph
36 import GHC.Stg.Syntax
37 import GHC.Cmm
38 import GHC.Unit ( rtsUnit )
39 import GHC.Core.Type ( Type, tyConAppTyCon )
40 import GHC.Core.TyCon
41 import GHC.Cmm.CLabel
42 import GHC.Cmm.Utils
43 import GHC.Builtin.PrimOps
44 import GHC.Runtime.Heap.Layout
45 import GHC.Data.FastString
46 import GHC.Utils.Misc
47 import GHC.Utils.Panic
48 import GHC.Utils.Panic.Plain
49 import Data.Maybe
50
51 import Control.Monad (liftM, when, unless)
52
53 ------------------------------------------------------------------------
54 -- Primitive operations and foreign calls
55 ------------------------------------------------------------------------
56
57 {- Note [Foreign call results]
58 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
59 A foreign call always returns an unboxed tuple of results, one
60 of which is the state token. This seems to happen even for pure
61 calls.
62
63 Even if we returned a single result for pure calls, it'd still be
64 right to wrap it in a singleton unboxed tuple, because the result
65 might be a Haskell closure pointer, we don't want to evaluate it. -}
66
67 ----------------------------------
68 cgOpApp :: StgOp -- The op
69 -> [StgArg] -- Arguments
70 -> Type -- Result type (always an unboxed tuple)
71 -> FCode ReturnKind
72
73 -- Foreign calls
74 cgOpApp (StgFCallOp fcall ty) stg_args res_ty
75 = cgForeignCall fcall ty stg_args res_ty
76 -- Note [Foreign call results]
77
78 cgOpApp (StgPrimOp primop) args res_ty = do
79 dflags <- getDynFlags
80 cmm_args <- getNonVoidArgAmodes args
81 cmmPrimOpApp dflags primop cmm_args (Just res_ty)
82
83 cgOpApp (StgPrimCallOp primcall) args _res_ty
84 = do { cmm_args <- getNonVoidArgAmodes args
85 ; let fun = CmmLit (CmmLabel (mkPrimCallLabel primcall))
86 ; emitCall (NativeNodeCall, NativeReturn) fun cmm_args }
87
88 cmmPrimOpApp :: DynFlags -> PrimOp -> [CmmExpr] -> Maybe Type -> FCode ReturnKind
89 cmmPrimOpApp dflags primop cmm_args mres_ty =
90 case emitPrimOp dflags primop cmm_args of
91 PrimopCmmEmit_Internal f ->
92 let
93 -- if the result type isn't explicitly given, we directly use the
94 -- result type of the primop.
95 res_ty = fromMaybe (primOpResultType primop) mres_ty
96 in emitReturn =<< f res_ty
97 PrimopCmmEmit_External -> do
98 let fun = CmmLit (CmmLabel (mkRtsPrimOpLabel primop))
99 emitCall (NativeNodeCall, NativeReturn) fun cmm_args
100
101
102 -- | Interpret the argument as an unsigned value, assuming the value
103 -- is given in two-complement form in the given width.
104 --
105 -- Example: @asUnsigned W64 (-1)@ is 18446744073709551615.
106 --
107 -- This function is used to work around the fact that many array
108 -- primops take Int# arguments, but we interpret them as unsigned
109 -- quantities in the code gen. This means that we have to be careful
110 -- every time we work on e.g. a CmmInt literal that corresponds to the
111 -- array size, as it might contain a negative Integer value if the
112 -- user passed a value larger than 2^(wORD_SIZE_IN_BITS-1) as the Int#
113 -- literal.
114 asUnsigned :: Width -> Integer -> Integer
115 asUnsigned w n = n .&. (bit (widthInBits w) - 1)
116
117 ------------------------------------------------------------------------
118 -- Emitting code for a primop
119 ------------------------------------------------------------------------
120
121 shouldInlinePrimOp :: DynFlags -> PrimOp -> [CmmExpr] -> Bool
122 shouldInlinePrimOp dflags op args = case emitPrimOp dflags op args of
123 PrimopCmmEmit_External -> False
124 PrimopCmmEmit_Internal _ -> True
125
126 -- TODO: Several primop implementations (e.g. 'doNewByteArrayOp') use
127 -- ByteOff (or some other fixed width signed type) to represent
128 -- array sizes or indices. This means that these will overflow for
129 -- large enough sizes.
130
131 -- TODO: Several primops, such as 'copyArray#', only have an inline
132 -- implementation (below) but could possibly have both an inline
133 -- implementation and an out-of-line implementation, just like
134 -- 'newArray#'. This would lower the amount of code generated,
135 -- hopefully without a performance impact (needs to be measured).
136
137 -- | The big function handling all the primops.
138 --
139 -- In the simple case, there is just one implementation, and we emit that.
140 --
141 -- In more complex cases, there is a foreign call (out of line) fallback. This
142 -- might happen e.g. if there's enough static information, such as statically
143 -- know arguments.
144 emitPrimOp
145 :: DynFlags
146 -> PrimOp -- ^ The primop
147 -> [CmmExpr] -- ^ The primop arguments
148 -> PrimopCmmEmit
149 emitPrimOp dflags primop = case primop of
150 NewByteArrayOp_Char -> \case
151 [(CmmLit (CmmInt n w))]
152 | asUnsigned w n <= fromIntegral (maxInlineAllocSize dflags)
153 -> opIntoRegs $ \ [res] -> doNewByteArrayOp res (fromInteger n)
154 _ -> PrimopCmmEmit_External
155
156 NewArrayOp -> \case
157 [(CmmLit (CmmInt n w)), init]
158 | wordsToBytes platform (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags)
159 -> opIntoRegs $ \[res] -> doNewArrayOp res (arrPtrsRep platform (fromInteger n)) mkMAP_DIRTY_infoLabel
160 [ (mkIntExpr platform (fromInteger n),
161 fixedHdrSize profile + pc_OFFSET_StgMutArrPtrs_ptrs (platformConstants platform))
162 , (mkIntExpr platform (nonHdrSizeW (arrPtrsRep platform (fromInteger n))),
163 fixedHdrSize profile + pc_OFFSET_StgMutArrPtrs_size (platformConstants platform))
164 ]
165 (fromInteger n) init
166 _ -> PrimopCmmEmit_External
167
168 CopyArrayOp -> \case
169 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] ->
170 opIntoRegs $ \ [] -> doCopyArrayOp src src_off dst dst_off (fromInteger n)
171 _ -> PrimopCmmEmit_External
172
173 CopyMutableArrayOp -> \case
174 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] ->
175 opIntoRegs $ \ [] -> doCopyMutableArrayOp src src_off dst dst_off (fromInteger n)
176 _ -> PrimopCmmEmit_External
177
178 CopyArrayArrayOp -> \case
179 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] ->
180 opIntoRegs $ \ [] -> doCopyArrayOp src src_off dst dst_off (fromInteger n)
181 _ -> PrimopCmmEmit_External
182
183 CopyMutableArrayArrayOp -> \case
184 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] ->
185 opIntoRegs $ \ [] -> doCopyMutableArrayOp src src_off dst dst_off (fromInteger n)
186 _ -> PrimopCmmEmit_External
187
188 CloneArrayOp -> \case
189 [src, src_off, (CmmLit (CmmInt n w))]
190 | wordsToBytes platform (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags)
191 -> opIntoRegs $ \ [res] -> emitCloneArray mkMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n)
192 _ -> PrimopCmmEmit_External
193
194 CloneMutableArrayOp -> \case
195 [src, src_off, (CmmLit (CmmInt n w))]
196 | wordsToBytes platform (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags)
197 -> opIntoRegs $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n)
198 _ -> PrimopCmmEmit_External
199
200 FreezeArrayOp -> \case
201 [src, src_off, (CmmLit (CmmInt n w))]
202 | wordsToBytes platform (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags)
203 -> opIntoRegs $ \ [res] -> emitCloneArray mkMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n)
204 _ -> PrimopCmmEmit_External
205
206 ThawArrayOp -> \case
207 [src, src_off, (CmmLit (CmmInt n w))]
208 | wordsToBytes platform (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags)
209 -> opIntoRegs $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n)
210 _ -> PrimopCmmEmit_External
211
212 NewSmallArrayOp -> \case
213 [(CmmLit (CmmInt n w)), init]
214 | wordsToBytes platform (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags)
215 -> opIntoRegs $ \ [res] ->
216 doNewArrayOp res (smallArrPtrsRep (fromInteger n)) mkSMAP_DIRTY_infoLabel
217 [ (mkIntExpr platform (fromInteger n),
218 fixedHdrSize profile + pc_OFFSET_StgSmallMutArrPtrs_ptrs (platformConstants platform))
219 ]
220 (fromInteger n) init
221 _ -> PrimopCmmEmit_External
222
223 CopySmallArrayOp -> \case
224 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] ->
225 opIntoRegs $ \ [] -> doCopySmallArrayOp src src_off dst dst_off (fromInteger n)
226 _ -> PrimopCmmEmit_External
227
228 CopySmallMutableArrayOp -> \case
229 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] ->
230 opIntoRegs $ \ [] -> doCopySmallMutableArrayOp src src_off dst dst_off (fromInteger n)
231 _ -> PrimopCmmEmit_External
232
233 CloneSmallArrayOp -> \case
234 [src, src_off, (CmmLit (CmmInt n w))]
235 | wordsToBytes platform (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags)
236 -> opIntoRegs $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n)
237 _ -> PrimopCmmEmit_External
238
239 CloneSmallMutableArrayOp -> \case
240 [src, src_off, (CmmLit (CmmInt n w))]
241 | wordsToBytes platform (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags)
242 -> opIntoRegs $ \ [res] -> emitCloneSmallArray mkSMAP_DIRTY_infoLabel res src src_off (fromInteger n)
243 _ -> PrimopCmmEmit_External
244
245 FreezeSmallArrayOp -> \case
246 [src, src_off, (CmmLit (CmmInt n w))]
247 | wordsToBytes platform (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags)
248 -> opIntoRegs $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n)
249 _ -> PrimopCmmEmit_External
250
251 ThawSmallArrayOp -> \case
252 [src, src_off, (CmmLit (CmmInt n w))]
253 | wordsToBytes platform (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags)
254 -> opIntoRegs $ \ [res] -> emitCloneSmallArray mkSMAP_DIRTY_infoLabel res src src_off (fromInteger n)
255 _ -> PrimopCmmEmit_External
256
257 -- First we handle various awkward cases specially.
258
259 ParOp -> \[arg] -> opIntoRegs $ \[res] ->
260 -- for now, just implement this in a C function
261 -- later, we might want to inline it.
262 emitCCall
263 [(res,NoHint)]
264 (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
265 [(baseExpr, AddrHint), (arg,AddrHint)]
266
267 SparkOp -> \[arg] -> opIntoRegs $ \[res] -> do
268 -- returns the value of arg in res. We're going to therefore
269 -- refer to arg twice (once to pass to newSpark(), and once to
270 -- assign to res), so put it in a temporary.
271 tmp <- assignTemp arg
272 tmp2 <- newTemp (bWord platform)
273 emitCCall
274 [(tmp2,NoHint)]
275 (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
276 [(baseExpr, AddrHint), ((CmmReg (CmmLocal tmp)), AddrHint)]
277 emitAssign (CmmLocal res) (CmmReg (CmmLocal tmp))
278
279 GetCCSOfOp -> \[arg] -> opIntoRegs $ \[res] -> do
280 let
281 val
282 | profileIsProfiling profile = costCentreFrom platform (cmmUntag platform arg)
283 | otherwise = CmmLit (zeroCLit platform)
284 emitAssign (CmmLocal res) val
285
286 GetCurrentCCSOp -> \[_] -> opIntoRegs $ \[res] ->
287 emitAssign (CmmLocal res) cccsExpr
288
289 MyThreadIdOp -> \[] -> opIntoRegs $ \[res] ->
290 emitAssign (CmmLocal res) currentTSOExpr
291
292 ReadMutVarOp -> \[mutv] -> opIntoRegs $ \[res] ->
293 emitAssign (CmmLocal res) (cmmLoadIndexW platform mutv (fixedHdrSizeW profile) (gcWord platform))
294
295 WriteMutVarOp -> \[mutv, var] -> opIntoRegs $ \res@[] -> do
296 old_val <- CmmLocal <$> newTemp (cmmExprType platform var)
297 emitAssign old_val (cmmLoadIndexW platform mutv (fixedHdrSizeW profile) (gcWord platform))
298
299 -- Without this write barrier, other CPUs may see this pointer before
300 -- the writes for the closure it points to have occurred.
301 -- Note that this also must come after we read the old value to ensure
302 -- that the read of old_val comes before another core's write to the
303 -- MutVar's value.
304 emitPrimCall res MO_WriteBarrier []
305 emitStore (cmmOffsetW platform mutv (fixedHdrSizeW profile)) var
306 emitCCall
307 [{-no results-}]
308 (CmmLit (CmmLabel mkDirty_MUT_VAR_Label))
309 [(baseExpr, AddrHint), (mutv, AddrHint), (CmmReg old_val, AddrHint)]
310
311 -- #define sizzeofByteArrayzh(r,a) \
312 -- r = ((StgArrBytes *)(a))->bytes
313 SizeofByteArrayOp -> \[arg] -> opIntoRegs $ \[res] ->
314 emit $ mkAssign (CmmLocal res) (cmmLoadIndexW platform arg (fixedHdrSizeW profile) (bWord platform))
315
316 -- #define sizzeofMutableByteArrayzh(r,a) \
317 -- r = ((StgArrBytes *)(a))->bytes
318 SizeofMutableByteArrayOp -> emitPrimOp dflags SizeofByteArrayOp
319
320 -- #define getSizzeofMutableByteArrayzh(r,a) \
321 -- r = ((StgArrBytes *)(a))->bytes
322 GetSizeofMutableByteArrayOp -> \[arg] -> opIntoRegs $ \[res] ->
323 emitAssign (CmmLocal res) (cmmLoadIndexW platform arg (fixedHdrSizeW profile) (bWord platform))
324
325
326 -- #define touchzh(o) /* nothing */
327 TouchOp -> \args@[_] -> opIntoRegs $ \res@[] ->
328 emitPrimCall res MO_Touch args
329
330 -- #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a)
331 ByteArrayContents_Char -> \[arg] -> opIntoRegs $ \[res] ->
332 emitAssign (CmmLocal res) (cmmOffsetB platform arg (arrWordsHdrSize profile))
333
334 -- #define mutableByteArrayContentszh(r,a) r = BYTE_ARR_CTS(a)
335 MutableByteArrayContents_Char -> \[arg] -> opIntoRegs $ \[res] ->
336 emitAssign (CmmLocal res) (cmmOffsetB platform arg (arrWordsHdrSize profile))
337
338 -- #define stableNameToIntzh(r,s) (r = ((StgStableName *)s)->sn)
339 StableNameToIntOp -> \[arg] -> opIntoRegs $ \[res] ->
340 emitAssign (CmmLocal res) (cmmLoadIndexW platform arg (fixedHdrSizeW profile) (bWord platform))
341
342 EqStablePtrOp -> \args -> opTranslate args (mo_wordEq platform)
343
344 ReallyUnsafePtrEqualityOp -> \[arg1, arg2] -> opIntoRegs $ \[res] ->
345 emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq platform) [arg1,arg2])
346
347 -- #define addrToHValuezh(r,a) r=(P_)a
348 AddrToAnyOp -> \[arg] -> opIntoRegs $ \[res] ->
349 emitAssign (CmmLocal res) arg
350
351 -- #define hvalueToAddrzh(r, a) r=(W_)a
352 AnyToAddrOp -> \[arg] -> opIntoRegs $ \[res] ->
353 emitAssign (CmmLocal res) arg
354
355 {- Freezing arrays-of-ptrs requires changing an info table, for the
356 benefit of the generational collector. It needs to scavenge mutable
357 objects, even if they are in old space. When they become immutable,
358 they can be removed from this scavenge list. -}
359
360 -- #define unsafeFreezzeArrayzh(r,a)
361 -- {
362 -- SET_INFO((StgClosure *)a,&stg_MUT_ARR_PTRS_FROZEN_DIRTY_info);
363 -- r = a;
364 -- }
365 UnsafeFreezeArrayOp -> \[arg] -> opIntoRegs $ \[res] ->
366 emit $ catAGraphs
367 [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_DIRTY_infoLabel)),
368 mkAssign (CmmLocal res) arg ]
369 UnsafeFreezeArrayArrayOp -> \[arg] -> opIntoRegs $ \[res] ->
370 emit $ catAGraphs
371 [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_DIRTY_infoLabel)),
372 mkAssign (CmmLocal res) arg ]
373 UnsafeFreezeSmallArrayOp -> \[arg] -> opIntoRegs $ \[res] ->
374 emit $ catAGraphs
375 [ setInfo arg (CmmLit (CmmLabel mkSMAP_FROZEN_DIRTY_infoLabel)),
376 mkAssign (CmmLocal res) arg ]
377
378 -- #define unsafeFreezzeByteArrayzh(r,a) r=(a)
379 UnsafeFreezeByteArrayOp -> \[arg] -> opIntoRegs $ \[res] ->
380 emitAssign (CmmLocal res) arg
381
382 -- Reading/writing pointer arrays
383
384 ReadArrayOp -> \[obj, ix] -> opIntoRegs $ \[res] ->
385 doReadPtrArrayOp res obj ix
386 IndexArrayOp -> \[obj, ix] -> opIntoRegs $ \[res] ->
387 doReadPtrArrayOp res obj ix
388 WriteArrayOp -> \[obj, ix, v] -> opIntoRegs $ \[] ->
389 doWritePtrArrayOp obj ix v
390
391 IndexArrayArrayOp_ByteArray -> \[obj, ix] -> opIntoRegs $ \[res] ->
392 doReadPtrArrayOp res obj ix
393 IndexArrayArrayOp_ArrayArray -> \[obj, ix] -> opIntoRegs $ \[res] ->
394 doReadPtrArrayOp res obj ix
395 ReadArrayArrayOp_ByteArray -> \[obj, ix] -> opIntoRegs $ \[res] ->
396 doReadPtrArrayOp res obj ix
397 ReadArrayArrayOp_MutableByteArray -> \[obj, ix] -> opIntoRegs $ \[res] ->
398 doReadPtrArrayOp res obj ix
399 ReadArrayArrayOp_ArrayArray -> \[obj, ix] -> opIntoRegs $ \[res] ->
400 doReadPtrArrayOp res obj ix
401 ReadArrayArrayOp_MutableArrayArray -> \[obj, ix] -> opIntoRegs $ \[res] ->
402 doReadPtrArrayOp res obj ix
403 WriteArrayArrayOp_ByteArray -> \[obj,ix,v] -> opIntoRegs $ \[] ->
404 doWritePtrArrayOp obj ix v
405 WriteArrayArrayOp_MutableByteArray -> \[obj,ix,v] -> opIntoRegs $ \[] ->
406 doWritePtrArrayOp obj ix v
407 WriteArrayArrayOp_ArrayArray -> \[obj,ix,v] -> opIntoRegs $ \[] ->
408 doWritePtrArrayOp obj ix v
409 WriteArrayArrayOp_MutableArrayArray -> \[obj,ix,v] -> opIntoRegs $ \[] ->
410 doWritePtrArrayOp obj ix v
411
412 ReadSmallArrayOp -> \[obj, ix] -> opIntoRegs $ \[res] ->
413 doReadSmallPtrArrayOp res obj ix
414 IndexSmallArrayOp -> \[obj, ix] -> opIntoRegs $ \[res] ->
415 doReadSmallPtrArrayOp res obj ix
416 WriteSmallArrayOp -> \[obj,ix,v] -> opIntoRegs $ \[] ->
417 doWriteSmallPtrArrayOp obj ix v
418
419 -- Getting the size of pointer arrays
420
421 SizeofArrayOp -> \[arg] -> opIntoRegs $ \[res] ->
422 emit $ mkAssign (CmmLocal res) (cmmLoadIndexW platform arg
423 (fixedHdrSizeW profile + bytesToWordsRoundUp platform (pc_OFFSET_StgMutArrPtrs_ptrs (platformConstants platform)))
424 (bWord platform))
425 SizeofMutableArrayOp -> emitPrimOp dflags SizeofArrayOp
426 SizeofArrayArrayOp -> emitPrimOp dflags SizeofArrayOp
427 SizeofMutableArrayArrayOp -> emitPrimOp dflags SizeofArrayOp
428 SizeofSmallArrayOp -> \[arg] -> opIntoRegs $ \[res] ->
429 emit $ mkAssign (CmmLocal res)
430 (cmmLoadIndexW platform arg
431 (fixedHdrSizeW profile + bytesToWordsRoundUp platform (pc_OFFSET_StgSmallMutArrPtrs_ptrs (platformConstants platform)))
432 (bWord platform))
433
434 SizeofSmallMutableArrayOp -> emitPrimOp dflags SizeofSmallArrayOp
435 GetSizeofSmallMutableArrayOp -> emitPrimOp dflags SizeofSmallArrayOp
436
437 -- IndexXXXoffAddr
438
439 IndexOffAddrOp_Char -> \args -> opIntoRegs $ \res ->
440 doIndexOffAddrOp (Just (mo_u_8ToWord platform)) b8 res args
441 IndexOffAddrOp_WideChar -> \args -> opIntoRegs $ \res ->
442 doIndexOffAddrOp (Just (mo_u_32ToWord platform)) b32 res args
443 IndexOffAddrOp_Int -> \args -> opIntoRegs $ \res ->
444 doIndexOffAddrOp Nothing (bWord platform) res args
445 IndexOffAddrOp_Word -> \args -> opIntoRegs $ \res ->
446 doIndexOffAddrOp Nothing (bWord platform) res args
447 IndexOffAddrOp_Addr -> \args -> opIntoRegs $ \res ->
448 doIndexOffAddrOp Nothing (bWord platform) res args
449 IndexOffAddrOp_Float -> \args -> opIntoRegs $ \res ->
450 doIndexOffAddrOp Nothing f32 res args
451 IndexOffAddrOp_Double -> \args -> opIntoRegs $ \res ->
452 doIndexOffAddrOp Nothing f64 res args
453 IndexOffAddrOp_StablePtr -> \args -> opIntoRegs $ \res ->
454 doIndexOffAddrOp Nothing (bWord platform) res args
455 IndexOffAddrOp_Int8 -> \args -> opIntoRegs $ \res ->
456 doIndexOffAddrOp Nothing b8 res args
457 IndexOffAddrOp_Int16 -> \args -> opIntoRegs $ \res ->
458 doIndexOffAddrOp Nothing b16 res args
459 IndexOffAddrOp_Int32 -> \args -> opIntoRegs $ \res ->
460 doIndexOffAddrOp Nothing b32 res args
461 IndexOffAddrOp_Int64 -> \args -> opIntoRegs $ \res ->
462 doIndexOffAddrOp Nothing b64 res args
463 IndexOffAddrOp_Word8 -> \args -> opIntoRegs $ \res ->
464 doIndexOffAddrOp Nothing b8 res args
465 IndexOffAddrOp_Word16 -> \args -> opIntoRegs $ \res ->
466 doIndexOffAddrOp Nothing b16 res args
467 IndexOffAddrOp_Word32 -> \args -> opIntoRegs $ \res ->
468 doIndexOffAddrOp Nothing b32 res args
469 IndexOffAddrOp_Word64 -> \args -> opIntoRegs $ \res ->
470 doIndexOffAddrOp Nothing b64 res args
471
472 -- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr.
473
474 ReadOffAddrOp_Char -> \args -> opIntoRegs $ \res ->
475 doIndexOffAddrOp (Just (mo_u_8ToWord platform)) b8 res args
476 ReadOffAddrOp_WideChar -> \args -> opIntoRegs $ \res ->
477 doIndexOffAddrOp (Just (mo_u_32ToWord platform)) b32 res args
478 ReadOffAddrOp_Int -> \args -> opIntoRegs $ \res ->
479 doIndexOffAddrOp Nothing (bWord platform) res args
480 ReadOffAddrOp_Word -> \args -> opIntoRegs $ \res ->
481 doIndexOffAddrOp Nothing (bWord platform) res args
482 ReadOffAddrOp_Addr -> \args -> opIntoRegs $ \res ->
483 doIndexOffAddrOp Nothing (bWord platform) res args
484 ReadOffAddrOp_Float -> \args -> opIntoRegs $ \res ->
485 doIndexOffAddrOp Nothing f32 res args
486 ReadOffAddrOp_Double -> \args -> opIntoRegs $ \res ->
487 doIndexOffAddrOp Nothing f64 res args
488 ReadOffAddrOp_StablePtr -> \args -> opIntoRegs $ \res ->
489 doIndexOffAddrOp Nothing (bWord platform) res args
490 ReadOffAddrOp_Int8 -> \args -> opIntoRegs $ \res ->
491 doIndexOffAddrOp Nothing b8 res args
492 ReadOffAddrOp_Int16 -> \args -> opIntoRegs $ \res ->
493 doIndexOffAddrOp Nothing b16 res args
494 ReadOffAddrOp_Int32 -> \args -> opIntoRegs $ \res ->
495 doIndexOffAddrOp Nothing b32 res args
496 ReadOffAddrOp_Int64 -> \args -> opIntoRegs $ \res ->
497 doIndexOffAddrOp Nothing b64 res args
498 ReadOffAddrOp_Word8 -> \args -> opIntoRegs $ \res ->
499 doIndexOffAddrOp Nothing b8 res args
500 ReadOffAddrOp_Word16 -> \args -> opIntoRegs $ \res ->
501 doIndexOffAddrOp Nothing b16 res args
502 ReadOffAddrOp_Word32 -> \args -> opIntoRegs $ \res ->
503 doIndexOffAddrOp Nothing b32 res args
504 ReadOffAddrOp_Word64 -> \args -> opIntoRegs $ \res ->
505 doIndexOffAddrOp Nothing b64 res args
506
507 -- IndexXXXArray
508
509 IndexByteArrayOp_Char -> \args -> opIntoRegs $ \res ->
510 doIndexByteArrayOp (Just (mo_u_8ToWord platform)) b8 res args
511 IndexByteArrayOp_WideChar -> \args -> opIntoRegs $ \res ->
512 doIndexByteArrayOp (Just (mo_u_32ToWord platform)) b32 res args
513 IndexByteArrayOp_Int -> \args -> opIntoRegs $ \res ->
514 doIndexByteArrayOp Nothing (bWord platform) res args
515 IndexByteArrayOp_Word -> \args -> opIntoRegs $ \res ->
516 doIndexByteArrayOp Nothing (bWord platform) res args
517 IndexByteArrayOp_Addr -> \args -> opIntoRegs $ \res ->
518 doIndexByteArrayOp Nothing (bWord platform) res args
519 IndexByteArrayOp_Float -> \args -> opIntoRegs $ \res ->
520 doIndexByteArrayOp Nothing f32 res args
521 IndexByteArrayOp_Double -> \args -> opIntoRegs $ \res ->
522 doIndexByteArrayOp Nothing f64 res args
523 IndexByteArrayOp_StablePtr -> \args -> opIntoRegs $ \res ->
524 doIndexByteArrayOp Nothing (bWord platform) res args
525 IndexByteArrayOp_Int8 -> \args -> opIntoRegs $ \res ->
526 doIndexByteArrayOp Nothing b8 res args
527 IndexByteArrayOp_Int16 -> \args -> opIntoRegs $ \res ->
528 doIndexByteArrayOp Nothing b16 res args
529 IndexByteArrayOp_Int32 -> \args -> opIntoRegs $ \res ->
530 doIndexByteArrayOp Nothing b32 res args
531 IndexByteArrayOp_Int64 -> \args -> opIntoRegs $ \res ->
532 doIndexByteArrayOp Nothing b64 res args
533 IndexByteArrayOp_Word8 -> \args -> opIntoRegs $ \res ->
534 doIndexByteArrayOp Nothing b8 res args
535 IndexByteArrayOp_Word16 -> \args -> opIntoRegs $ \res ->
536 doIndexByteArrayOp Nothing b16 res args
537 IndexByteArrayOp_Word32 -> \args -> opIntoRegs $ \res ->
538 doIndexByteArrayOp Nothing b32 res args
539 IndexByteArrayOp_Word64 -> \args -> opIntoRegs $ \res ->
540 doIndexByteArrayOp Nothing b64 res args
541
542 -- ReadXXXArray, identical to IndexXXXArray.
543
544 ReadByteArrayOp_Char -> \args -> opIntoRegs $ \res ->
545 doIndexByteArrayOp (Just (mo_u_8ToWord platform)) b8 res args
546 ReadByteArrayOp_WideChar -> \args -> opIntoRegs $ \res ->
547 doIndexByteArrayOp (Just (mo_u_32ToWord platform)) b32 res args
548 ReadByteArrayOp_Int -> \args -> opIntoRegs $ \res ->
549 doIndexByteArrayOp Nothing (bWord platform) res args
550 ReadByteArrayOp_Word -> \args -> opIntoRegs $ \res ->
551 doIndexByteArrayOp Nothing (bWord platform) res args
552 ReadByteArrayOp_Addr -> \args -> opIntoRegs $ \res ->
553 doIndexByteArrayOp Nothing (bWord platform) res args
554 ReadByteArrayOp_Float -> \args -> opIntoRegs $ \res ->
555 doIndexByteArrayOp Nothing f32 res args
556 ReadByteArrayOp_Double -> \args -> opIntoRegs $ \res ->
557 doIndexByteArrayOp Nothing f64 res args
558 ReadByteArrayOp_StablePtr -> \args -> opIntoRegs $ \res ->
559 doIndexByteArrayOp Nothing (bWord platform) res args
560 ReadByteArrayOp_Int8 -> \args -> opIntoRegs $ \res ->
561 doIndexByteArrayOp Nothing b8 res args
562 ReadByteArrayOp_Int16 -> \args -> opIntoRegs $ \res ->
563 doIndexByteArrayOp Nothing b16 res args
564 ReadByteArrayOp_Int32 -> \args -> opIntoRegs $ \res ->
565 doIndexByteArrayOp Nothing b32 res args
566 ReadByteArrayOp_Int64 -> \args -> opIntoRegs $ \res ->
567 doIndexByteArrayOp Nothing b64 res args
568 ReadByteArrayOp_Word8 -> \args -> opIntoRegs $ \res ->
569 doIndexByteArrayOp Nothing b8 res args
570 ReadByteArrayOp_Word16 -> \args -> opIntoRegs $ \res ->
571 doIndexByteArrayOp Nothing b16 res args
572 ReadByteArrayOp_Word32 -> \args -> opIntoRegs $ \res ->
573 doIndexByteArrayOp Nothing b32 res args
574 ReadByteArrayOp_Word64 -> \args -> opIntoRegs $ \res ->
575 doIndexByteArrayOp Nothing b64 res args
576
577 -- IndexWord8ArrayAsXXX
578
579 IndexByteArrayOp_Word8AsChar -> \args -> opIntoRegs $ \res ->
580 doIndexByteArrayOpAs (Just (mo_u_8ToWord platform)) b8 b8 res args
581 IndexByteArrayOp_Word8AsWideChar -> \args -> opIntoRegs $ \res ->
582 doIndexByteArrayOpAs (Just (mo_u_32ToWord platform)) b32 b8 res args
583 IndexByteArrayOp_Word8AsInt -> \args -> opIntoRegs $ \res ->
584 doIndexByteArrayOpAs Nothing (bWord platform) b8 res args
585 IndexByteArrayOp_Word8AsWord -> \args -> opIntoRegs $ \res ->
586 doIndexByteArrayOpAs Nothing (bWord platform) b8 res args
587 IndexByteArrayOp_Word8AsAddr -> \args -> opIntoRegs $ \res ->
588 doIndexByteArrayOpAs Nothing (bWord platform) b8 res args
589 IndexByteArrayOp_Word8AsFloat -> \args -> opIntoRegs $ \res ->
590 doIndexByteArrayOpAs Nothing f32 b8 res args
591 IndexByteArrayOp_Word8AsDouble -> \args -> opIntoRegs $ \res ->
592 doIndexByteArrayOpAs Nothing f64 b8 res args
593 IndexByteArrayOp_Word8AsStablePtr -> \args -> opIntoRegs $ \res ->
594 doIndexByteArrayOpAs Nothing (bWord platform) b8 res args
595 IndexByteArrayOp_Word8AsInt16 -> \args -> opIntoRegs $ \res ->
596 doIndexByteArrayOpAs Nothing b16 b8 res args
597 IndexByteArrayOp_Word8AsInt32 -> \args -> opIntoRegs $ \res ->
598 doIndexByteArrayOpAs Nothing b32 b8 res args
599 IndexByteArrayOp_Word8AsInt64 -> \args -> opIntoRegs $ \res ->
600 doIndexByteArrayOpAs Nothing b64 b8 res args
601 IndexByteArrayOp_Word8AsWord16 -> \args -> opIntoRegs $ \res ->
602 doIndexByteArrayOpAs Nothing b16 b8 res args
603 IndexByteArrayOp_Word8AsWord32 -> \args -> opIntoRegs $ \res ->
604 doIndexByteArrayOpAs Nothing b32 b8 res args
605 IndexByteArrayOp_Word8AsWord64 -> \args -> opIntoRegs $ \res ->
606 doIndexByteArrayOpAs Nothing b64 b8 res args
607
608 -- ReadInt8ArrayAsXXX, identical to IndexInt8ArrayAsXXX
609
610 ReadByteArrayOp_Word8AsChar -> \args -> opIntoRegs $ \res ->
611 doIndexByteArrayOpAs (Just (mo_u_8ToWord platform)) b8 b8 res args
612 ReadByteArrayOp_Word8AsWideChar -> \args -> opIntoRegs $ \res ->
613 doIndexByteArrayOpAs (Just (mo_u_32ToWord platform)) b32 b8 res args
614 ReadByteArrayOp_Word8AsInt -> \args -> opIntoRegs $ \res ->
615 doIndexByteArrayOpAs Nothing (bWord platform) b8 res args
616 ReadByteArrayOp_Word8AsWord -> \args -> opIntoRegs $ \res ->
617 doIndexByteArrayOpAs Nothing (bWord platform) b8 res args
618 ReadByteArrayOp_Word8AsAddr -> \args -> opIntoRegs $ \res ->
619 doIndexByteArrayOpAs Nothing (bWord platform) b8 res args
620 ReadByteArrayOp_Word8AsFloat -> \args -> opIntoRegs $ \res ->
621 doIndexByteArrayOpAs Nothing f32 b8 res args
622 ReadByteArrayOp_Word8AsDouble -> \args -> opIntoRegs $ \res ->
623 doIndexByteArrayOpAs Nothing f64 b8 res args
624 ReadByteArrayOp_Word8AsStablePtr -> \args -> opIntoRegs $ \res ->
625 doIndexByteArrayOpAs Nothing (bWord platform) b8 res args
626 ReadByteArrayOp_Word8AsInt16 -> \args -> opIntoRegs $ \res ->
627 doIndexByteArrayOpAs Nothing b16 b8 res args
628 ReadByteArrayOp_Word8AsInt32 -> \args -> opIntoRegs $ \res ->
629 doIndexByteArrayOpAs Nothing b32 b8 res args
630 ReadByteArrayOp_Word8AsInt64 -> \args -> opIntoRegs $ \res ->
631 doIndexByteArrayOpAs Nothing b64 b8 res args
632 ReadByteArrayOp_Word8AsWord16 -> \args -> opIntoRegs $ \res ->
633 doIndexByteArrayOpAs Nothing b16 b8 res args
634 ReadByteArrayOp_Word8AsWord32 -> \args -> opIntoRegs $ \res ->
635 doIndexByteArrayOpAs Nothing b32 b8 res args
636 ReadByteArrayOp_Word8AsWord64 -> \args -> opIntoRegs $ \res ->
637 doIndexByteArrayOpAs Nothing b64 b8 res args
638
639 -- WriteXXXoffAddr
640
641 WriteOffAddrOp_Char -> \args -> opIntoRegs $ \res ->
642 doWriteOffAddrOp (Just (mo_WordTo8 platform)) b8 res args
643 WriteOffAddrOp_WideChar -> \args -> opIntoRegs $ \res ->
644 doWriteOffAddrOp (Just (mo_WordTo32 platform)) b32 res args
645 WriteOffAddrOp_Int -> \args -> opIntoRegs $ \res ->
646 doWriteOffAddrOp Nothing (bWord platform) res args
647 WriteOffAddrOp_Word -> \args -> opIntoRegs $ \res ->
648 doWriteOffAddrOp Nothing (bWord platform) res args
649 WriteOffAddrOp_Addr -> \args -> opIntoRegs $ \res ->
650 doWriteOffAddrOp Nothing (bWord platform) res args
651 WriteOffAddrOp_Float -> \args -> opIntoRegs $ \res ->
652 doWriteOffAddrOp Nothing f32 res args
653 WriteOffAddrOp_Double -> \args -> opIntoRegs $ \res ->
654 doWriteOffAddrOp Nothing f64 res args
655 WriteOffAddrOp_StablePtr -> \args -> opIntoRegs $ \res ->
656 doWriteOffAddrOp Nothing (bWord platform) res args
657 WriteOffAddrOp_Int8 -> \args -> opIntoRegs $ \res ->
658 doWriteOffAddrOp Nothing b8 res args
659 WriteOffAddrOp_Int16 -> \args -> opIntoRegs $ \res ->
660 doWriteOffAddrOp Nothing b16 res args
661 WriteOffAddrOp_Int32 -> \args -> opIntoRegs $ \res ->
662 doWriteOffAddrOp Nothing b32 res args
663 WriteOffAddrOp_Int64 -> \args -> opIntoRegs $ \res ->
664 doWriteOffAddrOp Nothing b64 res args
665 WriteOffAddrOp_Word8 -> \args -> opIntoRegs $ \res ->
666 doWriteOffAddrOp Nothing b8 res args
667 WriteOffAddrOp_Word16 -> \args -> opIntoRegs $ \res ->
668 doWriteOffAddrOp Nothing b16 res args
669 WriteOffAddrOp_Word32 -> \args -> opIntoRegs $ \res ->
670 doWriteOffAddrOp Nothing b32 res args
671 WriteOffAddrOp_Word64 -> \args -> opIntoRegs $ \res ->
672 doWriteOffAddrOp Nothing b64 res args
673
674 -- WriteXXXArray
675
676 WriteByteArrayOp_Char -> \args -> opIntoRegs $ \res ->
677 doWriteByteArrayOp (Just (mo_WordTo8 platform)) b8 res args
678 WriteByteArrayOp_WideChar -> \args -> opIntoRegs $ \res ->
679 doWriteByteArrayOp (Just (mo_WordTo32 platform)) b32 res args
680 WriteByteArrayOp_Int -> \args -> opIntoRegs $ \res ->
681 doWriteByteArrayOp Nothing (bWord platform) res args
682 WriteByteArrayOp_Word -> \args -> opIntoRegs $ \res ->
683 doWriteByteArrayOp Nothing (bWord platform) res args
684 WriteByteArrayOp_Addr -> \args -> opIntoRegs $ \res ->
685 doWriteByteArrayOp Nothing (bWord platform) res args
686 WriteByteArrayOp_Float -> \args -> opIntoRegs $ \res ->
687 doWriteByteArrayOp Nothing f32 res args
688 WriteByteArrayOp_Double -> \args -> opIntoRegs $ \res ->
689 doWriteByteArrayOp Nothing f64 res args
690 WriteByteArrayOp_StablePtr -> \args -> opIntoRegs $ \res ->
691 doWriteByteArrayOp Nothing (bWord platform) res args
692 WriteByteArrayOp_Int8 -> \args -> opIntoRegs $ \res ->
693 doWriteByteArrayOp Nothing b8 res args
694 WriteByteArrayOp_Int16 -> \args -> opIntoRegs $ \res ->
695 doWriteByteArrayOp Nothing b16 res args
696 WriteByteArrayOp_Int32 -> \args -> opIntoRegs $ \res ->
697 doWriteByteArrayOp Nothing b32 res args
698 WriteByteArrayOp_Int64 -> \args -> opIntoRegs $ \res ->
699 doWriteByteArrayOp Nothing b64 res args
700 WriteByteArrayOp_Word8 -> \args -> opIntoRegs $ \res ->
701 doWriteByteArrayOp Nothing b8 res args
702 WriteByteArrayOp_Word16 -> \args -> opIntoRegs $ \res ->
703 doWriteByteArrayOp Nothing b16 res args
704 WriteByteArrayOp_Word32 -> \args -> opIntoRegs $ \res ->
705 doWriteByteArrayOp Nothing b32 res args
706 WriteByteArrayOp_Word64 -> \args -> opIntoRegs $ \res ->
707 doWriteByteArrayOp Nothing b64 res args
708
709 -- WriteInt8ArrayAsXXX
710
711 WriteByteArrayOp_Word8AsChar -> \args -> opIntoRegs $ \res ->
712 doWriteByteArrayOp (Just (mo_WordTo8 platform)) b8 res args
713 WriteByteArrayOp_Word8AsWideChar -> \args -> opIntoRegs $ \res ->
714 doWriteByteArrayOp (Just (mo_WordTo32 platform)) b8 res args
715 WriteByteArrayOp_Word8AsInt -> \args -> opIntoRegs $ \res ->
716 doWriteByteArrayOp Nothing b8 res args
717 WriteByteArrayOp_Word8AsWord -> \args -> opIntoRegs $ \res ->
718 doWriteByteArrayOp Nothing b8 res args
719 WriteByteArrayOp_Word8AsAddr -> \args -> opIntoRegs $ \res ->
720 doWriteByteArrayOp Nothing b8 res args
721 WriteByteArrayOp_Word8AsFloat -> \args -> opIntoRegs $ \res ->
722 doWriteByteArrayOp Nothing b8 res args
723 WriteByteArrayOp_Word8AsDouble -> \args -> opIntoRegs $ \res ->
724 doWriteByteArrayOp Nothing b8 res args
725 WriteByteArrayOp_Word8AsStablePtr -> \args -> opIntoRegs $ \res ->
726 doWriteByteArrayOp Nothing b8 res args
727 WriteByteArrayOp_Word8AsInt16 -> \args -> opIntoRegs $ \res ->
728 doWriteByteArrayOp Nothing b8 res args
729 WriteByteArrayOp_Word8AsInt32 -> \args -> opIntoRegs $ \res ->
730 doWriteByteArrayOp Nothing b8 res args
731 WriteByteArrayOp_Word8AsInt64 -> \args -> opIntoRegs $ \res ->
732 doWriteByteArrayOp Nothing b8 res args
733 WriteByteArrayOp_Word8AsWord16 -> \args -> opIntoRegs $ \res ->
734 doWriteByteArrayOp Nothing b8 res args
735 WriteByteArrayOp_Word8AsWord32 -> \args -> opIntoRegs $ \res ->
736 doWriteByteArrayOp Nothing b8 res args
737 WriteByteArrayOp_Word8AsWord64 -> \args -> opIntoRegs $ \res ->
738 doWriteByteArrayOp Nothing b8 res args
739
740 -- Copying and setting byte arrays
741 CopyByteArrayOp -> \[src,src_off,dst,dst_off,n] -> opIntoRegs $ \[] ->
742 doCopyByteArrayOp src src_off dst dst_off n
743 CopyMutableByteArrayOp -> \[src,src_off,dst,dst_off,n] -> opIntoRegs $ \[] ->
744 doCopyMutableByteArrayOp src src_off dst dst_off n
745 CopyByteArrayToAddrOp -> \[src,src_off,dst,n] -> opIntoRegs $ \[] ->
746 doCopyByteArrayToAddrOp src src_off dst n
747 CopyMutableByteArrayToAddrOp -> \[src,src_off,dst,n] -> opIntoRegs $ \[] ->
748 doCopyMutableByteArrayToAddrOp src src_off dst n
749 CopyAddrToByteArrayOp -> \[src,dst,dst_off,n] -> opIntoRegs $ \[] ->
750 doCopyAddrToByteArrayOp src dst dst_off n
751 SetByteArrayOp -> \[ba,off,len,c] -> opIntoRegs $ \[] ->
752 doSetByteArrayOp ba off len c
753
754 -- Comparing byte arrays
755 CompareByteArraysOp -> \[ba1,ba1_off,ba2,ba2_off,n] -> opIntoRegs $ \[res] ->
756 doCompareByteArraysOp res ba1 ba1_off ba2 ba2_off n
757
758 BSwap16Op -> \[w] -> opIntoRegs $ \[res] ->
759 emitBSwapCall res w W16
760 BSwap32Op -> \[w] -> opIntoRegs $ \[res] ->
761 emitBSwapCall res w W32
762 BSwap64Op -> \[w] -> opIntoRegs $ \[res] ->
763 emitBSwapCall res w W64
764 BSwapOp -> \[w] -> opIntoRegs $ \[res] ->
765 emitBSwapCall res w (wordWidth platform)
766
767 BRev8Op -> \[w] -> opIntoRegs $ \[res] ->
768 emitBRevCall res w W8
769 BRev16Op -> \[w] -> opIntoRegs $ \[res] ->
770 emitBRevCall res w W16
771 BRev32Op -> \[w] -> opIntoRegs $ \[res] ->
772 emitBRevCall res w W32
773 BRev64Op -> \[w] -> opIntoRegs $ \[res] ->
774 emitBRevCall res w W64
775 BRevOp -> \[w] -> opIntoRegs $ \[res] ->
776 emitBRevCall res w (wordWidth platform)
777
778 -- Population count
779 PopCnt8Op -> \[w] -> opIntoRegs $ \[res] ->
780 emitPopCntCall res w W8
781 PopCnt16Op -> \[w] -> opIntoRegs $ \[res] ->
782 emitPopCntCall res w W16
783 PopCnt32Op -> \[w] -> opIntoRegs $ \[res] ->
784 emitPopCntCall res w W32
785 PopCnt64Op -> \[w] -> opIntoRegs $ \[res] ->
786 emitPopCntCall res w W64
787 PopCntOp -> \[w] -> opIntoRegs $ \[res] ->
788 emitPopCntCall res w (wordWidth platform)
789
790 -- Parallel bit deposit
791 Pdep8Op -> \[src, mask] -> opIntoRegs $ \[res] ->
792 emitPdepCall res src mask W8
793 Pdep16Op -> \[src, mask] -> opIntoRegs $ \[res] ->
794 emitPdepCall res src mask W16
795 Pdep32Op -> \[src, mask] -> opIntoRegs $ \[res] ->
796 emitPdepCall res src mask W32
797 Pdep64Op -> \[src, mask] -> opIntoRegs $ \[res] ->
798 emitPdepCall res src mask W64
799 PdepOp -> \[src, mask] -> opIntoRegs $ \[res] ->
800 emitPdepCall res src mask (wordWidth platform)
801
802 -- Parallel bit extract
803 Pext8Op -> \[src, mask] -> opIntoRegs $ \[res] ->
804 emitPextCall res src mask W8
805 Pext16Op -> \[src, mask] -> opIntoRegs $ \[res] ->
806 emitPextCall res src mask W16
807 Pext32Op -> \[src, mask] -> opIntoRegs $ \[res] ->
808 emitPextCall res src mask W32
809 Pext64Op -> \[src, mask] -> opIntoRegs $ \[res] ->
810 emitPextCall res src mask W64
811 PextOp -> \[src, mask] -> opIntoRegs $ \[res] ->
812 emitPextCall res src mask (wordWidth platform)
813
814 -- count leading zeros
815 Clz8Op -> \[w] -> opIntoRegs $ \[res] ->
816 emitClzCall res w W8
817 Clz16Op -> \[w] -> opIntoRegs $ \[res] ->
818 emitClzCall res w W16
819 Clz32Op -> \[w] -> opIntoRegs $ \[res] ->
820 emitClzCall res w W32
821 Clz64Op -> \[w] -> opIntoRegs $ \[res] ->
822 emitClzCall res w W64
823 ClzOp -> \[w] -> opIntoRegs $ \[res] ->
824 emitClzCall res w (wordWidth platform)
825
826 -- count trailing zeros
827 Ctz8Op -> \[w] -> opIntoRegs $ \[res] ->
828 emitCtzCall res w W8
829 Ctz16Op -> \[w] -> opIntoRegs $ \[res] ->
830 emitCtzCall res w W16
831 Ctz32Op -> \[w] -> opIntoRegs $ \[res] ->
832 emitCtzCall res w W32
833 Ctz64Op -> \[w] -> opIntoRegs $ \[res] ->
834 emitCtzCall res w W64
835 CtzOp -> \[w] -> opIntoRegs $ \[res] ->
836 emitCtzCall res w (wordWidth platform)
837
838 -- Unsigned int to floating point conversions
839 WordToFloatOp -> \[w] -> opIntoRegs $ \[res] ->
840 emitPrimCall [res] (MO_UF_Conv W32) [w]
841 WordToDoubleOp -> \[w] -> opIntoRegs $ \[res] ->
842 emitPrimCall [res] (MO_UF_Conv W64) [w]
843
844 -- Atomic operations
845 InterlockedExchange_Addr -> \[src, value] -> opIntoRegs $ \[res] ->
846 emitPrimCall [res] (MO_Xchg (wordWidth platform)) [src, value]
847 InterlockedExchange_Word -> \[src, value] -> opIntoRegs $ \[res] ->
848 emitPrimCall [res] (MO_Xchg (wordWidth platform)) [src, value]
849
850 FetchAddAddrOp_Word -> \[addr, n] -> opIntoRegs $ \[res] ->
851 doAtomicAddrRMW res AMO_Add addr (bWord platform) n
852 FetchSubAddrOp_Word -> \[addr, n] -> opIntoRegs $ \[res] ->
853 doAtomicAddrRMW res AMO_Sub addr (bWord platform) n
854 FetchAndAddrOp_Word -> \[addr, n] -> opIntoRegs $ \[res] ->
855 doAtomicAddrRMW res AMO_And addr (bWord platform) n
856 FetchNandAddrOp_Word -> \[addr, n] -> opIntoRegs $ \[res] ->
857 doAtomicAddrRMW res AMO_Nand addr (bWord platform) n
858 FetchOrAddrOp_Word -> \[addr, n] -> opIntoRegs $ \[res] ->
859 doAtomicAddrRMW res AMO_Or addr (bWord platform) n
860 FetchXorAddrOp_Word -> \[addr, n] -> opIntoRegs $ \[res] ->
861 doAtomicAddrRMW res AMO_Xor addr (bWord platform) n
862
863 AtomicReadAddrOp_Word -> \[addr] -> opIntoRegs $ \[res] ->
864 doAtomicReadAddr res addr (bWord platform)
865 AtomicWriteAddrOp_Word -> \[addr, val] -> opIntoRegs $ \[] ->
866 doAtomicWriteAddr addr (bWord platform) val
867
868 CasAddrOp_Addr -> \[dst, expected, new] -> opIntoRegs $ \[res] ->
869 emitPrimCall [res] (MO_Cmpxchg (wordWidth platform)) [dst, expected, new]
870 CasAddrOp_Word -> \[dst, expected, new] -> opIntoRegs $ \[res] ->
871 emitPrimCall [res] (MO_Cmpxchg (wordWidth platform)) [dst, expected, new]
872 CasAddrOp_Word8 -> \[dst, expected, new] -> opIntoRegs $ \[res] ->
873 emitPrimCall [res] (MO_Cmpxchg W8) [dst, expected, new]
874 CasAddrOp_Word16 -> \[dst, expected, new] -> opIntoRegs $ \[res] ->
875 emitPrimCall [res] (MO_Cmpxchg W16) [dst, expected, new]
876 CasAddrOp_Word32 -> \[dst, expected, new] -> opIntoRegs $ \[res] ->
877 emitPrimCall [res] (MO_Cmpxchg W32) [dst, expected, new]
878 CasAddrOp_Word64 -> \[dst, expected, new] -> opIntoRegs $ \[res] ->
879 emitPrimCall [res] (MO_Cmpxchg W64) [dst, expected, new]
880
881 -- SIMD primops
882 (VecBroadcastOp vcat n w) -> \[e] -> opIntoRegs $ \[res] -> do
883 checkVecCompatibility dflags vcat n w
884 doVecPackOp (vecElemInjectCast platform vcat w) ty zeros (replicate n e) res
885 where
886 zeros :: CmmExpr
887 zeros = CmmLit $ CmmVec (replicate n zero)
888
889 zero :: CmmLit
890 zero = case vcat of
891 IntVec -> CmmInt 0 w
892 WordVec -> CmmInt 0 w
893 FloatVec -> CmmFloat 0 w
894
895 ty :: CmmType
896 ty = vecVmmType vcat n w
897
898 (VecPackOp vcat n w) -> \es -> opIntoRegs $ \[res] -> do
899 checkVecCompatibility dflags vcat n w
900 when (es `lengthIsNot` n) $
901 panic "emitPrimOp: VecPackOp has wrong number of arguments"
902 doVecPackOp (vecElemInjectCast platform vcat w) ty zeros es res
903 where
904 zeros :: CmmExpr
905 zeros = CmmLit $ CmmVec (replicate n zero)
906
907 zero :: CmmLit
908 zero = case vcat of
909 IntVec -> CmmInt 0 w
910 WordVec -> CmmInt 0 w
911 FloatVec -> CmmFloat 0 w
912
913 ty :: CmmType
914 ty = vecVmmType vcat n w
915
916 (VecUnpackOp vcat n w) -> \[arg] -> opIntoRegs $ \res -> do
917 checkVecCompatibility dflags vcat n w
918 when (res `lengthIsNot` n) $
919 panic "emitPrimOp: VecUnpackOp has wrong number of results"
920 doVecUnpackOp (vecElemProjectCast platform vcat w) ty arg res
921 where
922 ty :: CmmType
923 ty = vecVmmType vcat n w
924
925 (VecInsertOp vcat n w) -> \[v,e,i] -> opIntoRegs $ \[res] -> do
926 checkVecCompatibility dflags vcat n w
927 doVecInsertOp (vecElemInjectCast platform vcat w) ty v e i res
928 where
929 ty :: CmmType
930 ty = vecVmmType vcat n w
931
932 (VecIndexByteArrayOp vcat n w) -> \args -> opIntoRegs $ \res0 -> do
933 checkVecCompatibility dflags vcat n w
934 doIndexByteArrayOp Nothing ty res0 args
935 where
936 ty :: CmmType
937 ty = vecVmmType vcat n w
938
939 (VecReadByteArrayOp vcat n w) -> \args -> opIntoRegs $ \res0 -> do
940 checkVecCompatibility dflags vcat n w
941 doIndexByteArrayOp Nothing ty res0 args
942 where
943 ty :: CmmType
944 ty = vecVmmType vcat n w
945
946 (VecWriteByteArrayOp vcat n w) -> \args -> opIntoRegs $ \res0 -> do
947 checkVecCompatibility dflags vcat n w
948 doWriteByteArrayOp Nothing ty res0 args
949 where
950 ty :: CmmType
951 ty = vecVmmType vcat n w
952
953 (VecIndexOffAddrOp vcat n w) -> \args -> opIntoRegs $ \res0 -> do
954 checkVecCompatibility dflags vcat n w
955 doIndexOffAddrOp Nothing ty res0 args
956 where
957 ty :: CmmType
958 ty = vecVmmType vcat n w
959
960 (VecReadOffAddrOp vcat n w) -> \args -> opIntoRegs $ \res0 -> do
961 checkVecCompatibility dflags vcat n w
962 doIndexOffAddrOp Nothing ty res0 args
963 where
964 ty :: CmmType
965 ty = vecVmmType vcat n w
966
967 (VecWriteOffAddrOp vcat n w) -> \args -> opIntoRegs $ \res0 -> do
968 checkVecCompatibility dflags vcat n w
969 doWriteOffAddrOp Nothing ty res0 args
970 where
971 ty :: CmmType
972 ty = vecVmmType vcat n w
973
974 (VecIndexScalarByteArrayOp vcat n w) -> \args -> opIntoRegs $ \res0 -> do
975 checkVecCompatibility dflags vcat n w
976 doIndexByteArrayOpAs Nothing vecty ty res0 args
977 where
978 vecty :: CmmType
979 vecty = vecVmmType vcat n w
980
981 ty :: CmmType
982 ty = vecCmmCat vcat w
983
984 (VecReadScalarByteArrayOp vcat n w) -> \args -> opIntoRegs $ \res0 -> do
985 checkVecCompatibility dflags vcat n w
986 doIndexByteArrayOpAs Nothing vecty ty res0 args
987 where
988 vecty :: CmmType
989 vecty = vecVmmType vcat n w
990
991 ty :: CmmType
992 ty = vecCmmCat vcat w
993
994 (VecWriteScalarByteArrayOp vcat n w) -> \args -> opIntoRegs $ \res0 -> do
995 checkVecCompatibility dflags vcat n w
996 doWriteByteArrayOp Nothing ty res0 args
997 where
998 ty :: CmmType
999 ty = vecCmmCat vcat w
1000
1001 (VecIndexScalarOffAddrOp vcat n w) -> \args -> opIntoRegs $ \res0 -> do
1002 checkVecCompatibility dflags vcat n w
1003 doIndexOffAddrOpAs Nothing vecty ty res0 args
1004 where
1005 vecty :: CmmType
1006 vecty = vecVmmType vcat n w
1007
1008 ty :: CmmType
1009 ty = vecCmmCat vcat w
1010
1011 (VecReadScalarOffAddrOp vcat n w) -> \args -> opIntoRegs $ \res0 -> do
1012 checkVecCompatibility dflags vcat n w
1013 doIndexOffAddrOpAs Nothing vecty ty res0 args
1014 where
1015 vecty :: CmmType
1016 vecty = vecVmmType vcat n w
1017
1018 ty :: CmmType
1019 ty = vecCmmCat vcat w
1020
1021 (VecWriteScalarOffAddrOp vcat n w) -> \args -> opIntoRegs $ \res0 -> do
1022 checkVecCompatibility dflags vcat n w
1023 doWriteOffAddrOp Nothing ty res0 args
1024 where
1025 ty :: CmmType
1026 ty = vecCmmCat vcat w
1027
1028 -- Prefetch
1029 PrefetchByteArrayOp3 -> \args -> opIntoRegs $ \[] ->
1030 doPrefetchByteArrayOp 3 args
1031 PrefetchMutableByteArrayOp3 -> \args -> opIntoRegs $ \[] ->
1032 doPrefetchMutableByteArrayOp 3 args
1033 PrefetchAddrOp3 -> \args -> opIntoRegs $ \[] ->
1034 doPrefetchAddrOp 3 args
1035 PrefetchValueOp3 -> \args -> opIntoRegs $ \[] ->
1036 doPrefetchValueOp 3 args
1037
1038 PrefetchByteArrayOp2 -> \args -> opIntoRegs $ \[] ->
1039 doPrefetchByteArrayOp 2 args
1040 PrefetchMutableByteArrayOp2 -> \args -> opIntoRegs $ \[] ->
1041 doPrefetchMutableByteArrayOp 2 args
1042 PrefetchAddrOp2 -> \args -> opIntoRegs $ \[] ->
1043 doPrefetchAddrOp 2 args
1044 PrefetchValueOp2 -> \args -> opIntoRegs $ \[] ->
1045 doPrefetchValueOp 2 args
1046 PrefetchByteArrayOp1 -> \args -> opIntoRegs $ \[] ->
1047 doPrefetchByteArrayOp 1 args
1048 PrefetchMutableByteArrayOp1 -> \args -> opIntoRegs $ \[] ->
1049 doPrefetchMutableByteArrayOp 1 args
1050 PrefetchAddrOp1 -> \args -> opIntoRegs $ \[] ->
1051 doPrefetchAddrOp 1 args
1052 PrefetchValueOp1 -> \args -> opIntoRegs $ \[] ->
1053 doPrefetchValueOp 1 args
1054
1055 PrefetchByteArrayOp0 -> \args -> opIntoRegs $ \[] ->
1056 doPrefetchByteArrayOp 0 args
1057 PrefetchMutableByteArrayOp0 -> \args -> opIntoRegs $ \[] ->
1058 doPrefetchMutableByteArrayOp 0 args
1059 PrefetchAddrOp0 -> \args -> opIntoRegs $ \[] ->
1060 doPrefetchAddrOp 0 args
1061 PrefetchValueOp0 -> \args -> opIntoRegs $ \[] ->
1062 doPrefetchValueOp 0 args
1063
1064 -- Atomic read-modify-write
1065 FetchAddByteArrayOp_Int -> \[mba, ix, n] -> opIntoRegs $ \[res] ->
1066 doAtomicByteArrayRMW res AMO_Add mba ix (bWord platform) n
1067 FetchSubByteArrayOp_Int -> \[mba, ix, n] -> opIntoRegs $ \[res] ->
1068 doAtomicByteArrayRMW res AMO_Sub mba ix (bWord platform) n
1069 FetchAndByteArrayOp_Int -> \[mba, ix, n] -> opIntoRegs $ \[res] ->
1070 doAtomicByteArrayRMW res AMO_And mba ix (bWord platform) n
1071 FetchNandByteArrayOp_Int -> \[mba, ix, n] -> opIntoRegs $ \[res] ->
1072 doAtomicByteArrayRMW res AMO_Nand mba ix (bWord platform) n
1073 FetchOrByteArrayOp_Int -> \[mba, ix, n] -> opIntoRegs $ \[res] ->
1074 doAtomicByteArrayRMW res AMO_Or mba ix (bWord platform) n
1075 FetchXorByteArrayOp_Int -> \[mba, ix, n] -> opIntoRegs $ \[res] ->
1076 doAtomicByteArrayRMW res AMO_Xor mba ix (bWord platform) n
1077 AtomicReadByteArrayOp_Int -> \[mba, ix] -> opIntoRegs $ \[res] ->
1078 doAtomicReadByteArray res mba ix (bWord platform)
1079 AtomicWriteByteArrayOp_Int -> \[mba, ix, val] -> opIntoRegs $ \[] ->
1080 doAtomicWriteByteArray mba ix (bWord platform) val
1081 CasByteArrayOp_Int -> \[mba, ix, old, new] -> opIntoRegs $ \[res] ->
1082 doCasByteArray res mba ix (bWord platform) old new
1083 CasByteArrayOp_Int8 -> \[mba, ix, old, new] -> opIntoRegs $ \[res] ->
1084 doCasByteArray res mba ix b8 old new
1085 CasByteArrayOp_Int16 -> \[mba, ix, old, new] -> opIntoRegs $ \[res] ->
1086 doCasByteArray res mba ix b16 old new
1087 CasByteArrayOp_Int32 -> \[mba, ix, old, new] -> opIntoRegs $ \[res] ->
1088 doCasByteArray res mba ix b32 old new
1089 CasByteArrayOp_Int64 -> \[mba, ix, old, new] -> opIntoRegs $ \[res] ->
1090 doCasByteArray res mba ix b64 old new
1091
1092 -- The rest just translate straightforwardly
1093
1094 Int8ToWord8Op -> \args -> opNop args
1095 Word8ToInt8Op -> \args -> opNop args
1096 Int16ToWord16Op -> \args -> opNop args
1097 Word16ToInt16Op -> \args -> opNop args
1098 Int32ToWord32Op -> \args -> opNop args
1099 Word32ToInt32Op -> \args -> opNop args
1100 Int64ToWord64Op -> \args -> opNop args
1101 Word64ToInt64Op -> \args -> opNop args
1102 IntToWordOp -> \args -> opNop args
1103 WordToIntOp -> \args -> opNop args
1104 IntToAddrOp -> \args -> opNop args
1105 AddrToIntOp -> \args -> opNop args
1106 ChrOp -> \args -> opNop args -- Int# and Char# are rep'd the same
1107 OrdOp -> \args -> opNop args
1108
1109 Narrow8IntOp -> \args -> opNarrow args (MO_SS_Conv, W8)
1110 Narrow16IntOp -> \args -> opNarrow args (MO_SS_Conv, W16)
1111 Narrow32IntOp -> \args -> opNarrow args (MO_SS_Conv, W32)
1112 Narrow8WordOp -> \args -> opNarrow args (MO_UU_Conv, W8)
1113 Narrow16WordOp -> \args -> opNarrow args (MO_UU_Conv, W16)
1114 Narrow32WordOp -> \args -> opNarrow args (MO_UU_Conv, W32)
1115
1116 DoublePowerOp -> \args -> opCallish args MO_F64_Pwr
1117 DoubleSinOp -> \args -> opCallish args MO_F64_Sin
1118 DoubleCosOp -> \args -> opCallish args MO_F64_Cos
1119 DoubleTanOp -> \args -> opCallish args MO_F64_Tan
1120 DoubleSinhOp -> \args -> opCallish args MO_F64_Sinh
1121 DoubleCoshOp -> \args -> opCallish args MO_F64_Cosh
1122 DoubleTanhOp -> \args -> opCallish args MO_F64_Tanh
1123 DoubleAsinOp -> \args -> opCallish args MO_F64_Asin
1124 DoubleAcosOp -> \args -> opCallish args MO_F64_Acos
1125 DoubleAtanOp -> \args -> opCallish args MO_F64_Atan
1126 DoubleAsinhOp -> \args -> opCallish args MO_F64_Asinh
1127 DoubleAcoshOp -> \args -> opCallish args MO_F64_Acosh
1128 DoubleAtanhOp -> \args -> opCallish args MO_F64_Atanh
1129 DoubleLogOp -> \args -> opCallish args MO_F64_Log
1130 DoubleLog1POp -> \args -> opCallish args MO_F64_Log1P
1131 DoubleExpOp -> \args -> opCallish args MO_F64_Exp
1132 DoubleExpM1Op -> \args -> opCallish args MO_F64_ExpM1
1133 DoubleSqrtOp -> \args -> opCallish args MO_F64_Sqrt
1134
1135 FloatPowerOp -> \args -> opCallish args MO_F32_Pwr
1136 FloatSinOp -> \args -> opCallish args MO_F32_Sin
1137 FloatCosOp -> \args -> opCallish args MO_F32_Cos
1138 FloatTanOp -> \args -> opCallish args MO_F32_Tan
1139 FloatSinhOp -> \args -> opCallish args MO_F32_Sinh
1140 FloatCoshOp -> \args -> opCallish args MO_F32_Cosh
1141 FloatTanhOp -> \args -> opCallish args MO_F32_Tanh
1142 FloatAsinOp -> \args -> opCallish args MO_F32_Asin
1143 FloatAcosOp -> \args -> opCallish args MO_F32_Acos
1144 FloatAtanOp -> \args -> opCallish args MO_F32_Atan
1145 FloatAsinhOp -> \args -> opCallish args MO_F32_Asinh
1146 FloatAcoshOp -> \args -> opCallish args MO_F32_Acosh
1147 FloatAtanhOp -> \args -> opCallish args MO_F32_Atanh
1148 FloatLogOp -> \args -> opCallish args MO_F32_Log
1149 FloatLog1POp -> \args -> opCallish args MO_F32_Log1P
1150 FloatExpOp -> \args -> opCallish args MO_F32_Exp
1151 FloatExpM1Op -> \args -> opCallish args MO_F32_ExpM1
1152 FloatSqrtOp -> \args -> opCallish args MO_F32_Sqrt
1153
1154 -- Native word signless ops
1155
1156 IntAddOp -> \args -> opTranslate args (mo_wordAdd platform)
1157 IntSubOp -> \args -> opTranslate args (mo_wordSub platform)
1158 WordAddOp -> \args -> opTranslate args (mo_wordAdd platform)
1159 WordSubOp -> \args -> opTranslate args (mo_wordSub platform)
1160 AddrAddOp -> \args -> opTranslate args (mo_wordAdd platform)
1161 AddrSubOp -> \args -> opTranslate args (mo_wordSub platform)
1162
1163 IntEqOp -> \args -> opTranslate args (mo_wordEq platform)
1164 IntNeOp -> \args -> opTranslate args (mo_wordNe platform)
1165 WordEqOp -> \args -> opTranslate args (mo_wordEq platform)
1166 WordNeOp -> \args -> opTranslate args (mo_wordNe platform)
1167 AddrEqOp -> \args -> opTranslate args (mo_wordEq platform)
1168 AddrNeOp -> \args -> opTranslate args (mo_wordNe platform)
1169
1170 WordAndOp -> \args -> opTranslate args (mo_wordAnd platform)
1171 WordOrOp -> \args -> opTranslate args (mo_wordOr platform)
1172 WordXorOp -> \args -> opTranslate args (mo_wordXor platform)
1173 WordNotOp -> \args -> opTranslate args (mo_wordNot platform)
1174 WordSllOp -> \args -> opTranslate args (mo_wordShl platform)
1175 WordSrlOp -> \args -> opTranslate args (mo_wordUShr platform)
1176
1177 AddrRemOp -> \args -> opTranslate args (mo_wordURem platform)
1178
1179 -- Native word signed ops
1180
1181 IntMulOp -> \args -> opTranslate args (mo_wordMul platform)
1182 IntMulMayOfloOp -> \args -> opTranslate args (MO_S_MulMayOflo (wordWidth platform))
1183 IntQuotOp -> \args -> opTranslate args (mo_wordSQuot platform)
1184 IntRemOp -> \args -> opTranslate args (mo_wordSRem platform)
1185 IntNegOp -> \args -> opTranslate args (mo_wordSNeg platform)
1186
1187 IntGeOp -> \args -> opTranslate args (mo_wordSGe platform)
1188 IntLeOp -> \args -> opTranslate args (mo_wordSLe platform)
1189 IntGtOp -> \args -> opTranslate args (mo_wordSGt platform)
1190 IntLtOp -> \args -> opTranslate args (mo_wordSLt platform)
1191
1192 IntAndOp -> \args -> opTranslate args (mo_wordAnd platform)
1193 IntOrOp -> \args -> opTranslate args (mo_wordOr platform)
1194 IntXorOp -> \args -> opTranslate args (mo_wordXor platform)
1195 IntNotOp -> \args -> opTranslate args (mo_wordNot platform)
1196 IntSllOp -> \args -> opTranslate args (mo_wordShl platform)
1197 IntSraOp -> \args -> opTranslate args (mo_wordSShr platform)
1198 IntSrlOp -> \args -> opTranslate args (mo_wordUShr platform)
1199
1200 -- Native word unsigned ops
1201
1202 WordGeOp -> \args -> opTranslate args (mo_wordUGe platform)
1203 WordLeOp -> \args -> opTranslate args (mo_wordULe platform)
1204 WordGtOp -> \args -> opTranslate args (mo_wordUGt platform)
1205 WordLtOp -> \args -> opTranslate args (mo_wordULt platform)
1206
1207 WordMulOp -> \args -> opTranslate args (mo_wordMul platform)
1208 WordQuotOp -> \args -> opTranslate args (mo_wordUQuot platform)
1209 WordRemOp -> \args -> opTranslate args (mo_wordURem platform)
1210
1211 AddrGeOp -> \args -> opTranslate args (mo_wordUGe platform)
1212 AddrLeOp -> \args -> opTranslate args (mo_wordULe platform)
1213 AddrGtOp -> \args -> opTranslate args (mo_wordUGt platform)
1214 AddrLtOp -> \args -> opTranslate args (mo_wordULt platform)
1215
1216 -- Int8# signed ops
1217
1218 Int8ToIntOp -> \args -> opTranslate args (MO_SS_Conv W8 (wordWidth platform))
1219 IntToInt8Op -> \args -> opTranslate args (MO_SS_Conv (wordWidth platform) W8)
1220 Int8NegOp -> \args -> opTranslate args (MO_S_Neg W8)
1221 Int8AddOp -> \args -> opTranslate args (MO_Add W8)
1222 Int8SubOp -> \args -> opTranslate args (MO_Sub W8)
1223 Int8MulOp -> \args -> opTranslate args (MO_Mul W8)
1224 Int8QuotOp -> \args -> opTranslate args (MO_S_Quot W8)
1225 Int8RemOp -> \args -> opTranslate args (MO_S_Rem W8)
1226
1227 Int8SllOp -> \args -> opTranslate args (MO_Shl W8)
1228 Int8SraOp -> \args -> opTranslate args (MO_S_Shr W8)
1229 Int8SrlOp -> \args -> opTranslate args (MO_U_Shr W8)
1230
1231 Int8EqOp -> \args -> opTranslate args (MO_Eq W8)
1232 Int8GeOp -> \args -> opTranslate args (MO_S_Ge W8)
1233 Int8GtOp -> \args -> opTranslate args (MO_S_Gt W8)
1234 Int8LeOp -> \args -> opTranslate args (MO_S_Le W8)
1235 Int8LtOp -> \args -> opTranslate args (MO_S_Lt W8)
1236 Int8NeOp -> \args -> opTranslate args (MO_Ne W8)
1237
1238 -- Word8# unsigned ops
1239
1240 Word8ToWordOp -> \args -> opTranslate args (MO_UU_Conv W8 (wordWidth platform))
1241 WordToWord8Op -> \args -> opTranslate args (MO_UU_Conv (wordWidth platform) W8)
1242 Word8AddOp -> \args -> opTranslate args (MO_Add W8)
1243 Word8SubOp -> \args -> opTranslate args (MO_Sub W8)
1244 Word8MulOp -> \args -> opTranslate args (MO_Mul W8)
1245 Word8QuotOp -> \args -> opTranslate args (MO_U_Quot W8)
1246 Word8RemOp -> \args -> opTranslate args (MO_U_Rem W8)
1247
1248 Word8AndOp -> \args -> opTranslate args (MO_And W8)
1249 Word8OrOp -> \args -> opTranslate args (MO_Or W8)
1250 Word8XorOp -> \args -> opTranslate args (MO_Xor W8)
1251 Word8NotOp -> \args -> opTranslate args (MO_Not W8)
1252 Word8SllOp -> \args -> opTranslate args (MO_Shl W8)
1253 Word8SrlOp -> \args -> opTranslate args (MO_U_Shr W8)
1254
1255 Word8EqOp -> \args -> opTranslate args (MO_Eq W8)
1256 Word8GeOp -> \args -> opTranslate args (MO_U_Ge W8)
1257 Word8GtOp -> \args -> opTranslate args (MO_U_Gt W8)
1258 Word8LeOp -> \args -> opTranslate args (MO_U_Le W8)
1259 Word8LtOp -> \args -> opTranslate args (MO_U_Lt W8)
1260 Word8NeOp -> \args -> opTranslate args (MO_Ne W8)
1261
1262 -- Int16# signed ops
1263
1264 Int16ToIntOp -> \args -> opTranslate args (MO_SS_Conv W16 (wordWidth platform))
1265 IntToInt16Op -> \args -> opTranslate args (MO_SS_Conv (wordWidth platform) W16)
1266 Int16NegOp -> \args -> opTranslate args (MO_S_Neg W16)
1267 Int16AddOp -> \args -> opTranslate args (MO_Add W16)
1268 Int16SubOp -> \args -> opTranslate args (MO_Sub W16)
1269 Int16MulOp -> \args -> opTranslate args (MO_Mul W16)
1270 Int16QuotOp -> \args -> opTranslate args (MO_S_Quot W16)
1271 Int16RemOp -> \args -> opTranslate args (MO_S_Rem W16)
1272
1273 Int16SllOp -> \args -> opTranslate args (MO_Shl W16)
1274 Int16SraOp -> \args -> opTranslate args (MO_S_Shr W16)
1275 Int16SrlOp -> \args -> opTranslate args (MO_U_Shr W16)
1276
1277 Int16EqOp -> \args -> opTranslate args (MO_Eq W16)
1278 Int16GeOp -> \args -> opTranslate args (MO_S_Ge W16)
1279 Int16GtOp -> \args -> opTranslate args (MO_S_Gt W16)
1280 Int16LeOp -> \args -> opTranslate args (MO_S_Le W16)
1281 Int16LtOp -> \args -> opTranslate args (MO_S_Lt W16)
1282 Int16NeOp -> \args -> opTranslate args (MO_Ne W16)
1283
1284 -- Word16# unsigned ops
1285
1286 Word16ToWordOp -> \args -> opTranslate args (MO_UU_Conv W16 (wordWidth platform))
1287 WordToWord16Op -> \args -> opTranslate args (MO_UU_Conv (wordWidth platform) W16)
1288 Word16AddOp -> \args -> opTranslate args (MO_Add W16)
1289 Word16SubOp -> \args -> opTranslate args (MO_Sub W16)
1290 Word16MulOp -> \args -> opTranslate args (MO_Mul W16)
1291 Word16QuotOp -> \args -> opTranslate args (MO_U_Quot W16)
1292 Word16RemOp -> \args -> opTranslate args (MO_U_Rem W16)
1293
1294 Word16AndOp -> \args -> opTranslate args (MO_And W16)
1295 Word16OrOp -> \args -> opTranslate args (MO_Or W16)
1296 Word16XorOp -> \args -> opTranslate args (MO_Xor W16)
1297 Word16NotOp -> \args -> opTranslate args (MO_Not W16)
1298 Word16SllOp -> \args -> opTranslate args (MO_Shl W16)
1299 Word16SrlOp -> \args -> opTranslate args (MO_U_Shr W16)
1300
1301 Word16EqOp -> \args -> opTranslate args (MO_Eq W16)
1302 Word16GeOp -> \args -> opTranslate args (MO_U_Ge W16)
1303 Word16GtOp -> \args -> opTranslate args (MO_U_Gt W16)
1304 Word16LeOp -> \args -> opTranslate args (MO_U_Le W16)
1305 Word16LtOp -> \args -> opTranslate args (MO_U_Lt W16)
1306 Word16NeOp -> \args -> opTranslate args (MO_Ne W16)
1307
1308 -- Int32# signed ops
1309
1310 Int32ToIntOp -> \args -> opTranslate args (MO_SS_Conv W32 (wordWidth platform))
1311 IntToInt32Op -> \args -> opTranslate args (MO_SS_Conv (wordWidth platform) W32)
1312 Int32NegOp -> \args -> opTranslate args (MO_S_Neg W32)
1313 Int32AddOp -> \args -> opTranslate args (MO_Add W32)
1314 Int32SubOp -> \args -> opTranslate args (MO_Sub W32)
1315 Int32MulOp -> \args -> opTranslate args (MO_Mul W32)
1316 Int32QuotOp -> \args -> opTranslate args (MO_S_Quot W32)
1317 Int32RemOp -> \args -> opTranslate args (MO_S_Rem W32)
1318
1319 Int32SllOp -> \args -> opTranslate args (MO_Shl W32)
1320 Int32SraOp -> \args -> opTranslate args (MO_S_Shr W32)
1321 Int32SrlOp -> \args -> opTranslate args (MO_U_Shr W32)
1322
1323 Int32EqOp -> \args -> opTranslate args (MO_Eq W32)
1324 Int32GeOp -> \args -> opTranslate args (MO_S_Ge W32)
1325 Int32GtOp -> \args -> opTranslate args (MO_S_Gt W32)
1326 Int32LeOp -> \args -> opTranslate args (MO_S_Le W32)
1327 Int32LtOp -> \args -> opTranslate args (MO_S_Lt W32)
1328 Int32NeOp -> \args -> opTranslate args (MO_Ne W32)
1329
1330 -- Word32# unsigned ops
1331
1332 Word32ToWordOp -> \args -> opTranslate args (MO_UU_Conv W32 (wordWidth platform))
1333 WordToWord32Op -> \args -> opTranslate args (MO_UU_Conv (wordWidth platform) W32)
1334 Word32AddOp -> \args -> opTranslate args (MO_Add W32)
1335 Word32SubOp -> \args -> opTranslate args (MO_Sub W32)
1336 Word32MulOp -> \args -> opTranslate args (MO_Mul W32)
1337 Word32QuotOp -> \args -> opTranslate args (MO_U_Quot W32)
1338 Word32RemOp -> \args -> opTranslate args (MO_U_Rem W32)
1339
1340 Word32AndOp -> \args -> opTranslate args (MO_And W32)
1341 Word32OrOp -> \args -> opTranslate args (MO_Or W32)
1342 Word32XorOp -> \args -> opTranslate args (MO_Xor W32)
1343 Word32NotOp -> \args -> opTranslate args (MO_Not W32)
1344 Word32SllOp -> \args -> opTranslate args (MO_Shl W32)
1345 Word32SrlOp -> \args -> opTranslate args (MO_U_Shr W32)
1346
1347 Word32EqOp -> \args -> opTranslate args (MO_Eq W32)
1348 Word32GeOp -> \args -> opTranslate args (MO_U_Ge W32)
1349 Word32GtOp -> \args -> opTranslate args (MO_U_Gt W32)
1350 Word32LeOp -> \args -> opTranslate args (MO_U_Le W32)
1351 Word32LtOp -> \args -> opTranslate args (MO_U_Lt W32)
1352 Word32NeOp -> \args -> opTranslate args (MO_Ne W32)
1353
1354 -- Int64# signed ops
1355
1356 Int64ToIntOp -> \args -> opTranslate64 args (\w -> MO_SS_Conv w (wordWidth platform)) MO_I64_ToI
1357 IntToInt64Op -> \args -> opTranslate64 args (\w -> MO_SS_Conv (wordWidth platform) w) MO_I64_FromI
1358 Int64NegOp -> \args -> opTranslate64 args MO_S_Neg MO_x64_Neg
1359 Int64AddOp -> \args -> opTranslate64 args MO_Add MO_x64_Add
1360 Int64SubOp -> \args -> opTranslate64 args MO_Sub MO_x64_Sub
1361 Int64MulOp -> \args -> opTranslate64 args MO_Mul MO_x64_Mul
1362 Int64QuotOp -> \args -> opTranslate64 args MO_S_Quot MO_I64_Quot
1363 Int64RemOp -> \args -> opTranslate64 args MO_S_Rem MO_I64_Rem
1364
1365 Int64SllOp -> \args -> opTranslate64 args MO_Shl MO_x64_Shl
1366 Int64SraOp -> \args -> opTranslate64 args MO_S_Shr MO_I64_Shr
1367 Int64SrlOp -> \args -> opTranslate64 args MO_U_Shr MO_W64_Shr
1368
1369 Int64EqOp -> \args -> opTranslate64 args MO_Eq MO_x64_Eq
1370 Int64GeOp -> \args -> opTranslate64 args MO_S_Ge MO_I64_Ge
1371 Int64GtOp -> \args -> opTranslate64 args MO_S_Gt MO_I64_Gt
1372 Int64LeOp -> \args -> opTranslate64 args MO_S_Le MO_I64_Le
1373 Int64LtOp -> \args -> opTranslate64 args MO_S_Lt MO_I64_Lt
1374 Int64NeOp -> \args -> opTranslate64 args MO_Ne MO_x64_Ne
1375
1376 -- Word64# unsigned ops
1377
1378 Word64ToWordOp -> \args -> opTranslate64 args (\w -> MO_UU_Conv w (wordWidth platform)) MO_W64_ToW
1379 WordToWord64Op -> \args -> opTranslate64 args (\w -> MO_UU_Conv (wordWidth platform) w) MO_W64_FromW
1380 Word64AddOp -> \args -> opTranslate64 args MO_Add MO_x64_Add
1381 Word64SubOp -> \args -> opTranslate64 args MO_Sub MO_x64_Sub
1382 Word64MulOp -> \args -> opTranslate64 args MO_Mul MO_x64_Mul
1383 Word64QuotOp -> \args -> opTranslate64 args MO_U_Quot MO_W64_Quot
1384 Word64RemOp -> \args -> opTranslate64 args MO_U_Rem MO_W64_Rem
1385
1386 Word64AndOp -> \args -> opTranslate64 args MO_And MO_x64_And
1387 Word64OrOp -> \args -> opTranslate64 args MO_Or MO_x64_Or
1388 Word64XorOp -> \args -> opTranslate64 args MO_Xor MO_x64_Xor
1389 Word64NotOp -> \args -> opTranslate64 args MO_Not MO_x64_Not
1390 Word64SllOp -> \args -> opTranslate64 args MO_Shl MO_x64_Shl
1391 Word64SrlOp -> \args -> opTranslate64 args MO_U_Shr MO_W64_Shr
1392
1393 Word64EqOp -> \args -> opTranslate64 args MO_Eq MO_x64_Eq
1394 Word64GeOp -> \args -> opTranslate64 args MO_U_Ge MO_W64_Ge
1395 Word64GtOp -> \args -> opTranslate64 args MO_U_Gt MO_W64_Gt
1396 Word64LeOp -> \args -> opTranslate64 args MO_U_Le MO_W64_Le
1397 Word64LtOp -> \args -> opTranslate64 args MO_U_Lt MO_W64_Lt
1398 Word64NeOp -> \args -> opTranslate64 args MO_Ne MO_x64_Ne
1399
1400 -- Char# ops
1401
1402 CharEqOp -> \args -> opTranslate args (MO_Eq (wordWidth platform))
1403 CharNeOp -> \args -> opTranslate args (MO_Ne (wordWidth platform))
1404 CharGeOp -> \args -> opTranslate args (MO_U_Ge (wordWidth platform))
1405 CharLeOp -> \args -> opTranslate args (MO_U_Le (wordWidth platform))
1406 CharGtOp -> \args -> opTranslate args (MO_U_Gt (wordWidth platform))
1407 CharLtOp -> \args -> opTranslate args (MO_U_Lt (wordWidth platform))
1408
1409 -- Double ops
1410
1411 DoubleEqOp -> \args -> opTranslate args (MO_F_Eq W64)
1412 DoubleNeOp -> \args -> opTranslate args (MO_F_Ne W64)
1413 DoubleGeOp -> \args -> opTranslate args (MO_F_Ge W64)
1414 DoubleLeOp -> \args -> opTranslate args (MO_F_Le W64)
1415 DoubleGtOp -> \args -> opTranslate args (MO_F_Gt W64)
1416 DoubleLtOp -> \args -> opTranslate args (MO_F_Lt W64)
1417
1418 DoubleAddOp -> \args -> opTranslate args (MO_F_Add W64)
1419 DoubleSubOp -> \args -> opTranslate args (MO_F_Sub W64)
1420 DoubleMulOp -> \args -> opTranslate args (MO_F_Mul W64)
1421 DoubleDivOp -> \args -> opTranslate args (MO_F_Quot W64)
1422 DoubleNegOp -> \args -> opTranslate args (MO_F_Neg W64)
1423
1424 -- Float ops
1425
1426 FloatEqOp -> \args -> opTranslate args (MO_F_Eq W32)
1427 FloatNeOp -> \args -> opTranslate args (MO_F_Ne W32)
1428 FloatGeOp -> \args -> opTranslate args (MO_F_Ge W32)
1429 FloatLeOp -> \args -> opTranslate args (MO_F_Le W32)
1430 FloatGtOp -> \args -> opTranslate args (MO_F_Gt W32)
1431 FloatLtOp -> \args -> opTranslate args (MO_F_Lt W32)
1432
1433 FloatAddOp -> \args -> opTranslate args (MO_F_Add W32)
1434 FloatSubOp -> \args -> opTranslate args (MO_F_Sub W32)
1435 FloatMulOp -> \args -> opTranslate args (MO_F_Mul W32)
1436 FloatDivOp -> \args -> opTranslate args (MO_F_Quot W32)
1437 FloatNegOp -> \args -> opTranslate args (MO_F_Neg W32)
1438
1439 -- Vector ops
1440
1441 (VecAddOp FloatVec n w) -> \args -> opTranslate args (MO_VF_Add n w)
1442 (VecSubOp FloatVec n w) -> \args -> opTranslate args (MO_VF_Sub n w)
1443 (VecMulOp FloatVec n w) -> \args -> opTranslate args (MO_VF_Mul n w)
1444 (VecDivOp FloatVec n w) -> \args -> opTranslate args (MO_VF_Quot n w)
1445 (VecQuotOp FloatVec _ _) -> \_ -> panic "unsupported primop"
1446 (VecRemOp FloatVec _ _) -> \_ -> panic "unsupported primop"
1447 (VecNegOp FloatVec n w) -> \args -> opTranslate args (MO_VF_Neg n w)
1448
1449 (VecAddOp IntVec n w) -> \args -> opTranslate args (MO_V_Add n w)
1450 (VecSubOp IntVec n w) -> \args -> opTranslate args (MO_V_Sub n w)
1451 (VecMulOp IntVec n w) -> \args -> opTranslate args (MO_V_Mul n w)
1452 (VecDivOp IntVec _ _) -> \_ -> panic "unsupported primop"
1453 (VecQuotOp IntVec n w) -> \args -> opTranslate args (MO_VS_Quot n w)
1454 (VecRemOp IntVec n w) -> \args -> opTranslate args (MO_VS_Rem n w)
1455 (VecNegOp IntVec n w) -> \args -> opTranslate args (MO_VS_Neg n w)
1456
1457 (VecAddOp WordVec n w) -> \args -> opTranslate args (MO_V_Add n w)
1458 (VecSubOp WordVec n w) -> \args -> opTranslate args (MO_V_Sub n w)
1459 (VecMulOp WordVec n w) -> \args -> opTranslate args (MO_V_Mul n w)
1460 (VecDivOp WordVec _ _) -> \_ -> panic "unsupported primop"
1461 (VecQuotOp WordVec n w) -> \args -> opTranslate args (MO_VU_Quot n w)
1462 (VecRemOp WordVec n w) -> \args -> opTranslate args (MO_VU_Rem n w)
1463 (VecNegOp WordVec _ _) -> \_ -> panic "unsupported primop"
1464
1465 -- Conversions
1466
1467 IntToDoubleOp -> \args -> opTranslate args (MO_SF_Conv (wordWidth platform) W64)
1468 DoubleToIntOp -> \args -> opTranslate args (MO_FS_Conv W64 (wordWidth platform))
1469
1470 IntToFloatOp -> \args -> opTranslate args (MO_SF_Conv (wordWidth platform) W32)
1471 FloatToIntOp -> \args -> opTranslate args (MO_FS_Conv W32 (wordWidth platform))
1472
1473 FloatToDoubleOp -> \args -> opTranslate args (MO_FF_Conv W32 W64)
1474 DoubleToFloatOp -> \args -> opTranslate args (MO_FF_Conv W64 W32)
1475
1476 IntQuotRemOp -> \args -> opCallishHandledLater args $
1477 if ncg && (x86ish || ppc) && not (quotRemCanBeOptimized args)
1478 then Left (MO_S_QuotRem (wordWidth platform))
1479 else Right (genericIntQuotRemOp (wordWidth platform))
1480
1481 Int8QuotRemOp -> \args -> opCallishHandledLater args $
1482 if ncg && (x86ish || ppc) && not (quotRemCanBeOptimized args)
1483 then Left (MO_S_QuotRem W8)
1484 else Right (genericIntQuotRemOp W8)
1485
1486 Int16QuotRemOp -> \args -> opCallishHandledLater args $
1487 if ncg && (x86ish || ppc) && not (quotRemCanBeOptimized args)
1488 then Left (MO_S_QuotRem W16)
1489 else Right (genericIntQuotRemOp W16)
1490
1491 Int32QuotRemOp -> \args -> opCallishHandledLater args $
1492 if ncg && (x86ish || ppc) && not (quotRemCanBeOptimized args)
1493 then Left (MO_S_QuotRem W32)
1494 else Right (genericIntQuotRemOp W32)
1495
1496 WordQuotRemOp -> \args -> opCallishHandledLater args $
1497 if ncg && (x86ish || ppc) && not (quotRemCanBeOptimized args)
1498 then Left (MO_U_QuotRem (wordWidth platform))
1499 else Right (genericWordQuotRemOp (wordWidth platform))
1500
1501 WordQuotRem2Op -> \args -> opCallishHandledLater args $
1502 if (ncg && (x86ish || ppc)) || llvm
1503 then Left (MO_U_QuotRem2 (wordWidth platform))
1504 else Right (genericWordQuotRem2Op platform)
1505
1506 Word8QuotRemOp -> \args -> opCallishHandledLater args $
1507 if ncg && (x86ish || ppc) && not (quotRemCanBeOptimized args)
1508 then Left (MO_U_QuotRem W8)
1509 else Right (genericWordQuotRemOp W8)
1510
1511 Word16QuotRemOp -> \args -> opCallishHandledLater args $
1512 if ncg && (x86ish || ppc) && not (quotRemCanBeOptimized args)
1513 then Left (MO_U_QuotRem W16)
1514 else Right (genericWordQuotRemOp W16)
1515
1516 Word32QuotRemOp -> \args -> opCallishHandledLater args $
1517 if ncg && (x86ish || ppc) && not (quotRemCanBeOptimized args)
1518 then Left (MO_U_QuotRem W32)
1519 else Right (genericWordQuotRemOp W32)
1520
1521 WordAdd2Op -> \args -> opCallishHandledLater args $
1522 if (ncg && (x86ish || ppc)) || llvm
1523 then Left (MO_Add2 (wordWidth platform))
1524 else Right genericWordAdd2Op
1525
1526 WordAddCOp -> \args -> opCallishHandledLater args $
1527 if (ncg && (x86ish || ppc)) || llvm
1528 then Left (MO_AddWordC (wordWidth platform))
1529 else Right genericWordAddCOp
1530
1531 WordSubCOp -> \args -> opCallishHandledLater args $
1532 if (ncg && (x86ish || ppc)) || llvm
1533 then Left (MO_SubWordC (wordWidth platform))
1534 else Right genericWordSubCOp
1535
1536 IntAddCOp -> \args -> opCallishHandledLater args $
1537 if (ncg && (x86ish || ppc)) || llvm
1538 then Left (MO_AddIntC (wordWidth platform))
1539 else Right genericIntAddCOp
1540
1541 IntSubCOp -> \args -> opCallishHandledLater args $
1542 if (ncg && (x86ish || ppc)) || llvm
1543 then Left (MO_SubIntC (wordWidth platform))
1544 else Right genericIntSubCOp
1545
1546 WordMul2Op -> \args -> opCallishHandledLater args $
1547 if ncg && (x86ish || ppc) || llvm
1548 then Left (MO_U_Mul2 (wordWidth platform))
1549 else Right genericWordMul2Op
1550
1551 IntMul2Op -> \args -> opCallishHandledLater args $
1552 if ncg && x86ish || llvm
1553 then Left (MO_S_Mul2 (wordWidth platform))
1554 else Right genericIntMul2Op
1555
1556 FloatFabsOp -> \args -> opCallishHandledLater args $
1557 if (ncg && (x86ish || ppc || aarch64)) || llvm
1558 then Left MO_F32_Fabs
1559 else Right $ genericFabsOp W32
1560
1561 DoubleFabsOp -> \args -> opCallishHandledLater args $
1562 if (ncg && (x86ish || ppc || aarch64)) || llvm
1563 then Left MO_F64_Fabs
1564 else Right $ genericFabsOp W64
1565
1566 -- tagToEnum# is special: we need to pull the constructor
1567 -- out of the table, and perform an appropriate return.
1568 TagToEnumOp -> \[amode] -> PrimopCmmEmit_Internal $ \res_ty -> do
1569 -- If you're reading this code in the attempt to figure
1570 -- out why the compiler panic'ed here, it is probably because
1571 -- you used tagToEnum# in a non-monomorphic setting, e.g.,
1572 -- intToTg :: Enum a => Int -> a ; intToTg (I# x#) = tagToEnum# x#
1573 -- That won't work.
1574 let tycon = tyConAppTyCon res_ty
1575 massert (isEnumerationTyCon tycon)
1576 platform <- getPlatform
1577 pure [tagToClosure platform tycon amode]
1578
1579 -- Out of line primops.
1580 -- TODO compiler need not know about these
1581
1582 UnsafeThawArrayOp -> alwaysExternal
1583 CasArrayOp -> alwaysExternal
1584 UnsafeThawSmallArrayOp -> alwaysExternal
1585 CasSmallArrayOp -> alwaysExternal
1586 NewPinnedByteArrayOp_Char -> alwaysExternal
1587 NewAlignedPinnedByteArrayOp_Char -> alwaysExternal
1588 MutableByteArrayIsPinnedOp -> alwaysExternal
1589 DoubleDecode_2IntOp -> alwaysExternal
1590 DoubleDecode_Int64Op -> alwaysExternal
1591 FloatDecode_IntOp -> alwaysExternal
1592 ByteArrayIsPinnedOp -> alwaysExternal
1593 ShrinkMutableByteArrayOp_Char -> alwaysExternal
1594 ResizeMutableByteArrayOp_Char -> alwaysExternal
1595 ShrinkSmallMutableArrayOp_Char -> alwaysExternal
1596 NewArrayArrayOp -> alwaysExternal
1597 NewMutVarOp -> alwaysExternal
1598 AtomicModifyMutVar2Op -> alwaysExternal
1599 AtomicModifyMutVar_Op -> alwaysExternal
1600 CasMutVarOp -> alwaysExternal
1601 CatchOp -> alwaysExternal
1602 RaiseOp -> alwaysExternal
1603 RaiseIOOp -> alwaysExternal
1604 MaskAsyncExceptionsOp -> alwaysExternal
1605 MaskUninterruptibleOp -> alwaysExternal
1606 UnmaskAsyncExceptionsOp -> alwaysExternal
1607 MaskStatus -> alwaysExternal
1608 AtomicallyOp -> alwaysExternal
1609 RetryOp -> alwaysExternal
1610 CatchRetryOp -> alwaysExternal
1611 CatchSTMOp -> alwaysExternal
1612 NewTVarOp -> alwaysExternal
1613 ReadTVarOp -> alwaysExternal
1614 ReadTVarIOOp -> alwaysExternal
1615 WriteTVarOp -> alwaysExternal
1616 NewMVarOp -> alwaysExternal
1617 TakeMVarOp -> alwaysExternal
1618 TryTakeMVarOp -> alwaysExternal
1619 PutMVarOp -> alwaysExternal
1620 TryPutMVarOp -> alwaysExternal
1621 ReadMVarOp -> alwaysExternal
1622 TryReadMVarOp -> alwaysExternal
1623 IsEmptyMVarOp -> alwaysExternal
1624 NewIOPortrOp -> alwaysExternal
1625 ReadIOPortOp -> alwaysExternal
1626 WriteIOPortOp -> alwaysExternal
1627 DelayOp -> alwaysExternal
1628 WaitReadOp -> alwaysExternal
1629 WaitWriteOp -> alwaysExternal
1630 ForkOp -> alwaysExternal
1631 ForkOnOp -> alwaysExternal
1632 KillThreadOp -> alwaysExternal
1633 YieldOp -> alwaysExternal
1634 LabelThreadOp -> alwaysExternal
1635 IsCurrentThreadBoundOp -> alwaysExternal
1636 NoDuplicateOp -> alwaysExternal
1637 ThreadStatusOp -> alwaysExternal
1638 MkWeakOp -> alwaysExternal
1639 MkWeakNoFinalizerOp -> alwaysExternal
1640 AddCFinalizerToWeakOp -> alwaysExternal
1641 DeRefWeakOp -> alwaysExternal
1642 FinalizeWeakOp -> alwaysExternal
1643 MakeStablePtrOp -> alwaysExternal
1644 DeRefStablePtrOp -> alwaysExternal
1645 MakeStableNameOp -> alwaysExternal
1646 CompactNewOp -> alwaysExternal
1647 CompactResizeOp -> alwaysExternal
1648 CompactContainsOp -> alwaysExternal
1649 CompactContainsAnyOp -> alwaysExternal
1650 CompactGetFirstBlockOp -> alwaysExternal
1651 CompactGetNextBlockOp -> alwaysExternal
1652 CompactAllocateBlockOp -> alwaysExternal
1653 CompactFixupPointersOp -> alwaysExternal
1654 CompactAdd -> alwaysExternal
1655 CompactAddWithSharing -> alwaysExternal
1656 CompactSize -> alwaysExternal
1657 SeqOp -> alwaysExternal
1658 GetSparkOp -> alwaysExternal
1659 NumSparks -> alwaysExternal
1660 DataToTagOp -> alwaysExternal
1661 MkApUpd0_Op -> alwaysExternal
1662 NewBCOOp -> alwaysExternal
1663 UnpackClosureOp -> alwaysExternal
1664 ClosureSizeOp -> alwaysExternal
1665 WhereFromOp -> alwaysExternal
1666 GetApStackValOp -> alwaysExternal
1667 ClearCCSOp -> alwaysExternal
1668 TraceEventOp -> alwaysExternal
1669 TraceEventBinaryOp -> alwaysExternal
1670 TraceMarkerOp -> alwaysExternal
1671 SetThreadAllocationCounter -> alwaysExternal
1672
1673 -- See Note [keepAlive# magic] in GHC.CoreToStg.Prep.
1674 KeepAliveOp -> panic "keepAlive# should have been eliminated in CorePrep"
1675
1676 where
1677 profile = targetProfile dflags
1678 platform = profilePlatform profile
1679 result_info = getPrimOpResultInfo primop
1680
1681 opNop :: [CmmExpr] -> PrimopCmmEmit
1682 opNop args = opIntoRegs $ \[res] -> emitAssign (CmmLocal res) arg
1683 where [arg] = args
1684
1685 opNarrow
1686 :: [CmmExpr]
1687 -> (Width -> Width -> MachOp, Width)
1688 -> PrimopCmmEmit
1689 opNarrow args (mop, rep) = opIntoRegs $ \[res] -> emitAssign (CmmLocal res) $
1690 CmmMachOp (mop rep (wordWidth platform)) [CmmMachOp (mop (wordWidth platform) rep) [arg]]
1691 where [arg] = args
1692
1693 -- | These primops are implemented by CallishMachOps, because they sometimes
1694 -- turn into foreign calls depending on the backend.
1695 opCallish :: [CmmExpr] -> CallishMachOp -> PrimopCmmEmit
1696 opCallish args prim = opIntoRegs $ \[res] -> emitPrimCall [res] prim args
1697
1698 opTranslate :: [CmmExpr] -> MachOp -> PrimopCmmEmit
1699 opTranslate args mop = opIntoRegs $ \[res] -> do
1700 let stmt = mkAssign (CmmLocal res) (CmmMachOp mop args)
1701 emit stmt
1702
1703 opTranslate64
1704 :: [CmmExpr]
1705 -> (Width -> MachOp)
1706 -> CallishMachOp
1707 -> PrimopCmmEmit
1708 opTranslate64 args mkMop callish =
1709 case platformWordSize platform of
1710 -- LLVM and C `can handle larger than native size arithmetic natively.
1711 _ | not ncg -> opTranslate args $ mkMop W64
1712 PW4 -> opCallish args callish
1713 PW8 -> opTranslate args $ mkMop W64
1714
1715 -- | Basically a "manual" case, rather than one of the common repetitive forms
1716 -- above. The results are a parameter to the returned function so we know the
1717 -- choice of variant never depends on them.
1718 opCallishHandledLater
1719 :: [CmmExpr]
1720 -> Either CallishMachOp GenericOp
1721 -> PrimopCmmEmit
1722 opCallishHandledLater args callOrNot = opIntoRegs $ \res0 -> case callOrNot of
1723 Left op -> emit $ mkUnsafeCall (PrimTarget op) res0 args
1724 Right gen -> gen res0 args
1725
1726 opIntoRegs
1727 :: ([LocalReg] -- where to put the results
1728 -> FCode ())
1729 -> PrimopCmmEmit
1730 opIntoRegs f = PrimopCmmEmit_Internal $ \res_ty -> do
1731 regs <- case result_info of
1732 ReturnsPrim VoidRep -> pure []
1733 ReturnsPrim rep
1734 -> do reg <- newTemp (primRepCmmType platform rep)
1735 pure [reg]
1736
1737 ReturnsAlg tycon | isUnboxedTupleTyCon tycon
1738 -> do (regs, _hints) <- newUnboxedTupleRegs res_ty
1739 pure regs
1740
1741 _ -> panic "cgOpApp"
1742 f regs
1743 pure $ map (CmmReg . CmmLocal) regs
1744
1745 alwaysExternal = \_ -> PrimopCmmEmit_External
1746 -- Note [QuotRem optimization]
1747 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
1748 --
1749 -- `quot` and `rem` with constant divisor can be implemented with fast bit-ops
1750 -- (shift, .&.).
1751 --
1752 -- Currently we only support optimization (performed in GHC.Cmm.Opt) when the
1753 -- constant is a power of 2. #9041 tracks the implementation of the general
1754 -- optimization.
1755 --
1756 -- `quotRem` can be optimized in the same way. However as it returns two values,
1757 -- it is implemented as a "callish" primop which is harder to match and
1758 -- to transform later on. For simplicity, the current implementation detects cases
1759 -- that can be optimized (see `quotRemCanBeOptimized`) and converts STG quotRem
1760 -- primop into two CMM quot and rem primops.
1761 quotRemCanBeOptimized = \case
1762 [_, CmmLit (CmmInt n _) ] -> isJust (exactLog2 n)
1763 _ -> False
1764
1765 ncg = backend dflags == NCG
1766 llvm = backend dflags == LLVM
1767 x86ish = case platformArch platform of
1768 ArchX86 -> True
1769 ArchX86_64 -> True
1770 _ -> False
1771 ppc = case platformArch platform of
1772 ArchPPC -> True
1773 ArchPPC_64 _ -> True
1774 _ -> False
1775 aarch64 = platformArch platform == ArchAArch64
1776
1777 data PrimopCmmEmit
1778 -- | Out of line fake primop that's actually just a foreign call to other
1779 -- (presumably) C--.
1780 = PrimopCmmEmit_External
1781 -- | Real primop turned into inline C--.
1782 | PrimopCmmEmit_Internal (Type -- the return type, some primops are specialized to it
1783 -> FCode [CmmExpr]) -- just for TagToEnum for now
1784
1785 type GenericOp = [CmmFormal] -> [CmmActual] -> FCode ()
1786
1787 genericIntQuotRemOp :: Width -> GenericOp
1788 genericIntQuotRemOp width [res_q, res_r] [arg_x, arg_y]
1789 = emit $ mkAssign (CmmLocal res_q)
1790 (CmmMachOp (MO_S_Quot width) [arg_x, arg_y]) <*>
1791 mkAssign (CmmLocal res_r)
1792 (CmmMachOp (MO_S_Rem width) [arg_x, arg_y])
1793 genericIntQuotRemOp _ _ _ = panic "genericIntQuotRemOp"
1794
1795 genericWordQuotRemOp :: Width -> GenericOp
1796 genericWordQuotRemOp width [res_q, res_r] [arg_x, arg_y]
1797 = emit $ mkAssign (CmmLocal res_q)
1798 (CmmMachOp (MO_U_Quot width) [arg_x, arg_y]) <*>
1799 mkAssign (CmmLocal res_r)
1800 (CmmMachOp (MO_U_Rem width) [arg_x, arg_y])
1801 genericWordQuotRemOp _ _ _ = panic "genericWordQuotRemOp"
1802
1803 genericWordQuotRem2Op :: Platform -> GenericOp
1804 genericWordQuotRem2Op platform [res_q, res_r] [arg_x_high, arg_x_low, arg_y]
1805 = emit =<< f (widthInBits (wordWidth platform)) zero arg_x_high arg_x_low
1806 where ty = cmmExprType platform arg_x_high
1807 shl x i = CmmMachOp (MO_Shl (wordWidth platform)) [x, i]
1808 shr x i = CmmMachOp (MO_U_Shr (wordWidth platform)) [x, i]
1809 or x y = CmmMachOp (MO_Or (wordWidth platform)) [x, y]
1810 ge x y = CmmMachOp (MO_U_Ge (wordWidth platform)) [x, y]
1811 ne x y = CmmMachOp (MO_Ne (wordWidth platform)) [x, y]
1812 minus x y = CmmMachOp (MO_Sub (wordWidth platform)) [x, y]
1813 times x y = CmmMachOp (MO_Mul (wordWidth platform)) [x, y]
1814 zero = lit 0
1815 one = lit 1
1816 negone = lit (fromIntegral (platformWordSizeInBits platform) - 1)
1817 lit i = CmmLit (CmmInt i (wordWidth platform))
1818
1819 f :: Int -> CmmExpr -> CmmExpr -> CmmExpr -> FCode CmmAGraph
1820 f 0 acc high _ = return (mkAssign (CmmLocal res_q) acc <*>
1821 mkAssign (CmmLocal res_r) high)
1822 f i acc high low =
1823 do roverflowedBit <- newTemp ty
1824 rhigh' <- newTemp ty
1825 rhigh'' <- newTemp ty
1826 rlow' <- newTemp ty
1827 risge <- newTemp ty
1828 racc' <- newTemp ty
1829 let high' = CmmReg (CmmLocal rhigh')
1830 isge = CmmReg (CmmLocal risge)
1831 overflowedBit = CmmReg (CmmLocal roverflowedBit)
1832 let this = catAGraphs
1833 [mkAssign (CmmLocal roverflowedBit)
1834 (shr high negone),
1835 mkAssign (CmmLocal rhigh')
1836 (or (shl high one) (shr low negone)),
1837 mkAssign (CmmLocal rlow')
1838 (shl low one),
1839 mkAssign (CmmLocal risge)
1840 (or (overflowedBit `ne` zero)
1841 (high' `ge` arg_y)),
1842 mkAssign (CmmLocal rhigh'')
1843 (high' `minus` (arg_y `times` isge)),
1844 mkAssign (CmmLocal racc')
1845 (or (shl acc one) isge)]
1846 rest <- f (i - 1) (CmmReg (CmmLocal racc'))
1847 (CmmReg (CmmLocal rhigh''))
1848 (CmmReg (CmmLocal rlow'))
1849 return (this <*> rest)
1850 genericWordQuotRem2Op _ _ _ = panic "genericWordQuotRem2Op"
1851
1852 genericWordAdd2Op :: GenericOp
1853 genericWordAdd2Op [res_h, res_l] [arg_x, arg_y]
1854 = do platform <- getPlatform
1855 r1 <- newTemp (cmmExprType platform arg_x)
1856 r2 <- newTemp (cmmExprType platform arg_x)
1857 let topHalf x = CmmMachOp (MO_U_Shr (wordWidth platform)) [x, hww]
1858 toTopHalf x = CmmMachOp (MO_Shl (wordWidth platform)) [x, hww]
1859 bottomHalf x = CmmMachOp (MO_And (wordWidth platform)) [x, hwm]
1860 add x y = CmmMachOp (MO_Add (wordWidth platform)) [x, y]
1861 or x y = CmmMachOp (MO_Or (wordWidth platform)) [x, y]
1862 hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth platform)))
1863 (wordWidth platform))
1864 hwm = CmmLit (CmmInt (halfWordMask platform) (wordWidth platform))
1865 emit $ catAGraphs
1866 [mkAssign (CmmLocal r1)
1867 (add (bottomHalf arg_x) (bottomHalf arg_y)),
1868 mkAssign (CmmLocal r2)
1869 (add (topHalf (CmmReg (CmmLocal r1)))
1870 (add (topHalf arg_x) (topHalf arg_y))),
1871 mkAssign (CmmLocal res_h)
1872 (topHalf (CmmReg (CmmLocal r2))),
1873 mkAssign (CmmLocal res_l)
1874 (or (toTopHalf (CmmReg (CmmLocal r2)))
1875 (bottomHalf (CmmReg (CmmLocal r1))))]
1876 genericWordAdd2Op _ _ = panic "genericWordAdd2Op"
1877
1878 -- | Implements branchless recovery of the carry flag @c@ by checking the
1879 -- leftmost bits of both inputs @a@ and @b@ and result @r = a + b@:
1880 --
1881 -- @
1882 -- c = a&b | (a|b)&~r
1883 -- @
1884 --
1885 -- https://brodowsky.it-sky.net/2015/04/02/how-to-recover-the-carry-bit/
1886 genericWordAddCOp :: GenericOp
1887 genericWordAddCOp [res_r, res_c] [aa, bb]
1888 = do platform <- getPlatform
1889 emit $ catAGraphs [
1890 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordAdd platform) [aa,bb]),
1891 mkAssign (CmmLocal res_c) $
1892 CmmMachOp (mo_wordUShr platform) [
1893 CmmMachOp (mo_wordOr platform) [
1894 CmmMachOp (mo_wordAnd platform) [aa,bb],
1895 CmmMachOp (mo_wordAnd platform) [
1896 CmmMachOp (mo_wordOr platform) [aa,bb],
1897 CmmMachOp (mo_wordNot platform) [CmmReg (CmmLocal res_r)]
1898 ]
1899 ],
1900 mkIntExpr platform (platformWordSizeInBits platform - 1)
1901 ]
1902 ]
1903 genericWordAddCOp _ _ = panic "genericWordAddCOp"
1904
1905 -- | Implements branchless recovery of the carry flag @c@ by checking the
1906 -- leftmost bits of both inputs @a@ and @b@ and result @r = a - b@:
1907 --
1908 -- @
1909 -- c = ~a&b | (~a|b)&r
1910 -- @
1911 --
1912 -- https://brodowsky.it-sky.net/2015/04/02/how-to-recover-the-carry-bit/
1913 genericWordSubCOp :: GenericOp
1914 genericWordSubCOp [res_r, res_c] [aa, bb]
1915 = do platform <- getPlatform
1916 emit $ catAGraphs [
1917 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordSub platform) [aa,bb]),
1918 mkAssign (CmmLocal res_c) $
1919 CmmMachOp (mo_wordUShr platform) [
1920 CmmMachOp (mo_wordOr platform) [
1921 CmmMachOp (mo_wordAnd platform) [
1922 CmmMachOp (mo_wordNot platform) [aa],
1923 bb
1924 ],
1925 CmmMachOp (mo_wordAnd platform) [
1926 CmmMachOp (mo_wordOr platform) [
1927 CmmMachOp (mo_wordNot platform) [aa],
1928 bb
1929 ],
1930 CmmReg (CmmLocal res_r)
1931 ]
1932 ],
1933 mkIntExpr platform (platformWordSizeInBits platform - 1)
1934 ]
1935 ]
1936 genericWordSubCOp _ _ = panic "genericWordSubCOp"
1937
1938 genericIntAddCOp :: GenericOp
1939 genericIntAddCOp [res_r, res_c] [aa, bb]
1940 {-
1941 With some bit-twiddling, we can define int{Add,Sub}Czh portably in
1942 C, and without needing any comparisons. This may not be the
1943 fastest way to do it - if you have better code, please send it! --SDM
1944
1945 Return : r = a + b, c = 0 if no overflow, 1 on overflow.
1946
1947 We currently don't make use of the r value if c is != 0 (i.e.
1948 overflow), we just convert to big integers and try again. This
1949 could be improved by making r and c the correct values for
1950 plugging into a new J#.
1951
1952 { r = ((I_)(a)) + ((I_)(b)); \
1953 c = ((StgWord)(~(((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
1954 >> (BITS_IN (I_) - 1); \
1955 }
1956 Wading through the mass of bracketry, it seems to reduce to:
1957 c = ( (~(a^b)) & (a^r) ) >>unsigned (BITS_IN(I_)-1)
1958
1959 -}
1960 = do platform <- getPlatform
1961 emit $ catAGraphs [
1962 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordAdd platform) [aa,bb]),
1963 mkAssign (CmmLocal res_c) $
1964 CmmMachOp (mo_wordUShr platform) [
1965 CmmMachOp (mo_wordAnd platform) [
1966 CmmMachOp (mo_wordNot platform) [CmmMachOp (mo_wordXor platform) [aa,bb]],
1967 CmmMachOp (mo_wordXor platform) [aa, CmmReg (CmmLocal res_r)]
1968 ],
1969 mkIntExpr platform (platformWordSizeInBits platform - 1)
1970 ]
1971 ]
1972 genericIntAddCOp _ _ = panic "genericIntAddCOp"
1973
1974 genericIntSubCOp :: GenericOp
1975 genericIntSubCOp [res_r, res_c] [aa, bb]
1976 {- Similarly:
1977 #define subIntCzh(r,c,a,b) \
1978 { r = ((I_)(a)) - ((I_)(b)); \
1979 c = ((StgWord)((((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
1980 >> (BITS_IN (I_) - 1); \
1981 }
1982
1983 c = ((a^b) & (a^r)) >>unsigned (BITS_IN(I_)-1)
1984 -}
1985 = do platform <- getPlatform
1986 emit $ catAGraphs [
1987 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordSub platform) [aa,bb]),
1988 mkAssign (CmmLocal res_c) $
1989 CmmMachOp (mo_wordUShr platform) [
1990 CmmMachOp (mo_wordAnd platform) [
1991 CmmMachOp (mo_wordXor platform) [aa,bb],
1992 CmmMachOp (mo_wordXor platform) [aa, CmmReg (CmmLocal res_r)]
1993 ],
1994 mkIntExpr platform (platformWordSizeInBits platform - 1)
1995 ]
1996 ]
1997 genericIntSubCOp _ _ = panic "genericIntSubCOp"
1998
1999 genericWordMul2Op :: GenericOp
2000 genericWordMul2Op [res_h, res_l] [arg_x, arg_y]
2001 = do platform <- getPlatform
2002 let t = cmmExprType platform arg_x
2003 xlyl <- liftM CmmLocal $ newTemp t
2004 xlyh <- liftM CmmLocal $ newTemp t
2005 xhyl <- liftM CmmLocal $ newTemp t
2006 r <- liftM CmmLocal $ newTemp t
2007 -- This generic implementation is very simple and slow. We might
2008 -- well be able to do better, but for now this at least works.
2009 let topHalf x = CmmMachOp (MO_U_Shr (wordWidth platform)) [x, hww]
2010 toTopHalf x = CmmMachOp (MO_Shl (wordWidth platform)) [x, hww]
2011 bottomHalf x = CmmMachOp (MO_And (wordWidth platform)) [x, hwm]
2012 add x y = CmmMachOp (MO_Add (wordWidth platform)) [x, y]
2013 sum = foldl1 add
2014 mul x y = CmmMachOp (MO_Mul (wordWidth platform)) [x, y]
2015 or x y = CmmMachOp (MO_Or (wordWidth platform)) [x, y]
2016 hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth platform)))
2017 (wordWidth platform))
2018 hwm = CmmLit (CmmInt (halfWordMask platform) (wordWidth platform))
2019 emit $ catAGraphs
2020 [mkAssign xlyl
2021 (mul (bottomHalf arg_x) (bottomHalf arg_y)),
2022 mkAssign xlyh
2023 (mul (bottomHalf arg_x) (topHalf arg_y)),
2024 mkAssign xhyl
2025 (mul (topHalf arg_x) (bottomHalf arg_y)),
2026 mkAssign r
2027 (sum [topHalf (CmmReg xlyl),
2028 bottomHalf (CmmReg xhyl),
2029 bottomHalf (CmmReg xlyh)]),
2030 mkAssign (CmmLocal res_l)
2031 (or (bottomHalf (CmmReg xlyl))
2032 (toTopHalf (CmmReg r))),
2033 mkAssign (CmmLocal res_h)
2034 (sum [mul (topHalf arg_x) (topHalf arg_y),
2035 topHalf (CmmReg xhyl),
2036 topHalf (CmmReg xlyh),
2037 topHalf (CmmReg r)])]
2038 genericWordMul2Op _ _ = panic "genericWordMul2Op"
2039
2040 genericIntMul2Op :: GenericOp
2041 genericIntMul2Op [res_c, res_h, res_l] both_args@[arg_x, arg_y]
2042 = do dflags <- getDynFlags
2043 platform <- getPlatform
2044 -- Implement algorithm from Hacker's Delight, 2nd edition, p.174
2045 let t = cmmExprType platform arg_x
2046 p <- newTemp t
2047 -- 1) compute the multiplication as if numbers were unsigned
2048 _ <- withSequel (AssignTo [p, res_l] False) $
2049 cmmPrimOpApp dflags WordMul2Op both_args Nothing
2050 -- 2) correct the high bits of the unsigned result
2051 let carryFill x = CmmMachOp (MO_S_Shr ww) [x, wwm1]
2052 sub x y = CmmMachOp (MO_Sub ww) [x, y]
2053 and x y = CmmMachOp (MO_And ww) [x, y]
2054 neq x y = CmmMachOp (MO_Ne ww) [x, y]
2055 f x y = (carryFill x) `and` y
2056 wwm1 = CmmLit (CmmInt (fromIntegral (widthInBits ww - 1)) ww)
2057 rl x = CmmReg (CmmLocal x)
2058 ww = wordWidth platform
2059 emit $ catAGraphs
2060 [ mkAssign (CmmLocal res_h) (rl p `sub` f arg_x arg_y `sub` f arg_y arg_x)
2061 , mkAssign (CmmLocal res_c) (rl res_h `neq` carryFill (rl res_l))
2062 ]
2063 genericIntMul2Op _ _ = panic "genericIntMul2Op"
2064
2065 -- This replicates what we had in libraries/base/GHC/Float.hs:
2066 --
2067 -- abs x | x == 0 = 0 -- handles (-0.0)
2068 -- | x > 0 = x
2069 -- | otherwise = negateFloat x
2070 genericFabsOp :: Width -> GenericOp
2071 genericFabsOp w [res_r] [aa]
2072 = do platform <- getPlatform
2073 let zero = CmmLit (CmmFloat 0 w)
2074
2075 eq x y = CmmMachOp (MO_F_Eq w) [x, y]
2076 gt x y = CmmMachOp (MO_F_Gt w) [x, y]
2077
2078 neg x = CmmMachOp (MO_F_Neg w) [x]
2079
2080 g1 = catAGraphs [mkAssign (CmmLocal res_r) zero]
2081 g2 = catAGraphs [mkAssign (CmmLocal res_r) aa]
2082
2083 res_t <- CmmLocal <$> newTemp (cmmExprType platform aa)
2084 let g3 = catAGraphs [mkAssign res_t aa,
2085 mkAssign (CmmLocal res_r) (neg (CmmReg res_t))]
2086
2087 g4 <- mkCmmIfThenElse (gt aa zero) g2 g3
2088
2089 emit =<< mkCmmIfThenElse (eq aa zero) g1 g4
2090
2091 genericFabsOp _ _ _ = panic "genericFabsOp"
2092
2093 ------------------------------------------------------------------------------
2094 -- Helpers for translating various minor variants of array indexing.
2095
2096 doIndexOffAddrOp :: Maybe MachOp
2097 -> CmmType
2098 -> [LocalReg]
2099 -> [CmmExpr]
2100 -> FCode ()
2101 doIndexOffAddrOp maybe_post_read_cast rep [res] [addr,idx]
2102 = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr rep idx
2103 doIndexOffAddrOp _ _ _ _
2104 = panic "GHC.StgToCmm.Prim: doIndexOffAddrOp"
2105
2106 doIndexOffAddrOpAs :: Maybe MachOp
2107 -> CmmType
2108 -> CmmType
2109 -> [LocalReg]
2110 -> [CmmExpr]
2111 -> FCode ()
2112 doIndexOffAddrOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
2113 = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr idx_rep idx
2114 doIndexOffAddrOpAs _ _ _ _ _
2115 = panic "GHC.StgToCmm.Prim: doIndexOffAddrOpAs"
2116
2117 doIndexByteArrayOp :: Maybe MachOp
2118 -> CmmType
2119 -> [LocalReg]
2120 -> [CmmExpr]
2121 -> FCode ()
2122 doIndexByteArrayOp maybe_post_read_cast rep [res] [addr,idx]
2123 = do profile <- getProfile
2124 mkBasicIndexedRead (arrWordsHdrSize profile) maybe_post_read_cast rep res addr rep idx
2125 doIndexByteArrayOp _ _ _ _
2126 = panic "GHC.StgToCmm.Prim: doIndexByteArrayOp"
2127
2128 doIndexByteArrayOpAs :: Maybe MachOp
2129 -> CmmType
2130 -> CmmType
2131 -> [LocalReg]
2132 -> [CmmExpr]
2133 -> FCode ()
2134 doIndexByteArrayOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
2135 = do profile <- getProfile
2136 mkBasicIndexedRead (arrWordsHdrSize profile) maybe_post_read_cast rep res addr idx_rep idx
2137 doIndexByteArrayOpAs _ _ _ _ _
2138 = panic "GHC.StgToCmm.Prim: doIndexByteArrayOpAs"
2139
2140 doReadPtrArrayOp :: LocalReg
2141 -> CmmExpr
2142 -> CmmExpr
2143 -> FCode ()
2144 doReadPtrArrayOp res addr idx
2145 = do profile <- getProfile
2146 platform <- getPlatform
2147 mkBasicIndexedRead (arrPtrsHdrSize profile) Nothing (gcWord platform) res addr (gcWord platform) idx
2148
2149 doWriteOffAddrOp :: Maybe MachOp
2150 -> CmmType
2151 -> [LocalReg]
2152 -> [CmmExpr]
2153 -> FCode ()
2154 doWriteOffAddrOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
2155 = mkBasicIndexedWrite 0 maybe_pre_write_cast addr idx_ty idx val
2156 doWriteOffAddrOp _ _ _ _
2157 = panic "GHC.StgToCmm.Prim: doWriteOffAddrOp"
2158
2159 doWriteByteArrayOp :: Maybe MachOp
2160 -> CmmType
2161 -> [LocalReg]
2162 -> [CmmExpr]
2163 -> FCode ()
2164 doWriteByteArrayOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
2165 = do profile <- getProfile
2166 mkBasicIndexedWrite (arrWordsHdrSize profile) maybe_pre_write_cast addr idx_ty idx val
2167 doWriteByteArrayOp _ _ _ _
2168 = panic "GHC.StgToCmm.Prim: doWriteByteArrayOp"
2169
2170 doWritePtrArrayOp :: CmmExpr
2171 -> CmmExpr
2172 -> CmmExpr
2173 -> FCode ()
2174 doWritePtrArrayOp addr idx val
2175 = do profile <- getProfile
2176 platform <- getPlatform
2177 let ty = cmmExprType platform val
2178 hdr_size = arrPtrsHdrSize profile
2179 -- Update remembered set for non-moving collector
2180 whenUpdRemSetEnabled
2181 $ emitUpdRemSetPush (cmmLoadIndexOffExpr platform hdr_size ty addr ty idx)
2182 -- This write barrier is to ensure that the heap writes to the object
2183 -- referred to by val have happened before we write val into the array.
2184 -- See #12469 for details.
2185 emitPrimCall [] MO_WriteBarrier []
2186 mkBasicIndexedWrite hdr_size Nothing addr ty idx val
2187 emit (setInfo addr (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
2188 -- the write barrier. We must write a byte into the mark table:
2189 -- bits8[a + header_size + StgMutArrPtrs_size(a) + x >> N]
2190 emit $ mkStore (
2191 cmmOffsetExpr platform
2192 (cmmOffsetExprW platform (cmmOffsetB platform addr hdr_size)
2193 (loadArrPtrsSize profile addr))
2194 (CmmMachOp (mo_wordUShr platform) [idx,
2195 mkIntExpr platform (pc_MUT_ARR_PTRS_CARD_BITS (platformConstants platform))])
2196 ) (CmmLit (CmmInt 1 W8))
2197
2198 loadArrPtrsSize :: Profile -> CmmExpr -> CmmExpr
2199 loadArrPtrsSize profile addr = CmmLoad (cmmOffsetB platform addr off) (bWord platform)
2200 where off = fixedHdrSize profile + pc_OFFSET_StgMutArrPtrs_ptrs (profileConstants profile)
2201 platform = profilePlatform profile
2202
2203 mkBasicIndexedRead :: ByteOff -- Initial offset in bytes
2204 -> Maybe MachOp -- Optional result cast
2205 -> CmmType -- Type of element we are accessing
2206 -> LocalReg -- Destination
2207 -> CmmExpr -- Base address
2208 -> CmmType -- Type of element by which we are indexing
2209 -> CmmExpr -- Index
2210 -> FCode ()
2211 mkBasicIndexedRead off Nothing ty res base idx_ty idx
2212 = do platform <- getPlatform
2213 emitAssign (CmmLocal res) (cmmLoadIndexOffExpr platform off ty base idx_ty idx)
2214 mkBasicIndexedRead off (Just cast) ty res base idx_ty idx
2215 = do platform <- getPlatform
2216 emitAssign (CmmLocal res) (CmmMachOp cast [
2217 cmmLoadIndexOffExpr platform off ty base idx_ty idx])
2218
2219 mkBasicIndexedWrite :: ByteOff -- Initial offset in bytes
2220 -> Maybe MachOp -- Optional value cast
2221 -> CmmExpr -- Base address
2222 -> CmmType -- Type of element by which we are indexing
2223 -> CmmExpr -- Index
2224 -> CmmExpr -- Value to write
2225 -> FCode ()
2226 mkBasicIndexedWrite off Nothing base idx_ty idx val
2227 = do platform <- getPlatform
2228 emitStore (cmmIndexOffExpr platform off (typeWidth idx_ty) base idx) val
2229 mkBasicIndexedWrite off (Just cast) base idx_ty idx val
2230 = mkBasicIndexedWrite off Nothing base idx_ty idx (CmmMachOp cast [val])
2231
2232 -- ----------------------------------------------------------------------------
2233 -- Misc utils
2234
2235 cmmIndexOffExpr :: Platform
2236 -> ByteOff -- Initial offset in bytes
2237 -> Width -- Width of element by which we are indexing
2238 -> CmmExpr -- Base address
2239 -> CmmExpr -- Index
2240 -> CmmExpr
2241 cmmIndexOffExpr platform off width base idx
2242 = cmmIndexExpr platform width (cmmOffsetB platform base off) idx
2243
2244 cmmLoadIndexOffExpr :: Platform
2245 -> ByteOff -- Initial offset in bytes
2246 -> CmmType -- Type of element we are accessing
2247 -> CmmExpr -- Base address
2248 -> CmmType -- Type of element by which we are indexing
2249 -> CmmExpr -- Index
2250 -> CmmExpr
2251 cmmLoadIndexOffExpr platform off ty base idx_ty idx
2252 = CmmLoad (cmmIndexOffExpr platform off (typeWidth idx_ty) base idx) ty
2253
2254 setInfo :: CmmExpr -> CmmExpr -> CmmAGraph
2255 setInfo closure_ptr info_ptr = mkStore closure_ptr info_ptr
2256
2257 ------------------------------------------------------------------------------
2258 -- Helpers for translating vector primops.
2259
2260 vecVmmType :: PrimOpVecCat -> Length -> Width -> CmmType
2261 vecVmmType pocat n w = vec n (vecCmmCat pocat w)
2262
2263 vecCmmCat :: PrimOpVecCat -> Width -> CmmType
2264 vecCmmCat IntVec = cmmBits
2265 vecCmmCat WordVec = cmmBits
2266 vecCmmCat FloatVec = cmmFloat
2267
2268 vecElemInjectCast :: Platform -> PrimOpVecCat -> Width -> Maybe MachOp
2269 vecElemInjectCast _ FloatVec _ = Nothing
2270 vecElemInjectCast platform IntVec W8 = Just (mo_WordTo8 platform)
2271 vecElemInjectCast platform IntVec W16 = Just (mo_WordTo16 platform)
2272 vecElemInjectCast platform IntVec W32 = Just (mo_WordTo32 platform)
2273 vecElemInjectCast _ IntVec W64 = Nothing
2274 vecElemInjectCast platform WordVec W8 = Just (mo_WordTo8 platform)
2275 vecElemInjectCast platform WordVec W16 = Just (mo_WordTo16 platform)
2276 vecElemInjectCast platform WordVec W32 = Just (mo_WordTo32 platform)
2277 vecElemInjectCast _ WordVec W64 = Nothing
2278 vecElemInjectCast _ _ _ = Nothing
2279
2280 vecElemProjectCast :: Platform -> PrimOpVecCat -> Width -> Maybe MachOp
2281 vecElemProjectCast _ FloatVec _ = Nothing
2282 vecElemProjectCast platform IntVec W8 = Just (mo_s_8ToWord platform)
2283 vecElemProjectCast platform IntVec W16 = Just (mo_s_16ToWord platform)
2284 vecElemProjectCast platform IntVec W32 = Just (mo_s_32ToWord platform)
2285 vecElemProjectCast _ IntVec W64 = Nothing
2286 vecElemProjectCast platform WordVec W8 = Just (mo_u_8ToWord platform)
2287 vecElemProjectCast platform WordVec W16 = Just (mo_u_16ToWord platform)
2288 vecElemProjectCast platform WordVec W32 = Just (mo_u_32ToWord platform)
2289 vecElemProjectCast _ WordVec W64 = Nothing
2290 vecElemProjectCast _ _ _ = Nothing
2291
2292
2293 -- NOTE [SIMD Design for the future]
2294 -- Check to make sure that we can generate code for the specified vector type
2295 -- given the current set of dynamic flags.
2296 -- Currently these checks are specific to x86 and x86_64 architecture.
2297 -- This should be fixed!
2298 -- In particular,
2299 -- 1) Add better support for other architectures! (this may require a redesign)
2300 -- 2) Decouple design choices from LLVM's pseudo SIMD model!
2301 -- The high level LLVM naive rep makes per CPU family SIMD generation is own
2302 -- optimization problem, and hides important differences in eg ARM vs x86_64 simd
2303 -- 3) Depending on the architecture, the SIMD registers may also support general
2304 -- computations on Float/Double/Word/Int scalars, but currently on
2305 -- for example x86_64, we always put Word/Int (or sized) in GPR
2306 -- (general purpose) registers. Would relaxing that allow for
2307 -- useful optimization opportunities?
2308 -- Phrased differently, it is worth experimenting with supporting
2309 -- different register mapping strategies than we currently have, especially if
2310 -- someday we want SIMD to be a first class denizen in GHC along with scalar
2311 -- values!
2312 -- The current design with respect to register mapping of scalars could
2313 -- very well be the best,but exploring the design space and doing careful
2314 -- measurements is the only way to validate that.
2315 -- In some next generation CPU ISAs, notably RISC V, the SIMD extension
2316 -- includes support for a sort of run time CPU dependent vectorization parameter,
2317 -- where a loop may act upon a single scalar each iteration OR some 2,4,8 ...
2318 -- element chunk! Time will tell if that direction sees wide adoption,
2319 -- but it is from that context that unifying our handling of simd and scalars
2320 -- may benefit. It is not likely to benefit current architectures, though
2321 -- it may very well be a design perspective that helps guide improving the NCG.
2322
2323
2324 checkVecCompatibility :: DynFlags -> PrimOpVecCat -> Length -> Width -> FCode ()
2325 checkVecCompatibility dflags vcat l w = do
2326 when (backend dflags /= LLVM) $
2327 sorry $ unlines ["SIMD vector instructions require the LLVM back-end."
2328 ,"Please use -fllvm."]
2329 check vecWidth vcat l w
2330 where
2331 platform = targetPlatform dflags
2332 check :: Width -> PrimOpVecCat -> Length -> Width -> FCode ()
2333 check W128 FloatVec 4 W32 | not (isSseEnabled platform) =
2334 sorry $ "128-bit wide single-precision floating point " ++
2335 "SIMD vector instructions require at least -msse."
2336 check W128 _ _ _ | not (isSse2Enabled platform) =
2337 sorry $ "128-bit wide integer and double precision " ++
2338 "SIMD vector instructions require at least -msse2."
2339 check W256 FloatVec _ _ | not (isAvxEnabled dflags) =
2340 sorry $ "256-bit wide floating point " ++
2341 "SIMD vector instructions require at least -mavx."
2342 check W256 _ _ _ | not (isAvx2Enabled dflags) =
2343 sorry $ "256-bit wide integer " ++
2344 "SIMD vector instructions require at least -mavx2."
2345 check W512 _ _ _ | not (isAvx512fEnabled dflags) =
2346 sorry $ "512-bit wide " ++
2347 "SIMD vector instructions require -mavx512f."
2348 check _ _ _ _ = return ()
2349
2350 vecWidth = typeWidth (vecVmmType vcat l w)
2351
2352 ------------------------------------------------------------------------------
2353 -- Helpers for translating vector packing and unpacking.
2354
2355 doVecPackOp :: Maybe MachOp -- Cast from element to vector component
2356 -> CmmType -- Type of vector
2357 -> CmmExpr -- Initial vector
2358 -> [CmmExpr] -- Elements
2359 -> CmmFormal -- Destination for result
2360 -> FCode ()
2361 doVecPackOp maybe_pre_write_cast ty z es res = do
2362 dst <- newTemp ty
2363 emitAssign (CmmLocal dst) z
2364 vecPack dst es 0
2365 where
2366 vecPack :: CmmFormal -> [CmmExpr] -> Int -> FCode ()
2367 vecPack src [] _ =
2368 emitAssign (CmmLocal res) (CmmReg (CmmLocal src))
2369
2370 vecPack src (e : es) i = do
2371 dst <- newTemp ty
2372 if isFloatType (vecElemType ty)
2373 then emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Insert len wid)
2374 [CmmReg (CmmLocal src), cast e, iLit])
2375 else emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid)
2376 [CmmReg (CmmLocal src), cast e, iLit])
2377 vecPack dst es (i + 1)
2378 where
2379 -- vector indices are always 32-bits
2380 iLit = CmmLit (CmmInt (toInteger i) W32)
2381
2382 cast :: CmmExpr -> CmmExpr
2383 cast val = case maybe_pre_write_cast of
2384 Nothing -> val
2385 Just cast -> CmmMachOp cast [val]
2386
2387 len :: Length
2388 len = vecLength ty
2389
2390 wid :: Width
2391 wid = typeWidth (vecElemType ty)
2392
2393 doVecUnpackOp :: Maybe MachOp -- Cast from vector component to element result
2394 -> CmmType -- Type of vector
2395 -> CmmExpr -- Vector
2396 -> [CmmFormal] -- Element results
2397 -> FCode ()
2398 doVecUnpackOp maybe_post_read_cast ty e res =
2399 vecUnpack res 0
2400 where
2401 vecUnpack :: [CmmFormal] -> Int -> FCode ()
2402 vecUnpack [] _ =
2403 return ()
2404
2405 vecUnpack (r : rs) i = do
2406 if isFloatType (vecElemType ty)
2407 then emitAssign (CmmLocal r) (cast (CmmMachOp (MO_VF_Extract len wid)
2408 [e, iLit]))
2409 else emitAssign (CmmLocal r) (cast (CmmMachOp (MO_V_Extract len wid)
2410 [e, iLit]))
2411 vecUnpack rs (i + 1)
2412 where
2413 -- vector indices are always 32-bits
2414 iLit = CmmLit (CmmInt (toInteger i) W32)
2415
2416 cast :: CmmExpr -> CmmExpr
2417 cast val = case maybe_post_read_cast of
2418 Nothing -> val
2419 Just cast -> CmmMachOp cast [val]
2420
2421 len :: Length
2422 len = vecLength ty
2423
2424 wid :: Width
2425 wid = typeWidth (vecElemType ty)
2426
2427 doVecInsertOp :: Maybe MachOp -- Cast from element to vector component
2428 -> CmmType -- Vector type
2429 -> CmmExpr -- Source vector
2430 -> CmmExpr -- Element
2431 -> CmmExpr -- Index at which to insert element
2432 -> CmmFormal -- Destination for result
2433 -> FCode ()
2434 doVecInsertOp maybe_pre_write_cast ty src e idx res = do
2435 platform <- getPlatform
2436 -- vector indices are always 32-bits
2437 let idx' :: CmmExpr
2438 idx' = CmmMachOp (MO_SS_Conv (wordWidth platform) W32) [idx]
2439 if isFloatType (vecElemType ty)
2440 then emitAssign (CmmLocal res) (CmmMachOp (MO_VF_Insert len wid) [src, cast e, idx'])
2441 else emitAssign (CmmLocal res) (CmmMachOp (MO_V_Insert len wid) [src, cast e, idx'])
2442 where
2443 cast :: CmmExpr -> CmmExpr
2444 cast val = case maybe_pre_write_cast of
2445 Nothing -> val
2446 Just cast -> CmmMachOp cast [val]
2447
2448 len :: Length
2449 len = vecLength ty
2450
2451 wid :: Width
2452 wid = typeWidth (vecElemType ty)
2453
2454 ------------------------------------------------------------------------------
2455 -- Helpers for translating prefetching.
2456
2457
2458 -- | Translate byte array prefetch operations into proper primcalls.
2459 doPrefetchByteArrayOp :: Int
2460 -> [CmmExpr]
2461 -> FCode ()
2462 doPrefetchByteArrayOp locality [addr,idx]
2463 = do profile <- getProfile
2464 mkBasicPrefetch locality (arrWordsHdrSize profile) addr idx
2465 doPrefetchByteArrayOp _ _
2466 = panic "GHC.StgToCmm.Prim: doPrefetchByteArrayOp"
2467
2468 -- | Translate mutable byte array prefetch operations into proper primcalls.
2469 doPrefetchMutableByteArrayOp :: Int
2470 -> [CmmExpr]
2471 -> FCode ()
2472 doPrefetchMutableByteArrayOp locality [addr,idx]
2473 = do profile <- getProfile
2474 mkBasicPrefetch locality (arrWordsHdrSize profile) addr idx
2475 doPrefetchMutableByteArrayOp _ _
2476 = panic "GHC.StgToCmm.Prim: doPrefetchByteArrayOp"
2477
2478 -- | Translate address prefetch operations into proper primcalls.
2479 doPrefetchAddrOp ::Int
2480 -> [CmmExpr]
2481 -> FCode ()
2482 doPrefetchAddrOp locality [addr,idx]
2483 = mkBasicPrefetch locality 0 addr idx
2484 doPrefetchAddrOp _ _
2485 = panic "GHC.StgToCmm.Prim: doPrefetchAddrOp"
2486
2487 -- | Translate value prefetch operations into proper primcalls.
2488 doPrefetchValueOp :: Int
2489 -> [CmmExpr]
2490 -> FCode ()
2491 doPrefetchValueOp locality [addr]
2492 = do platform <- getPlatform
2493 mkBasicPrefetch locality 0 addr (CmmLit (CmmInt 0 (wordWidth platform)))
2494 doPrefetchValueOp _ _
2495 = panic "GHC.StgToCmm.Prim: doPrefetchValueOp"
2496
2497 -- | helper to generate prefetch primcalls
2498 mkBasicPrefetch :: Int -- Locality level 0-3
2499 -> ByteOff -- Initial offset in bytes
2500 -> CmmExpr -- Base address
2501 -> CmmExpr -- Index
2502 -> FCode ()
2503 mkBasicPrefetch locality off base idx
2504 = do platform <- getPlatform
2505 emitPrimCall [] (MO_Prefetch_Data locality) [cmmIndexExpr platform W8 (cmmOffsetB platform base off) idx]
2506 return ()
2507
2508 -- ----------------------------------------------------------------------------
2509 -- Allocating byte arrays
2510
2511 -- | Takes a register to return the newly allocated array in and the
2512 -- size of the new array in bytes. Allocates a new
2513 -- 'MutableByteArray#'.
2514 doNewByteArrayOp :: CmmFormal -> ByteOff -> FCode ()
2515 doNewByteArrayOp res_r n = do
2516 profile <- getProfile
2517 platform <- getPlatform
2518
2519 let info_ptr = mkLblExpr mkArrWords_infoLabel
2520 rep = arrWordsRep platform n
2521
2522 tickyAllocPrim (mkIntExpr platform (arrWordsHdrSize profile))
2523 (mkIntExpr platform (nonHdrSize platform rep))
2524 (zeroExpr platform)
2525
2526 let hdr_size = fixedHdrSize profile
2527
2528 base <- allocHeapClosure rep info_ptr cccsExpr
2529 [ (mkIntExpr platform n,
2530 hdr_size + pc_OFFSET_StgArrBytes_bytes (platformConstants platform))
2531 ]
2532
2533 emit $ mkAssign (CmmLocal res_r) base
2534
2535 -- ----------------------------------------------------------------------------
2536 -- Comparing byte arrays
2537
2538 doCompareByteArraysOp :: LocalReg -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
2539 -> FCode ()
2540 doCompareByteArraysOp res ba1 ba1_off ba2 ba2_off n = do
2541 profile <- getProfile
2542 platform <- getPlatform
2543 ba1_p <- assignTempE $ cmmOffsetExpr platform (cmmOffsetB platform ba1 (arrWordsHdrSize profile)) ba1_off
2544 ba2_p <- assignTempE $ cmmOffsetExpr platform (cmmOffsetB platform ba2 (arrWordsHdrSize profile)) ba2_off
2545
2546 -- short-cut in case of equal pointers avoiding a costly
2547 -- subroutine call to the memcmp(3) routine; the Cmm logic below
2548 -- results in assembly code being generated for
2549 --
2550 -- cmpPrefix10 :: ByteArray# -> ByteArray# -> Int#
2551 -- cmpPrefix10 ba1 ba2 = compareByteArrays# ba1 0# ba2 0# 10#
2552 --
2553 -- that looks like
2554 --
2555 -- leaq 16(%r14),%rax
2556 -- leaq 16(%rsi),%rbx
2557 -- xorl %ecx,%ecx
2558 -- cmpq %rbx,%rax
2559 -- je l_ptr_eq
2560 --
2561 -- ; NB: the common case (unequal pointers) falls-through
2562 -- ; the conditional jump, and therefore matches the
2563 -- ; usual static branch prediction convention of modern cpus
2564 --
2565 -- subq $8,%rsp
2566 -- movq %rbx,%rsi
2567 -- movq %rax,%rdi
2568 -- movl $10,%edx
2569 -- xorl %eax,%eax
2570 -- call memcmp
2571 -- addq $8,%rsp
2572 -- movslq %eax,%rax
2573 -- movq %rax,%rcx
2574 -- l_ptr_eq:
2575 -- movq %rcx,%rbx
2576 -- jmp *(%rbp)
2577
2578 l_ptr_eq <- newBlockId
2579 l_ptr_ne <- newBlockId
2580
2581 emit (mkAssign (CmmLocal res) (zeroExpr platform))
2582 emit (mkCbranch (cmmEqWord platform ba1_p ba2_p)
2583 l_ptr_eq l_ptr_ne (Just False))
2584
2585 emitLabel l_ptr_ne
2586 emitMemcmpCall res ba1_p ba2_p n 1
2587
2588 emitLabel l_ptr_eq
2589
2590 -- ----------------------------------------------------------------------------
2591 -- Copying byte arrays
2592
2593 -- | Takes a source 'ByteArray#', an offset in the source array, a
2594 -- destination 'MutableByteArray#', an offset into the destination
2595 -- array, and the number of bytes to copy. Copies the given number of
2596 -- bytes from the source array to the destination array.
2597 doCopyByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
2598 -> FCode ()
2599 doCopyByteArrayOp = emitCopyByteArray copy
2600 where
2601 -- Copy data (we assume the arrays aren't overlapping since
2602 -- they're of different types)
2603 copy _src _dst dst_p src_p bytes align =
2604 emitMemcpyCall dst_p src_p bytes align
2605
2606 -- | Takes a source 'MutableByteArray#', an offset in the source
2607 -- array, a destination 'MutableByteArray#', an offset into the
2608 -- destination array, and the number of bytes to copy. Copies the
2609 -- given number of bytes from the source array to the destination
2610 -- array.
2611 doCopyMutableByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
2612 -> FCode ()
2613 doCopyMutableByteArrayOp = emitCopyByteArray copy
2614 where
2615 -- The only time the memory might overlap is when the two arrays
2616 -- we were provided are the same array!
2617 -- TODO: Optimize branch for common case of no aliasing.
2618 copy src dst dst_p src_p bytes align = do
2619 platform <- getPlatform
2620 (moveCall, cpyCall) <- forkAltPair
2621 (getCode $ emitMemmoveCall dst_p src_p bytes align)
2622 (getCode $ emitMemcpyCall dst_p src_p bytes align)
2623 emit =<< mkCmmIfThenElse (cmmEqWord platform src dst) moveCall cpyCall
2624
2625 emitCopyByteArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
2626 -> Alignment -> FCode ())
2627 -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
2628 -> FCode ()
2629 emitCopyByteArray copy src src_off dst dst_off n = do
2630 profile <- getProfile
2631 platform <- getPlatform
2632 let byteArrayAlignment = wordAlignment platform
2633 srcOffAlignment = cmmExprAlignment src_off
2634 dstOffAlignment = cmmExprAlignment dst_off
2635 align = minimum [byteArrayAlignment, srcOffAlignment, dstOffAlignment]
2636 dst_p <- assignTempE $ cmmOffsetExpr platform (cmmOffsetB platform dst (arrWordsHdrSize profile)) dst_off
2637 src_p <- assignTempE $ cmmOffsetExpr platform (cmmOffsetB platform src (arrWordsHdrSize profile)) src_off
2638 copy src dst dst_p src_p n align
2639
2640 -- | Takes a source 'ByteArray#', an offset in the source array, a
2641 -- destination 'Addr#', and the number of bytes to copy. Copies the given
2642 -- number of bytes from the source array to the destination memory region.
2643 doCopyByteArrayToAddrOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
2644 doCopyByteArrayToAddrOp src src_off dst_p bytes = do
2645 -- Use memcpy (we are allowed to assume the arrays aren't overlapping)
2646 profile <- getProfile
2647 platform <- getPlatform
2648 src_p <- assignTempE $ cmmOffsetExpr platform (cmmOffsetB platform src (arrWordsHdrSize profile)) src_off
2649 emitMemcpyCall dst_p src_p bytes (mkAlignment 1)
2650
2651 -- | Takes a source 'MutableByteArray#', an offset in the source array, a
2652 -- destination 'Addr#', and the number of bytes to copy. Copies the given
2653 -- number of bytes from the source array to the destination memory region.
2654 doCopyMutableByteArrayToAddrOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
2655 -> FCode ()
2656 doCopyMutableByteArrayToAddrOp = doCopyByteArrayToAddrOp
2657
2658 -- | Takes a source 'Addr#', a destination 'MutableByteArray#', an offset into
2659 -- the destination array, and the number of bytes to copy. Copies the given
2660 -- number of bytes from the source memory region to the destination array.
2661 doCopyAddrToByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
2662 doCopyAddrToByteArrayOp src_p dst dst_off bytes = do
2663 -- Use memcpy (we are allowed to assume the arrays aren't overlapping)
2664 profile <- getProfile
2665 platform <- getPlatform
2666 dst_p <- assignTempE $ cmmOffsetExpr platform (cmmOffsetB platform dst (arrWordsHdrSize profile)) dst_off
2667 emitMemcpyCall dst_p src_p bytes (mkAlignment 1)
2668
2669
2670 -- ----------------------------------------------------------------------------
2671 -- Setting byte arrays
2672
2673 -- | Takes a 'MutableByteArray#', an offset into the array, a length,
2674 -- and a byte, and sets each of the selected bytes in the array to the
2675 -- character.
2676 doSetByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
2677 -> FCode ()
2678 doSetByteArrayOp ba off len c = do
2679 profile <- getProfile
2680 platform <- getPlatform
2681
2682 let byteArrayAlignment = wordAlignment platform -- known since BA is allocated on heap
2683 offsetAlignment = cmmExprAlignment off
2684 align = min byteArrayAlignment offsetAlignment
2685
2686 p <- assignTempE $ cmmOffsetExpr platform (cmmOffsetB platform ba (arrWordsHdrSize profile)) off
2687 emitMemsetCall p c len align
2688
2689 -- ----------------------------------------------------------------------------
2690 -- Allocating arrays
2691
2692 -- | Allocate a new array.
2693 doNewArrayOp :: CmmFormal -- ^ return register
2694 -> SMRep -- ^ representation of the array
2695 -> CLabel -- ^ info pointer
2696 -> [(CmmExpr, ByteOff)] -- ^ header payload
2697 -> WordOff -- ^ array size
2698 -> CmmExpr -- ^ initial element
2699 -> FCode ()
2700 doNewArrayOp res_r rep info payload n init = do
2701 profile <- getProfile
2702 platform <- getPlatform
2703
2704 let info_ptr = mkLblExpr info
2705
2706 tickyAllocPrim (mkIntExpr platform (hdrSize profile rep))
2707 (mkIntExpr platform (nonHdrSize platform rep))
2708 (zeroExpr platform)
2709
2710 base <- allocHeapClosure rep info_ptr cccsExpr payload
2711
2712 arr <- CmmLocal `fmap` newTemp (bWord platform)
2713 emit $ mkAssign arr base
2714
2715 -- Initialise all elements of the array
2716 let mkOff off = cmmOffsetW platform (CmmReg arr) (hdrSizeW profile rep + off)
2717 initialization = [ mkStore (mkOff off) init | off <- [0.. n - 1] ]
2718 emit (catAGraphs initialization)
2719
2720 emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
2721
2722 -- ----------------------------------------------------------------------------
2723 -- Copying pointer arrays
2724
2725 -- EZY: This code has an unusually high amount of assignTemp calls, seen
2726 -- nowhere else in the code generator. This is mostly because these
2727 -- "primitive" ops result in a surprisingly large amount of code. It
2728 -- will likely be worthwhile to optimize what is emitted here, so that
2729 -- our optimization passes don't waste time repeatedly optimizing the
2730 -- same bits of code.
2731
2732 -- More closely imitates 'assignTemp' from the old code generator, which
2733 -- returns a CmmExpr rather than a LocalReg.
2734 assignTempE :: CmmExpr -> FCode CmmExpr
2735 assignTempE e = do
2736 t <- assignTemp e
2737 return (CmmReg (CmmLocal t))
2738
2739 -- | Takes a source 'Array#', an offset in the source array, a
2740 -- destination 'MutableArray#', an offset into the destination array,
2741 -- and the number of elements to copy. Copies the given number of
2742 -- elements from the source array to the destination array.
2743 doCopyArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
2744 -> FCode ()
2745 doCopyArrayOp = emitCopyArray copy
2746 where
2747 -- Copy data (we assume the arrays aren't overlapping since
2748 -- they're of different types)
2749 copy _src _dst dst_p src_p bytes =
2750 do platform <- getPlatform
2751 emitMemcpyCall dst_p src_p (mkIntExpr platform bytes)
2752 (wordAlignment platform)
2753
2754
2755 -- | Takes a source 'MutableArray#', an offset in the source array, a
2756 -- destination 'MutableArray#', an offset into the destination array,
2757 -- and the number of elements to copy. Copies the given number of
2758 -- elements from the source array to the destination array.
2759 doCopyMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
2760 -> FCode ()
2761 doCopyMutableArrayOp = emitCopyArray copy
2762 where
2763 -- The only time the memory might overlap is when the two arrays
2764 -- we were provided are the same array!
2765 -- TODO: Optimize branch for common case of no aliasing.
2766 copy src dst dst_p src_p bytes = do
2767 platform <- getPlatform
2768 (moveCall, cpyCall) <- forkAltPair
2769 (getCode $ emitMemmoveCall dst_p src_p (mkIntExpr platform bytes)
2770 (wordAlignment platform))
2771 (getCode $ emitMemcpyCall dst_p src_p (mkIntExpr platform bytes)
2772 (wordAlignment platform))
2773 emit =<< mkCmmIfThenElse (cmmEqWord platform src dst) moveCall cpyCall
2774
2775 emitCopyArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> ByteOff
2776 -> FCode ()) -- ^ copy function
2777 -> CmmExpr -- ^ source array
2778 -> CmmExpr -- ^ offset in source array
2779 -> CmmExpr -- ^ destination array
2780 -> CmmExpr -- ^ offset in destination array
2781 -> WordOff -- ^ number of elements to copy
2782 -> FCode ()
2783 emitCopyArray copy src0 src_off dst0 dst_off0 n =
2784 when (n /= 0) $ do
2785 profile <- getProfile
2786 platform <- getPlatform
2787
2788 -- Passed as arguments (be careful)
2789 src <- assignTempE src0
2790 dst <- assignTempE dst0
2791 dst_off <- assignTempE dst_off0
2792
2793 -- Nonmoving collector write barrier
2794 emitCopyUpdRemSetPush platform (arrPtrsHdrSize profile) dst dst_off n
2795
2796 -- Set the dirty bit in the header.
2797 emit (setInfo dst (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
2798
2799 dst_elems_p <- assignTempE $ cmmOffsetB platform dst
2800 (arrPtrsHdrSize profile)
2801 dst_p <- assignTempE $ cmmOffsetExprW platform dst_elems_p dst_off
2802 src_p <- assignTempE $ cmmOffsetExprW platform
2803 (cmmOffsetB platform src (arrPtrsHdrSize profile)) src_off
2804 let bytes = wordsToBytes platform n
2805
2806 copy src dst dst_p src_p bytes
2807
2808 -- The base address of the destination card table
2809 dst_cards_p <- assignTempE $ cmmOffsetExprW platform dst_elems_p
2810 (loadArrPtrsSize profile dst)
2811
2812 emitSetCards dst_off dst_cards_p n
2813
2814 doCopySmallArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
2815 -> FCode ()
2816 doCopySmallArrayOp = emitCopySmallArray copy
2817 where
2818 -- Copy data (we assume the arrays aren't overlapping since
2819 -- they're of different types)
2820 copy _src _dst dst_p src_p bytes =
2821 do platform <- getPlatform
2822 emitMemcpyCall dst_p src_p (mkIntExpr platform bytes)
2823 (wordAlignment platform)
2824
2825
2826 doCopySmallMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
2827 -> FCode ()
2828 doCopySmallMutableArrayOp = emitCopySmallArray copy
2829 where
2830 -- The only time the memory might overlap is when the two arrays
2831 -- we were provided are the same array!
2832 -- TODO: Optimize branch for common case of no aliasing.
2833 copy src dst dst_p src_p bytes = do
2834 platform <- getPlatform
2835 (moveCall, cpyCall) <- forkAltPair
2836 (getCode $ emitMemmoveCall dst_p src_p (mkIntExpr platform bytes)
2837 (wordAlignment platform))
2838 (getCode $ emitMemcpyCall dst_p src_p (mkIntExpr platform bytes)
2839 (wordAlignment platform))
2840 emit =<< mkCmmIfThenElse (cmmEqWord platform src dst) moveCall cpyCall
2841
2842 emitCopySmallArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> ByteOff
2843 -> FCode ()) -- ^ copy function
2844 -> CmmExpr -- ^ source array
2845 -> CmmExpr -- ^ offset in source array
2846 -> CmmExpr -- ^ destination array
2847 -> CmmExpr -- ^ offset in destination array
2848 -> WordOff -- ^ number of elements to copy
2849 -> FCode ()
2850 emitCopySmallArray copy src0 src_off dst0 dst_off n =
2851 when (n /= 0) $ do
2852 profile <- getProfile
2853 platform <- getPlatform
2854
2855 -- Passed as arguments (be careful)
2856 src <- assignTempE src0
2857 dst <- assignTempE dst0
2858
2859 -- Nonmoving collector write barrier
2860 emitCopyUpdRemSetPush platform (smallArrPtrsHdrSize profile) dst dst_off n
2861
2862 -- Set the dirty bit in the header.
2863 emit (setInfo dst (CmmLit (CmmLabel mkSMAP_DIRTY_infoLabel)))
2864
2865 dst_p <- assignTempE $ cmmOffsetExprW platform
2866 (cmmOffsetB platform dst (smallArrPtrsHdrSize profile)) dst_off
2867 src_p <- assignTempE $ cmmOffsetExprW platform
2868 (cmmOffsetB platform src (smallArrPtrsHdrSize profile)) src_off
2869 let bytes = wordsToBytes platform n
2870
2871 copy src dst dst_p src_p bytes
2872
2873 -- | Takes an info table label, a register to return the newly
2874 -- allocated array in, a source array, an offset in the source array,
2875 -- and the number of elements to copy. Allocates a new array and
2876 -- initializes it from the source array.
2877 emitCloneArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> WordOff
2878 -> FCode ()
2879 emitCloneArray info_p res_r src src_off n = do
2880 profile <- getProfile
2881 platform <- getPlatform
2882
2883 let info_ptr = mkLblExpr info_p
2884 rep = arrPtrsRep platform n
2885
2886 tickyAllocPrim (mkIntExpr platform (arrPtrsHdrSize profile))
2887 (mkIntExpr platform (nonHdrSize platform rep))
2888 (zeroExpr platform)
2889
2890 let hdr_size = fixedHdrSize profile
2891 constants = platformConstants platform
2892
2893 base <- allocHeapClosure rep info_ptr cccsExpr
2894 [ (mkIntExpr platform n,
2895 hdr_size + pc_OFFSET_StgMutArrPtrs_ptrs constants)
2896 , (mkIntExpr platform (nonHdrSizeW rep),
2897 hdr_size + pc_OFFSET_StgMutArrPtrs_size constants)
2898 ]
2899
2900 arr <- CmmLocal `fmap` newTemp (bWord platform)
2901 emit $ mkAssign arr base
2902
2903 dst_p <- assignTempE $ cmmOffsetB platform (CmmReg arr)
2904 (arrPtrsHdrSize profile)
2905 src_p <- assignTempE $ cmmOffsetExprW platform src
2906 (cmmAddWord platform
2907 (mkIntExpr platform (arrPtrsHdrSizeW profile)) src_off)
2908
2909 emitMemcpyCall dst_p src_p (mkIntExpr platform (wordsToBytes platform n))
2910 (wordAlignment platform)
2911
2912 emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
2913
2914 -- | Takes an info table label, a register to return the newly
2915 -- allocated array in, a source array, an offset in the source array,
2916 -- and the number of elements to copy. Allocates a new array and
2917 -- initializes it from the source array.
2918 emitCloneSmallArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> WordOff
2919 -> FCode ()
2920 emitCloneSmallArray info_p res_r src src_off n = do
2921 profile <- getProfile
2922 platform <- getPlatform
2923
2924 let info_ptr = mkLblExpr info_p
2925 rep = smallArrPtrsRep n
2926
2927 tickyAllocPrim (mkIntExpr platform (smallArrPtrsHdrSize profile))
2928 (mkIntExpr platform (nonHdrSize platform rep))
2929 (zeroExpr platform)
2930
2931 let hdr_size = fixedHdrSize profile
2932
2933 base <- allocHeapClosure rep info_ptr cccsExpr
2934 [ (mkIntExpr platform n,
2935 hdr_size + pc_OFFSET_StgSmallMutArrPtrs_ptrs (platformConstants platform))
2936 ]
2937
2938 arr <- CmmLocal `fmap` newTemp (bWord platform)
2939 emit $ mkAssign arr base
2940
2941 dst_p <- assignTempE $ cmmOffsetB platform (CmmReg arr)
2942 (smallArrPtrsHdrSize profile)
2943 src_p <- assignTempE $ cmmOffsetExprW platform src
2944 (cmmAddWord platform
2945 (mkIntExpr platform (smallArrPtrsHdrSizeW profile)) src_off)
2946
2947 emitMemcpyCall dst_p src_p (mkIntExpr platform (wordsToBytes platform n))
2948 (wordAlignment platform)
2949
2950 emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
2951
2952 -- | Takes and offset in the destination array, the base address of
2953 -- the card table, and the number of elements affected (*not* the
2954 -- number of cards). The number of elements may not be zero.
2955 -- Marks the relevant cards as dirty.
2956 emitSetCards :: CmmExpr -> CmmExpr -> WordOff -> FCode ()
2957 emitSetCards dst_start dst_cards_start n = do
2958 platform <- getPlatform
2959 start_card <- assignTempE $ cardCmm platform dst_start
2960 let end_card = cardCmm platform
2961 (cmmSubWord platform
2962 (cmmAddWord platform dst_start (mkIntExpr platform n))
2963 (mkIntExpr platform 1))
2964 emitMemsetCall (cmmAddWord platform dst_cards_start start_card)
2965 (mkIntExpr platform 1)
2966 (cmmAddWord platform (cmmSubWord platform end_card start_card) (mkIntExpr platform 1))
2967 (mkAlignment 1) -- no alignment (1 byte)
2968
2969 -- Convert an element index to a card index
2970 cardCmm :: Platform -> CmmExpr -> CmmExpr
2971 cardCmm platform i =
2972 cmmUShrWord platform i (mkIntExpr platform (pc_MUT_ARR_PTRS_CARD_BITS (platformConstants platform)))
2973
2974 ------------------------------------------------------------------------------
2975 -- SmallArray PrimOp implementations
2976
2977 doReadSmallPtrArrayOp :: LocalReg
2978 -> CmmExpr
2979 -> CmmExpr
2980 -> FCode ()
2981 doReadSmallPtrArrayOp res addr idx = do
2982 profile <- getProfile
2983 platform <- getPlatform
2984 mkBasicIndexedRead (smallArrPtrsHdrSize profile) Nothing (gcWord platform) res addr
2985 (gcWord platform) idx
2986
2987 doWriteSmallPtrArrayOp :: CmmExpr
2988 -> CmmExpr
2989 -> CmmExpr
2990 -> FCode ()
2991 doWriteSmallPtrArrayOp addr idx val = do
2992 profile <- getProfile
2993 platform <- getPlatform
2994 let ty = cmmExprType platform val
2995
2996 -- Update remembered set for non-moving collector
2997 tmp <- newTemp ty
2998 mkBasicIndexedRead (smallArrPtrsHdrSize profile) Nothing ty tmp addr ty idx
2999 whenUpdRemSetEnabled $ emitUpdRemSetPush (CmmReg (CmmLocal tmp))
3000
3001 emitPrimCall [] MO_WriteBarrier [] -- #12469
3002 mkBasicIndexedWrite (smallArrPtrsHdrSize profile) Nothing addr ty idx val
3003 emit (setInfo addr (CmmLit (CmmLabel mkSMAP_DIRTY_infoLabel)))
3004
3005 ------------------------------------------------------------------------------
3006 -- Atomic read-modify-write
3007
3008 -- | Emit an atomic modification to a byte array element. The result
3009 -- reg contains that previous value of the element. Implies a full
3010 -- memory barrier.
3011 doAtomicByteArrayRMW
3012 :: LocalReg -- ^ Result reg
3013 -> AtomicMachOp -- ^ Atomic op (e.g. add)
3014 -> CmmExpr -- ^ MutableByteArray#
3015 -> CmmExpr -- ^ Index
3016 -> CmmType -- ^ Type of element by which we are indexing
3017 -> CmmExpr -- ^ Op argument (e.g. amount to add)
3018 -> FCode ()
3019 doAtomicByteArrayRMW res amop mba idx idx_ty n = do
3020 profile <- getProfile
3021 platform <- getPlatform
3022 let width = typeWidth idx_ty
3023 addr = cmmIndexOffExpr platform (arrWordsHdrSize profile)
3024 width mba idx
3025 doAtomicAddrRMW res amop addr idx_ty n
3026
3027 doAtomicAddrRMW
3028 :: LocalReg -- ^ Result reg
3029 -> AtomicMachOp -- ^ Atomic op (e.g. add)
3030 -> CmmExpr -- ^ Addr#
3031 -> CmmType -- ^ Pointed value type
3032 -> CmmExpr -- ^ Op argument (e.g. amount to add)
3033 -> FCode ()
3034 doAtomicAddrRMW res amop addr ty n =
3035 emitPrimCall
3036 [ res ]
3037 (MO_AtomicRMW (typeWidth ty) amop)
3038 [ addr, n ]
3039
3040 -- | Emit an atomic read to a byte array that acts as a memory barrier.
3041 doAtomicReadByteArray
3042 :: LocalReg -- ^ Result reg
3043 -> CmmExpr -- ^ MutableByteArray#
3044 -> CmmExpr -- ^ Index
3045 -> CmmType -- ^ Type of element by which we are indexing
3046 -> FCode ()
3047 doAtomicReadByteArray res mba idx idx_ty = do
3048 profile <- getProfile
3049 platform <- getPlatform
3050 let width = typeWidth idx_ty
3051 addr = cmmIndexOffExpr platform (arrWordsHdrSize profile)
3052 width mba idx
3053 doAtomicReadAddr res addr idx_ty
3054
3055 -- | Emit an atomic read to an address that acts as a memory barrier.
3056 doAtomicReadAddr
3057 :: LocalReg -- ^ Result reg
3058 -> CmmExpr -- ^ Addr#
3059 -> CmmType -- ^ Type of element by which we are indexing
3060 -> FCode ()
3061 doAtomicReadAddr res addr ty =
3062 emitPrimCall
3063 [ res ]
3064 (MO_AtomicRead (typeWidth ty))
3065 [ addr ]
3066
3067 -- | Emit an atomic write to a byte array that acts as a memory barrier.
3068 doAtomicWriteByteArray
3069 :: CmmExpr -- ^ MutableByteArray#
3070 -> CmmExpr -- ^ Index
3071 -> CmmType -- ^ Type of element by which we are indexing
3072 -> CmmExpr -- ^ Value to write
3073 -> FCode ()
3074 doAtomicWriteByteArray mba idx idx_ty val = do
3075 profile <- getProfile
3076 platform <- getPlatform
3077 let width = typeWidth idx_ty
3078 addr = cmmIndexOffExpr platform (arrWordsHdrSize profile)
3079 width mba idx
3080 doAtomicWriteAddr addr idx_ty val
3081
3082 -- | Emit an atomic write to an address that acts as a memory barrier.
3083 doAtomicWriteAddr
3084 :: CmmExpr -- ^ Addr#
3085 -> CmmType -- ^ Type of element by which we are indexing
3086 -> CmmExpr -- ^ Value to write
3087 -> FCode ()
3088 doAtomicWriteAddr addr ty val =
3089 emitPrimCall
3090 [ {- no results -} ]
3091 (MO_AtomicWrite (typeWidth ty))
3092 [ addr, val ]
3093
3094 doCasByteArray
3095 :: LocalReg -- ^ Result reg
3096 -> CmmExpr -- ^ MutableByteArray#
3097 -> CmmExpr -- ^ Index
3098 -> CmmType -- ^ Type of element by which we are indexing
3099 -> CmmExpr -- ^ Old value
3100 -> CmmExpr -- ^ New value
3101 -> FCode ()
3102 doCasByteArray res mba idx idx_ty old new = do
3103 profile <- getProfile
3104 platform <- getPlatform
3105 let width = typeWidth idx_ty
3106 addr = cmmIndexOffExpr platform (arrWordsHdrSize profile)
3107 width mba idx
3108 emitPrimCall
3109 [ res ]
3110 (MO_Cmpxchg width)
3111 [ addr, old, new ]
3112
3113 ------------------------------------------------------------------------------
3114 -- Helpers for emitting function calls
3115
3116 -- | Emit a call to @memcpy@.
3117 emitMemcpyCall :: CmmExpr -> CmmExpr -> CmmExpr -> Alignment -> FCode ()
3118 emitMemcpyCall dst src n align =
3119 emitPrimCall
3120 [ {-no results-} ]
3121 (MO_Memcpy (alignmentBytes align))
3122 [ dst, src, n ]
3123
3124 -- | Emit a call to @memmove@.
3125 emitMemmoveCall :: CmmExpr -> CmmExpr -> CmmExpr -> Alignment -> FCode ()
3126 emitMemmoveCall dst src n align =
3127 emitPrimCall
3128 [ {- no results -} ]
3129 (MO_Memmove (alignmentBytes align))
3130 [ dst, src, n ]
3131
3132 -- | Emit a call to @memset@. The second argument must fit inside an
3133 -- unsigned char.
3134 emitMemsetCall :: CmmExpr -> CmmExpr -> CmmExpr -> Alignment -> FCode ()
3135 emitMemsetCall dst c n align =
3136 emitPrimCall
3137 [ {- no results -} ]
3138 (MO_Memset (alignmentBytes align))
3139 [ dst, c, n ]
3140
3141 emitMemcmpCall :: LocalReg -> CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()
3142 emitMemcmpCall res ptr1 ptr2 n align = do
3143 -- 'MO_Memcmp' is assumed to return an 32bit 'CInt' because all
3144 -- code-gens currently call out to the @memcmp(3)@ C function.
3145 -- This was easier than moving the sign-extensions into
3146 -- all the code-gens.
3147 platform <- getPlatform
3148 let is32Bit = typeWidth (localRegType res) == W32
3149
3150 cres <- if is32Bit
3151 then return res
3152 else newTemp b32
3153
3154 emitPrimCall
3155 [ cres ]
3156 (MO_Memcmp align)
3157 [ ptr1, ptr2, n ]
3158
3159 unless is32Bit $
3160 emit $ mkAssign (CmmLocal res)
3161 (CmmMachOp
3162 (mo_s_32ToWord platform)
3163 [(CmmReg (CmmLocal cres))])
3164
3165 emitBSwapCall :: LocalReg -> CmmExpr -> Width -> FCode ()
3166 emitBSwapCall res x width =
3167 emitPrimCall
3168 [ res ]
3169 (MO_BSwap width)
3170 [ x ]
3171
3172 emitBRevCall :: LocalReg -> CmmExpr -> Width -> FCode ()
3173 emitBRevCall res x width =
3174 emitPrimCall
3175 [ res ]
3176 (MO_BRev width)
3177 [ x ]
3178
3179 emitPopCntCall :: LocalReg -> CmmExpr -> Width -> FCode ()
3180 emitPopCntCall res x width =
3181 emitPrimCall
3182 [ res ]
3183 (MO_PopCnt width)
3184 [ x ]
3185
3186 emitPdepCall :: LocalReg -> CmmExpr -> CmmExpr -> Width -> FCode ()
3187 emitPdepCall res x y width =
3188 emitPrimCall
3189 [ res ]
3190 (MO_Pdep width)
3191 [ x, y ]
3192
3193 emitPextCall :: LocalReg -> CmmExpr -> CmmExpr -> Width -> FCode ()
3194 emitPextCall res x y width =
3195 emitPrimCall
3196 [ res ]
3197 (MO_Pext width)
3198 [ x, y ]
3199
3200 emitClzCall :: LocalReg -> CmmExpr -> Width -> FCode ()
3201 emitClzCall res x width =
3202 emitPrimCall
3203 [ res ]
3204 (MO_Clz width)
3205 [ x ]
3206
3207 emitCtzCall :: LocalReg -> CmmExpr -> Width -> FCode ()
3208 emitCtzCall res x width =
3209 emitPrimCall
3210 [ res ]
3211 (MO_Ctz width)
3212 [ x ]
3213
3214 ---------------------------------------------------------------------------
3215 -- Pushing to the update remembered set
3216 ---------------------------------------------------------------------------
3217
3218 -- | Push a range of pointer-array elements that are about to be copied over to
3219 -- the update remembered set.
3220 emitCopyUpdRemSetPush :: Platform
3221 -> ByteOff -- ^ array header size (in bytes)
3222 -> CmmExpr -- ^ destination array
3223 -> CmmExpr -- ^ offset in destination array (in words)
3224 -> Int -- ^ number of elements to copy
3225 -> FCode ()
3226 emitCopyUpdRemSetPush _platform _hdr_size _dst _dst_off 0 = return ()
3227 emitCopyUpdRemSetPush platform hdr_size dst dst_off n =
3228 whenUpdRemSetEnabled $ do
3229 updfr_off <- getUpdFrameOff
3230 graph <- mkCall lbl (NativeNodeCall,NativeReturn) [] args updfr_off []
3231 emit graph
3232 where
3233 lbl = mkLblExpr $ mkPrimCallLabel
3234 $ PrimCall (fsLit "stg_copyArray_barrier") rtsUnit
3235 args =
3236 [ mkIntExpr platform hdr_size
3237 , dst
3238 , dst_off
3239 , mkIntExpr platform n
3240 ]