never executed always true always false
1 {-# LANGUAGE BangPatterns #-}
2 {-# LANGUAGE CPP #-}
3 {-# LANGUAGE DeriveDataTypeable #-}
4 {-# LANGUAGE DerivingStrategies #-}
5 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
6 {-# LANGUAGE MagicHash #-}
7 {-# LANGUAGE UnboxedTuples #-}
8 {-# LANGUAGE UnliftedFFITypes #-}
9
10 {-# OPTIONS_GHC -O2 -funbox-strict-fields #-}
11 -- We always optimise this, otherwise performance of a non-optimised
12 -- compiler is severely affected
13
14 -- |
15 -- There are two principal string types used internally by GHC:
16 --
17 -- ['FastString']
18 --
19 -- * A compact, hash-consed, representation of character strings.
20 -- * Generated by 'fsLit'.
21 -- * You can get a 'GHC.Types.Unique.Unique' from them.
22 -- * Equality test is O(1) (it uses the Unique).
23 -- * Comparison is O(1) or O(n):
24 -- * O(n) but deterministic with lexical comparison (`lexicalCompareFS`)
25 -- * O(1) but non-deterministic with Unique comparison (`uniqCompareFS`)
26 -- * Turn into 'GHC.Utils.Outputable.SDoc' with 'GHC.Utils.Outputable.ftext'.
27 --
28 -- ['PtrString']
29 --
30 -- * Pointer and size of a Latin-1 encoded string.
31 -- * Practically no operations.
32 -- * Outputting them is fast.
33 -- * Generated by 'mkPtrString'.
34 -- * Length of string literals (mkPtrString "abc") is computed statically
35 -- * Turn into 'GHC.Utils.Outputable.SDoc' with 'GHC.Utils.Outputable.ptext'
36 -- * Requires manual memory management.
37 -- Improper use may lead to memory leaks or dangling pointers.
38 -- * It assumes Latin-1 as the encoding, therefore it cannot represent
39 -- arbitrary Unicode strings.
40 --
41 -- Use 'PtrString' unless you want the facilities of 'FastString'.
42 module GHC.Data.FastString
43 (
44 -- * ByteString
45 bytesFS,
46 fastStringToByteString,
47 mkFastStringByteString,
48 fastZStringToByteString,
49 unsafeMkByteString,
50
51 -- * ShortByteString
52 fastStringToShortByteString,
53 mkFastStringShortByteString,
54
55 -- * FastZString
56 FastZString,
57 hPutFZS,
58 zString,
59 lengthFZS,
60
61 -- * FastStrings
62 FastString(..), -- not abstract, for now.
63 NonDetFastString (..),
64 LexicalFastString (..),
65
66 -- ** Construction
67 fsLit,
68 mkFastString,
69 mkFastStringBytes,
70 mkFastStringByteList,
71 mkFastString#,
72
73 -- ** Deconstruction
74 unpackFS, -- :: FastString -> String
75 unconsFS, -- :: FastString -> Maybe (Char, FastString)
76
77 -- ** Encoding
78 zEncodeFS,
79
80 -- ** Operations
81 uniqueOfFS,
82 lengthFS,
83 nullFS,
84 appendFS,
85 headFS,
86 concatFS,
87 consFS,
88 nilFS,
89 isUnderscoreFS,
90 lexicalCompareFS,
91 uniqCompareFS,
92
93 -- ** Outputting
94 hPutFS,
95
96 -- ** Internal
97 getFastStringTable,
98 getFastStringZEncCounter,
99
100 -- * PtrStrings
101 PtrString (..),
102
103 -- ** Construction
104 mkPtrString#,
105 mkPtrString,
106
107 -- ** Deconstruction
108 unpackPtrString,
109
110 -- ** Operations
111 lengthPS
112 ) where
113
114 -- For GHC_STAGE
115 #include "ghcplatform.h"
116
117 import GHC.Prelude as Prelude
118
119 import GHC.Utils.Encoding
120 import GHC.Utils.IO.Unsafe
121 import GHC.Utils.Panic.Plain
122 import GHC.Utils.Misc
123 import GHC.Data.FastMutInt
124
125 import Control.Concurrent.MVar
126 import Control.DeepSeq
127 import Control.Monad
128 import Data.ByteString (ByteString)
129 import Data.ByteString.Short (ShortByteString)
130 import qualified Data.ByteString as BS
131 import qualified Data.ByteString.Char8 as BSC
132 import qualified Data.ByteString.Unsafe as BS
133 import qualified Data.ByteString.Short as SBS
134 #if !MIN_VERSION_bytestring(0,11,0)
135 import qualified Data.ByteString.Short.Internal as SBS
136 #endif
137 import Foreign.C
138 import System.IO
139 import Data.Data
140 import Data.IORef
141 import Data.Char
142 import Data.Semigroup as Semi
143
144 import Foreign
145
146 #if GHC_STAGE >= 2
147 import GHC.Conc.Sync (sharedCAF)
148 #endif
149
150 #if __GLASGOW_HASKELL__ < 811
151 import GHC.Base (unpackCString#,unpackNBytes#)
152 #endif
153 import GHC.Exts
154 import GHC.IO
155
156 -- | Gives the Modified UTF-8 encoded bytes corresponding to a 'FastString'
157 bytesFS, fastStringToByteString :: FastString -> ByteString
158 {-# INLINE[1] bytesFS #-}
159 bytesFS f = SBS.fromShort $ fs_sbs f
160
161 {-# DEPRECATED fastStringToByteString "Use `bytesFS` instead" #-}
162 fastStringToByteString = bytesFS
163
164 fastStringToShortByteString :: FastString -> ShortByteString
165 fastStringToShortByteString = fs_sbs
166
167 fastZStringToByteString :: FastZString -> ByteString
168 fastZStringToByteString (FastZString bs) = bs
169
170 -- This will drop information if any character > '\xFF'
171 unsafeMkByteString :: String -> ByteString
172 unsafeMkByteString = BSC.pack
173
174 hashFastString :: FastString -> Int
175 hashFastString fs = hashStr $ fs_sbs fs
176
177 -- -----------------------------------------------------------------------------
178
179 newtype FastZString = FastZString ByteString
180 deriving NFData
181
182 hPutFZS :: Handle -> FastZString -> IO ()
183 hPutFZS handle (FastZString bs) = BS.hPut handle bs
184
185 zString :: FastZString -> String
186 zString (FastZString bs) =
187 inlinePerformIO $ BS.unsafeUseAsCStringLen bs peekCAStringLen
188
189 lengthFZS :: FastZString -> Int
190 lengthFZS (FastZString bs) = BS.length bs
191
192 mkFastZStringString :: String -> FastZString
193 mkFastZStringString str = FastZString (BSC.pack str)
194
195 -- -----------------------------------------------------------------------------
196
197 {-| A 'FastString' is a UTF-8 encoded string together with a unique ID. All
198 'FastString's are stored in a global hashtable to support fast O(1)
199 comparison.
200
201 It is also associated with a lazy reference to the Z-encoding
202 of this string which is used by the compiler internally.
203 -}
204 data FastString = FastString {
205 uniq :: {-# UNPACK #-} !Int, -- unique id
206 n_chars :: {-# UNPACK #-} !Int, -- number of chars
207 fs_sbs :: {-# UNPACK #-} !ShortByteString,
208 fs_zenc :: FastZString
209 -- ^ Lazily computed Z-encoding of this string. See Note [Z-Encoding] in
210 -- GHC.Utils.Encoding.
211 --
212 -- Since 'FastString's are globally memoized this is computed at most
213 -- once for any given string.
214 }
215
216 instance Eq FastString where
217 f1 == f2 = uniq f1 == uniq f2
218
219 -- We don't provide any "Ord FastString" instance to force you to think about
220 -- which ordering you want:
221 -- * lexical: deterministic, O(n). Cf lexicalCompareFS and LexicalFastString.
222 -- * by unique: non-deterministic, O(1). Cf uniqCompareFS and NonDetFastString.
223
224 instance IsString FastString where
225 fromString = fsLit
226
227 instance Semi.Semigroup FastString where
228 (<>) = appendFS
229
230 instance Monoid FastString where
231 mempty = nilFS
232 mappend = (Semi.<>)
233 mconcat = concatFS
234
235 instance Show FastString where
236 show fs = show (unpackFS fs)
237
238 instance Data FastString where
239 -- don't traverse?
240 toConstr _ = abstractConstr "FastString"
241 gunfold _ _ = error "gunfold"
242 dataTypeOf _ = mkNoRepType "FastString"
243
244 instance NFData FastString where
245 rnf fs = seq fs ()
246
247 -- | Compare FastString lexically
248 --
249 -- If you don't care about the lexical ordering, use `uniqCompareFS` instead.
250 lexicalCompareFS :: FastString -> FastString -> Ordering
251 lexicalCompareFS fs1 fs2 =
252 if uniq fs1 == uniq fs2 then EQ else
253 utf8CompareShortByteString (fs_sbs fs1) (fs_sbs fs2)
254 -- perform a lexical comparison taking into account the Modified UTF-8
255 -- encoding we use (cf #18562)
256
257 -- | Compare FastString by their Unique (not lexically).
258 --
259 -- Much cheaper than `lexicalCompareFS` but non-deterministic!
260 uniqCompareFS :: FastString -> FastString -> Ordering
261 uniqCompareFS fs1 fs2 = compare (uniq fs1) (uniq fs2)
262
263 -- | Non-deterministic FastString
264 --
265 -- This is a simple FastString wrapper with an Ord instance using
266 -- `uniqCompareFS` (i.e. which compares FastStrings on their Uniques). Hence it
267 -- is not deterministic from one run to the other.
268 newtype NonDetFastString
269 = NonDetFastString FastString
270 deriving newtype (Eq, Show)
271 deriving stock Data
272
273 instance Ord NonDetFastString where
274 compare (NonDetFastString fs1) (NonDetFastString fs2) = uniqCompareFS fs1 fs2
275
276 -- | Lexical FastString
277 --
278 -- This is a simple FastString wrapper with an Ord instance using
279 -- `lexicalCompareFS` (i.e. which compares FastStrings on their String
280 -- representation). Hence it is deterministic from one run to the other.
281 newtype LexicalFastString
282 = LexicalFastString FastString
283 deriving newtype (Eq, Show)
284 deriving stock Data
285
286 instance Ord LexicalFastString where
287 compare (LexicalFastString fs1) (LexicalFastString fs2) = lexicalCompareFS fs1 fs2
288
289 -- -----------------------------------------------------------------------------
290 -- Construction
291
292 {-
293 Internally, the compiler will maintain a fast string symbol table, providing
294 sharing and fast comparison. Creation of new @FastString@s then covertly does a
295 lookup, re-using the @FastString@ if there was a hit.
296
297 The design of the FastString hash table allows for lockless concurrent reads
298 and updates to multiple buckets with low synchronization overhead.
299
300 See Note [Updating the FastString table] on how it's updated.
301 -}
302 data FastStringTable = FastStringTable
303 {-# UNPACK #-} !FastMutInt -- the unique ID counter shared with all buckets
304 {-# UNPACK #-} !FastMutInt -- number of computed z-encodings for all buckets
305 (Array# (IORef FastStringTableSegment)) -- concurrent segments
306
307 data FastStringTableSegment = FastStringTableSegment
308 {-# UNPACK #-} !(MVar ()) -- the lock for write in each segment
309 {-# UNPACK #-} !FastMutInt -- the number of elements
310 (MutableArray# RealWorld [FastString]) -- buckets in this segment
311
312 {-
313 Following parameters are determined based on:
314
315 * Benchmark based on testsuite/tests/utils/should_run/T14854.hs
316 * Stats of @echo :browse | ghc --interactive -dfaststring-stats >/dev/null@:
317 on 2018-10-24, we have 13920 entries.
318 -}
319 segmentBits, numSegments, segmentMask, initialNumBuckets :: Int
320 segmentBits = 8
321 numSegments = 256 -- bit segmentBits
322 segmentMask = 0xff -- bit segmentBits - 1
323 initialNumBuckets = 64
324
325 hashToSegment# :: Int# -> Int#
326 hashToSegment# hash# = hash# `andI#` segmentMask#
327 where
328 !(I# segmentMask#) = segmentMask
329
330 hashToIndex# :: MutableArray# RealWorld [FastString] -> Int# -> Int#
331 hashToIndex# buckets# hash# =
332 (hash# `uncheckedIShiftRL#` segmentBits#) `remInt#` size#
333 where
334 !(I# segmentBits#) = segmentBits
335 size# = sizeofMutableArray# buckets#
336
337 maybeResizeSegment :: IORef FastStringTableSegment -> IO FastStringTableSegment
338 maybeResizeSegment segmentRef = do
339 segment@(FastStringTableSegment lock counter old#) <- readIORef segmentRef
340 let oldSize# = sizeofMutableArray# old#
341 newSize# = oldSize# *# 2#
342 (I# n#) <- readFastMutInt counter
343 if isTrue# (n# <# newSize#) -- maximum load of 1
344 then return segment
345 else do
346 resizedSegment@(FastStringTableSegment _ _ new#) <- IO $ \s1# ->
347 case newArray# newSize# [] s1# of
348 (# s2#, arr# #) -> (# s2#, FastStringTableSegment lock counter arr# #)
349 forM_ [0 .. (I# oldSize#) - 1] $ \(I# i#) -> do
350 fsList <- IO $ readArray# old# i#
351 forM_ fsList $ \fs -> do
352 let -- Shall we store in hash value in FastString instead?
353 !(I# hash#) = hashFastString fs
354 idx# = hashToIndex# new# hash#
355 IO $ \s1# ->
356 case readArray# new# idx# s1# of
357 (# s2#, bucket #) -> case writeArray# new# idx# (fs: bucket) s2# of
358 s3# -> (# s3#, () #)
359 writeIORef segmentRef resizedSegment
360 return resizedSegment
361
362 {-# NOINLINE stringTable #-}
363 stringTable :: FastStringTable
364 stringTable = unsafePerformIO $ do
365 let !(I# numSegments#) = numSegments
366 !(I# initialNumBuckets#) = initialNumBuckets
367 loop a# i# s1#
368 | isTrue# (i# ==# numSegments#) = s1#
369 | otherwise = case newMVar () `unIO` s1# of
370 (# s2#, lock #) -> case newFastMutInt 0 `unIO` s2# of
371 (# s3#, counter #) -> case newArray# initialNumBuckets# [] s3# of
372 (# s4#, buckets# #) -> case newIORef
373 (FastStringTableSegment lock counter buckets#) `unIO` s4# of
374 (# s5#, segment #) -> case writeArray# a# i# segment s5# of
375 s6# -> loop a# (i# +# 1#) s6#
376 uid <- newFastMutInt 603979776 -- ord '$' * 0x01000000
377 n_zencs <- newFastMutInt 0
378 tab <- IO $ \s1# ->
379 case newArray# numSegments# (panic "string_table") s1# of
380 (# s2#, arr# #) -> case loop arr# 0# s2# of
381 s3# -> case unsafeFreezeArray# arr# s3# of
382 (# s4#, segments# #) ->
383 (# s4#, FastStringTable uid n_zencs segments# #)
384
385 -- use the support wired into the RTS to share this CAF among all images of
386 -- libHSghc
387 #if GHC_STAGE < 2
388 return tab
389 #else
390 sharedCAF tab getOrSetLibHSghcFastStringTable
391
392 -- from the RTS; thus we cannot use this mechanism when GHC_STAGE<2; the previous
393 -- RTS might not have this symbol
394 foreign import ccall unsafe "getOrSetLibHSghcFastStringTable"
395 getOrSetLibHSghcFastStringTable :: Ptr a -> IO (Ptr a)
396 #endif
397
398 {-
399
400 We include the FastString table in the `sharedCAF` mechanism because we'd like
401 FastStrings created by a Core plugin to have the same uniques as corresponding
402 strings created by the host compiler itself. For example, this allows plugins
403 to lookup known names (eg `mkTcOcc "MySpecialType"`) in the GlobalRdrEnv or
404 even re-invoke the parser.
405
406 In particular, the following little sanity test was failing in a plugin
407 prototyping safe newtype-coercions: GHC.NT.Type.NT was imported, but could not
408 be looked up /by the plugin/.
409
410 let rdrName = mkModuleName "GHC.NT.Type" `mkRdrQual` mkTcOcc "NT"
411 putMsgS $ showSDoc dflags $ ppr $ lookupGRE_RdrName rdrName $ mg_rdr_env guts
412
413 `mkTcOcc` involves the lookup (or creation) of a FastString. Since the
414 plugin's FastString.string_table is empty, constructing the RdrName also
415 allocates new uniques for the FastStrings "GHC.NT.Type" and "NT". These
416 uniques are almost certainly unequal to the ones that the host compiler
417 originally assigned to those FastStrings. Thus the lookup fails since the
418 domain of the GlobalRdrEnv is affected by the RdrName's OccName's FastString's
419 unique.
420
421 Maintaining synchronization of the two instances of this global is rather
422 difficult because of the uses of `unsafePerformIO` in this module. Not
423 synchronizing them risks breaking the rather major invariant that two
424 FastStrings with the same unique have the same string. Thus we use the
425 lower-level `sharedCAF` mechanism that relies on Globals.c.
426
427 -}
428
429 mkFastString# :: Addr# -> FastString
430 {-# INLINE mkFastString# #-}
431 mkFastString# a# = mkFastStringBytes ptr (ptrStrLength ptr)
432 where ptr = Ptr a#
433
434 {- Note [Updating the FastString table]
435
436 We use a concurrent hashtable which contains multiple segments, each hash value
437 always maps to the same segment. Read is lock-free, write to the a segment
438 should acquire a lock for that segment to avoid race condition, writes to
439 different segments are independent.
440
441 The procedure goes like this:
442
443 1. Find out which segment to operate on based on the hash value
444 2. Read the relevant bucket and perform a look up of the string.
445 3. If it exists, return it.
446 4. Otherwise grab a unique ID, create a new FastString and atomically attempt
447 to update the relevant segment with this FastString:
448
449 * Resize the segment by doubling the number of buckets when the number of
450 FastStrings in this segment grows beyond the threshold.
451 * Double check that the string is not in the bucket. Another thread may have
452 inserted it while we were creating our string.
453 * Return the existing FastString if it exists. The one we preemptively
454 created will get GCed.
455 * Otherwise, insert and return the string we created.
456 -}
457
458 mkFastStringWith
459 :: (Int -> FastMutInt-> IO FastString) -> ShortByteString -> IO FastString
460 mkFastStringWith mk_fs sbs = do
461 FastStringTableSegment lock _ buckets# <- readIORef segmentRef
462 let idx# = hashToIndex# buckets# hash#
463 bucket <- IO $ readArray# buckets# idx#
464 res <- bucket_match bucket sbs
465 case res of
466 Just found -> return found
467 Nothing -> do
468 -- The withMVar below is not dupable. It can lead to deadlock if it is
469 -- only run partially and putMVar is not called after takeMVar.
470 noDuplicate
471 n <- get_uid
472 new_fs <- mk_fs n n_zencs
473 withMVar lock $ \_ -> insert new_fs
474 where
475 !(FastStringTable uid n_zencs segments#) = stringTable
476 get_uid = atomicFetchAddFastMut uid 1
477
478 !(I# hash#) = hashStr sbs
479 (# segmentRef #) = indexArray# segments# (hashToSegment# hash#)
480 insert fs = do
481 FastStringTableSegment _ counter buckets# <- maybeResizeSegment segmentRef
482 let idx# = hashToIndex# buckets# hash#
483 bucket <- IO $ readArray# buckets# idx#
484 res <- bucket_match bucket sbs
485 case res of
486 -- The FastString was added by another thread after previous read and
487 -- before we acquired the write lock.
488 Just found -> return found
489 Nothing -> do
490 IO $ \s1# ->
491 case writeArray# buckets# idx# (fs : bucket) s1# of
492 s2# -> (# s2#, () #)
493 _ <- atomicFetchAddFastMut counter 1
494 return fs
495
496 bucket_match :: [FastString] -> ShortByteString -> IO (Maybe FastString)
497 bucket_match [] _ = return Nothing
498 bucket_match (fs@(FastString {fs_sbs=fs_sbs}) : ls) sbs
499 | fs_sbs == sbs = return (Just fs)
500 | otherwise = bucket_match ls sbs
501
502 mkFastStringBytes :: Ptr Word8 -> Int -> FastString
503 mkFastStringBytes !ptr !len =
504 -- NB: Might as well use unsafeDupablePerformIO, since mkFastStringWith is
505 -- idempotent.
506 unsafeDupablePerformIO $ do
507 sbs <- newSBSFromPtr ptr len
508 mkFastStringWith (mkNewFastStringShortByteString sbs) sbs
509
510 newSBSFromPtr :: Ptr a -> Int -> IO ShortByteString
511 newSBSFromPtr (Ptr src#) (I# len#) =
512 IO $ \s ->
513 case newByteArray# len# s of { (# s, dst# #) ->
514 case copyAddrToByteArray# src# dst# 0# len# s of { s ->
515 case unsafeFreezeByteArray# dst# s of { (# s, ba# #) ->
516 (# s, SBS.SBS ba# #) }}}
517
518 -- | Create a 'FastString' by copying an existing 'ByteString'
519 mkFastStringByteString :: ByteString -> FastString
520 mkFastStringByteString bs =
521 let sbs = SBS.toShort bs in
522 inlinePerformIO $
523 mkFastStringWith (mkNewFastStringShortByteString sbs) sbs
524
525 -- | Create a 'FastString' from an existing 'ShortByteString' without
526 -- copying.
527 mkFastStringShortByteString :: ShortByteString -> FastString
528 mkFastStringShortByteString sbs =
529 inlinePerformIO $ mkFastStringWith (mkNewFastStringShortByteString sbs) sbs
530
531 -- | Creates a UTF-8 encoded 'FastString' from a 'String'
532 mkFastString :: String -> FastString
533 {-# NOINLINE[1] mkFastString #-}
534 mkFastString str =
535 inlinePerformIO $ do
536 sbs <- utf8EncodeShortByteString str
537 mkFastStringWith (mkNewFastStringShortByteString sbs) sbs
538
539 -- The following rule is used to avoid polluting the non-reclaimable FastString
540 -- table with transient strings when we only want their encoding.
541 {-# RULES
542 "bytesFS/mkFastString" forall x. bytesFS (mkFastString x) = utf8EncodeString x
543 #-}
544
545 -- | Creates a 'FastString' from a UTF-8 encoded @[Word8]@
546 mkFastStringByteList :: [Word8] -> FastString
547 mkFastStringByteList str = mkFastStringShortByteString (SBS.pack str)
548
549 -- | Creates a (lazy) Z-encoded 'FastString' from a 'ShortByteString' and
550 -- account the number of forced z-strings into the passed 'FastMutInt'.
551 mkZFastString :: FastMutInt -> ShortByteString -> FastZString
552 mkZFastString n_zencs sbs = unsafePerformIO $ do
553 _ <- atomicFetchAddFastMut n_zencs 1
554 return $ mkFastZStringString (zEncodeString (utf8DecodeShortByteString sbs))
555
556 mkNewFastStringShortByteString :: ShortByteString -> Int
557 -> FastMutInt -> IO FastString
558 mkNewFastStringShortByteString sbs uid n_zencs = do
559 let zstr = mkZFastString n_zencs sbs
560 chars <- countUTF8Chars sbs
561 return (FastString uid chars sbs zstr)
562
563 hashStr :: ShortByteString -> Int
564 -- produce a hash value between 0 & m (inclusive)
565 hashStr sbs@(SBS.SBS ba#) = loop 0# 0#
566 where
567 !(I# len#) = SBS.length sbs
568 loop h n =
569 if isTrue# (n ==# len#) then
570 I# h
571 else
572 let
573 -- DO NOT move this let binding! indexCharOffAddr# reads from the
574 -- pointer so we need to evaluate this based on the length check
575 -- above. Not doing this right caused #17909.
576 #if __GLASGOW_HASKELL__ >= 901
577 !c = int8ToInt# (indexInt8Array# ba# n)
578 #else
579 !c = indexInt8Array# ba# n
580 #endif
581 !h2 = (h *# 16777619#) `xorI#` c
582 in
583 loop h2 (n +# 1#)
584
585 -- -----------------------------------------------------------------------------
586 -- Operations
587
588 -- | Returns the length of the 'FastString' in characters
589 lengthFS :: FastString -> Int
590 lengthFS fs = n_chars fs
591
592 -- | Returns @True@ if the 'FastString' is empty
593 nullFS :: FastString -> Bool
594 nullFS fs = SBS.null $ fs_sbs fs
595
596 -- | Unpacks and decodes the FastString
597 unpackFS :: FastString -> String
598 unpackFS fs = utf8DecodeShortByteString $ fs_sbs fs
599
600 -- | Returns a Z-encoded version of a 'FastString'. This might be the
601 -- original, if it was already Z-encoded. The first time this
602 -- function is applied to a particular 'FastString', the results are
603 -- memoized.
604 --
605 zEncodeFS :: FastString -> FastZString
606 zEncodeFS fs = fs_zenc fs
607
608 appendFS :: FastString -> FastString -> FastString
609 appendFS fs1 fs2 = mkFastStringShortByteString
610 $ (Semi.<>) (fs_sbs fs1) (fs_sbs fs2)
611
612 concatFS :: [FastString] -> FastString
613 concatFS = mkFastStringShortByteString . mconcat . map fs_sbs
614
615 headFS :: FastString -> Char
616 headFS fs
617 | SBS.null $ fs_sbs fs = panic "headFS: Empty FastString"
618 headFS fs = head $ unpackFS fs
619
620 consFS :: Char -> FastString -> FastString
621 consFS c fs = mkFastString (c : unpackFS fs)
622
623 unconsFS :: FastString -> Maybe (Char, FastString)
624 unconsFS fs =
625 case unpackFS fs of
626 [] -> Nothing
627 (chr : str) -> Just (chr, mkFastString str)
628
629 uniqueOfFS :: FastString -> Int
630 uniqueOfFS fs = uniq fs
631
632 nilFS :: FastString
633 nilFS = mkFastString ""
634
635 isUnderscoreFS :: FastString -> Bool
636 isUnderscoreFS fs = fs == fsLit "_"
637
638 -- -----------------------------------------------------------------------------
639 -- Stats
640
641 getFastStringTable :: IO [[[FastString]]]
642 getFastStringTable =
643 forM [0 .. numSegments - 1] $ \(I# i#) -> do
644 let (# segmentRef #) = indexArray# segments# i#
645 FastStringTableSegment _ _ buckets# <- readIORef segmentRef
646 let bucketSize = I# (sizeofMutableArray# buckets#)
647 forM [0 .. bucketSize - 1] $ \(I# j#) ->
648 IO $ readArray# buckets# j#
649 where
650 !(FastStringTable _ _ segments#) = stringTable
651
652 getFastStringZEncCounter :: IO Int
653 getFastStringZEncCounter = readFastMutInt n_zencs
654 where
655 !(FastStringTable _ n_zencs _) = stringTable
656
657 -- -----------------------------------------------------------------------------
658 -- Outputting 'FastString's
659
660 -- |Outputs a 'FastString' with /no decoding at all/, that is, you
661 -- get the actual bytes in the 'FastString' written to the 'Handle'.
662 hPutFS :: Handle -> FastString -> IO ()
663 hPutFS handle fs = BS.hPut handle $ bytesFS fs
664
665 -- ToDo: we'll probably want an hPutFSLocal, or something, to output
666 -- in the current locale's encoding (for error messages and suchlike).
667
668 -- -----------------------------------------------------------------------------
669 -- PtrStrings, here for convenience only.
670
671 -- | A 'PtrString' is a pointer to some array of Latin-1 encoded chars.
672 data PtrString = PtrString !(Ptr Word8) !Int
673
674 -- | Wrap an unboxed address into a 'PtrString'.
675 mkPtrString# :: Addr# -> PtrString
676 {-# INLINE mkPtrString# #-}
677 mkPtrString# a# = PtrString (Ptr a#) (ptrStrLength (Ptr a#))
678
679 -- | Encode a 'String' into a newly allocated 'PtrString' using Latin-1
680 -- encoding. The original string must not contain non-Latin-1 characters
681 -- (above codepoint @0xff@).
682 {-# NOINLINE[0] mkPtrString #-} -- see rules below
683 mkPtrString :: String -> PtrString
684 mkPtrString s =
685 -- we don't use `unsafeDupablePerformIO` here to avoid potential memory leaks
686 -- and because someone might be using `eqAddr#` to check for string equality.
687 unsafePerformIO (do
688 let len = length s
689 p <- mallocBytes len
690 let
691 loop :: Int -> String -> IO ()
692 loop !_ [] = return ()
693 loop n (c:cs) = do
694 pokeByteOff p n (fromIntegral (ord c) :: Word8)
695 loop (1+n) cs
696 loop 0 s
697 return (PtrString p len)
698 )
699
700 {-# RULES "mkPtrString"
701 forall x . mkPtrString (unpackCString# x) = mkPtrString# x #-}
702
703 -- | Decode a 'PtrString' back into a 'String' using Latin-1 encoding.
704 -- This does not free the memory associated with 'PtrString'.
705 unpackPtrString :: PtrString -> String
706 unpackPtrString (PtrString (Ptr p#) (I# n#)) = unpackNBytes# p# n#
707
708 -- | Return the length of a 'PtrString'
709 lengthPS :: PtrString -> Int
710 lengthPS (PtrString _ n) = n
711
712 -- -----------------------------------------------------------------------------
713 -- under the carpet
714
715 #if !MIN_VERSION_GLASGOW_HASKELL(9,0,0,0)
716 foreign import ccall unsafe "strlen"
717 cstringLength# :: Addr# -> Int#
718 #endif
719
720 ptrStrLength :: Ptr Word8 -> Int
721 {-# INLINE ptrStrLength #-}
722 ptrStrLength (Ptr a) = I# (cstringLength# a)
723
724 {-# NOINLINE fsLit #-}
725 fsLit :: String -> FastString
726 fsLit x = mkFastString x
727
728 {-# RULES "fslit"
729 forall x . fsLit (unpackCString# x) = mkFastString# x #-}