never executed always true always false
    1 {-# LANGUAGE BangPatterns #-}
    2 {-# LANGUAGE CPP #-}
    3 {-# LANGUAGE DeriveDataTypeable #-}
    4 {-# LANGUAGE DerivingStrategies #-}
    5 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
    6 {-# LANGUAGE MagicHash #-}
    7 {-# LANGUAGE UnboxedTuples #-}
    8 {-# LANGUAGE UnliftedFFITypes #-}
    9 
   10 {-# OPTIONS_GHC -O2 -funbox-strict-fields #-}
   11 -- We always optimise this, otherwise performance of a non-optimised
   12 -- compiler is severely affected
   13 
   14 -- |
   15 -- There are two principal string types used internally by GHC:
   16 --
   17 -- ['FastString']
   18 --
   19 --   * A compact, hash-consed, representation of character strings.
   20 --   * Generated by 'fsLit'.
   21 --   * You can get a 'GHC.Types.Unique.Unique' from them.
   22 --   * Equality test is O(1) (it uses the Unique).
   23 --   * Comparison is O(1) or O(n):
   24 --       * O(n) but deterministic with lexical comparison (`lexicalCompareFS`)
   25 --       * O(1) but non-deterministic with Unique comparison (`uniqCompareFS`)
   26 --   * Turn into 'GHC.Utils.Outputable.SDoc' with 'GHC.Utils.Outputable.ftext'.
   27 --
   28 -- ['PtrString']
   29 --
   30 --   * Pointer and size of a Latin-1 encoded string.
   31 --   * Practically no operations.
   32 --   * Outputting them is fast.
   33 --   * Generated by 'mkPtrString'.
   34 --   * Length of string literals (mkPtrString "abc") is computed statically
   35 --   * Turn into 'GHC.Utils.Outputable.SDoc' with 'GHC.Utils.Outputable.ptext'
   36 --   * Requires manual memory management.
   37 --     Improper use may lead to memory leaks or dangling pointers.
   38 --   * It assumes Latin-1 as the encoding, therefore it cannot represent
   39 --     arbitrary Unicode strings.
   40 --
   41 -- Use 'PtrString' unless you want the facilities of 'FastString'.
   42 module GHC.Data.FastString
   43        (
   44         -- * ByteString
   45         bytesFS,
   46         fastStringToByteString,
   47         mkFastStringByteString,
   48         fastZStringToByteString,
   49         unsafeMkByteString,
   50 
   51         -- * ShortByteString
   52         fastStringToShortByteString,
   53         mkFastStringShortByteString,
   54 
   55         -- * FastZString
   56         FastZString,
   57         hPutFZS,
   58         zString,
   59         lengthFZS,
   60 
   61         -- * FastStrings
   62         FastString(..),     -- not abstract, for now.
   63         NonDetFastString (..),
   64         LexicalFastString (..),
   65 
   66         -- ** Construction
   67         fsLit,
   68         mkFastString,
   69         mkFastStringBytes,
   70         mkFastStringByteList,
   71         mkFastString#,
   72 
   73         -- ** Deconstruction
   74         unpackFS,           -- :: FastString -> String
   75         unconsFS,           -- :: FastString -> Maybe (Char, FastString)
   76 
   77         -- ** Encoding
   78         zEncodeFS,
   79 
   80         -- ** Operations
   81         uniqueOfFS,
   82         lengthFS,
   83         nullFS,
   84         appendFS,
   85         headFS,
   86         concatFS,
   87         consFS,
   88         nilFS,
   89         isUnderscoreFS,
   90         lexicalCompareFS,
   91         uniqCompareFS,
   92 
   93         -- ** Outputting
   94         hPutFS,
   95 
   96         -- ** Internal
   97         getFastStringTable,
   98         getFastStringZEncCounter,
   99 
  100         -- * PtrStrings
  101         PtrString (..),
  102 
  103         -- ** Construction
  104         mkPtrString#,
  105         mkPtrString,
  106 
  107         -- ** Deconstruction
  108         unpackPtrString,
  109 
  110         -- ** Operations
  111         lengthPS
  112        ) where
  113 
  114 -- For GHC_STAGE
  115 #include "ghcplatform.h"
  116 
  117 import GHC.Prelude as Prelude
  118 
  119 import GHC.Utils.Encoding
  120 import GHC.Utils.IO.Unsafe
  121 import GHC.Utils.Panic.Plain
  122 import GHC.Utils.Misc
  123 import GHC.Data.FastMutInt
  124 
  125 import Control.Concurrent.MVar
  126 import Control.DeepSeq
  127 import Control.Monad
  128 import Data.ByteString (ByteString)
  129 import Data.ByteString.Short (ShortByteString)
  130 import qualified Data.ByteString          as BS
  131 import qualified Data.ByteString.Char8    as BSC
  132 import qualified Data.ByteString.Unsafe   as BS
  133 import qualified Data.ByteString.Short    as SBS
  134 #if !MIN_VERSION_bytestring(0,11,0)
  135 import qualified Data.ByteString.Short.Internal as SBS
  136 #endif
  137 import Foreign.C
  138 import System.IO
  139 import Data.Data
  140 import Data.IORef
  141 import Data.Char
  142 import Data.Semigroup as Semi
  143 
  144 import Foreign
  145 
  146 #if GHC_STAGE >= 2
  147 import GHC.Conc.Sync    (sharedCAF)
  148 #endif
  149 
  150 #if __GLASGOW_HASKELL__ < 811
  151 import GHC.Base (unpackCString#,unpackNBytes#)
  152 #endif
  153 import GHC.Exts
  154 import GHC.IO
  155 
  156 -- | Gives the Modified UTF-8 encoded bytes corresponding to a 'FastString'
  157 bytesFS, fastStringToByteString :: FastString -> ByteString
  158 {-# INLINE[1] bytesFS #-}
  159 bytesFS f = SBS.fromShort $ fs_sbs f
  160 
  161 {-# DEPRECATED fastStringToByteString "Use `bytesFS` instead" #-}
  162 fastStringToByteString = bytesFS
  163 
  164 fastStringToShortByteString :: FastString -> ShortByteString
  165 fastStringToShortByteString = fs_sbs
  166 
  167 fastZStringToByteString :: FastZString -> ByteString
  168 fastZStringToByteString (FastZString bs) = bs
  169 
  170 -- This will drop information if any character > '\xFF'
  171 unsafeMkByteString :: String -> ByteString
  172 unsafeMkByteString = BSC.pack
  173 
  174 hashFastString :: FastString -> Int
  175 hashFastString fs = hashStr $ fs_sbs fs
  176 
  177 -- -----------------------------------------------------------------------------
  178 
  179 newtype FastZString = FastZString ByteString
  180   deriving NFData
  181 
  182 hPutFZS :: Handle -> FastZString -> IO ()
  183 hPutFZS handle (FastZString bs) = BS.hPut handle bs
  184 
  185 zString :: FastZString -> String
  186 zString (FastZString bs) =
  187     inlinePerformIO $ BS.unsafeUseAsCStringLen bs peekCAStringLen
  188 
  189 lengthFZS :: FastZString -> Int
  190 lengthFZS (FastZString bs) = BS.length bs
  191 
  192 mkFastZStringString :: String -> FastZString
  193 mkFastZStringString str = FastZString (BSC.pack str)
  194 
  195 -- -----------------------------------------------------------------------------
  196 
  197 {-| A 'FastString' is a UTF-8 encoded string together with a unique ID. All
  198 'FastString's are stored in a global hashtable to support fast O(1)
  199 comparison.
  200 
  201 It is also associated with a lazy reference to the Z-encoding
  202 of this string which is used by the compiler internally.
  203 -}
  204 data FastString = FastString {
  205       uniq    :: {-# UNPACK #-} !Int, -- unique id
  206       n_chars :: {-# UNPACK #-} !Int, -- number of chars
  207       fs_sbs  :: {-# UNPACK #-} !ShortByteString,
  208       fs_zenc :: FastZString
  209       -- ^ Lazily computed Z-encoding of this string. See Note [Z-Encoding] in
  210       -- GHC.Utils.Encoding.
  211       --
  212       -- Since 'FastString's are globally memoized this is computed at most
  213       -- once for any given string.
  214   }
  215 
  216 instance Eq FastString where
  217   f1 == f2  =  uniq f1 == uniq f2
  218 
  219 -- We don't provide any "Ord FastString" instance to force you to think about
  220 -- which ordering you want:
  221 --    * lexical:   deterministic,     O(n). Cf lexicalCompareFS and LexicalFastString.
  222 --    * by unique: non-deterministic, O(1). Cf uniqCompareFS    and NonDetFastString.
  223 
  224 instance IsString FastString where
  225     fromString = fsLit
  226 
  227 instance Semi.Semigroup FastString where
  228     (<>) = appendFS
  229 
  230 instance Monoid FastString where
  231     mempty = nilFS
  232     mappend = (Semi.<>)
  233     mconcat = concatFS
  234 
  235 instance Show FastString where
  236    show fs = show (unpackFS fs)
  237 
  238 instance Data FastString where
  239   -- don't traverse?
  240   toConstr _   = abstractConstr "FastString"
  241   gunfold _ _  = error "gunfold"
  242   dataTypeOf _ = mkNoRepType "FastString"
  243 
  244 instance NFData FastString where
  245   rnf fs = seq fs ()
  246 
  247 -- | Compare FastString lexically
  248 --
  249 -- If you don't care about the lexical ordering, use `uniqCompareFS` instead.
  250 lexicalCompareFS :: FastString -> FastString -> Ordering
  251 lexicalCompareFS fs1 fs2 =
  252   if uniq fs1 == uniq fs2 then EQ else
  253   utf8CompareShortByteString (fs_sbs fs1) (fs_sbs fs2)
  254   -- perform a lexical comparison taking into account the Modified UTF-8
  255   -- encoding we use (cf #18562)
  256 
  257 -- | Compare FastString by their Unique (not lexically).
  258 --
  259 -- Much cheaper than `lexicalCompareFS` but non-deterministic!
  260 uniqCompareFS :: FastString -> FastString -> Ordering
  261 uniqCompareFS fs1 fs2 = compare (uniq fs1) (uniq fs2)
  262 
  263 -- | Non-deterministic FastString
  264 --
  265 -- This is a simple FastString wrapper with an Ord instance using
  266 -- `uniqCompareFS` (i.e. which compares FastStrings on their Uniques). Hence it
  267 -- is not deterministic from one run to the other.
  268 newtype NonDetFastString
  269    = NonDetFastString FastString
  270    deriving newtype (Eq, Show)
  271    deriving stock Data
  272 
  273 instance Ord NonDetFastString where
  274    compare (NonDetFastString fs1) (NonDetFastString fs2) = uniqCompareFS fs1 fs2
  275 
  276 -- | Lexical FastString
  277 --
  278 -- This is a simple FastString wrapper with an Ord instance using
  279 -- `lexicalCompareFS` (i.e. which compares FastStrings on their String
  280 -- representation). Hence it is deterministic from one run to the other.
  281 newtype LexicalFastString
  282    = LexicalFastString FastString
  283    deriving newtype (Eq, Show)
  284    deriving stock Data
  285 
  286 instance Ord LexicalFastString where
  287    compare (LexicalFastString fs1) (LexicalFastString fs2) = lexicalCompareFS fs1 fs2
  288 
  289 -- -----------------------------------------------------------------------------
  290 -- Construction
  291 
  292 {-
  293 Internally, the compiler will maintain a fast string symbol table, providing
  294 sharing and fast comparison. Creation of new @FastString@s then covertly does a
  295 lookup, re-using the @FastString@ if there was a hit.
  296 
  297 The design of the FastString hash table allows for lockless concurrent reads
  298 and updates to multiple buckets with low synchronization overhead.
  299 
  300 See Note [Updating the FastString table] on how it's updated.
  301 -}
  302 data FastStringTable = FastStringTable
  303   {-# UNPACK #-} !FastMutInt -- the unique ID counter shared with all buckets
  304   {-# UNPACK #-} !FastMutInt -- number of computed z-encodings for all buckets
  305   (Array# (IORef FastStringTableSegment)) -- concurrent segments
  306 
  307 data FastStringTableSegment = FastStringTableSegment
  308   {-# UNPACK #-} !(MVar ())  -- the lock for write in each segment
  309   {-# UNPACK #-} !FastMutInt -- the number of elements
  310   (MutableArray# RealWorld [FastString]) -- buckets in this segment
  311 
  312 {-
  313 Following parameters are determined based on:
  314 
  315 * Benchmark based on testsuite/tests/utils/should_run/T14854.hs
  316 * Stats of @echo :browse | ghc --interactive -dfaststring-stats >/dev/null@:
  317   on 2018-10-24, we have 13920 entries.
  318 -}
  319 segmentBits, numSegments, segmentMask, initialNumBuckets :: Int
  320 segmentBits = 8
  321 numSegments = 256   -- bit segmentBits
  322 segmentMask = 0xff  -- bit segmentBits - 1
  323 initialNumBuckets = 64
  324 
  325 hashToSegment# :: Int# -> Int#
  326 hashToSegment# hash# = hash# `andI#` segmentMask#
  327   where
  328     !(I# segmentMask#) = segmentMask
  329 
  330 hashToIndex# :: MutableArray# RealWorld [FastString] -> Int# -> Int#
  331 hashToIndex# buckets# hash# =
  332   (hash# `uncheckedIShiftRL#` segmentBits#) `remInt#` size#
  333   where
  334     !(I# segmentBits#) = segmentBits
  335     size# = sizeofMutableArray# buckets#
  336 
  337 maybeResizeSegment :: IORef FastStringTableSegment -> IO FastStringTableSegment
  338 maybeResizeSegment segmentRef = do
  339   segment@(FastStringTableSegment lock counter old#) <- readIORef segmentRef
  340   let oldSize# = sizeofMutableArray# old#
  341       newSize# = oldSize# *# 2#
  342   (I# n#) <- readFastMutInt counter
  343   if isTrue# (n# <# newSize#) -- maximum load of 1
  344   then return segment
  345   else do
  346     resizedSegment@(FastStringTableSegment _ _ new#) <- IO $ \s1# ->
  347       case newArray# newSize# [] s1# of
  348         (# s2#, arr# #) -> (# s2#, FastStringTableSegment lock counter arr# #)
  349     forM_ [0 .. (I# oldSize#) - 1] $ \(I# i#) -> do
  350       fsList <- IO $ readArray# old# i#
  351       forM_ fsList $ \fs -> do
  352         let -- Shall we store in hash value in FastString instead?
  353             !(I# hash#) = hashFastString fs
  354             idx# = hashToIndex# new# hash#
  355         IO $ \s1# ->
  356           case readArray# new# idx# s1# of
  357             (# s2#, bucket #) -> case writeArray# new# idx# (fs: bucket) s2# of
  358               s3# -> (# s3#, () #)
  359     writeIORef segmentRef resizedSegment
  360     return resizedSegment
  361 
  362 {-# NOINLINE stringTable #-}
  363 stringTable :: FastStringTable
  364 stringTable = unsafePerformIO $ do
  365   let !(I# numSegments#) = numSegments
  366       !(I# initialNumBuckets#) = initialNumBuckets
  367       loop a# i# s1#
  368         | isTrue# (i# ==# numSegments#) = s1#
  369         | otherwise = case newMVar () `unIO` s1# of
  370             (# s2#, lock #) -> case newFastMutInt 0 `unIO` s2# of
  371               (# s3#, counter #) -> case newArray# initialNumBuckets# [] s3# of
  372                 (# s4#, buckets# #) -> case newIORef
  373                     (FastStringTableSegment lock counter buckets#) `unIO` s4# of
  374                   (# s5#, segment #) -> case writeArray# a# i# segment s5# of
  375                     s6# -> loop a# (i# +# 1#) s6#
  376   uid <- newFastMutInt 603979776 -- ord '$' * 0x01000000
  377   n_zencs <- newFastMutInt 0
  378   tab <- IO $ \s1# ->
  379     case newArray# numSegments# (panic "string_table") s1# of
  380       (# s2#, arr# #) -> case loop arr# 0# s2# of
  381         s3# -> case unsafeFreezeArray# arr# s3# of
  382           (# s4#, segments# #) ->
  383             (# s4#, FastStringTable uid n_zencs segments# #)
  384 
  385   -- use the support wired into the RTS to share this CAF among all images of
  386   -- libHSghc
  387 #if GHC_STAGE < 2
  388   return tab
  389 #else
  390   sharedCAF tab getOrSetLibHSghcFastStringTable
  391 
  392 -- from the RTS; thus we cannot use this mechanism when GHC_STAGE<2; the previous
  393 -- RTS might not have this symbol
  394 foreign import ccall unsafe "getOrSetLibHSghcFastStringTable"
  395   getOrSetLibHSghcFastStringTable :: Ptr a -> IO (Ptr a)
  396 #endif
  397 
  398 {-
  399 
  400 We include the FastString table in the `sharedCAF` mechanism because we'd like
  401 FastStrings created by a Core plugin to have the same uniques as corresponding
  402 strings created by the host compiler itself.  For example, this allows plugins
  403 to lookup known names (eg `mkTcOcc "MySpecialType"`) in the GlobalRdrEnv or
  404 even re-invoke the parser.
  405 
  406 In particular, the following little sanity test was failing in a plugin
  407 prototyping safe newtype-coercions: GHC.NT.Type.NT was imported, but could not
  408 be looked up /by the plugin/.
  409 
  410    let rdrName = mkModuleName "GHC.NT.Type" `mkRdrQual` mkTcOcc "NT"
  411    putMsgS $ showSDoc dflags $ ppr $ lookupGRE_RdrName rdrName $ mg_rdr_env guts
  412 
  413 `mkTcOcc` involves the lookup (or creation) of a FastString.  Since the
  414 plugin's FastString.string_table is empty, constructing the RdrName also
  415 allocates new uniques for the FastStrings "GHC.NT.Type" and "NT".  These
  416 uniques are almost certainly unequal to the ones that the host compiler
  417 originally assigned to those FastStrings.  Thus the lookup fails since the
  418 domain of the GlobalRdrEnv is affected by the RdrName's OccName's FastString's
  419 unique.
  420 
  421 Maintaining synchronization of the two instances of this global is rather
  422 difficult because of the uses of `unsafePerformIO` in this module.  Not
  423 synchronizing them risks breaking the rather major invariant that two
  424 FastStrings with the same unique have the same string. Thus we use the
  425 lower-level `sharedCAF` mechanism that relies on Globals.c.
  426 
  427 -}
  428 
  429 mkFastString# :: Addr# -> FastString
  430 {-# INLINE mkFastString# #-}
  431 mkFastString# a# = mkFastStringBytes ptr (ptrStrLength ptr)
  432   where ptr = Ptr a#
  433 
  434 {- Note [Updating the FastString table]
  435 
  436 We use a concurrent hashtable which contains multiple segments, each hash value
  437 always maps to the same segment. Read is lock-free, write to the a segment
  438 should acquire a lock for that segment to avoid race condition, writes to
  439 different segments are independent.
  440 
  441 The procedure goes like this:
  442 
  443 1. Find out which segment to operate on based on the hash value
  444 2. Read the relevant bucket and perform a look up of the string.
  445 3. If it exists, return it.
  446 4. Otherwise grab a unique ID, create a new FastString and atomically attempt
  447    to update the relevant segment with this FastString:
  448 
  449    * Resize the segment by doubling the number of buckets when the number of
  450      FastStrings in this segment grows beyond the threshold.
  451    * Double check that the string is not in the bucket. Another thread may have
  452      inserted it while we were creating our string.
  453    * Return the existing FastString if it exists. The one we preemptively
  454      created will get GCed.
  455    * Otherwise, insert and return the string we created.
  456 -}
  457 
  458 mkFastStringWith
  459     :: (Int -> FastMutInt-> IO FastString) -> ShortByteString -> IO FastString
  460 mkFastStringWith mk_fs sbs = do
  461   FastStringTableSegment lock _ buckets# <- readIORef segmentRef
  462   let idx# = hashToIndex# buckets# hash#
  463   bucket <- IO $ readArray# buckets# idx#
  464   res <- bucket_match bucket sbs
  465   case res of
  466     Just found -> return found
  467     Nothing -> do
  468       -- The withMVar below is not dupable. It can lead to deadlock if it is
  469       -- only run partially and putMVar is not called after takeMVar.
  470       noDuplicate
  471       n <- get_uid
  472       new_fs <- mk_fs n n_zencs
  473       withMVar lock $ \_ -> insert new_fs
  474   where
  475     !(FastStringTable uid n_zencs segments#) = stringTable
  476     get_uid = atomicFetchAddFastMut uid 1
  477 
  478     !(I# hash#) = hashStr sbs
  479     (# segmentRef #) = indexArray# segments# (hashToSegment# hash#)
  480     insert fs = do
  481       FastStringTableSegment _ counter buckets# <- maybeResizeSegment segmentRef
  482       let idx# = hashToIndex# buckets# hash#
  483       bucket <- IO $ readArray# buckets# idx#
  484       res <- bucket_match bucket sbs
  485       case res of
  486         -- The FastString was added by another thread after previous read and
  487         -- before we acquired the write lock.
  488         Just found -> return found
  489         Nothing -> do
  490           IO $ \s1# ->
  491             case writeArray# buckets# idx# (fs : bucket) s1# of
  492               s2# -> (# s2#, () #)
  493           _ <- atomicFetchAddFastMut counter 1
  494           return fs
  495 
  496 bucket_match :: [FastString] -> ShortByteString -> IO (Maybe FastString)
  497 bucket_match [] _ = return Nothing
  498 bucket_match (fs@(FastString {fs_sbs=fs_sbs}) : ls) sbs
  499   | fs_sbs == sbs = return (Just fs)
  500   | otherwise     =  bucket_match ls sbs
  501 
  502 mkFastStringBytes :: Ptr Word8 -> Int -> FastString
  503 mkFastStringBytes !ptr !len =
  504     -- NB: Might as well use unsafeDupablePerformIO, since mkFastStringWith is
  505     -- idempotent.
  506     unsafeDupablePerformIO $ do
  507         sbs <- newSBSFromPtr ptr len
  508         mkFastStringWith (mkNewFastStringShortByteString sbs) sbs
  509 
  510 newSBSFromPtr :: Ptr a -> Int -> IO ShortByteString
  511 newSBSFromPtr (Ptr src#) (I# len#) =
  512   IO $ \s ->
  513     case newByteArray# len# s of { (# s, dst# #) ->
  514     case copyAddrToByteArray# src# dst# 0# len# s of { s ->
  515     case unsafeFreezeByteArray# dst# s of { (# s, ba# #) ->
  516     (# s, SBS.SBS ba# #) }}}
  517 
  518 -- | Create a 'FastString' by copying an existing 'ByteString'
  519 mkFastStringByteString :: ByteString -> FastString
  520 mkFastStringByteString bs =
  521   let sbs = SBS.toShort bs in
  522   inlinePerformIO $
  523       mkFastStringWith (mkNewFastStringShortByteString sbs) sbs
  524 
  525 -- | Create a 'FastString' from an existing 'ShortByteString' without
  526 -- copying.
  527 mkFastStringShortByteString :: ShortByteString -> FastString
  528 mkFastStringShortByteString sbs =
  529   inlinePerformIO $ mkFastStringWith (mkNewFastStringShortByteString sbs) sbs
  530 
  531 -- | Creates a UTF-8 encoded 'FastString' from a 'String'
  532 mkFastString :: String -> FastString
  533 {-# NOINLINE[1] mkFastString #-}
  534 mkFastString str =
  535   inlinePerformIO $ do
  536     sbs <- utf8EncodeShortByteString str
  537     mkFastStringWith (mkNewFastStringShortByteString sbs) sbs
  538 
  539 -- The following rule is used to avoid polluting the non-reclaimable FastString
  540 -- table with transient strings when we only want their encoding.
  541 {-# RULES
  542 "bytesFS/mkFastString" forall x. bytesFS (mkFastString x) = utf8EncodeString x
  543 #-}
  544 
  545 -- | Creates a 'FastString' from a UTF-8 encoded @[Word8]@
  546 mkFastStringByteList :: [Word8] -> FastString
  547 mkFastStringByteList str = mkFastStringShortByteString (SBS.pack str)
  548 
  549 -- | Creates a (lazy) Z-encoded 'FastString' from a 'ShortByteString' and
  550 -- account the number of forced z-strings into the passed 'FastMutInt'.
  551 mkZFastString :: FastMutInt -> ShortByteString -> FastZString
  552 mkZFastString n_zencs sbs = unsafePerformIO $ do
  553   _ <- atomicFetchAddFastMut n_zencs 1
  554   return $ mkFastZStringString (zEncodeString (utf8DecodeShortByteString sbs))
  555 
  556 mkNewFastStringShortByteString :: ShortByteString -> Int
  557                                -> FastMutInt -> IO FastString
  558 mkNewFastStringShortByteString sbs uid n_zencs = do
  559   let zstr = mkZFastString n_zencs sbs
  560   chars <- countUTF8Chars sbs
  561   return (FastString uid chars sbs zstr)
  562 
  563 hashStr  :: ShortByteString -> Int
  564  -- produce a hash value between 0 & m (inclusive)
  565 hashStr sbs@(SBS.SBS ba#) = loop 0# 0#
  566    where
  567     !(I# len#) = SBS.length sbs
  568     loop h n =
  569       if isTrue# (n ==# len#) then
  570         I# h
  571       else
  572         let
  573           -- DO NOT move this let binding! indexCharOffAddr# reads from the
  574           -- pointer so we need to evaluate this based on the length check
  575           -- above. Not doing this right caused #17909.
  576 #if __GLASGOW_HASKELL__ >= 901
  577           !c = int8ToInt# (indexInt8Array# ba# n)
  578 #else
  579           !c = indexInt8Array# ba# n
  580 #endif
  581           !h2 = (h *# 16777619#) `xorI#` c
  582         in
  583           loop h2 (n +# 1#)
  584 
  585 -- -----------------------------------------------------------------------------
  586 -- Operations
  587 
  588 -- | Returns the length of the 'FastString' in characters
  589 lengthFS :: FastString -> Int
  590 lengthFS fs = n_chars fs
  591 
  592 -- | Returns @True@ if the 'FastString' is empty
  593 nullFS :: FastString -> Bool
  594 nullFS fs = SBS.null $ fs_sbs fs
  595 
  596 -- | Unpacks and decodes the FastString
  597 unpackFS :: FastString -> String
  598 unpackFS fs = utf8DecodeShortByteString $ fs_sbs fs
  599 
  600 -- | Returns a Z-encoded version of a 'FastString'.  This might be the
  601 -- original, if it was already Z-encoded.  The first time this
  602 -- function is applied to a particular 'FastString', the results are
  603 -- memoized.
  604 --
  605 zEncodeFS :: FastString -> FastZString
  606 zEncodeFS fs = fs_zenc fs
  607 
  608 appendFS :: FastString -> FastString -> FastString
  609 appendFS fs1 fs2 = mkFastStringShortByteString
  610                  $ (Semi.<>) (fs_sbs fs1) (fs_sbs fs2)
  611 
  612 concatFS :: [FastString] -> FastString
  613 concatFS = mkFastStringShortByteString . mconcat . map fs_sbs
  614 
  615 headFS :: FastString -> Char
  616 headFS fs
  617   | SBS.null $ fs_sbs fs = panic "headFS: Empty FastString"
  618 headFS fs = head $ unpackFS fs
  619 
  620 consFS :: Char -> FastString -> FastString
  621 consFS c fs = mkFastString (c : unpackFS fs)
  622 
  623 unconsFS :: FastString -> Maybe (Char, FastString)
  624 unconsFS fs =
  625   case unpackFS fs of
  626     []          -> Nothing
  627     (chr : str) -> Just (chr, mkFastString str)
  628 
  629 uniqueOfFS :: FastString -> Int
  630 uniqueOfFS fs = uniq fs
  631 
  632 nilFS :: FastString
  633 nilFS = mkFastString ""
  634 
  635 isUnderscoreFS :: FastString -> Bool
  636 isUnderscoreFS fs = fs == fsLit "_"
  637 
  638 -- -----------------------------------------------------------------------------
  639 -- Stats
  640 
  641 getFastStringTable :: IO [[[FastString]]]
  642 getFastStringTable =
  643   forM [0 .. numSegments - 1] $ \(I# i#) -> do
  644     let (# segmentRef #) = indexArray# segments# i#
  645     FastStringTableSegment _ _ buckets# <- readIORef segmentRef
  646     let bucketSize = I# (sizeofMutableArray# buckets#)
  647     forM [0 .. bucketSize - 1] $ \(I# j#) ->
  648       IO $ readArray# buckets# j#
  649   where
  650     !(FastStringTable _ _ segments#) = stringTable
  651 
  652 getFastStringZEncCounter :: IO Int
  653 getFastStringZEncCounter = readFastMutInt n_zencs
  654   where
  655     !(FastStringTable _ n_zencs _) = stringTable
  656 
  657 -- -----------------------------------------------------------------------------
  658 -- Outputting 'FastString's
  659 
  660 -- |Outputs a 'FastString' with /no decoding at all/, that is, you
  661 -- get the actual bytes in the 'FastString' written to the 'Handle'.
  662 hPutFS :: Handle -> FastString -> IO ()
  663 hPutFS handle fs = BS.hPut handle $ bytesFS fs
  664 
  665 -- ToDo: we'll probably want an hPutFSLocal, or something, to output
  666 -- in the current locale's encoding (for error messages and suchlike).
  667 
  668 -- -----------------------------------------------------------------------------
  669 -- PtrStrings, here for convenience only.
  670 
  671 -- | A 'PtrString' is a pointer to some array of Latin-1 encoded chars.
  672 data PtrString = PtrString !(Ptr Word8) !Int
  673 
  674 -- | Wrap an unboxed address into a 'PtrString'.
  675 mkPtrString# :: Addr# -> PtrString
  676 {-# INLINE mkPtrString# #-}
  677 mkPtrString# a# = PtrString (Ptr a#) (ptrStrLength (Ptr a#))
  678 
  679 -- | Encode a 'String' into a newly allocated 'PtrString' using Latin-1
  680 -- encoding.  The original string must not contain non-Latin-1 characters
  681 -- (above codepoint @0xff@).
  682 {-# NOINLINE[0] mkPtrString #-} -- see rules below
  683 mkPtrString :: String -> PtrString
  684 mkPtrString s =
  685  -- we don't use `unsafeDupablePerformIO` here to avoid potential memory leaks
  686  -- and because someone might be using `eqAddr#` to check for string equality.
  687  unsafePerformIO (do
  688    let len = length s
  689    p <- mallocBytes len
  690    let
  691      loop :: Int -> String -> IO ()
  692      loop !_ []    = return ()
  693      loop n (c:cs) = do
  694         pokeByteOff p n (fromIntegral (ord c) :: Word8)
  695         loop (1+n) cs
  696    loop 0 s
  697    return (PtrString p len)
  698  )
  699 
  700 {-# RULES "mkPtrString"
  701     forall x . mkPtrString (unpackCString# x) = mkPtrString#  x #-}
  702 
  703 -- | Decode a 'PtrString' back into a 'String' using Latin-1 encoding.
  704 -- This does not free the memory associated with 'PtrString'.
  705 unpackPtrString :: PtrString -> String
  706 unpackPtrString (PtrString (Ptr p#) (I# n#)) = unpackNBytes# p# n#
  707 
  708 -- | Return the length of a 'PtrString'
  709 lengthPS :: PtrString -> Int
  710 lengthPS (PtrString _ n) = n
  711 
  712 -- -----------------------------------------------------------------------------
  713 -- under the carpet
  714 
  715 #if !MIN_VERSION_GLASGOW_HASKELL(9,0,0,0)
  716 foreign import ccall unsafe "strlen"
  717   cstringLength# :: Addr# -> Int#
  718 #endif
  719 
  720 ptrStrLength :: Ptr Word8 -> Int
  721 {-# INLINE ptrStrLength #-}
  722 ptrStrLength (Ptr a) = I# (cstringLength# a)
  723 
  724 {-# NOINLINE fsLit #-}
  725 fsLit :: String -> FastString
  726 fsLit x = mkFastString x
  727 
  728 {-# RULES "fslit"
  729     forall x . fsLit (unpackCString# x) = mkFastString# x #-}