1 {-# LANGUAGE CPP, ForeignFunctionInterface, BangPatterns #-}
    2 {-# LANGUAGE DeriveDataTypeable #-}
    3 #if __GLASGOW_HASKELL__ >= 703
    4 {-# LANGUAGE Unsafe #-}
    5 #endif
    6 {-# OPTIONS_HADDOCK hide #-}
    7 
    8 -- |
    9 -- Module      : Data.ByteString.Lazy.Internal
   10 -- Copyright   : (c) Don Stewart 2006-2008
   11 --               (c) Duncan Coutts 2006-2011
   12 -- License     : BSD-style
   13 -- Maintainer  : dons00@gmail.com, duncan@community.haskell.org
   14 -- Stability   : unstable
   15 -- Portability : non-portable
   16 -- 
   17 -- A module containing semi-public 'ByteString' internals. This exposes
   18 -- the 'ByteString' representation and low level construction functions.
   19 -- Modules which extend the 'ByteString' system will need to use this module
   20 -- while ideally most users will be able to make do with the public interface
   21 -- modules.
   22 --
   23 module Data.ByteString.Lazy.Internal (
   24 
   25         -- * The lazy @ByteString@ type and representation
   26         ByteString(..),     -- instances: Eq, Ord, Show, Read, Data, Typeable
   27         chunk,
   28         foldrChunks,
   29         foldlChunks,
   30 
   31         -- * Data type invariant and abstraction function
   32         invariant,
   33         checkInvariant,
   34 
   35         -- * Chunk allocation sizes
   36         defaultChunkSize,
   37         smallChunkSize,
   38         chunkOverhead,
   39 
   40         -- * Conversion with lists: packing and unpacking
   41         packBytes, packChars,
   42         unpackBytes, unpackChars,
   43 
   44   ) where
   45 
   46 import Prelude hiding (concat)
   47 
   48 import qualified Data.ByteString.Internal as S
   49 import qualified Data.ByteString          as S (length, take, drop)
   50 
   51 import Data.Word        (Word8)
   52 import Foreign.Storable (Storable(sizeOf))
   53 
   54 #if MIN_VERSION_base(4,9,0)
   55 import Data.Semigroup   (Semigroup((<>)))
   56 #endif
   57 #if !(MIN_VERSION_base(4,8,0))
   58 import Data.Monoid      (Monoid(..))
   59 #endif
   60 import Control.DeepSeq  (NFData, rnf)
   61 
   62 import Data.String      (IsString(..))
   63 
   64 import Data.Typeable            (Typeable)
   65 import Data.Data                (Data(..), mkNoRepType)
   66 
   67 -- | A space-efficient representation of a 'Word8' vector, supporting many
   68 -- efficient operations.
   69 --
   70 -- A lazy 'ByteString' contains 8-bit bytes, or by using the operations
   71 -- from "Data.ByteString.Lazy.Char8" it can be interpreted as containing
   72 -- 8-bit characters.
   73 --
   74 data ByteString = Empty | Chunk {-# UNPACK #-} !S.ByteString ByteString
   75     deriving (Typeable)
   76 
   77 instance Eq  ByteString where
   78     (==)    = eq
   79 
   80 instance Ord ByteString where
   81     compare = cmp
   82 
   83 #if MIN_VERSION_base(4,9,0)
   84 instance Semigroup ByteString where
   85     (<>)    = append
   86 #endif
   87 
   88 instance Monoid ByteString where
   89     mempty  = Empty
   90 #if MIN_VERSION_base(4,9,0)
   91     mappend = (<>)
   92 #else
   93     mappend = append
   94 #endif
   95     mconcat = concat
   96 
   97 instance NFData ByteString where
   98     rnf Empty       = ()
   99     rnf (Chunk _ b) = rnf b
  100 
  101 instance Show ByteString where
  102     showsPrec p ps r = showsPrec p (unpackChars ps) r
  103 
  104 instance Read ByteString where
  105     readsPrec p str = [ (packChars x, y) | (x, y) <- readsPrec p str ]
  106 
  107 instance IsString ByteString where
  108     fromString = packChars
  109 
  110 instance Data ByteString where
  111   gfoldl f z txt = z packBytes `f` unpackBytes txt
  112   toConstr _     = error "Data.ByteString.Lazy.ByteString.toConstr"
  113   gunfold _ _    = error "Data.ByteString.Lazy.ByteString.gunfold"
  114   dataTypeOf _   = mkNoRepType "Data.ByteString.Lazy.ByteString"
  115 
  116 ------------------------------------------------------------------------
  117 -- Packing and unpacking from lists
  118 
  119 packBytes :: [Word8] -> ByteString
  120 packBytes cs0 =
  121     packChunks 32 cs0
  122   where
  123     packChunks n cs = case S.packUptoLenBytes n cs of
  124       (bs, [])  -> chunk bs Empty
  125       (bs, cs') -> Chunk bs (packChunks (min (n * 2) smallChunkSize) cs')
  126 
  127 packChars :: [Char] -> ByteString
  128 packChars cs0 =
  129     packChunks 32 cs0
  130   where
  131     packChunks n cs = case S.packUptoLenChars n cs of
  132       (bs, [])  -> chunk bs Empty
  133       (bs, cs') -> Chunk bs (packChunks (min (n * 2) smallChunkSize) cs')
  134 
  135 unpackBytes :: ByteString -> [Word8]
  136 unpackBytes Empty        = []
  137 unpackBytes (Chunk c cs) = S.unpackAppendBytesLazy c (unpackBytes cs)
  138 
  139 unpackChars :: ByteString -> [Char]
  140 unpackChars Empty        = []
  141 unpackChars (Chunk c cs) = S.unpackAppendCharsLazy c (unpackChars cs)
  142 
  143 ------------------------------------------------------------------------
  144 
  145 -- | The data type invariant:
  146 -- Every ByteString is either 'Empty' or consists of non-null 'S.ByteString's.
  147 -- All functions must preserve this, and the QC properties must check this.
  148 --
  149 invariant :: ByteString -> Bool
  150 invariant Empty                     = True
  151 invariant (Chunk (S.PS _ _ len) cs) = len > 0 && invariant cs
  152 
  153 -- | In a form that checks the invariant lazily.
  154 checkInvariant :: ByteString -> ByteString
  155 checkInvariant Empty = Empty
  156 checkInvariant (Chunk c@(S.PS _ _ len) cs)
  157     | len > 0   = Chunk c (checkInvariant cs)
  158     | otherwise = error $ "Data.ByteString.Lazy: invariant violation:"
  159                ++ show (Chunk c cs)
  160 
  161 ------------------------------------------------------------------------
  162 
  163 -- | Smart constructor for 'Chunk'. Guarantees the data type invariant.
  164 chunk :: S.ByteString -> ByteString -> ByteString
  165 chunk c@(S.PS _ _ len) cs | len == 0  = cs
  166                           | otherwise = Chunk c cs
  167 {-# INLINE chunk #-}
  168 
  169 -- | Consume the chunks of a lazy ByteString with a natural right fold.
  170 foldrChunks :: (S.ByteString -> a -> a) -> a -> ByteString -> a
  171 foldrChunks f z = go
  172   where go Empty        = z
  173         go (Chunk c cs) = f c (go cs)
  174 {-# INLINE foldrChunks #-}
  175 
  176 -- | Consume the chunks of a lazy ByteString with a strict, tail-recursive,
  177 -- accumulating left fold.
  178 foldlChunks :: (a -> S.ByteString -> a) -> a -> ByteString -> a
  179 foldlChunks f z = go z
  180   where go a _ | a `seq` False = undefined
  181         go a Empty        = a
  182         go a (Chunk c cs) = go (f a c) cs
  183 {-# INLINE foldlChunks #-}
  184 
  185 ------------------------------------------------------------------------
  186 
  187 -- The representation uses lists of packed chunks. When we have to convert from
  188 -- a lazy list to the chunked representation, then by default we use this
  189 -- chunk size. Some functions give you more control over the chunk size.
  190 --
  191 -- Measurements here:
  192 --  http://www.cse.unsw.edu.au/~dons/tmp/chunksize_v_cache.png
  193 --
  194 -- indicate that a value around 0.5 to 1 x your L2 cache is best.
  195 -- The following value assumes people have something greater than 128k,
  196 -- and need to share the cache with other programs.
  197 
  198 -- | The chunk size used for I\/O. Currently set to 32k, less the memory management overhead
  199 defaultChunkSize :: Int
  200 defaultChunkSize = 32 * k - chunkOverhead
  201    where k = 1024
  202 
  203 -- | The recommended chunk size. Currently set to 4k, less the memory management overhead
  204 smallChunkSize :: Int
  205 smallChunkSize = 4 * k - chunkOverhead
  206    where k = 1024
  207 
  208 -- | The memory management overhead. Currently this is tuned for GHC only.
  209 chunkOverhead :: Int
  210 chunkOverhead = 2 * sizeOf (undefined :: Int)
  211 
  212 ------------------------------------------------------------------------
  213 -- Implementations for Eq, Ord and Monoid instances
  214 
  215 eq :: ByteString -> ByteString -> Bool
  216 eq Empty Empty = True
  217 eq Empty _     = False
  218 eq _     Empty = False
  219 eq (Chunk a as) (Chunk b bs) =
  220   case compare (S.length a) (S.length b) of
  221     LT -> a == (S.take (S.length a) b) && eq as (Chunk (S.drop (S.length a) b) bs)
  222     EQ -> a == b                       && eq as bs
  223     GT -> (S.take (S.length b) a) == b && eq (Chunk (S.drop (S.length b) a) as) bs
  224 
  225 cmp :: ByteString -> ByteString -> Ordering
  226 cmp Empty Empty = EQ
  227 cmp Empty _     = LT
  228 cmp _     Empty = GT
  229 cmp (Chunk a as) (Chunk b bs) =
  230   case compare (S.length a) (S.length b) of
  231     LT -> case compare a (S.take (S.length a) b) of
  232             EQ     -> cmp as (Chunk (S.drop (S.length a) b) bs)
  233             result -> result
  234     EQ -> case compare a b of
  235             EQ     -> cmp as bs
  236             result -> result
  237     GT -> case compare (S.take (S.length b) a) b of
  238             EQ     -> cmp (Chunk (S.drop (S.length b) a) as) bs
  239             result -> result
  240 
  241 append :: ByteString -> ByteString -> ByteString
  242 append xs ys = foldrChunks Chunk ys xs
  243 
  244 concat :: [ByteString] -> ByteString
  245 concat css0 = to css0
  246   where
  247     go Empty        css = to css
  248     go (Chunk c cs) css = Chunk c (go cs css)
  249     to []               = Empty
  250     to (cs:css)         = go cs css