1 {-# LANGUAGE CPP, BangPatterns #-}
    2 {-# OPTIONS_HADDOCK prune #-}
    3 #if __GLASGOW_HASKELL__ >= 701
    4 {-# LANGUAGE Trustworthy #-}
    5 #endif
    6 
    7 -- |
    8 -- Module      : Data.ByteString.Lazy.Char8
    9 -- Copyright   : (c) Don Stewart 2006-2008
   10 --               (c) Duncan Coutts 2006-2011
   11 -- License     : BSD-style
   12 --
   13 -- Maintainer  : dons00@gmail.com, duncan@community.haskell.org
   14 -- Stability   : stable
   15 -- Portability : portable
   16 --
   17 -- Manipulate /lazy/ 'ByteString's using 'Char' operations. All Chars will
   18 -- be truncated to 8 bits. It can be expected that these functions will
   19 -- run at identical speeds to their 'Data.Word.Word8' equivalents in
   20 -- "Data.ByteString.Lazy".
   21 --
   22 -- This module is intended to be imported @qualified@, to avoid name
   23 -- clashes with "Prelude" functions.  eg.
   24 --
   25 -- > import qualified Data.ByteString.Lazy.Char8 as C
   26 --
   27 -- The Char8 interface to bytestrings provides an instance of IsString
   28 -- for the ByteString type, enabling you to use string literals, and
   29 -- have them implicitly packed to ByteStrings.
   30 -- Use @{-\# LANGUAGE OverloadedStrings \#-}@ to enable this.
   31 --
   32 
   33 module Data.ByteString.Lazy.Char8 (
   34 
   35         -- * The @ByteString@ type
   36         ByteString,             -- instances: Eq, Ord, Show, Read, Data, Typeable
   37 
   38         -- * Introducing and eliminating 'ByteString's
   39         empty,                  -- :: ByteString
   40         singleton,              -- :: Char   -> ByteString
   41         pack,                   -- :: String -> ByteString
   42         unpack,                 -- :: ByteString -> String
   43         fromChunks,             -- :: [Strict.ByteString] -> ByteString
   44         toChunks,               -- :: ByteString -> [Strict.ByteString]
   45         fromStrict,             -- :: Strict.ByteString -> ByteString
   46         toStrict,               -- :: ByteString -> Strict.ByteString
   47 
   48         -- * Basic interface
   49         cons,                   -- :: Char -> ByteString -> ByteString
   50         cons',                  -- :: Char -> ByteString -> ByteString
   51         snoc,                   -- :: ByteString -> Char -> ByteString
   52         append,                 -- :: ByteString -> ByteString -> ByteString
   53         head,                   -- :: ByteString -> Char
   54         uncons,                 -- :: ByteString -> Maybe (Char, ByteString)
   55         last,                   -- :: ByteString -> Char
   56         tail,                   -- :: ByteString -> ByteString
   57         unsnoc,                 -- :: ByteString -> Maybe (ByteString, Char)
   58         init,                   -- :: ByteString -> ByteString
   59         null,                   -- :: ByteString -> Bool
   60         length,                 -- :: ByteString -> Int64
   61 
   62         -- * Transforming ByteStrings
   63         map,                    -- :: (Char -> Char) -> ByteString -> ByteString
   64         reverse,                -- :: ByteString -> ByteString
   65         intersperse,            -- :: Char -> ByteString -> ByteString
   66         intercalate,            -- :: ByteString -> [ByteString] -> ByteString
   67         transpose,              -- :: [ByteString] -> [ByteString]
   68 
   69         -- * Reducing 'ByteString's (folds)
   70         foldl,                  -- :: (a -> Char -> a) -> a -> ByteString -> a
   71         foldl',                 -- :: (a -> Char -> a) -> a -> ByteString -> a
   72         foldl1,                 -- :: (Char -> Char -> Char) -> ByteString -> Char
   73         foldl1',                -- :: (Char -> Char -> Char) -> ByteString -> Char
   74         foldr,                  -- :: (Char -> a -> a) -> a -> ByteString -> a
   75         foldr1,                 -- :: (Char -> Char -> Char) -> ByteString -> Char
   76 
   77         -- ** Special folds
   78         concat,                 -- :: [ByteString] -> ByteString
   79         concatMap,              -- :: (Char -> ByteString) -> ByteString -> ByteString
   80         any,                    -- :: (Char -> Bool) -> ByteString -> Bool
   81         all,                    -- :: (Char -> Bool) -> ByteString -> Bool
   82         maximum,                -- :: ByteString -> Char
   83         minimum,                -- :: ByteString -> Char
   84 
   85         -- * Building ByteStrings
   86         -- ** Scans
   87         scanl,                  -- :: (Char -> Char -> Char) -> Char -> ByteString -> ByteString
   88 --      scanl1,                 -- :: (Char -> Char -> Char) -> ByteString -> ByteString
   89 --      scanr,                  -- :: (Char -> Char -> Char) -> Char -> ByteString -> ByteString
   90 --      scanr1,                 -- :: (Char -> Char -> Char) -> ByteString -> ByteString
   91 
   92         -- ** Accumulating maps
   93         mapAccumL,              -- :: (acc -> Char -> (acc, Char)) -> acc -> ByteString -> (acc, ByteString)
   94         mapAccumR,              -- :: (acc -> Char -> (acc, Char)) -> acc -> ByteString -> (acc, ByteString)
   95 
   96         -- ** Infinite ByteStrings
   97         repeat,                 -- :: Char -> ByteString
   98         replicate,              -- :: Int64 -> Char -> ByteString
   99         cycle,                  -- :: ByteString -> ByteString
  100         iterate,                -- :: (Char -> Char) -> Char -> ByteString
  101 
  102         -- ** Unfolding ByteStrings
  103         unfoldr,                -- :: (a -> Maybe (Char, a)) -> a -> ByteString
  104 
  105         -- * Substrings
  106 
  107         -- ** Breaking strings
  108         take,                   -- :: Int64 -> ByteString -> ByteString
  109         drop,                   -- :: Int64 -> ByteString -> ByteString
  110         splitAt,                -- :: Int64 -> ByteString -> (ByteString, ByteString)
  111         takeWhile,              -- :: (Char -> Bool) -> ByteString -> ByteString
  112         dropWhile,              -- :: (Char -> Bool) -> ByteString -> ByteString
  113         span,                   -- :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
  114         break,                  -- :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
  115         group,                  -- :: ByteString -> [ByteString]
  116         groupBy,                -- :: (Char -> Char -> Bool) -> ByteString -> [ByteString]
  117         inits,                  -- :: ByteString -> [ByteString]
  118         tails,                  -- :: ByteString -> [ByteString]
  119         stripPrefix,            -- :: ByteString -> ByteString -> Maybe ByteString
  120         stripSuffix,            -- :: ByteString -> ByteString -> Maybe ByteString
  121 
  122         -- ** Breaking into many substrings
  123         split,                  -- :: Char -> ByteString -> [ByteString]
  124         splitWith,              -- :: (Char -> Bool) -> ByteString -> [ByteString]
  125 
  126         -- ** Breaking into lines and words
  127         lines,                  -- :: ByteString -> [ByteString]
  128         words,                  -- :: ByteString -> [ByteString]
  129         unlines,                -- :: [ByteString] -> ByteString
  130         unwords,                -- :: ByteString -> [ByteString]
  131 
  132         -- * Predicates
  133         isPrefixOf,             -- :: ByteString -> ByteString -> Bool
  134         isSuffixOf,             -- :: ByteString -> ByteString -> Bool
  135 
  136         -- * Searching ByteStrings
  137 
  138         -- ** Searching by equality
  139         elem,                   -- :: Char -> ByteString -> Bool
  140         notElem,                -- :: Char -> ByteString -> Bool
  141 
  142         -- ** Searching with a predicate
  143         find,                   -- :: (Char -> Bool) -> ByteString -> Maybe Char
  144         filter,                 -- :: (Char -> Bool) -> ByteString -> ByteString
  145 --      partition               -- :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
  146 
  147         -- * Indexing ByteStrings
  148         index,                  -- :: ByteString -> Int64 -> Char
  149         elemIndex,              -- :: Char -> ByteString -> Maybe Int64
  150         elemIndices,            -- :: Char -> ByteString -> [Int64]
  151         findIndex,              -- :: (Char -> Bool) -> ByteString -> Maybe Int64
  152         findIndices,            -- :: (Char -> Bool) -> ByteString -> [Int64]
  153         count,                  -- :: Char -> ByteString -> Int64
  154 
  155         -- * Zipping and unzipping ByteStrings
  156         zip,                    -- :: ByteString -> ByteString -> [(Char,Char)]
  157         zipWith,                -- :: (Char -> Char -> c) -> ByteString -> ByteString -> [c]
  158 --      unzip,                  -- :: [(Char,Char)] -> (ByteString,ByteString)
  159 
  160         -- * Ordered ByteStrings
  161 --        sort,                   -- :: ByteString -> ByteString
  162 
  163         -- * Low level conversions
  164         -- ** Copying ByteStrings
  165         copy,                   -- :: ByteString -> ByteString
  166 
  167         -- * Reading from ByteStrings
  168         readInt,
  169         readInteger,
  170 
  171         -- * I\/O with 'ByteString's
  172         -- | ByteString I/O uses binary mode, without any character decoding
  173         -- or newline conversion. The fact that it does not respect the Handle
  174         -- newline mode is considered a flaw and may be changed in a future version.
  175 
  176         -- ** Standard input and output
  177         getContents,            -- :: IO ByteString
  178         putStr,                 -- :: ByteString -> IO ()
  179         putStrLn,               -- :: ByteString -> IO ()
  180         interact,               -- :: (ByteString -> ByteString) -> IO ()
  181 
  182         -- ** Files
  183         readFile,               -- :: FilePath -> IO ByteString
  184         writeFile,              -- :: FilePath -> ByteString -> IO ()
  185         appendFile,             -- :: FilePath -> ByteString -> IO ()
  186 
  187         -- ** I\/O with Handles
  188         hGetContents,           -- :: Handle -> IO ByteString
  189         hGet,                   -- :: Handle -> Int64 -> IO ByteString
  190         hGetNonBlocking,        -- :: Handle -> Int64 -> IO ByteString
  191         hPut,                   -- :: Handle -> ByteString -> IO ()
  192         hPutNonBlocking,        -- :: Handle -> ByteString -> IO ByteString
  193         hPutStr,                -- :: Handle -> ByteString -> IO ()
  194         hPutStrLn,              -- :: Handle -> ByteString -> IO ()
  195 
  196   ) where
  197 
  198 -- Functions transparently exported
  199 import Data.ByteString.Lazy 
  200         (fromChunks, toChunks, fromStrict, toStrict
  201         ,empty,null,length,tail,init,append,reverse,transpose,cycle
  202         ,concat,take,drop,splitAt,intercalate
  203         ,isPrefixOf,isSuffixOf,group,inits,tails,copy
  204         ,stripPrefix,stripSuffix
  205         ,hGetContents, hGet, hPut, getContents
  206         ,hGetNonBlocking, hPutNonBlocking
  207         ,putStr, hPutStr, interact)
  208 
  209 -- Functions we need to wrap.
  210 import qualified Data.ByteString.Lazy as L
  211 import qualified Data.ByteString as S (ByteString) -- typename only
  212 import qualified Data.ByteString as B
  213 import qualified Data.ByteString.Unsafe as B
  214 import Data.ByteString.Lazy.Internal
  215 
  216 import Data.ByteString.Internal (w2c, c2w, isSpaceWord8)
  217 
  218 import Data.Int (Int64)
  219 import qualified Data.List as List
  220 
  221 import Prelude hiding           
  222         (reverse,head,tail,last,init,null,length,map,lines,foldl,foldr,unlines
  223         ,concat,any,take,drop,splitAt,takeWhile,dropWhile,span,break,elem,filter
  224         ,unwords,words,maximum,minimum,all,concatMap,scanl,scanl1,foldl1,foldr1
  225         ,readFile,writeFile,appendFile,replicate,getContents,getLine,putStr,putStrLn
  226         ,zip,zipWith,unzip,notElem,repeat,iterate,interact,cycle)
  227 
  228 import System.IO            (Handle,stdout,hClose,openBinaryFile,IOMode(..))
  229 import Control.Exception    (bracket)
  230 
  231 ------------------------------------------------------------------------
  232 
  233 -- | /O(1)/ Convert a 'Char' into a 'ByteString'
  234 singleton :: Char -> ByteString
  235 singleton = L.singleton . c2w
  236 {-# INLINE singleton #-}
  237 
  238 -- | /O(n)/ Convert a 'String' into a 'ByteString'. 
  239 pack :: [Char] -> ByteString
  240 pack = packChars
  241 
  242 -- | /O(n)/ Converts a 'ByteString' to a 'String'.
  243 unpack :: ByteString -> [Char]
  244 unpack = unpackChars
  245 
  246 infixr 5 `cons`, `cons'` --same as list (:)
  247 infixl 5 `snoc`
  248 
  249 -- | /O(1)/ 'cons' is analogous to '(:)' for lists.
  250 cons :: Char -> ByteString -> ByteString
  251 cons = L.cons . c2w
  252 {-# INLINE cons #-}
  253 
  254 -- | /O(1)/ Unlike 'cons', 'cons\'' is
  255 -- strict in the ByteString that we are consing onto. More precisely, it forces
  256 -- the head and the first chunk. It does this because, for space efficiency, it
  257 -- may coalesce the new byte onto the first \'chunk\' rather than starting a
  258 -- new \'chunk\'.
  259 --
  260 -- So that means you can't use a lazy recursive contruction like this:
  261 --
  262 -- > let xs = cons\' c xs in xs
  263 --
  264 -- You can however use 'cons', as well as 'repeat' and 'cycle', to build
  265 -- infinite lazy ByteStrings.
  266 --
  267 cons' :: Char -> ByteString -> ByteString
  268 cons' = L.cons' . c2w
  269 {-# INLINE cons' #-}
  270 
  271 -- | /O(n)/ Append a Char to the end of a 'ByteString'. Similar to
  272 -- 'cons', this function performs a memcpy.
  273 snoc :: ByteString -> Char -> ByteString
  274 snoc p = L.snoc p . c2w
  275 {-# INLINE snoc #-}
  276 
  277 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
  278 head :: ByteString -> Char
  279 head = w2c . L.head
  280 {-# INLINE head #-}
  281 
  282 -- | /O(1)/ Extract the head and tail of a ByteString, returning Nothing
  283 -- if it is empty.
  284 uncons :: ByteString -> Maybe (Char, ByteString)
  285 uncons bs = case L.uncons bs of
  286                   Nothing -> Nothing
  287                   Just (w, bs') -> Just (w2c w, bs')
  288 {-# INLINE uncons #-}
  289 
  290 -- | /O(n\/c)/ Extract the 'init' and 'last' of a ByteString, returning Nothing
  291 -- if it is empty.
  292 unsnoc :: ByteString -> Maybe (ByteString, Char)
  293 unsnoc bs = case L.unsnoc bs of
  294                   Nothing -> Nothing
  295                   Just (bs', w) -> Just (bs', w2c w)
  296 {-# INLINE unsnoc #-}
  297 
  298 -- | /O(1)/ Extract the last element of a packed string, which must be non-empty.
  299 last :: ByteString -> Char
  300 last = w2c . L.last
  301 {-# INLINE last #-}
  302 
  303 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each element of @xs@
  304 map :: (Char -> Char) -> ByteString -> ByteString
  305 map f = L.map (c2w . f . w2c)
  306 {-# INLINE map #-}
  307 
  308 -- | /O(n)/ The 'intersperse' function takes a Char and a 'ByteString'
  309 -- and \`intersperses\' that Char between the elements of the
  310 -- 'ByteString'.  It is analogous to the intersperse function on Lists.
  311 intersperse :: Char -> ByteString -> ByteString
  312 intersperse = L.intersperse . c2w
  313 {-# INLINE intersperse #-}
  314 
  315 -- | 'foldl', applied to a binary operator, a starting value (typically
  316 -- the left-identity of the operator), and a ByteString, reduces the
  317 -- ByteString using the binary operator, from left to right.
  318 foldl :: (a -> Char -> a) -> a -> ByteString -> a
  319 foldl f = L.foldl (\a c -> f a (w2c c))
  320 {-# INLINE foldl #-}
  321 
  322 -- | 'foldl\'' is like foldl, but strict in the accumulator.
  323 foldl' :: (a -> Char -> a) -> a -> ByteString -> a
  324 foldl' f = L.foldl' (\a c -> f a (w2c c))
  325 {-# INLINE foldl' #-}
  326 
  327 -- | 'foldr', applied to a binary operator, a starting value
  328 -- (typically the right-identity of the operator), and a packed string,
  329 -- reduces the packed string using the binary operator, from right to left.
  330 foldr :: (Char -> a -> a) -> a -> ByteString -> a
  331 foldr f = L.foldr (\c a -> f (w2c c) a)
  332 {-# INLINE foldr #-}
  333 
  334 -- | 'foldl1' is a variant of 'foldl' that has no starting value
  335 -- argument, and thus must be applied to non-empty 'ByteStrings'.
  336 foldl1 :: (Char -> Char -> Char) -> ByteString -> Char
  337 foldl1 f ps = w2c (L.foldl1 (\x y -> c2w (f (w2c x) (w2c y))) ps)
  338 {-# INLINE foldl1 #-}
  339 
  340 -- | 'foldl1\'' is like 'foldl1', but strict in the accumulator.
  341 foldl1' :: (Char -> Char -> Char) -> ByteString -> Char
  342 foldl1' f ps = w2c (L.foldl1' (\x y -> c2w (f (w2c x) (w2c y))) ps)
  343 
  344 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
  345 -- and thus must be applied to non-empty 'ByteString's
  346 foldr1 :: (Char -> Char -> Char) -> ByteString -> Char
  347 foldr1 f ps = w2c (L.foldr1 (\x y -> c2w (f (w2c x) (w2c y))) ps)
  348 {-# INLINE foldr1 #-}
  349 
  350 -- | Map a function over a 'ByteString' and concatenate the results
  351 concatMap :: (Char -> ByteString) -> ByteString -> ByteString
  352 concatMap f = L.concatMap (f . w2c)
  353 {-# INLINE concatMap #-}
  354 
  355 -- | Applied to a predicate and a ByteString, 'any' determines if
  356 -- any element of the 'ByteString' satisfies the predicate.
  357 any :: (Char -> Bool) -> ByteString -> Bool
  358 any f = L.any (f . w2c)
  359 {-# INLINE any #-}
  360 
  361 -- | Applied to a predicate and a 'ByteString', 'all' determines if
  362 -- all elements of the 'ByteString' satisfy the predicate.
  363 all :: (Char -> Bool) -> ByteString -> Bool
  364 all f = L.all (f . w2c)
  365 {-# INLINE all #-}
  366 
  367 -- | 'maximum' returns the maximum value from a 'ByteString'
  368 maximum :: ByteString -> Char
  369 maximum = w2c . L.maximum
  370 {-# INLINE maximum #-}
  371 
  372 -- | 'minimum' returns the minimum value from a 'ByteString'
  373 minimum :: ByteString -> Char
  374 minimum = w2c . L.minimum
  375 {-# INLINE minimum #-}
  376 
  377 -- ---------------------------------------------------------------------
  378 -- Building ByteStrings
  379 
  380 -- | 'scanl' is similar to 'foldl', but returns a list of successive
  381 -- reduced values from the left. This function will fuse.
  382 --
  383 -- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...]
  384 --
  385 -- Note that
  386 --
  387 -- > last (scanl f z xs) == foldl f z xs.
  388 scanl :: (Char -> Char -> Char) -> Char -> ByteString -> ByteString
  389 scanl f z = L.scanl (\a b -> c2w (f (w2c a) (w2c b))) (c2w z)
  390 
  391 -- | The 'mapAccumL' function behaves like a combination of 'map' and
  392 -- 'foldl'; it applies a function to each element of a ByteString,
  393 -- passing an accumulating parameter from left to right, and returning a
  394 -- final value of this accumulator together with the new ByteString.
  395 mapAccumL :: (acc -> Char -> (acc, Char)) -> acc -> ByteString -> (acc, ByteString)
  396 mapAccumL f = L.mapAccumL (\a w -> case f a (w2c w) of (a',c) -> (a', c2w c))
  397 
  398 -- | The 'mapAccumR' function behaves like a combination of 'map' and
  399 -- 'foldr'; it applies a function to each element of a ByteString,
  400 -- passing an accumulating parameter from right to left, and returning a
  401 -- final value of this accumulator together with the new ByteString.
  402 mapAccumR :: (acc -> Char -> (acc, Char)) -> acc -> ByteString -> (acc, ByteString)
  403 mapAccumR f = L.mapAccumR (\acc w -> case f acc (w2c w) of (acc', c) -> (acc', c2w c))
  404 
  405 ------------------------------------------------------------------------
  406 -- Generating and unfolding ByteStrings
  407 
  408 -- | @'iterate' f x@ returns an infinite ByteString of repeated applications
  409 -- of @f@ to @x@:
  410 --
  411 -- > iterate f x == [x, f x, f (f x), ...]
  412 --
  413 iterate :: (Char -> Char) -> Char -> ByteString
  414 iterate f = L.iterate (c2w . f . w2c) . c2w
  415 
  416 -- | @'repeat' x@ is an infinite ByteString, with @x@ the value of every
  417 -- element.
  418 --
  419 repeat :: Char -> ByteString
  420 repeat = L.repeat . c2w
  421 
  422 -- | /O(n)/ @'replicate' n x@ is a ByteString of length @n@ with @x@
  423 -- the value of every element.
  424 --
  425 replicate :: Int64 -> Char -> ByteString
  426 replicate w c = L.replicate w (c2w c)
  427 
  428 -- | /O(n)/ The 'unfoldr' function is analogous to the List \'unfoldr\'.
  429 -- 'unfoldr' builds a ByteString from a seed value.  The function takes
  430 -- the element and returns 'Nothing' if it is done producing the
  431 -- ByteString or returns 'Just' @(a,b)@, in which case, @a@ is a
  432 -- prepending to the ByteString and @b@ is used as the next element in a
  433 -- recursive call.
  434 unfoldr :: (a -> Maybe (Char, a)) -> a -> ByteString
  435 unfoldr f = L.unfoldr $ \a -> case f a of
  436                                     Nothing      -> Nothing
  437                                     Just (c, a') -> Just (c2w c, a')
  438 
  439 ------------------------------------------------------------------------
  440 
  441 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
  442 -- returns the longest prefix (possibly empty) of @xs@ of elements that
  443 -- satisfy @p@.
  444 takeWhile :: (Char -> Bool) -> ByteString -> ByteString
  445 takeWhile f = L.takeWhile (f . w2c)
  446 {-# INLINE takeWhile #-}
  447 
  448 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
  449 dropWhile :: (Char -> Bool) -> ByteString -> ByteString
  450 dropWhile f = L.dropWhile (f . w2c)
  451 {-# INLINE dropWhile #-}
  452 
  453 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
  454 break :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
  455 break f = L.break (f . w2c)
  456 {-# INLINE break #-}
  457 
  458 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
  459 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
  460 span :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
  461 span f = L.span (f . w2c)
  462 {-# INLINE span #-}
  463 
  464 {-
  465 -- | 'breakChar' breaks its ByteString argument at the first occurence
  466 -- of the specified Char. It is more efficient than 'break' as it is
  467 -- implemented with @memchr(3)@. I.e.
  468 -- 
  469 -- > break (=='c') "abcd" == breakChar 'c' "abcd"
  470 --
  471 breakChar :: Char -> ByteString -> (ByteString, ByteString)
  472 breakChar = L.breakByte . c2w
  473 {-# INLINE breakChar #-}
  474 
  475 -- | 'spanChar' breaks its ByteString argument at the first
  476 -- occurence of a Char other than its argument. It is more efficient
  477 -- than 'span (==)'
  478 --
  479 -- > span  (=='c') "abcd" == spanByte 'c' "abcd"
  480 --
  481 spanChar :: Char -> ByteString -> (ByteString, ByteString)
  482 spanChar = L.spanByte . c2w
  483 {-# INLINE spanChar #-}
  484 -}
  485 
  486 --
  487 -- TODO, more rules for breakChar*
  488 --
  489 
  490 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
  491 -- argument, consuming the delimiter. I.e.
  492 --
  493 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
  494 -- > split 'a'  "aXaXaXa"    == ["","X","X","X"]
  495 -- > split 'x'  "x"          == ["",""]
  496 -- 
  497 -- and
  498 --
  499 -- > intercalate [c] . split c == id
  500 -- > split == splitWith . (==)
  501 -- 
  502 -- As for all splitting functions in this library, this function does
  503 -- not copy the substrings, it just constructs new 'ByteStrings' that
  504 -- are slices of the original.
  505 --
  506 split :: Char -> ByteString -> [ByteString]
  507 split = L.split . c2w
  508 {-# INLINE split #-}
  509 
  510 -- | /O(n)/ Splits a 'ByteString' into components delimited by
  511 -- separators, where the predicate returns True for a separator element.
  512 -- The resulting components do not contain the separators.  Two adjacent
  513 -- separators result in an empty component in the output.  eg.
  514 --
  515 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
  516 --
  517 splitWith :: (Char -> Bool) -> ByteString -> [ByteString]
  518 splitWith f = L.splitWith (f . w2c)
  519 {-# INLINE splitWith #-}
  520 
  521 -- | The 'groupBy' function is the non-overloaded version of 'group'.
  522 groupBy :: (Char -> Char -> Bool) -> ByteString -> [ByteString]
  523 groupBy k = L.groupBy (\a b -> k (w2c a) (w2c b))
  524 
  525 -- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
  526 index :: ByteString -> Int64 -> Char
  527 index = (w2c .) . L.index
  528 {-# INLINE index #-}
  529 
  530 -- | /O(n)/ The 'elemIndex' function returns the index of the first
  531 -- element in the given 'ByteString' which is equal (by memchr) to the
  532 -- query element, or 'Nothing' if there is no such element.
  533 elemIndex :: Char -> ByteString -> Maybe Int64
  534 elemIndex = L.elemIndex . c2w
  535 {-# INLINE elemIndex #-}
  536 
  537 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
  538 -- the indices of all elements equal to the query element, in ascending order.
  539 elemIndices :: Char -> ByteString -> [Int64]
  540 elemIndices = L.elemIndices . c2w
  541 {-# INLINE elemIndices #-}
  542 
  543 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
  544 -- returns the index of the first element in the ByteString satisfying the predicate.
  545 findIndex :: (Char -> Bool) -> ByteString -> Maybe Int64
  546 findIndex f = L.findIndex (f . w2c)
  547 {-# INLINE findIndex #-}
  548 
  549 -- | The 'findIndices' function extends 'findIndex', by returning the
  550 -- indices of all elements satisfying the predicate, in ascending order.
  551 findIndices :: (Char -> Bool) -> ByteString -> [Int64]
  552 findIndices f = L.findIndices (f . w2c)
  553 
  554 -- | count returns the number of times its argument appears in the ByteString
  555 --
  556 -- > count      == length . elemIndices
  557 -- > count '\n' == length . lines
  558 --
  559 -- But more efficiently than using length on the intermediate list.
  560 count :: Char -> ByteString -> Int64
  561 count c = L.count (c2w c)
  562 
  563 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate. This
  564 -- implementation uses @memchr(3)@.
  565 elem :: Char -> ByteString -> Bool
  566 elem c = L.elem (c2w c)
  567 {-# INLINE elem #-}
  568 
  569 -- | /O(n)/ 'notElem' is the inverse of 'elem'
  570 notElem :: Char -> ByteString -> Bool
  571 notElem c = L.notElem (c2w c)
  572 {-# INLINE notElem #-}
  573 
  574 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
  575 -- returns a ByteString containing those characters that satisfy the
  576 -- predicate.
  577 filter :: (Char -> Bool) -> ByteString -> ByteString
  578 filter f = L.filter (f . w2c)
  579 {-# INLINE filter #-}
  580 
  581 {-
  582 -- | /O(n)/ and /O(n\/c) space/ A first order equivalent of /filter .
  583 -- (==)/, for the common case of filtering a single Char. It is more
  584 -- efficient to use /filterChar/ in this case.
  585 --
  586 -- > filterChar == filter . (==)
  587 --
  588 -- filterChar is around 10x faster, and uses much less space, than its
  589 -- filter equivalent
  590 --
  591 filterChar :: Char -> ByteString -> ByteString
  592 filterChar c ps = replicate (count c ps) c
  593 {-# INLINE filterChar #-}
  594 
  595 {-# RULES
  596   "ByteString specialise filter (== x)" forall x.
  597       filter ((==) x) = filterChar x
  598   #-}
  599 
  600 {-# RULES
  601   "ByteString specialise filter (== x)" forall x.
  602      filter (== x) = filterChar x
  603   #-}
  604 -}
  605 
  606 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
  607 -- and returns the first element in matching the predicate, or 'Nothing'
  608 -- if there is no such element.
  609 find :: (Char -> Bool) -> ByteString -> Maybe Char
  610 find f ps = w2c `fmap` L.find (f . w2c) ps
  611 {-# INLINE find #-}
  612 
  613 {-
  614 -- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
  615 -- case of filtering a single Char. It is more efficient to use
  616 -- filterChar in this case.
  617 --
  618 -- > filterChar == filter . (==)
  619 --
  620 -- filterChar is around 10x faster, and uses much less space, than its
  621 -- filter equivalent
  622 --
  623 filterChar :: Char -> ByteString -> ByteString
  624 filterChar c = L.filterByte (c2w c)
  625 {-# INLINE filterChar #-}
  626 
  627 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
  628 -- case of filtering a single Char out of a list. It is more efficient
  629 -- to use /filterNotChar/ in this case.
  630 --
  631 -- > filterNotChar == filter . (/=)
  632 --
  633 -- filterNotChar is around 3x faster, and uses much less space, than its
  634 -- filter equivalent
  635 --
  636 filterNotChar :: Char -> ByteString -> ByteString
  637 filterNotChar c = L.filterNotByte (c2w c)
  638 {-# INLINE filterNotChar #-}
  639 -}
  640 
  641 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
  642 -- corresponding pairs of Chars. If one input ByteString is short,
  643 -- excess elements of the longer ByteString are discarded. This is
  644 -- equivalent to a pair of 'unpack' operations, and so space
  645 -- usage may be large for multi-megabyte ByteStrings
  646 zip :: ByteString -> ByteString -> [(Char,Char)]
  647 zip ps qs
  648     | L.null ps || L.null qs = []
  649     | otherwise = (head ps, head qs) : zip (L.tail ps) (L.tail qs)
  650 
  651 -- | 'zipWith' generalises 'zip' by zipping with the function given as
  652 -- the first argument, instead of a tupling function.  For example,
  653 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list
  654 -- of corresponding sums.
  655 zipWith :: (Char -> Char -> a) -> ByteString -> ByteString -> [a]
  656 zipWith f = L.zipWith ((. w2c) . f . w2c)
  657 
  658 -- | 'lines' breaks a ByteString up into a list of ByteStrings at
  659 -- newline Chars. The resulting strings do not contain newlines.
  660 --
  661 -- As of bytestring 0.9.0.3, this function is stricter than its 
  662 -- list cousin.
  663 --
  664 lines :: ByteString -> [ByteString]
  665 lines Empty          = []
  666 lines (Chunk c0 cs0) = loop0 c0 cs0
  667     where
  668     -- this is a really performance sensitive function but the
  669     -- chunked representation makes the general case a bit expensive
  670     -- however assuming a large chunk size and normalish line lengths
  671     -- we will find line endings much more frequently than chunk
  672     -- endings so it makes sense to optimise for that common case.
  673     -- So we partition into two special cases depending on whether we
  674     -- are keeping back a list of chunks that will eventually be output
  675     -- once we get to the end of the current line.
  676 
  677     -- the common special case where we have no existing chunks of
  678     -- the current line
  679     loop0 :: S.ByteString -> ByteString -> [ByteString]
  680     loop0 c cs =
  681         case B.elemIndex (c2w '\n') c of
  682             Nothing -> case cs of
  683                            Empty  | B.null c  ->                 []
  684                                   | otherwise -> Chunk c Empty : []
  685                            (Chunk c' cs')
  686                                | B.null c  -> loop0 c'     cs'
  687                                | otherwise -> loop  c' [c] cs'
  688 
  689             Just n | n /= 0    -> Chunk (B.unsafeTake n c) Empty
  690                                 : loop0 (B.unsafeDrop (n+1) c) cs
  691                    | otherwise -> Empty
  692                                 : loop0 (B.unsafeTail c) cs
  693 
  694     -- the general case when we are building a list of chunks that are
  695     -- part of the same line
  696     loop :: S.ByteString -> [S.ByteString] -> ByteString -> [ByteString]
  697     loop c line cs =
  698         case B.elemIndex (c2w '\n') c of
  699             Nothing ->
  700                 case cs of
  701                     Empty -> let c' = revChunks (c : line)
  702                               in c' `seq` (c' : [])
  703 
  704                     (Chunk c' cs') -> loop c' (c : line) cs'
  705 
  706             Just n ->
  707                 let c' = revChunks (B.unsafeTake n c : line)
  708                  in c' `seq` (c' : loop0 (B.unsafeDrop (n+1) c) cs)
  709 
  710 {-
  711 
  712 This function is too strict!  Consider,
  713 
  714 > prop_lazy =
  715     (L.unpack . head . lazylines $ L.append (L.pack "a\nb\n") (error "failed"))
  716   ==
  717     "a"
  718 
  719 fails.  Here's a properly lazy version of 'lines' for lazy bytestrings
  720 
  721     lazylines           :: L.ByteString -> [L.ByteString]
  722     lazylines s
  723         | L.null s  = []
  724         | otherwise =
  725             let (l,s') = L.break ((==) '\n') s
  726             in l : if L.null s' then []
  727                                 else lazylines (L.tail s')
  728 
  729 we need a similarly lazy, but efficient version.
  730 
  731 -}
  732 
  733 
  734 -- | 'unlines' is an inverse operation to 'lines'.  It joins lines,
  735 -- after appending a terminating newline to each.
  736 unlines :: [ByteString] -> ByteString
  737 unlines [] = empty
  738 unlines ss = (concat $ List.intersperse nl ss) `append` nl -- half as much space
  739     where nl = singleton '\n'
  740 
  741 -- | 'words' breaks a ByteString up into a list of words, which
  742 -- were delimited by Chars representing white space. And
  743 --
  744 -- > tokens isSpace = words
  745 --
  746 words :: ByteString -> [ByteString]
  747 words = List.filter (not . L.null) . L.splitWith isSpaceWord8
  748 {-# INLINE words #-}
  749 
  750 -- | The 'unwords' function is analogous to the 'unlines' function, on words.
  751 unwords :: [ByteString] -> ByteString
  752 unwords = intercalate (singleton ' ')
  753 {-# INLINE unwords #-}
  754 
  755 -- | readInt reads an Int from the beginning of the ByteString.  If
  756 -- there is no integer at the beginning of the string, it returns
  757 -- Nothing, otherwise it just returns the int read, and the rest of the
  758 -- string.
  759 
  760 readInt :: ByteString -> Maybe (Int, ByteString)
  761 {-# INLINE readInt #-}
  762 readInt Empty        = Nothing
  763 readInt (Chunk x xs) = case w2c (B.unsafeHead x) of
  764     '-' -> loop True  0 0 (B.unsafeTail x) xs
  765     '+' -> loop False 0 0 (B.unsafeTail x) xs
  766     _   -> loop False 0 0 x xs
  767 
  768     where loop :: Bool -> Int -> Int
  769                 -> S.ByteString -> ByteString -> Maybe (Int, ByteString)
  770           loop neg !i !n !c cs
  771               | B.null c = case cs of
  772                              Empty          -> end  neg i n c  cs
  773                              (Chunk c' cs') -> loop neg i n c' cs'
  774               | otherwise =
  775                   case B.unsafeHead c of
  776                     w | w >= 0x30
  777                      && w <= 0x39 -> loop neg (i+1)
  778                                           (n * 10 + (fromIntegral w - 0x30))
  779                                           (B.unsafeTail c) cs
  780                       | otherwise -> end neg i n c cs
  781 
  782           {-# INLINE end #-}
  783           end _   0 _ _  _ = Nothing
  784           end neg _ n c cs = e `seq` e
  785                 where n' = if neg then negate n else n
  786                       c' = chunk c cs
  787                       e  = n' `seq` c' `seq` Just $! (n',c')
  788          --                  in n' `seq` c' `seq` JustS n' c'
  789 
  790 
  791 -- | readInteger reads an Integer from the beginning of the ByteString.  If
  792 -- there is no integer at the beginning of the string, it returns Nothing,
  793 -- otherwise it just returns the int read, and the rest of the string.
  794 readInteger :: ByteString -> Maybe (Integer, ByteString)
  795 readInteger Empty = Nothing
  796 readInteger (Chunk c0 cs0) =
  797         case w2c (B.unsafeHead c0) of
  798             '-' -> first (B.unsafeTail c0) cs0 >>= \(n, cs') -> return (-n, cs')
  799             '+' -> first (B.unsafeTail c0) cs0
  800             _   -> first c0 cs0
  801 
  802     where first c cs
  803               | B.null c = case cs of
  804                   Empty          -> Nothing
  805                   (Chunk c' cs') -> first' c' cs'
  806               | otherwise = first' c cs
  807 
  808           first' c cs = case B.unsafeHead c of
  809               w | w >= 0x30 && w <= 0x39 -> Just $
  810                   loop 1 (fromIntegral w - 0x30) [] (B.unsafeTail c) cs
  811                 | otherwise              -> Nothing
  812 
  813           loop :: Int -> Int -> [Integer]
  814                -> S.ByteString -> ByteString -> (Integer, ByteString)
  815           loop !d !acc ns !c cs
  816               | B.null c = case cs of
  817                              Empty          -> combine d acc ns c cs
  818                              (Chunk c' cs') -> loop d acc ns c' cs'
  819               | otherwise =
  820                   case B.unsafeHead c of
  821                    w | w >= 0x30 && w <= 0x39 ->
  822                        if d < 9 then loop (d+1)
  823                                           (10*acc + (fromIntegral w - 0x30))
  824                                           ns (B.unsafeTail c) cs
  825                                 else loop 1 (fromIntegral w - 0x30)
  826                                           (fromIntegral acc : ns)
  827                                           (B.unsafeTail c) cs
  828                      | otherwise -> combine d acc ns c cs
  829 
  830           combine _ acc [] c cs = end (fromIntegral acc) c cs
  831           combine d acc ns c cs =
  832               end (10^d * combine1 1000000000 ns + fromIntegral acc) c cs
  833 
  834           combine1 _ [n] = n
  835           combine1 b ns  = combine1 (b*b) $ combine2 b ns
  836 
  837           combine2 b (n:m:ns) = let t = n+m*b in t `seq` (t : combine2 b ns)
  838           combine2 _ ns       = ns
  839 
  840           end n c cs = let c' = chunk c cs
  841                         in c' `seq` (n, c')
  842 
  843 -- | Read an entire file /lazily/ into a 'ByteString'.
  844 readFile :: FilePath -> IO ByteString
  845 readFile f = openBinaryFile f ReadMode >>= hGetContents
  846 
  847 -- | Write a 'ByteString' to a file.
  848 writeFile :: FilePath -> ByteString -> IO ()
  849 writeFile f txt = bracket (openBinaryFile f WriteMode) hClose
  850     (\hdl -> hPut hdl txt)
  851 
  852 -- | Append a 'ByteString' to a file.
  853 appendFile :: FilePath -> ByteString -> IO ()
  854 appendFile f txt = bracket (openBinaryFile f AppendMode) hClose
  855     (\hdl -> hPut hdl txt)
  856 
  857 
  858 -- | Write a ByteString to a handle, appending a newline byte
  859 --
  860 hPutStrLn :: Handle -> ByteString -> IO ()
  861 hPutStrLn h ps = hPut h ps >> hPut h (L.singleton 0x0a)
  862 
  863 -- | Write a ByteString to stdout, appending a newline byte
  864 putStrLn :: ByteString -> IO ()
  865 putStrLn = hPutStrLn stdout
  866 
  867 
  868 -- ---------------------------------------------------------------------
  869 -- Internal utilities
  870 
  871 -- reverse a list of possibly-empty chunks into a lazy ByteString
  872 revChunks :: [S.ByteString] -> ByteString
  873 revChunks cs = List.foldl' (flip chunk) Empty cs