1 {-# LANGUAGE CPP #-}
    2 {-# LANGUAGE MagicHash #-}
    3 #if __GLASGOW_HASKELL__ >= 703
    4 {-# LANGUAGE Unsafe #-}
    5 #endif
    6 
    7 -- |
    8 -- Module      : Data.ByteString.Unsafe
    9 -- Copyright   : (c) Don Stewart 2006-2008
   10 --               (c) Duncan Coutts 2006-2011
   11 -- License     : BSD-style
   12 -- Maintainer  : dons00@gmail.com, duncan@community.haskell.org
   13 -- Stability   : provisional
   14 -- Portability : non-portable
   15 --
   16 -- A module containing unsafe 'ByteString' operations.
   17 --
   18 -- While these functions have a stable API and you may use these functions in
   19 -- applications, do carefully consider the documented pre-conditions;
   20 -- incorrect use can break referential transparency or worse.
   21 --
   22 module Data.ByteString.Unsafe (
   23 
   24         -- * Unchecked access
   25         unsafeHead,             -- :: ByteString -> Word8
   26         unsafeTail,             -- :: ByteString -> ByteString
   27         unsafeInit,             -- :: ByteString -> ByteString
   28         unsafeLast,             -- :: ByteString -> Word8
   29         unsafeIndex,            -- :: ByteString -> Int -> Word8
   30         unsafeTake,             -- :: Int -> ByteString -> ByteString
   31         unsafeDrop,             -- :: Int -> ByteString -> ByteString
   32 
   33         -- * Low level interaction with CStrings
   34         -- ** Using ByteStrings with functions for CStrings
   35         unsafeUseAsCString,     -- :: ByteString -> (CString -> IO a) -> IO a
   36         unsafeUseAsCStringLen,  -- :: ByteString -> (CStringLen -> IO a) -> IO a
   37 
   38         -- ** Converting CStrings to ByteStrings
   39         unsafePackCString,      -- :: CString -> IO ByteString
   40         unsafePackCStringLen,   -- :: CStringLen -> IO ByteString
   41         unsafePackMallocCString,-- :: CString -> IO ByteString
   42         unsafePackMallocCStringLen, -- :: CStringLen -> IO ByteString
   43 
   44         unsafePackAddress,          -- :: Addr# -> IO ByteString
   45         unsafePackAddressLen,       -- :: Int -> Addr# -> IO ByteString
   46         unsafePackCStringFinalizer, -- :: Ptr Word8 -> Int -> IO () -> IO ByteString
   47         unsafeFinalize,             -- :: ByteString -> IO ()
   48 
   49   ) where
   50 
   51 import Data.ByteString.Internal
   52 
   53 import Foreign.ForeignPtr       (newForeignPtr_, newForeignPtr, withForeignPtr)
   54 import Foreign.Ptr              (Ptr, plusPtr, castPtr)
   55 
   56 import Foreign.Storable         (Storable(..))
   57 import Foreign.C.String         (CString, CStringLen)
   58 
   59 import Control.Exception        (assert)
   60 
   61 import Data.Word                (Word8)
   62 
   63 import qualified Foreign.ForeignPtr as FC (finalizeForeignPtr)
   64 import qualified Foreign.Concurrent as FC (newForeignPtr)
   65 
   66 import GHC.Prim                 (Addr#)
   67 import GHC.Ptr                  (Ptr(..))
   68 
   69 -- ---------------------------------------------------------------------
   70 --
   71 -- Extensions to the basic interface
   72 --
   73 
   74 -- | A variety of 'head' for non-empty ByteStrings. 'unsafeHead' omits the
   75 -- check for the empty case, so there is an obligation on the programmer
   76 -- to provide a proof that the ByteString is non-empty.
   77 unsafeHead :: ByteString -> Word8
   78 unsafeHead (PS x s l) = assert (l > 0) $
   79     accursedUnutterablePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
   80 {-# INLINE unsafeHead #-}
   81 
   82 -- | A variety of 'tail' for non-empty ByteStrings. 'unsafeTail' omits the
   83 -- check for the empty case. As with 'unsafeHead', the programmer must
   84 -- provide a separate proof that the ByteString is non-empty.
   85 unsafeTail :: ByteString -> ByteString
   86 unsafeTail (PS ps s l) = assert (l > 0) $ PS ps (s+1) (l-1)
   87 {-# INLINE unsafeTail #-}
   88 
   89 -- | A variety of 'init' for non-empty ByteStrings. 'unsafeInit' omits the
   90 -- check for the empty case. As with 'unsafeHead', the programmer must
   91 -- provide a separate proof that the ByteString is non-empty.
   92 unsafeInit :: ByteString -> ByteString
   93 unsafeInit (PS ps s l) = assert (l > 0) $ PS ps s (l-1)
   94 {-# INLINE unsafeInit #-}
   95 
   96 -- | A variety of 'last' for non-empty ByteStrings. 'unsafeLast' omits the
   97 -- check for the empty case. As with 'unsafeHead', the programmer must
   98 -- provide a separate proof that the ByteString is non-empty.
   99 unsafeLast :: ByteString -> Word8
  100 unsafeLast (PS x s l) = assert (l > 0) $
  101     accursedUnutterablePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+l-1)
  102 {-# INLINE unsafeLast #-}
  103 
  104 -- | Unsafe 'ByteString' index (subscript) operator, starting from 0, returning a 'Word8'
  105 -- This omits the bounds check, which means there is an accompanying
  106 -- obligation on the programmer to ensure the bounds are checked in some
  107 -- other way.
  108 unsafeIndex :: ByteString -> Int -> Word8
  109 unsafeIndex (PS x s l) i = assert (i >= 0 && i < l) $
  110     accursedUnutterablePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+i)
  111 {-# INLINE unsafeIndex #-}
  112 
  113 -- | A variety of 'take' which omits the checks on @n@ so there is an
  114 -- obligation on the programmer to provide a proof that @0 <= n <= 'length' xs@.
  115 unsafeTake :: Int -> ByteString -> ByteString
  116 unsafeTake n (PS x s l) = assert (0 <= n && n <= l) $ PS x s n
  117 {-# INLINE unsafeTake #-}
  118 
  119 -- | A variety of 'drop' which omits the checks on @n@ so there is an
  120 -- obligation on the programmer to provide a proof that @0 <= n <= 'length' xs@.
  121 unsafeDrop  :: Int -> ByteString -> ByteString
  122 unsafeDrop n (PS x s l) = assert (0 <= n && n <= l) $ PS x (s+n) (l-n)
  123 {-# INLINE unsafeDrop #-}
  124 
  125 
  126 -- | /O(1)/ 'unsafePackAddressLen' provides constant-time construction of
  127 -- 'ByteString's, which is ideal for string literals. It packs a sequence
  128 -- of bytes into a @ByteString@, given a raw 'Addr#' to the string, and
  129 -- the length of the string.
  130 --
  131 -- This function is /unsafe/ in two ways:
  132 --
  133 -- * the length argument is assumed to be correct. If the length
  134 -- argument is incorrect, it is possible to overstep the end of the
  135 -- byte array.
  136 --
  137 -- * if the underying Addr# is later modified, this change will be
  138 -- reflected in resulting @ByteString@, breaking referential
  139 -- transparency.
  140 --
  141 -- If in doubt, don't use this function.
  142 --
  143 unsafePackAddressLen :: Int -> Addr# -> IO ByteString
  144 unsafePackAddressLen len addr# = do
  145     p <- newForeignPtr_ (Ptr addr#)
  146     return $ PS p 0 len
  147 {-# INLINE unsafePackAddressLen #-}
  148 
  149 -- | /O(1)/ Construct a 'ByteString' given a Ptr Word8 to a buffer, a
  150 -- length, and an IO action representing a finalizer. This function is
  151 -- not available on Hugs.
  152 --
  153 -- This function is /unsafe/, it is possible to break referential
  154 -- transparency by modifying the underlying buffer pointed to by the
  155 -- first argument. Any changes to the original buffer will be reflected
  156 -- in the resulting @ByteString@.
  157 --
  158 unsafePackCStringFinalizer :: Ptr Word8 -> Int -> IO () -> IO ByteString
  159 unsafePackCStringFinalizer p l f = do
  160     fp <- FC.newForeignPtr p f
  161     return $ PS fp 0 l
  162 
  163 -- | Explicitly run the finaliser associated with a 'ByteString'.
  164 -- References to this value after finalisation may generate invalid memory
  165 -- references.
  166 --
  167 -- This function is /unsafe/, as there may be other
  168 -- 'ByteStrings' referring to the same underlying pages. If you use
  169 -- this, you need to have a proof of some kind that all 'ByteString's
  170 -- ever generated from the underlying byte array are no longer live.
  171 --
  172 unsafeFinalize :: ByteString -> IO ()
  173 unsafeFinalize (PS p _ _) = FC.finalizeForeignPtr p
  174 
  175 ------------------------------------------------------------------------
  176 -- Packing CStrings into ByteStrings
  177 
  178 -- | /O(n)/ Build a @ByteString@ from a @CString@. This value will have /no/
  179 -- finalizer associated to it, and will not be garbage collected by
  180 -- Haskell. The ByteString length is calculated using /strlen(3)/,
  181 -- and thus the complexity is a /O(n)/.
  182 --
  183 -- This function is /unsafe/. If the @CString@ is later modified, this
  184 -- change will be reflected in the resulting @ByteString@, breaking
  185 -- referential transparency.
  186 --
  187 unsafePackCString :: CString -> IO ByteString
  188 unsafePackCString cstr = do
  189     fp <- newForeignPtr_ (castPtr cstr)
  190     l <- c_strlen cstr
  191     return $! PS fp 0 (fromIntegral l)
  192 
  193 -- | /O(1)/ Build a @ByteString@ from a @CStringLen@. This value will
  194 -- have /no/ finalizer associated with it, and will not be garbage
  195 -- collected by Haskell. This operation has /O(1)/ complexity as we
  196 -- already know the final size, so no /strlen(3)/ is required.
  197 --
  198 -- This function is /unsafe/. If the original @CStringLen@ is later
  199 -- modified, this change will be reflected in the resulting @ByteString@,
  200 -- breaking referential transparency.
  201 --
  202 unsafePackCStringLen :: CStringLen -> IO ByteString
  203 unsafePackCStringLen (ptr,len) = do
  204     fp <- newForeignPtr_ (castPtr ptr)
  205     return $! PS fp 0 (fromIntegral len)
  206 
  207 -- | /O(n)/ Build a @ByteString@ from a malloced @CString@. This value will
  208 -- have a @free(3)@ finalizer associated to it.
  209 --
  210 -- This function is /unsafe/. If the original @CString@ is later
  211 -- modified, this change will be reflected in the resulting @ByteString@,
  212 -- breaking referential transparency.
  213 --
  214 -- This function is also unsafe if you call its finalizer twice,
  215 -- which will result in a /double free/ error, or if you pass it
  216 -- a CString not allocated with 'malloc'.
  217 --
  218 unsafePackMallocCString :: CString -> IO ByteString
  219 unsafePackMallocCString cstr = do
  220     fp <- newForeignPtr c_free_finalizer (castPtr cstr)
  221     len <- c_strlen cstr
  222     return $! PS fp 0 (fromIntegral len)
  223 
  224 -- | /O(1)/ Build a @ByteString@ from a malloced @CStringLen@. This
  225 -- value will have a @free(3)@ finalizer associated to it.
  226 --
  227 -- This function is /unsafe/. If the original @CString@ is later
  228 -- modified, this change will be reflected in the resulting @ByteString@,
  229 -- breaking referential transparency.
  230 --
  231 -- This function is also unsafe if you call its finalizer twice,
  232 -- which will result in a /double free/ error, or if you pass it
  233 -- a CString not allocated with 'malloc'.
  234 --
  235 unsafePackMallocCStringLen :: CStringLen -> IO ByteString
  236 unsafePackMallocCStringLen (cstr, len) = do
  237     fp <- newForeignPtr c_free_finalizer (castPtr cstr)
  238     return $! PS fp 0 len
  239 
  240 -- ---------------------------------------------------------------------
  241 
  242 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a
  243 -- @CString@.
  244 --
  245 -- This function does zero copying, and merely unwraps a @ByteString@ to
  246 -- appear as a @CString@. It is /unsafe/ in two ways:
  247 --
  248 -- * After calling this function the @CString@ shares the underlying
  249 -- byte buffer with the original @ByteString@. Thus modifying the
  250 -- @CString@, either in C, or using poke, will cause the contents of the
  251 -- @ByteString@ to change, breaking referential transparency. Other
  252 -- @ByteStrings@ created by sharing (such as those produced via 'take'
  253 -- or 'drop') will also reflect these changes. Modifying the @CString@
  254 -- will break referential transparency. To avoid this, use
  255 -- @useAsCString@, which makes a copy of the original @ByteString@.
  256 --
  257 -- * @CStrings@ are often passed to functions that require them to be
  258 -- null-terminated. If the original @ByteString@ wasn't null terminated,
  259 -- neither will the @CString@ be. It is the programmers responsibility
  260 -- to guarantee that the @ByteString@ is indeed null terminated. If in
  261 -- doubt, use @useAsCString@.
  262 --
  263 unsafeUseAsCString :: ByteString -> (CString -> IO a) -> IO a
  264 unsafeUseAsCString (PS ps s _) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s)
  265 
  266 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a
  267 -- @CStringLen@.
  268 --
  269 -- This function does zero copying, and merely unwraps a @ByteString@ to
  270 -- appear as a @CStringLen@. It is /unsafe/:
  271 --
  272 -- * After calling this function the @CStringLen@ shares the underlying
  273 -- byte buffer with the original @ByteString@. Thus modifying the
  274 -- @CStringLen@, either in C, or using poke, will cause the contents of the
  275 -- @ByteString@ to change, breaking referential transparency. Other
  276 -- @ByteStrings@ created by sharing (such as those produced via 'take'
  277 -- or 'drop') will also reflect these changes. Modifying the @CStringLen@
  278 -- will break referential transparency. To avoid this, use
  279 -- @useAsCStringLen@, which makes a copy of the original @ByteString@.
  280 --
  281 unsafeUseAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
  282 unsafeUseAsCStringLen (PS ps s l) f = withForeignPtr ps $ \p -> f (castPtr p `plusPtr` s,l)