never executed always true always false
    1 {-
    2 -----------------------------------------------------------------------------
    3 --
    4 -- (c) The University of Glasgow 2015
    5 --
    6 -- ELF format tools
    7 --
    8 -----------------------------------------------------------------------------
    9 -}
   10 
   11 module GHC.SysTools.Elf (
   12     readElfSectionByName,
   13     readElfNoteAsString,
   14     makeElfNote
   15   ) where
   16 
   17 import GHC.Prelude
   18 
   19 import GHC.Utils.Asm
   20 import GHC.Utils.Exception
   21 import GHC.Platform
   22 import GHC.Utils.Error
   23 import GHC.Data.Maybe       (MaybeT(..),runMaybeT)
   24 import GHC.Utils.Misc       (charToC)
   25 import GHC.Utils.Outputable (text,hcat)
   26 import GHC.Utils.Logger
   27 
   28 import Control.Monad (when)
   29 import Data.Binary.Get
   30 import Data.Word
   31 import Data.Char (ord)
   32 import Data.ByteString.Lazy (ByteString)
   33 import qualified Data.ByteString.Lazy as LBS
   34 import qualified Data.ByteString.Lazy.Char8 as B8
   35 
   36 {- Note [ELF specification]
   37    ~~~~~~~~~~~~~~~~~~~~~~~~
   38 
   39    ELF (Executable and Linking Format) is described in the System V Application
   40    Binary Interface (or ABI). The latter is composed of two parts: a generic
   41    part and a processor specific part. The generic ABI describes the parts of
   42    the interface that remain constant across all hardware implementations of
   43    System V.
   44 
   45    The latest release of the specification of the generic ABI is the version
   46    4.1 from March 18, 1997:
   47 
   48      - http://www.sco.com/developers/devspecs/gabi41.pdf
   49 
   50    Since 1997, snapshots of the draft for the "next" version are published:
   51 
   52      - http://www.sco.com/developers/gabi/
   53 
   54    Quoting the notice on the website: "There is more than one instance of these
   55    chapters to permit references to older instances to remain valid. All
   56    modifications to these chapters are forward-compatible, so that correct use
   57    of an older specification will not be invalidated by a newer instance.
   58    Approximately on a yearly basis, a new instance will be saved, as it reaches
   59    what appears to be a stable state."
   60 
   61    Nevertheless we will see that since 1998 it is not true for Note sections.
   62 
   63    Many ELF sections
   64    -----------------
   65 
   66    ELF-4.1: the normal section number fields in ELF are limited to 16 bits,
   67    which runs out of bits when you try to cram in more sections than that. Two
   68    fields are concerned: the one containing the number of the sections and the
   69    one containing the index of the section that contains section's names. (The
   70    same thing applies to the field containing the number of segments, but we
   71    don't care about it here).
   72 
   73    ELF-next: to solve this, theses fields in the ELF header have an escape
   74    value (different for each case), and the actual section number is stashed
   75    into unused fields in the first section header.
   76 
   77    We support this extension as it is forward-compatible with ELF-4.1.
   78    Moreover, GHC may generate objects with a lot of sections with the
   79    "function-sections" feature (one section per function).
   80 
   81    Note sections
   82    -------------
   83 
   84    Sections with type "note" (SHT_NOTE in the specification) are used to add
   85    arbitrary data into an ELF file. An entry in a note section is composed of a
   86    name, a type and a value.
   87 
   88    ELF-4.1: "The note information in sections and program header elements holds
   89    any number of entries, each of which is an array of 4-byte words in the
   90    format of the target processor." Each entry has the following format:
   91          | namesz |   Word32: size of the name string (including the ending \0)
   92          | descsz |   Word32: size of the value
   93          |  type  |   Word32: type of the note
   94          |  name  |   Name string (with \0 padding to ensure 4-byte alignment)
   95          |  ...   |
   96          |  desc  |   Value (with \0 padding to ensure 4-byte alignment)
   97          |  ...   |
   98 
   99    ELF-next: "The note information in sections and program header elements
  100    holds a variable amount of entries. In 64-bit objects (files with
  101    e_ident[EI_CLASS] equal to ELFCLASS64), each entry is an array of 8-byte
  102    words in the format of the target processor. In 32-bit objects (files with
  103    e_ident[EI_CLASS] equal to ELFCLASS32), each entry is an array of 4-byte
  104    words in the format of the target processor." (from 1998-2015 snapshots)
  105 
  106    This is not forward-compatible with ELF-4.1. In practice, for almost all
  107    platforms namesz, descz and type fields are 4-byte words for both 32-bit and
  108    64-bit objects (see elf.h and readelf source code).
  109 
  110    The only exception in readelf source code is for IA_64 machines with OpenVMS
  111    OS: "This OS has so many departures from the ELF standard that we test it at
  112    many places" (comment for is_ia64_vms() in readelf.c). In this case, namesz,
  113    descsz and type fields are 8-byte words and name and value fields are padded
  114    to ensure 8-byte alignment.
  115 
  116    We don't support this platform in the following code. Reading a note section
  117    could be done easily (by testing Machine and OS fields in the ELF header).
  118    Writing a note section, however, requires that we generate a different
  119    assembly code for GAS depending on the target platform and this is a little
  120    bit more involved.
  121 
  122 -}
  123 
  124 
  125 -- | ELF header
  126 --
  127 -- The ELF header indicates the native word size (32-bit or 64-bit) and the
  128 -- endianness of the target machine. We directly store getters for words of
  129 -- different sizes as it is more convenient to use. We also store the word size
  130 -- as it is useful to skip some uninteresting fields.
  131 --
  132 -- Other information such as the target machine and OS are left out as we don't
  133 -- use them yet. We could add them in the future if we ever need them.
  134 data ElfHeader = ElfHeader
  135    { gw16     :: Get Word16   -- ^ Get a Word16 with the correct endianness
  136    , gw32     :: Get Word32   -- ^ Get a Word32 with the correct endianness
  137    , gwN      :: Get Word64   -- ^ Get a Word with the correct word size
  138                               --   and endianness
  139    , wordSize :: Int          -- ^ Word size in bytes
  140    }
  141 
  142 
  143 -- | Read the ELF header
  144 readElfHeader :: Logger -> ByteString -> IO (Maybe ElfHeader)
  145 readElfHeader logger bs = runGetOrThrow getHeader bs `catchIO` \_ -> do
  146     debugTraceMsg logger 3 $
  147       text ("Unable to read ELF header")
  148     return Nothing
  149   where
  150     getHeader = do
  151       magic    <- getWord32be
  152       ws       <- getWord8
  153       endian   <- getWord8
  154       version  <- getWord8
  155       skip 9  -- skip OSABI, ABI version and padding
  156       when (magic /= 0x7F454C46 || version /= 1) $ fail "Invalid ELF header"
  157 
  158       case (ws, endian) of
  159           -- ELF 32, little endian
  160           (1,1) -> return . Just $ ElfHeader
  161                            getWord16le
  162                            getWord32le
  163                            (fmap fromIntegral getWord32le) 4
  164           -- ELF 32, big endian
  165           (1,2) -> return . Just $ ElfHeader
  166                            getWord16be
  167                            getWord32be
  168                            (fmap fromIntegral getWord32be) 4
  169           -- ELF 64, little endian
  170           (2,1) -> return . Just $ ElfHeader
  171                            getWord16le
  172                            getWord32le
  173                            (fmap fromIntegral getWord64le) 8
  174           -- ELF 64, big endian
  175           (2,2) -> return . Just $ ElfHeader
  176                            getWord16be
  177                            getWord32be
  178                            (fmap fromIntegral getWord64be) 8
  179           _     -> fail "Invalid ELF header"
  180 
  181 
  182 ------------------
  183 -- SECTIONS
  184 ------------------
  185 
  186 
  187 -- | Description of the section table
  188 data SectionTable = SectionTable
  189   { sectionTableOffset :: Word64  -- ^ offset of the table describing sections
  190   , sectionEntrySize   :: Word16  -- ^ size of an entry in the section table
  191   , sectionEntryCount  :: Word64  -- ^ number of sections
  192   , sectionNameIndex   :: Word32  -- ^ index of a special section which
  193                                   --   contains section's names
  194   }
  195 
  196 -- | Read the ELF section table
  197 readElfSectionTable :: Logger
  198                     -> ElfHeader
  199                     -> ByteString
  200                     -> IO (Maybe SectionTable)
  201 
  202 readElfSectionTable logger hdr bs = action `catchIO` \_ -> do
  203     debugTraceMsg logger 3 $
  204       text ("Unable to read ELF section table")
  205     return Nothing
  206   where
  207     getSectionTable :: Get SectionTable
  208     getSectionTable = do
  209       skip (24 + 2*wordSize hdr) -- skip header and some other fields
  210       secTableOffset <- gwN hdr
  211       skip 10
  212       entrySize      <- gw16 hdr
  213       entryCount     <- gw16 hdr
  214       secNameIndex   <- gw16 hdr
  215       return (SectionTable secTableOffset entrySize
  216                            (fromIntegral entryCount)
  217                            (fromIntegral secNameIndex))
  218 
  219     action = do
  220       secTable <- runGetOrThrow getSectionTable bs
  221       -- In some cases, the number of entries and the index of the section
  222       -- containing section's names must be found in unused fields of the first
  223       -- section entry (see Note [ELF specification])
  224       let
  225         offSize0 = fromIntegral $ sectionTableOffset secTable + 8
  226                                   + 3 * fromIntegral (wordSize hdr)
  227         offLink0 = fromIntegral $ offSize0 + fromIntegral (wordSize hdr)
  228 
  229       entryCount'     <- if sectionEntryCount secTable /= 0
  230                           then return (sectionEntryCount secTable)
  231                           else runGetOrThrow (gwN hdr) (LBS.drop offSize0 bs)
  232       entryNameIndex' <- if sectionNameIndex secTable /= 0xffff
  233                           then return (sectionNameIndex secTable)
  234                           else runGetOrThrow (gw32 hdr) (LBS.drop offLink0 bs)
  235       return (Just $ secTable
  236         { sectionEntryCount = entryCount'
  237         , sectionNameIndex  = entryNameIndex'
  238         })
  239 
  240 
  241 -- | A section
  242 data Section = Section
  243   { entryName :: ByteString   -- ^ Name of the section
  244   , entryBS   :: ByteString   -- ^ Content of the section
  245   }
  246 
  247 -- | Read a ELF section
  248 readElfSectionByIndex :: Logger
  249                       -> ElfHeader
  250                       -> SectionTable
  251                       -> Word64
  252                       -> ByteString
  253                       -> IO (Maybe Section)
  254 
  255 readElfSectionByIndex logger hdr secTable i bs = action `catchIO` \_ -> do
  256     debugTraceMsg logger 3 $
  257       text ("Unable to read ELF section")
  258     return Nothing
  259   where
  260     -- read an entry from the section table
  261     getEntry = do
  262       nameIndex <- gw32 hdr
  263       skip (4+2*wordSize hdr)
  264       offset    <- fmap fromIntegral $ gwN hdr
  265       size      <- fmap fromIntegral $ gwN hdr
  266       let bs' = LBS.take size (LBS.drop offset bs)
  267       return (nameIndex,bs')
  268 
  269     -- read the entry with the given index in the section table
  270     getEntryByIndex x = runGetOrThrow getEntry bs'
  271       where
  272         bs' = LBS.drop off bs
  273         off = fromIntegral $ sectionTableOffset secTable +
  274                              x * fromIntegral (sectionEntrySize secTable)
  275 
  276     -- Get the name of a section
  277     getEntryName nameIndex = do
  278       let idx = fromIntegral (sectionNameIndex secTable)
  279       (_,nameTable) <- getEntryByIndex idx
  280       let bs' = LBS.drop nameIndex nameTable
  281       runGetOrThrow getLazyByteStringNul bs'
  282 
  283     action = do
  284       (nameIndex,bs') <- getEntryByIndex (fromIntegral i)
  285       name            <- getEntryName (fromIntegral nameIndex)
  286       return (Just $ Section name bs')
  287 
  288 
  289 -- | Find a section from its name. Return the section contents.
  290 --
  291 -- We do not perform any check on the section type.
  292 findSectionFromName :: Logger
  293                     -> ElfHeader
  294                     -> SectionTable
  295                     -> String
  296                     -> ByteString
  297                     -> IO (Maybe ByteString)
  298 findSectionFromName logger hdr secTable name bs =
  299     rec [0..sectionEntryCount secTable - 1]
  300   where
  301     -- convert the required section name into a ByteString to perform
  302     -- ByteString comparison instead of String comparison
  303     name' = B8.pack name
  304 
  305     -- compare recursively each section name and return the contents of
  306     -- the matching one, if any
  307     rec []     = return Nothing
  308     rec (x:xs) = do
  309       me <- readElfSectionByIndex logger hdr secTable x bs
  310       case me of
  311         Just e | entryName e == name' -> return (Just (entryBS e))
  312         _                             -> rec xs
  313 
  314 
  315 -- | Given a section name, read its contents as a ByteString.
  316 --
  317 -- If the section isn't found or if there is any parsing error, we return
  318 -- Nothing
  319 readElfSectionByName :: Logger
  320                      -> ByteString
  321                      -> String
  322                      -> IO (Maybe LBS.ByteString)
  323 
  324 readElfSectionByName logger bs name = action `catchIO` \_ -> do
  325     debugTraceMsg logger 3 $
  326       text ("Unable to read ELF section \"" ++ name ++ "\"")
  327     return Nothing
  328   where
  329     action = runMaybeT $ do
  330       hdr      <- MaybeT $ readElfHeader logger bs
  331       secTable <- MaybeT $ readElfSectionTable logger hdr bs
  332       MaybeT $ findSectionFromName logger hdr secTable name bs
  333 
  334 ------------------
  335 -- NOTE SECTIONS
  336 ------------------
  337 
  338 -- | read a Note as a ByteString
  339 --
  340 -- If you try to read a note from a section which does not support the Note
  341 -- format, the parsing is likely to fail and Nothing will be returned
  342 readElfNoteBS :: Logger
  343               -> ByteString
  344               -> String
  345               -> String
  346               -> IO (Maybe LBS.ByteString)
  347 
  348 readElfNoteBS logger bs sectionName noteId = action `catchIO`  \_ -> do
  349     debugTraceMsg logger 3 $
  350          text ("Unable to read ELF note \"" ++ noteId ++
  351                "\" in section \"" ++ sectionName ++ "\"")
  352     return Nothing
  353   where
  354     -- align the getter on n bytes
  355     align n = do
  356       m <- bytesRead
  357       if m `mod` n == 0
  358         then return ()
  359         else skip 1 >> align n
  360 
  361     -- noteId as a bytestring
  362     noteId' = B8.pack noteId
  363 
  364     -- read notes recursively until the one with a valid identifier is found
  365     findNote hdr = do
  366       align 4
  367       namesz <- gw32 hdr
  368       descsz <- gw32 hdr
  369       _      <- gw32 hdr -- we don't use the note type
  370       name   <- if namesz == 0
  371                   then return LBS.empty
  372                   else getLazyByteStringNul
  373       align 4
  374       desc  <- if descsz == 0
  375                   then return LBS.empty
  376                   else getLazyByteString (fromIntegral descsz)
  377       if name == noteId'
  378         then return $ Just desc
  379         else findNote hdr
  380 
  381 
  382     action = runMaybeT $ do
  383       hdr  <- MaybeT $ readElfHeader logger bs
  384       sec  <- MaybeT $ readElfSectionByName logger bs sectionName
  385       MaybeT $ runGetOrThrow (findNote hdr) sec
  386 
  387 -- | read a Note as a String
  388 --
  389 -- If you try to read a note from a section which does not support the Note
  390 -- format, the parsing is likely to fail and Nothing will be returned
  391 readElfNoteAsString :: Logger
  392                     -> FilePath
  393                     -> String
  394                     -> String
  395                     -> IO (Maybe String)
  396 
  397 readElfNoteAsString logger path sectionName noteId = action `catchIO`  \_ -> do
  398     debugTraceMsg logger 3 $
  399          text ("Unable to read ELF note \"" ++ noteId ++
  400                "\" in section \"" ++ sectionName ++ "\"")
  401     return Nothing
  402   where
  403     action = do
  404       bs   <- LBS.readFile path
  405       note <- readElfNoteBS logger bs sectionName noteId
  406       return (fmap B8.unpack note)
  407 
  408 
  409 -- | Generate the GAS code to create a Note section
  410 --
  411 -- Header fields for notes are 32-bit long (see Note [ELF specification]).
  412 makeElfNote :: Platform -> String -> String -> Word32 -> String -> SDoc
  413 makeElfNote platform sectionName noteName typ contents = hcat [
  414     text "\t.section ",
  415     text sectionName,
  416     text ",\"\",",
  417     sectionType platform "note",
  418     text "\n",
  419     text "\t.balign 4\n",
  420 
  421     -- note name length (+ 1 for ending \0)
  422     asWord32 (length noteName + 1),
  423 
  424     -- note contents size
  425     asWord32 (length contents),
  426 
  427     -- note type
  428     asWord32 typ,
  429 
  430     -- note name (.asciz for \0 ending string) + padding
  431     text "\t.asciz \"",
  432     text noteName,
  433     text "\"\n",
  434     text "\t.balign 4\n",
  435 
  436     -- note contents (.ascii to avoid ending \0) + padding
  437     text "\t.ascii \"",
  438     text (escape contents),
  439     text "\"\n",
  440     text "\t.balign 4\n"]
  441   where
  442     escape :: String -> String
  443     escape = concatMap (charToC.fromIntegral.ord)
  444 
  445     asWord32 :: Show a => a -> SDoc
  446     asWord32 x = hcat [
  447       text "\t.4byte ",
  448       text (show x),
  449       text "\n"]
  450 
  451 
  452 ------------------
  453 -- Helpers
  454 ------------------
  455 
  456 -- | runGet in IO monad that throws an IOException on failure
  457 runGetOrThrow :: Get a -> LBS.ByteString -> IO a
  458 runGetOrThrow g bs = case runGetOrFail g bs of
  459   Left _        -> fail "Error while reading file"
  460   Right (_,_,a) -> return a