never executed always true always false
    1 -- -----------------------------------------------------------------------------
    2 -- | GHC LLVM Mangler
    3 --
    4 -- This script processes the assembly produced by LLVM, rewriting all symbols
    5 -- of type @function to @object. This keeps them from going through the PLT,
    6 -- which would be bad due to tables-next-to-code. On x86_64,
    7 -- it also rewrites AVX instructions that require alignment to their
    8 -- unaligned counterparts, since the stack is only 16-byte aligned but these
    9 -- instructions require 32-byte alignment.
   10 --
   11 
   12 module GHC.CmmToLlvm.Mangler ( llvmFixupAsm ) where
   13 
   14 import GHC.Prelude
   15 
   16 import GHC.Platform ( Platform, platformArch, Arch(..) )
   17 import GHC.Utils.Exception (try)
   18 
   19 import qualified Data.ByteString.Char8 as B
   20 import System.IO
   21 
   22 -- | Read in assembly file and process
   23 llvmFixupAsm :: Platform -> FilePath -> FilePath -> IO ()
   24 llvmFixupAsm platform f1 f2 = {-# SCC "llvm_mangler" #-}
   25   withBinaryFile f1 ReadMode $ \r -> withBinaryFile f2 WriteMode $ \w -> do
   26       go r w
   27       hClose r
   28       hClose w
   29       return ()
   30   where
   31     go :: Handle -> Handle -> IO ()
   32     go r w = do
   33       e_l <- try $ B.hGetLine r ::IO (Either IOError B.ByteString)
   34       let writeline a = B.hPutStrLn w (rewriteLine platform rewrites a) >> go r w
   35       case e_l of
   36         Right l -> writeline l
   37         Left _  -> return ()
   38 
   39 -- | These are the rewrites that the mangler will perform
   40 rewrites :: [Rewrite]
   41 rewrites = [rewriteSymType, rewriteAVX, rewriteCall]
   42 
   43 type Rewrite = Platform -> B.ByteString -> Maybe B.ByteString
   44 
   45 -- | Rewrite a line of assembly source with the given rewrites,
   46 -- taking the first rewrite that applies.
   47 rewriteLine :: Platform -> [Rewrite] -> B.ByteString -> B.ByteString
   48 rewriteLine platform rewrites l
   49   -- We disable .subsections_via_symbols on darwin and ios, as the llvm code
   50   -- gen uses prefix data for the info table.  This however does not prevent
   51   -- llvm from generating .subsections_via_symbols, which in turn with
   52   -- -dead_strip, strips the info tables, and therefore breaks ghc.
   53   | isSubsectionsViaSymbols l =
   54     (B.pack "## no .subsection_via_symbols for ghc. We need our info tables!")
   55   | otherwise =
   56     case firstJust $ map (\rewrite -> rewrite platform rest) rewrites of
   57       Nothing        -> l
   58       Just rewritten -> B.concat $ [symbol, B.pack "\t", rewritten]
   59   where
   60     isSubsectionsViaSymbols = B.isPrefixOf (B.pack ".subsections_via_symbols")
   61 
   62     (symbol, rest) = splitLine l
   63 
   64     firstJust :: [Maybe a] -> Maybe a
   65     firstJust (Just x:_) = Just x
   66     firstJust []         = Nothing
   67     firstJust (_:rest)   = firstJust rest
   68 
   69 -- | This rewrites @.type@ annotations of function symbols to @%object@.
   70 -- This is done as the linker can relocate @%functions@ through the
   71 -- Procedure Linking Table (PLT). This is bad since we expect that the
   72 -- info table will appear directly before the symbol's location. In the
   73 -- case that the PLT is used, this will be not an info table but instead
   74 -- some random PLT garbage.
   75 rewriteSymType :: Rewrite
   76 rewriteSymType _ l
   77   | isType l  = Just $ rewrite '@' $ rewrite '%' l
   78   | otherwise = Nothing
   79   where
   80     isType = B.isPrefixOf (B.pack ".type")
   81 
   82     rewrite :: Char -> B.ByteString -> B.ByteString
   83     rewrite prefix = replaceOnce funcType objType
   84       where
   85         funcType = prefix `B.cons` B.pack "function"
   86         objType  = prefix `B.cons` B.pack "object"
   87 
   88 -- | This rewrites aligned AVX instructions to their unaligned counterparts on
   89 -- x86-64. This is necessary because the stack is not adequately aligned for
   90 -- aligned AVX spills, so LLVM would emit code that adjusts the stack pointer
   91 -- and disable tail call optimization. Both would be catastrophic here so GHC
   92 -- tells LLVM that the stack is 32-byte aligned (even though it isn't) and then
   93 -- rewrites the instructions in the mangler.
   94 rewriteAVX :: Rewrite
   95 rewriteAVX platform s
   96   | not isX86_64 = Nothing
   97   | isVmovdqa s  = Just $ replaceOnce (B.pack "vmovdqa") (B.pack "vmovdqu") s
   98   | isVmovap s   = Just $ replaceOnce (B.pack "vmovap") (B.pack "vmovup") s
   99   | otherwise    = Nothing
  100   where
  101     isX86_64 = platformArch platform == ArchX86_64
  102     isVmovdqa = B.isPrefixOf (B.pack "vmovdqa")
  103     isVmovap = B.isPrefixOf (B.pack "vmovap")
  104 
  105 -- | This rewrites (tail) calls to avoid creating PLT entries for
  106 -- functions on riscv64. The replacement will load the address from the
  107 -- GOT, which is resolved to point to the real address of the function.
  108 rewriteCall :: Rewrite
  109 rewriteCall platform l
  110   | not isRISCV64 = Nothing
  111   | isCall l      = Just $ replaceCall "call" "jalr" "ra" l
  112   | isTail l      = Just $ replaceCall "tail" "jr" "t1" l
  113   | otherwise     = Nothing
  114   where
  115     isRISCV64 = platformArch platform == ArchRISCV64
  116     isCall = B.isPrefixOf (B.pack "call\t")
  117     isTail = B.isPrefixOf (B.pack "tail\t")
  118 
  119     replaceCall call jump reg l =
  120         appendInsn (jump ++ "\t" ++ reg) $ removePlt $
  121         replaceOnce (B.pack call) (B.pack ("la\t" ++ reg ++ ",")) l
  122       where
  123         removePlt = replaceOnce (B.pack "@plt") (B.pack "")
  124         appendInsn i = (`B.append` B.pack ("\n\t" ++ i))
  125 
  126 -- | @replaceOnce match replace bs@ replaces the first occurrence of the
  127 -- substring @match@ in @bs@ with @replace@.
  128 replaceOnce :: B.ByteString -> B.ByteString -> B.ByteString -> B.ByteString
  129 replaceOnce matchBS replaceOnceBS = loop
  130   where
  131     loop :: B.ByteString -> B.ByteString
  132     loop cts =
  133         case B.breakSubstring matchBS cts of
  134           (hd,tl) | B.null tl -> hd
  135                   | otherwise -> hd `B.append` replaceOnceBS `B.append`
  136                                  B.drop (B.length matchBS) tl
  137 
  138 -- | This function splits a line of assembly code into the label and the
  139 -- rest of the code.
  140 splitLine :: B.ByteString -> (B.ByteString, B.ByteString)
  141 splitLine l = (symbol, B.dropWhile isSpace rest)
  142   where
  143     isSpace ' ' = True
  144     isSpace '\t' = True
  145     isSpace _ = False
  146     (symbol, rest) = B.span (not . isSpace) l