never executed always true always false
1 -- -----------------------------------------------------------------------------
2 -- | GHC LLVM Mangler
3 --
4 -- This script processes the assembly produced by LLVM, rewriting all symbols
5 -- of type @function to @object. This keeps them from going through the PLT,
6 -- which would be bad due to tables-next-to-code. On x86_64,
7 -- it also rewrites AVX instructions that require alignment to their
8 -- unaligned counterparts, since the stack is only 16-byte aligned but these
9 -- instructions require 32-byte alignment.
10 --
11
12 module GHC.CmmToLlvm.Mangler ( llvmFixupAsm ) where
13
14 import GHC.Prelude
15
16 import GHC.Platform ( Platform, platformArch, Arch(..) )
17 import GHC.Utils.Exception (try)
18
19 import qualified Data.ByteString.Char8 as B
20 import System.IO
21
22 -- | Read in assembly file and process
23 llvmFixupAsm :: Platform -> FilePath -> FilePath -> IO ()
24 llvmFixupAsm platform f1 f2 = {-# SCC "llvm_mangler" #-}
25 withBinaryFile f1 ReadMode $ \r -> withBinaryFile f2 WriteMode $ \w -> do
26 go r w
27 hClose r
28 hClose w
29 return ()
30 where
31 go :: Handle -> Handle -> IO ()
32 go r w = do
33 e_l <- try $ B.hGetLine r ::IO (Either IOError B.ByteString)
34 let writeline a = B.hPutStrLn w (rewriteLine platform rewrites a) >> go r w
35 case e_l of
36 Right l -> writeline l
37 Left _ -> return ()
38
39 -- | These are the rewrites that the mangler will perform
40 rewrites :: [Rewrite]
41 rewrites = [rewriteSymType, rewriteAVX, rewriteCall]
42
43 type Rewrite = Platform -> B.ByteString -> Maybe B.ByteString
44
45 -- | Rewrite a line of assembly source with the given rewrites,
46 -- taking the first rewrite that applies.
47 rewriteLine :: Platform -> [Rewrite] -> B.ByteString -> B.ByteString
48 rewriteLine platform rewrites l
49 -- We disable .subsections_via_symbols on darwin and ios, as the llvm code
50 -- gen uses prefix data for the info table. This however does not prevent
51 -- llvm from generating .subsections_via_symbols, which in turn with
52 -- -dead_strip, strips the info tables, and therefore breaks ghc.
53 | isSubsectionsViaSymbols l =
54 (B.pack "## no .subsection_via_symbols for ghc. We need our info tables!")
55 | otherwise =
56 case firstJust $ map (\rewrite -> rewrite platform rest) rewrites of
57 Nothing -> l
58 Just rewritten -> B.concat $ [symbol, B.pack "\t", rewritten]
59 where
60 isSubsectionsViaSymbols = B.isPrefixOf (B.pack ".subsections_via_symbols")
61
62 (symbol, rest) = splitLine l
63
64 firstJust :: [Maybe a] -> Maybe a
65 firstJust (Just x:_) = Just x
66 firstJust [] = Nothing
67 firstJust (_:rest) = firstJust rest
68
69 -- | This rewrites @.type@ annotations of function symbols to @%object@.
70 -- This is done as the linker can relocate @%functions@ through the
71 -- Procedure Linking Table (PLT). This is bad since we expect that the
72 -- info table will appear directly before the symbol's location. In the
73 -- case that the PLT is used, this will be not an info table but instead
74 -- some random PLT garbage.
75 rewriteSymType :: Rewrite
76 rewriteSymType _ l
77 | isType l = Just $ rewrite '@' $ rewrite '%' l
78 | otherwise = Nothing
79 where
80 isType = B.isPrefixOf (B.pack ".type")
81
82 rewrite :: Char -> B.ByteString -> B.ByteString
83 rewrite prefix = replaceOnce funcType objType
84 where
85 funcType = prefix `B.cons` B.pack "function"
86 objType = prefix `B.cons` B.pack "object"
87
88 -- | This rewrites aligned AVX instructions to their unaligned counterparts on
89 -- x86-64. This is necessary because the stack is not adequately aligned for
90 -- aligned AVX spills, so LLVM would emit code that adjusts the stack pointer
91 -- and disable tail call optimization. Both would be catastrophic here so GHC
92 -- tells LLVM that the stack is 32-byte aligned (even though it isn't) and then
93 -- rewrites the instructions in the mangler.
94 rewriteAVX :: Rewrite
95 rewriteAVX platform s
96 | not isX86_64 = Nothing
97 | isVmovdqa s = Just $ replaceOnce (B.pack "vmovdqa") (B.pack "vmovdqu") s
98 | isVmovap s = Just $ replaceOnce (B.pack "vmovap") (B.pack "vmovup") s
99 | otherwise = Nothing
100 where
101 isX86_64 = platformArch platform == ArchX86_64
102 isVmovdqa = B.isPrefixOf (B.pack "vmovdqa")
103 isVmovap = B.isPrefixOf (B.pack "vmovap")
104
105 -- | This rewrites (tail) calls to avoid creating PLT entries for
106 -- functions on riscv64. The replacement will load the address from the
107 -- GOT, which is resolved to point to the real address of the function.
108 rewriteCall :: Rewrite
109 rewriteCall platform l
110 | not isRISCV64 = Nothing
111 | isCall l = Just $ replaceCall "call" "jalr" "ra" l
112 | isTail l = Just $ replaceCall "tail" "jr" "t1" l
113 | otherwise = Nothing
114 where
115 isRISCV64 = platformArch platform == ArchRISCV64
116 isCall = B.isPrefixOf (B.pack "call\t")
117 isTail = B.isPrefixOf (B.pack "tail\t")
118
119 replaceCall call jump reg l =
120 appendInsn (jump ++ "\t" ++ reg) $ removePlt $
121 replaceOnce (B.pack call) (B.pack ("la\t" ++ reg ++ ",")) l
122 where
123 removePlt = replaceOnce (B.pack "@plt") (B.pack "")
124 appendInsn i = (`B.append` B.pack ("\n\t" ++ i))
125
126 -- | @replaceOnce match replace bs@ replaces the first occurrence of the
127 -- substring @match@ in @bs@ with @replace@.
128 replaceOnce :: B.ByteString -> B.ByteString -> B.ByteString -> B.ByteString
129 replaceOnce matchBS replaceOnceBS = loop
130 where
131 loop :: B.ByteString -> B.ByteString
132 loop cts =
133 case B.breakSubstring matchBS cts of
134 (hd,tl) | B.null tl -> hd
135 | otherwise -> hd `B.append` replaceOnceBS `B.append`
136 B.drop (B.length matchBS) tl
137
138 -- | This function splits a line of assembly code into the label and the
139 -- rest of the code.
140 splitLine :: B.ByteString -> (B.ByteString, B.ByteString)
141 splitLine l = (symbol, B.dropWhile isSpace rest)
142 where
143 isSpace ' ' = True
144 isSpace '\t' = True
145 isSpace _ = False
146 (symbol, rest) = B.span (not . isSpace) l