never executed always true always false
    1 -- (c) The GHC Team
    2 --
    3 -- Functions to evaluate whether or not a string is a valid identifier.
    4 -- There is considerable overlap between the logic here and the logic
    5 -- in GHC.Parser.Lexer, but sadly there seems to be no way to merge them.
    6 
    7 module GHC.Utils.Lexeme (
    8           -- * Lexical characteristics of Haskell names
    9 
   10           -- | Use these functions to figure what kind of name a 'FastString'
   11           -- represents; these functions do /not/ check that the identifier
   12           -- is valid.
   13 
   14         isLexCon, isLexVar, isLexId, isLexSym,
   15         isLexConId, isLexConSym, isLexVarId, isLexVarSym,
   16         startsVarSym, startsVarId, startsConSym, startsConId,
   17 
   18           -- * Validating identifiers
   19 
   20           -- | These functions (working over plain old 'String's) check
   21           -- to make sure that the identifier is valid.
   22         okVarOcc, okConOcc, okTcOcc,
   23         okVarIdOcc, okVarSymOcc, okConIdOcc, okConSymOcc
   24 
   25         -- Some of the exports above are not used within GHC, but may
   26         -- be of value to GHC API users.
   27 
   28   ) where
   29 
   30 import GHC.Prelude
   31 
   32 import GHC.Data.FastString
   33 
   34 import Data.Char
   35 import qualified Data.Set as Set
   36 
   37 import GHC.Lexeme
   38 
   39 {-
   40 
   41 ************************************************************************
   42 *                                                                      *
   43     Lexical categories
   44 *                                                                      *
   45 ************************************************************************
   46 
   47 These functions test strings to see if they fit the lexical categories
   48 defined in the Haskell report.
   49 
   50 Note [Classification of generated names]
   51 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   52 
   53 Some names generated for internal use can show up in debugging output,
   54 e.g.  when using -ddump-simpl. These generated names start with a $
   55 but should still be pretty-printed using prefix notation. We make sure
   56 this is the case in isLexVarSym by only classifying a name as a symbol
   57 if all its characters are symbols, not just its first one.
   58 -}
   59 
   60 isLexCon,   isLexVar,    isLexId,    isLexSym    :: FastString -> Bool
   61 isLexConId, isLexConSym, isLexVarId, isLexVarSym :: FastString -> Bool
   62 
   63 isLexCon cs = isLexConId  cs || isLexConSym cs
   64 isLexVar cs = isLexVarId  cs || isLexVarSym cs
   65 
   66 isLexId  cs = isLexConId  cs || isLexVarId  cs
   67 isLexSym cs = isLexConSym cs || isLexVarSym cs
   68 
   69 -------------
   70 isLexConId cs                           -- Prefix type or data constructors
   71   | nullFS cs          = False          --      e.g. "Foo", "[]", "(,)"
   72   | cs == (fsLit "[]") = True
   73   | otherwise          = startsConId (headFS cs)
   74 
   75 isLexVarId cs                           -- Ordinary prefix identifiers
   76   | nullFS cs         = False           --      e.g. "x", "_x"
   77   | otherwise         = startsVarId (headFS cs)
   78 
   79 isLexConSym cs                          -- Infix type or data constructors
   80   | nullFS cs          = False          --      e.g. ":-:", ":", "->"
   81   | cs == (fsLit "->") = True
   82   | otherwise          = startsConSym (headFS cs)
   83 
   84 isLexVarSym fs                          -- Infix identifiers e.g. "+"
   85   | fs == (fsLit "~R#") = True
   86   | otherwise
   87   = case (if nullFS fs then [] else unpackFS fs) of
   88       [] -> False
   89       (c:cs) -> startsVarSym c && all isVarSymChar cs
   90         -- See Note [Classification of generated names]
   91 
   92 {-
   93 
   94 ************************************************************************
   95 *                                                                      *
   96     Detecting valid names for Template Haskell
   97 *                                                                      *
   98 ************************************************************************
   99 
  100 -}
  101 
  102 ----------------------
  103 -- External interface
  104 ----------------------
  105 
  106 -- | Is this an acceptable variable name?
  107 okVarOcc :: String -> Bool
  108 okVarOcc str@(c:_)
  109   | startsVarId c
  110   = okVarIdOcc str
  111   | startsVarSym c
  112   = okVarSymOcc str
  113 okVarOcc _ = False
  114 
  115 -- | Is this an acceptable constructor name?
  116 okConOcc :: String -> Bool
  117 okConOcc str@(c:_)
  118   | startsConId c
  119   = okConIdOcc str
  120   | startsConSym c
  121   = okConSymOcc str
  122   | str == "[]"
  123   = True
  124 okConOcc _ = False
  125 
  126 -- | Is this an acceptable type name?
  127 okTcOcc :: String -> Bool
  128 okTcOcc "[]" = True
  129 okTcOcc "->" = True
  130 okTcOcc "~"  = True
  131 okTcOcc str@(c:_)
  132   | startsConId c
  133   = okConIdOcc str
  134   | startsConSym c
  135   = okConSymOcc str
  136   | startsVarSym c
  137   = okVarSymOcc str
  138 okTcOcc _ = False
  139 
  140 -- | Is this an acceptable alphanumeric variable name, assuming it starts
  141 -- with an acceptable letter?
  142 okVarIdOcc :: String -> Bool
  143 okVarIdOcc str = okIdOcc str &&
  144                  -- admit "_" as a valid identifier.  Required to support typed
  145                  -- holes in Template Haskell.  See #10267
  146                  (str == "_" || not (str `Set.member` reservedIds))
  147 
  148 -- | Is this an acceptable symbolic variable name, assuming it starts
  149 -- with an acceptable character?
  150 okVarSymOcc :: String -> Bool
  151 okVarSymOcc str = all okSymChar str &&
  152                   not (str `Set.member` reservedOps) &&
  153                   not (isDashes str)
  154 
  155 -- | Is this an acceptable alphanumeric constructor name, assuming it
  156 -- starts with an acceptable letter?
  157 okConIdOcc :: String -> Bool
  158 okConIdOcc str = okIdOcc str ||
  159                  is_tuple_name1 True  str ||
  160                    -- Is it a boxed tuple...
  161                  is_tuple_name1 False str ||
  162                    -- ...or an unboxed tuple (#12407)...
  163                  is_sum_name1 str
  164                    -- ...or an unboxed sum (#12514)?
  165   where
  166     -- check for tuple name, starting at the beginning
  167     is_tuple_name1 True  ('(' : rest)       = is_tuple_name2 True  rest
  168     is_tuple_name1 False ('(' : '#' : rest) = is_tuple_name2 False rest
  169     is_tuple_name1 _     _                  = False
  170 
  171     -- check for tuple tail
  172     is_tuple_name2 True  ")"          = True
  173     is_tuple_name2 False "#)"         = True
  174     is_tuple_name2 boxed (',' : rest) = is_tuple_name2 boxed rest
  175     is_tuple_name2 boxed (ws  : rest)
  176       | isSpace ws                    = is_tuple_name2 boxed rest
  177     is_tuple_name2 _     _            = False
  178 
  179     -- check for sum name, starting at the beginning
  180     is_sum_name1 ('(' : '#' : rest) = is_sum_name2 False rest
  181     is_sum_name1 _                  = False
  182 
  183     -- check for sum tail, only allowing at most one underscore
  184     is_sum_name2 _          "#)"         = True
  185     is_sum_name2 underscore ('|' : rest) = is_sum_name2 underscore rest
  186     is_sum_name2 False      ('_' : rest) = is_sum_name2 True rest
  187     is_sum_name2 underscore (ws  : rest)
  188       | isSpace ws                       = is_sum_name2 underscore rest
  189     is_sum_name2 _          _            = False
  190 
  191 -- | Is this an acceptable symbolic constructor name, assuming it
  192 -- starts with an acceptable character?
  193 okConSymOcc :: String -> Bool
  194 okConSymOcc ":" = True
  195 okConSymOcc str = all okSymChar str &&
  196                   not (str `Set.member` reservedOps)
  197 
  198 ----------------------
  199 -- Internal functions
  200 ----------------------
  201 
  202 -- | Is this string an acceptable id, possibly with a suffix of hashes,
  203 -- but not worrying about case or clashing with reserved words?
  204 okIdOcc :: String -> Bool
  205 okIdOcc str
  206   = let hashes = dropWhile okIdChar str in
  207     all (== '#') hashes   -- -XMagicHash allows a suffix of hashes
  208                           -- of course, `all` says "True" to an empty list
  209 
  210 -- | Is this character acceptable in an identifier (after the first letter)?
  211 -- See alexGetByte in GHC.Parser.Lexer
  212 okIdChar :: Char -> Bool
  213 okIdChar c = case generalCategory c of
  214   UppercaseLetter -> True
  215   LowercaseLetter -> True
  216   TitlecaseLetter -> True
  217   ModifierLetter  -> True -- See #10196
  218   OtherLetter     -> True -- See #1103
  219   NonSpacingMark  -> True -- See #7650
  220   DecimalNumber   -> True
  221   OtherNumber     -> True -- See #4373
  222   _               -> c == '\'' || c == '_'
  223 
  224 -- | All reserved identifiers. Taken from section 2.4 of the 2010 Report.
  225 reservedIds :: Set.Set String
  226 reservedIds = Set.fromList [ "case", "class", "data", "default", "deriving"
  227                            , "do", "else", "foreign", "if", "import", "in"
  228                            , "infix", "infixl", "infixr", "instance", "let"
  229                            , "module", "newtype", "of", "then", "type", "where"
  230                            , "_" ]
  231 
  232 -- | All reserved operators. Taken from section 2.4 of the 2010 Report.
  233 reservedOps :: Set.Set String
  234 reservedOps = Set.fromList [ "..", ":", "::", "=", "\\", "|", "<-", "->"
  235                            , "@", "~", "=>" ]
  236 
  237 -- | Does this string contain only dashes and has at least 2 of them?
  238 isDashes :: String -> Bool
  239 isDashes ('-' : '-' : rest) = all (== '-') rest
  240 isDashes _                  = False