{-# LANGUAGE BangPatterns #-} module Presyntax.Lexer where import Data.Text (Text) import Data.Char import qualified Data.Text as T {- HLINT ignore -} data TokenClass = Tok_var Text | Tok_lambda | Tok_type | Tok_let | Tok_in -- Operations on equality | Tok_coe | Tok_cong | Tok_refl | Tok_sym | Tok_proj1 | Tok_proj2 | Tok_top | Tok_oparen | Tok_cparen | Tok_obrace | Tok_cbrace | Tok_arrow | Tok_times | Tok_colon | Tok_comma | Tok_semi | Tok_equal | Tok_under | Tok_equiv deriving (Eq, Show, Ord) data Token = Token { tokLine :: {-# UNPACK #-} !Int , tokCol :: {-# UNPACK #-} !Int , tokSOff :: {-# UNPACK #-} !Int , tokOff :: {-# UNPACK #-} !Int , tokClass :: !TokenClass } deriving (Eq, Show, Ord) data LexError = LexError { leChar :: {-# UNPACK #-} !Char , leLine :: {-# UNPACK #-} !Int , leCol :: {-# UNPACK #-} !Int } | EOFInComment { leLine :: {-# UNPACK #-} !Int , leCol :: {-# UNPACK #-} !Int } deriving (Eq, Show, Ord) lexString :: String -> Either LexError [Token] lexString = go 0 0 0 where go :: Int -> Int -> Int -> String -> Either LexError [Token] go !off !line !_ ('\n':xs) = go (off + 1) (line + 1) 0 xs go !off !line !col (' ':xs) = go (off + 1) line (col + 1) xs go !off !line !_ ('-':'-':xs) = let (a, b) = span (/= '\n') xs in go (off + length a) line 0 b go !off !line !col ('{':'-':xs) = skipComment off line col 1 xs go !off !line !col ('(':cs) = Token line col off (off + 1) Tok_oparen `yield` go (off + 1) line (col + 1) cs go !off !line !col (')':cs) = Token line col off (off + 1) Tok_cparen `yield` go (off + 1) line (col + 1) cs go !off !line !col ('{':cs) = Token line col off (off + 1) Tok_obrace `yield` go (off + 1) line (col + 1) cs go !off !line !col ('}':cs) = Token line col off (off + 1) Tok_cbrace `yield` go (off + 1) line (col + 1) cs go !off !line !col (':':cs) = Token line col off (off + 1) Tok_colon `yield` go (off + 1) line (col + 1) cs go !off !line !col (';':cs) = Token line col off (off + 1) Tok_semi `yield` go (off + 1) line (col + 1) cs go !off !line !col ('⊤':cs) = Token line col off (off + 1) Tok_top `yield` go (off + 1) line (col + 1) cs go !off !line !col ('≡':cs) = Token line col off (off + 1) Tok_equiv `yield` go (off + 1) line (col + 1) cs go !off !line !col ('=':'=':cs) = Token line col off (off + 2) Tok_equiv `yield` go (off + 2) line (col + 2) cs go !off !line !col ('=':cs) = Token line col off (off + 1) Tok_equal `yield` go (off + 1) line (col + 1) cs go !off !line !col ('→':cs) = Token line col off (off + 1) Tok_arrow `yield` go (off + 1) line (col + 1) cs go !off !line !col (',':cs) = Token line col off (off + 1) Tok_comma `yield` go (off + 1) line (col + 1) cs go !off !line !col ('_':cs) = Token line col off (off + 1) Tok_under `yield` go (off + 1) line (col + 1) cs go !off !line !col ('*':cs) = Token line col off (off + 1) Tok_times `yield` go (off + 1) line (col + 1) cs go !off !line !col ('×':cs) = Token line col off (off + 1) Tok_times `yield` go (off + 1) line (col + 1) cs go !off !line !col ('\\':cs) = Token line col off (off + 1) Tok_lambda `yield` go (off + 1) line (col + 1) cs go !off !line !col ('λ':cs) = Token line col off (off + 1) Tok_lambda `yield` go (off + 1) line (col + 1) cs go !off !line !col ('-':'>':cs) = Token line col off (off + 2) Tok_arrow `yield` go (off + 2) line (col + 2) cs go !off !line !col ('.':'1':cs) = Token line col off (off + 2) Tok_proj1 `yield` go (off + 2) line (col + 2) cs go !off !line !col ('.':'2':cs) = Token line col off (off + 2) Tok_proj2 `yield` go (off + 2) line (col + 2) cs go !off !line !col (c:cs) | isAlpha c = goIdent off off line col (T.singleton c) cs go !_ !line !col (c:_) = Left (LexError c line col) go _ _ _ [] = pure [] goIdent !soff !off !line !col !acc [] = pure [Token line col soff off (finishIdent acc)] goIdent !soff !off !line !col !acc (c:cs) | isAlphaNum c || c == '\'' = goIdent soff (off + 1) line (col + 1) (T.snoc acc c) cs | otherwise = Token line col soff off (finishIdent acc) `yield` go (off + 1) line (col + 1) (c:cs) skipComment off line col level ('-':'}':cs) | level == 1 = go off line col cs | otherwise = skipComment off line col (level - 1) cs skipComment off line col level ('{':'-':cs) = skipComment off line col (level + 1) cs skipComment _ line col _ [] = Left (EOFInComment line col) yield c = fmap (c:) finishIdent c | T.pack "let" == c = Tok_let | T.pack "Type" == c = Tok_type | T.pack "in" == c = Tok_in | T.pack "refl" == c = Tok_refl | T.pack "coe" == c = Tok_coe | T.pack "cong" == c = Tok_cong | T.pack "sym" == c = Tok_sym | otherwise = Tok_var c