{-# LANGUAGE BangPatterns #-} module Presyntax.Lexer where import Data.Text (Text) import Data.Char import qualified Data.Text as T {- HLINT ignore -} data TokenClass = Tok_var Text | Tok_lambda | Tok_type | Tok_path | Tok_phi | Tok_sub | Tok_comp | Tok_tr | Tok_I | Tok_I0 | Tok_I1 | Tok_oparen | Tok_cparen | Tok_osquare | Tok_csquare | Tok_colon | Tok_arrow | Tok_let | Tok_equal | Tok_in | Tok_and | Tok_not | Tok_or | Tok_fand | Tok_for | Tok_assume | Tok_p1 | Tok_p2 | Tok_comma | Tok_times deriving (Eq, Show, Ord) data Token = Token { tokLine :: {-# UNPACK #-} !Int , tokCol :: {-# UNPACK #-} !Int , tokSOff :: {-# UNPACK #-} !Int , tokOff :: {-# UNPACK #-} !Int , tokClass :: !TokenClass } deriving (Eq, Show, Ord) data LexError = LexError { leChar :: {-# UNPACK #-} !Char , leLine :: {-# UNPACK #-} !Int , leCol :: {-# UNPACK #-} !Int } | EOFInComment { leLine :: {-# UNPACK #-} !Int , leCol :: {-# UNPACK #-} !Int } deriving (Eq, Show, Ord) lexString :: String -> Either LexError [Token] lexString = go 0 0 0 where go :: Int -> Int -> Int -> String -> Either LexError [Token] go !off !line !_ ('\n':xs) = go (off + 1) (line + 1) 0 xs go !off !line !col (' ':xs) = go (off + 1) line (col + 1) xs go !off !line !_ ('-':'-':xs) = let (a, b) = span (/= '\n') xs in go (off + length a) line 0 b go !off !line !col ('{':'-':xs) = skipComment off line col 1 xs go !off !line !col ('~':cs) = Token line col off (off + 1) Tok_not `yield` go (off + 1) line (col + 1) cs go !off !line !col ('(':cs) = Token line col off (off + 1) Tok_oparen `yield` go (off + 1) line (col + 1) cs go !off !line !col (')':cs) = Token line col off (off + 1) Tok_cparen `yield` go (off + 1) line (col + 1) cs go !off !line !col ('[':cs) = Token line col off (off + 1) Tok_osquare `yield` go (off + 1) line (col + 1) cs go !off !line !col (']':cs) = Token line col off (off + 1) Tok_csquare `yield` go (off + 1) line (col + 1) cs go !off !line !col (':':cs) = Token line col off (off + 1) Tok_colon `yield` go (off + 1) line (col + 1) cs go !off !line !col (',':cs) = Token line col off (off + 1) Tok_comma `yield` go (off + 1) line (col + 1) cs go !off !line !col ('=':cs) = Token line col off (off + 1) Tok_equal `yield` go (off + 1) line (col + 1) cs go !off !line !col ('\\':'/':cs) = Token line col off (off + 2) Tok_for `yield` go (off + 2) line (col + 2) cs go !off !line !col ('\\':cs) = Token line col off (off + 1) Tok_lambda `yield` go (off + 1) line (col + 1) cs go !off !line !col ('*':cs) = Token line col off (off + 1) Tok_times `yield` go (off + 1) line (col + 1) cs go !off !line !col ('λ':cs) = Token line col off (off + 1) Tok_lambda `yield` go (off + 1) line (col + 1) cs go !off !line !col ('→':cs) = Token line col off (off + 1) Tok_lambda `yield` go (off + 1) line (col + 1) cs go !off !line !col ('-':'>':cs) = Token line col off (off + 2) Tok_arrow `yield` go (off + 2) line (col + 2) cs go !off !line !col ('&':'&':cs) = Token line col off (off + 2) Tok_and `yield` go (off + 2) line (col + 2) cs go !off !line !col ('&':cs) = Token line col off (off + 1) Tok_fand `yield` go (off + 1) line (col + 1) cs go !off !line !col ('/':'\\':cs) = Token line col off (off + 1) Tok_fand `yield` go (off + 1) line (col + 1) cs go !off !line !col ('|':'|':cs) = Token line col off (off + 2) Tok_or `yield` go (off + 2) line (col + 2) cs go !off !line !col ('|':cs) = Token line col off (off + 1) Tok_for `yield` go (off + 1) line (col + 1) cs go !off !line !col ('.':'1':cs) = Token line col off (off + 2) Tok_p1 `yield` go (off + 2) line (col + 2) cs go !off !line !col ('.':'2':cs) = Token line col off (off + 2) Tok_p2 `yield` go (off + 2) line (col + 2) cs go !off !line !col (c:cs) | isAlpha c = goIdent off off line col (T.singleton c) cs go !_ !line !col (c:_) = Left (LexError c line col) go _ _ _ [] = pure [] goIdent !soff !off !line !col !acc [] = pure [Token line col soff off (finishIdent acc)] goIdent !soff !off !line !col !acc (c:cs) | isAlphaNum c || c == '\'' = goIdent soff (off + 1) line (col + 1) (T.snoc acc c) cs | otherwise = Token line col soff off (finishIdent acc) `yield` go (off + 1) line (col + 1) (c:cs) skipComment off line col level ('-':'}':cs) | level == 1 = go (off + 2) line (col + 2) cs | otherwise = skipComment (off + 2) line (col + 2) (level - 1) cs skipComment off line col level ('{':'-':cs) = skipComment (off + 2) line (col + 2) (level + 1) cs skipComment off line col level ('\n':cs) = skipComment (off + 1) (line + 1) 0 level cs skipComment off line col level (c:cs) = skipComment (off + 1) line (col + 1) level cs skipComment _ line col _ [] = Left (EOFInComment line col) yield c = fmap (c:) finishIdent c | T.pack "Type" == c = Tok_type | T.pack "Path" == c = Tok_path | T.pack "Partial" == c = Tok_phi | T.pack "Sub" == c = Tok_sub | T.pack "comp" == c = Tok_comp | T.pack "tr" == c = Tok_tr | T.pack "I" == c = Tok_I | T.pack "i0" == c = Tok_I0 | T.pack "i1" == c = Tok_I1 | T.pack "let" == c = Tok_let | T.pack "in" == c = Tok_in | T.pack "assume" == c = Tok_assume | otherwise = Tok_var c