|
|
- {-# LANGUAGE BangPatterns #-}
- module Presyntax.Lexer where
-
- import Data.Text (Text)
-
- import Data.Char
- import qualified Data.Text as T
-
- {- HLINT ignore -}
- data TokenClass
- = Tok_var Text
- | Tok_lambda
-
- | Tok_type
- | Tok_let
- | Tok_in
-
- -- Operations on equality
- | Tok_coe
- | Tok_cong
- | Tok_refl
- | Tok_sym
-
- | Tok_proj1
- | Tok_proj2
-
- | Tok_top
-
- | Tok_oparen
- | Tok_cparen
-
- | Tok_obrace
- | Tok_cbrace
-
- | Tok_arrow
- | Tok_times
- | Tok_colon
- | Tok_comma
- | Tok_semi
- | Tok_equal
- | Tok_under
- | Tok_equiv
- deriving (Eq, Show, Ord)
-
- data Token
- = Token { tokLine :: {-# UNPACK #-} !Int
- , tokCol :: {-# UNPACK #-} !Int
- , tokSOff :: {-# UNPACK #-} !Int
- , tokOff :: {-# UNPACK #-} !Int
- , tokClass :: !TokenClass
- }
- deriving (Eq, Show, Ord)
-
- data LexError
- = LexError { leChar :: {-# UNPACK #-} !Char
- , leLine :: {-# UNPACK #-} !Int
- , leCol :: {-# UNPACK #-} !Int
- }
- | EOFInComment { leLine :: {-# UNPACK #-} !Int
- , leCol :: {-# UNPACK #-} !Int
- }
- deriving (Eq, Show, Ord)
-
- lexString :: String -> Either LexError [Token]
- lexString = go 0 0 0 where
- go :: Int -> Int -> Int -> String -> Either LexError [Token]
- go !off !line !_ ('\n':xs) =
- go (off + 1) (line + 1) 0 xs
-
- go !off !line !col (' ':xs) =
- go (off + 1) line (col + 1) xs
-
- go !off !line !_ ('-':'-':xs) =
- let (a, b) = span (/= '\n') xs
- in go (off + length a) line 0 b
-
- go !off !line !col ('{':'-':xs) = skipComment off line col 1 xs
-
- go !off !line !col ('(':cs) =
- Token line col off (off + 1) Tok_oparen `yield` go (off + 1) line (col + 1) cs
-
- go !off !line !col (')':cs) =
- Token line col off (off + 1) Tok_cparen `yield` go (off + 1) line (col + 1) cs
-
- go !off !line !col ('{':cs) =
- Token line col off (off + 1) Tok_obrace `yield` go (off + 1) line (col + 1) cs
-
- go !off !line !col ('}':cs) =
- Token line col off (off + 1) Tok_cbrace `yield` go (off + 1) line (col + 1) cs
-
- go !off !line !col (':':cs) =
- Token line col off (off + 1) Tok_colon `yield` go (off + 1) line (col + 1) cs
-
- go !off !line !col (';':cs) =
- Token line col off (off + 1) Tok_semi `yield` go (off + 1) line (col + 1) cs
-
- go !off !line !col ('⊤':cs) =
- Token line col off (off + 1) Tok_top `yield` go (off + 1) line (col + 1) cs
-
- go !off !line !col ('≡':cs) =
- Token line col off (off + 1) Tok_equiv `yield` go (off + 1) line (col + 1) cs
-
- go !off !line !col ('=':'=':cs) =
- Token line col off (off + 2) Tok_equiv `yield` go (off + 2) line (col + 2) cs
-
- go !off !line !col ('=':cs) =
- Token line col off (off + 1) Tok_equal `yield` go (off + 1) line (col + 1) cs
-
- go !off !line !col ('→':cs) =
- Token line col off (off + 1) Tok_arrow `yield` go (off + 1) line (col + 1) cs
-
- go !off !line !col (',':cs) =
- Token line col off (off + 1) Tok_comma `yield` go (off + 1) line (col + 1) cs
-
- go !off !line !col ('_':cs) =
- Token line col off (off + 1) Tok_under `yield` go (off + 1) line (col + 1) cs
-
- go !off !line !col ('*':cs) =
- Token line col off (off + 1) Tok_times `yield` go (off + 1) line (col + 1) cs
-
- go !off !line !col ('×':cs) =
- Token line col off (off + 1) Tok_times `yield` go (off + 1) line (col + 1) cs
-
- go !off !line !col ('\\':cs) =
- Token line col off (off + 1) Tok_lambda `yield` go (off + 1) line (col + 1) cs
-
- go !off !line !col ('λ':cs) =
- Token line col off (off + 1) Tok_lambda `yield` go (off + 1) line (col + 1) cs
-
- go !off !line !col ('-':'>':cs) =
- Token line col off (off + 2) Tok_arrow `yield` go (off + 2) line (col + 2) cs
-
- go !off !line !col ('.':'1':cs) =
- Token line col off (off + 2) Tok_proj1 `yield` go (off + 2) line (col + 2) cs
-
- go !off !line !col ('.':'2':cs) =
- Token line col off (off + 2) Tok_proj2 `yield` go (off + 2) line (col + 2) cs
-
- go !off !line !col (c:cs)
- | isAlpha c = goIdent off off line col (T.singleton c) cs
-
- go !_ !line !col (c:_) = Left (LexError c line col)
-
- go _ _ _ [] = pure []
-
- goIdent !soff !off !line !col !acc [] =
- pure [Token line col soff off (finishIdent acc)]
-
- goIdent !soff !off !line !col !acc (c:cs)
- | isAlphaNum c || c == '\''
- = goIdent soff (off + 1) line (col + 1) (T.snoc acc c) cs
- | otherwise
- = Token line col soff off (finishIdent acc) `yield` go (off + 1) line (col + 1) (c:cs)
-
- skipComment off line col level ('-':'}':cs)
- | level == 1 = go off line col cs
- | otherwise = skipComment off line col (level - 1) cs
- skipComment off line col level ('{':'-':cs) =
- skipComment off line col (level + 1) cs
- skipComment _ line col _ [] = Left (EOFInComment line col)
-
- yield c = fmap (c:)
-
- finishIdent c
- | T.pack "let" == c = Tok_let
- | T.pack "Type" == c = Tok_type
- | T.pack "in" == c = Tok_in
- | T.pack "refl" == c = Tok_refl
- | T.pack "coe" == c = Tok_coe
- | T.pack "cong" == c = Tok_cong
- | T.pack "sym" == c = Tok_sym
- | otherwise = Tok_var c
|