|
|
- {-# LANGUAGE BangPatterns #-}
- module Presyntax.Lexer where
-
- import qualified Data.Text as T
- import Data.Text (Text)
- import Data.Char
-
- {- HLINT ignore -}
- data TokenClass
- = Tok_var Text
- | Tok_lambda
-
- | Tok_type
- | Tok_typeω
- | Tok_path
- | Tok_Partial
- | Tok_PartialP
- | Tok_sub
- | Tok_comp
- | Tok_tr
-
- | Tok_I
- | Tok_I0
- | Tok_I1
-
- | Tok_Glue
- | Tok_glue
- | Tok_unglue
-
- | Tok_bool
- | Tok_tt
- | Tok_ff
- | Tok_if
-
- | Tok_oparen
- | Tok_cparen
-
- | Tok_osquare
- | Tok_csquare
-
- | Tok_colon
- | Tok_arrow
-
- | Tok_let
- | Tok_equal
- | Tok_in
-
- | Tok_and
- | Tok_not
- | Tok_or
-
- | Tok_fand
- | Tok_for
-
- | Tok_assume
-
- | Tok_p1
- | Tok_p2
- | Tok_comma
- | Tok_times
- deriving (Eq, Show, Ord)
-
- data Token
- = Token { tokLine :: {-# UNPACK #-} !Int
- , tokCol :: {-# UNPACK #-} !Int
- , tokSOff :: {-# UNPACK #-} !Int
- , tokOff :: {-# UNPACK #-} !Int
- , tokClass :: !TokenClass
- }
- deriving (Eq, Show, Ord)
-
- data LexError
- = LexError { leChar :: {-# UNPACK #-} !Char
- , leLine :: {-# UNPACK #-} !Int
- , leCol :: {-# UNPACK #-} !Int
- }
- | EOFInComment { leLine :: {-# UNPACK #-} !Int
- , leCol :: {-# UNPACK #-} !Int
- }
- deriving (Eq, Show, Ord)
-
- lexString :: String -> Either LexError [Token]
- lexString = go 0 0 0 where
- go :: Int -> Int -> Int -> String -> Either LexError [Token]
- go !off !line !_ ('\n':xs) =
- go (off + 1) (line + 1) 0 xs
-
- go !off !line !col (' ':xs) =
- go (off + 1) line (col + 1) xs
-
- go !off !line !_ ('-':'-':xs) =
- let (a, b) = span (/= '\n') xs
- in go (off + length a) line 0 b
-
- go !off !line !col ('{':'-':xs) = skipComment off line col 1 xs
-
- go !off !line !col ('~':cs) =
- Token line col off (off + 1) Tok_not `yield` go (off + 1) line (col + 1) cs
-
- go !off !line !col ('(':cs) =
- Token line col off (off + 1) Tok_oparen `yield` go (off + 1) line (col + 1) cs
-
- go !off !line !col (')':cs) =
- Token line col off (off + 1) Tok_cparen `yield` go (off + 1) line (col + 1) cs
-
- go !off !line !col ('[':cs) =
- Token line col off (off + 1) Tok_osquare `yield` go (off + 1) line (col + 1) cs
-
- go !off !line !col (']':cs) =
- Token line col off (off + 1) Tok_csquare `yield` go (off + 1) line (col + 1) cs
-
- go !off !line !col (':':cs) =
- Token line col off (off + 1) Tok_colon `yield` go (off + 1) line (col + 1) cs
-
- go !off !line !col (',':cs) =
- Token line col off (off + 1) Tok_comma `yield` go (off + 1) line (col + 1) cs
-
- go !off !line !col ('=':cs) =
- Token line col off (off + 1) Tok_equal `yield` go (off + 1) line (col + 1) cs
-
- go !off !line !col ('\\':'/':cs) =
- Token line col off (off + 2) Tok_for `yield` go (off + 2) line (col + 2) cs
-
- go !off !line !col ('\\':cs) =
- Token line col off (off + 1) Tok_lambda `yield` go (off + 1) line (col + 1) cs
-
- go !off !line !col ('*':cs) =
- Token line col off (off + 1) Tok_times `yield` go (off + 1) line (col + 1) cs
-
- go !off !line !col ('λ':cs) =
- Token line col off (off + 1) Tok_lambda `yield` go (off + 1) line (col + 1) cs
-
- go !off !line !col ('→':cs) =
- Token line col off (off + 1) Tok_lambda `yield` go (off + 1) line (col + 1) cs
-
- go !off !line !col ('-':'>':cs) =
- Token line col off (off + 2) Tok_arrow `yield` go (off + 2) line (col + 2) cs
-
- go !off !line !col ('&':'&':cs) =
- Token line col off (off + 2) Tok_and `yield` go (off + 2) line (col + 2) cs
-
- go !off !line !col ('&':cs) =
- Token line col off (off + 1) Tok_fand `yield` go (off + 1) line (col + 1) cs
-
- go !off !line !col ('/':'\\':cs) =
- Token line col off (off + 1) Tok_fand `yield` go (off + 1) line (col + 1) cs
-
- go !off !line !col ('|':'|':cs) =
- Token line col off (off + 2) Tok_or `yield` go (off + 2) line (col + 2) cs
-
- go !off !line !col ('|':cs) =
- Token line col off (off + 1) Tok_for `yield` go (off + 1) line (col + 1) cs
-
- go !off !line !col ('.':'1':cs) =
- Token line col off (off + 2) Tok_p1 `yield` go (off + 2) line (col + 2) cs
-
- go !off !line !col ('.':'2':cs) =
- Token line col off (off + 2) Tok_p2 `yield` go (off + 2) line (col + 2) cs
-
- go !off !line !col (c:cs)
- | isAlpha c = goIdent off off line col (T.singleton c) cs
-
- go !_ !line !col (c:_) = Left (LexError c line col)
-
- go _ _ _ [] = pure []
-
- goIdent !soff !off !line !col !acc [] =
- pure [Token line col soff off (finishIdent acc)]
-
- goIdent !soff !off !line !col !acc (c:cs)
- | isAlphaNum c || c == '\''
- = goIdent soff (off + 1) line (col + 1) (T.snoc acc c) cs
- | otherwise
- = Token line col soff off (finishIdent acc) `yield` go (off + 1) line (col + 1) (c:cs)
-
- skipComment off line col level ('-':'}':cs)
- | level == 1 = go (off + 2) line (col + 2) cs
- | otherwise = skipComment (off + 2) line (col + 2) (level - 1) cs
- skipComment off line col level ('{':'-':cs) =
- skipComment (off + 2) line (col + 2) (level + 1) cs
- skipComment off line col level ('\n':cs) =
- skipComment (off + 1) (line + 1) 0 level cs
- skipComment off line col level (c:cs) =
- skipComment (off + 1) line (col + 1) level cs
- skipComment _ line col _ [] = Left (EOFInComment line col)
-
- yield c = fmap (c:)
-
- finishIdent c
- | T.pack "Type" == c = Tok_type
- | T.pack "Typeω" == c || T.pack "Pretype" == c = Tok_typeω
- | T.pack "Path" == c = Tok_path
- | T.pack "Partial" == c = Tok_Partial
- | T.pack "PartialP" == c = Tok_PartialP
- | T.pack "Sub" == c = Tok_sub
- | T.pack "comp" == c = Tok_comp
-
- | T.pack "Glue" == c = Tok_Glue
- | T.pack "glue" == c = Tok_glue
- | T.pack "unglue" == c = Tok_unglue
-
- | T.pack "Bool" == c = Tok_bool
- | T.pack "tt" == c = Tok_tt
- | T.pack "ff" == c = Tok_ff
- | T.pack "if" == c = Tok_if
-
- | T.pack "tr" == c = Tok_tr
- | T.pack "I" == c = Tok_I
- | T.pack "i0" == c = Tok_I0
- | T.pack "i1" == c = Tok_I1
- | T.pack "let" == c = Tok_let
- | T.pack "in" == c = Tok_in
- | T.pack "assume" == c = Tok_assume
- | otherwise = Tok_var c
|