|
{-# LANGUAGE BangPatterns #-}
|
|
module Presyntax.Lexer where
|
|
|
|
import Data.Text (Text)
|
|
|
|
import Data.Char
|
|
import qualified Data.Text as T
|
|
|
|
{- HLINT ignore -}
|
|
data TokenClass
|
|
= Tok_var Text
|
|
| Tok_lambda
|
|
|
|
| Tok_type
|
|
| Tok_let
|
|
| Tok_in
|
|
|
|
-- Operations on equality
|
|
| Tok_coe
|
|
| Tok_cong
|
|
| Tok_refl
|
|
| Tok_sym
|
|
|
|
| Tok_proj1
|
|
| Tok_proj2
|
|
|
|
| Tok_top
|
|
|
|
| Tok_oparen
|
|
| Tok_cparen
|
|
|
|
| Tok_obrace
|
|
| Tok_cbrace
|
|
|
|
| Tok_arrow
|
|
| Tok_times
|
|
| Tok_colon
|
|
| Tok_comma
|
|
| Tok_semi
|
|
| Tok_equal
|
|
| Tok_under
|
|
| Tok_equiv
|
|
deriving (Eq, Show, Ord)
|
|
|
|
data Token
|
|
= Token { tokLine :: {-# UNPACK #-} !Int
|
|
, tokCol :: {-# UNPACK #-} !Int
|
|
, tokSOff :: {-# UNPACK #-} !Int
|
|
, tokOff :: {-# UNPACK #-} !Int
|
|
, tokClass :: !TokenClass
|
|
}
|
|
deriving (Eq, Show, Ord)
|
|
|
|
data LexError
|
|
= LexError { leChar :: {-# UNPACK #-} !Char
|
|
, leLine :: {-# UNPACK #-} !Int
|
|
, leCol :: {-# UNPACK #-} !Int
|
|
}
|
|
| EOFInComment { leLine :: {-# UNPACK #-} !Int
|
|
, leCol :: {-# UNPACK #-} !Int
|
|
}
|
|
deriving (Eq, Show, Ord)
|
|
|
|
lexString :: String -> Either LexError [Token]
|
|
lexString = go 0 0 0 where
|
|
go :: Int -> Int -> Int -> String -> Either LexError [Token]
|
|
go !off !line !_ ('\n':xs) =
|
|
go (off + 1) (line + 1) 0 xs
|
|
|
|
go !off !line !col (' ':xs) =
|
|
go (off + 1) line (col + 1) xs
|
|
|
|
go !off !line !_ ('-':'-':xs) =
|
|
let (a, b) = span (/= '\n') xs
|
|
in go (off + length a) line 0 b
|
|
|
|
go !off !line !col ('{':'-':xs) = skipComment off line col 1 xs
|
|
|
|
go !off !line !col ('(':cs) =
|
|
Token line col off (off + 1) Tok_oparen `yield` go (off + 1) line (col + 1) cs
|
|
|
|
go !off !line !col (')':cs) =
|
|
Token line col off (off + 1) Tok_cparen `yield` go (off + 1) line (col + 1) cs
|
|
|
|
go !off !line !col ('{':cs) =
|
|
Token line col off (off + 1) Tok_obrace `yield` go (off + 1) line (col + 1) cs
|
|
|
|
go !off !line !col ('}':cs) =
|
|
Token line col off (off + 1) Tok_cbrace `yield` go (off + 1) line (col + 1) cs
|
|
|
|
go !off !line !col (':':cs) =
|
|
Token line col off (off + 1) Tok_colon `yield` go (off + 1) line (col + 1) cs
|
|
|
|
go !off !line !col (';':cs) =
|
|
Token line col off (off + 1) Tok_semi `yield` go (off + 1) line (col + 1) cs
|
|
|
|
go !off !line !col ('⊤':cs) =
|
|
Token line col off (off + 1) Tok_top `yield` go (off + 1) line (col + 1) cs
|
|
|
|
go !off !line !col ('≡':cs) =
|
|
Token line col off (off + 1) Tok_equiv `yield` go (off + 1) line (col + 1) cs
|
|
|
|
go !off !line !col ('=':'=':cs) =
|
|
Token line col off (off + 2) Tok_equiv `yield` go (off + 2) line (col + 2) cs
|
|
|
|
go !off !line !col ('=':cs) =
|
|
Token line col off (off + 1) Tok_equal `yield` go (off + 1) line (col + 1) cs
|
|
|
|
go !off !line !col ('→':cs) =
|
|
Token line col off (off + 1) Tok_arrow `yield` go (off + 1) line (col + 1) cs
|
|
|
|
go !off !line !col (',':cs) =
|
|
Token line col off (off + 1) Tok_comma `yield` go (off + 1) line (col + 1) cs
|
|
|
|
go !off !line !col ('_':cs) =
|
|
Token line col off (off + 1) Tok_under `yield` go (off + 1) line (col + 1) cs
|
|
|
|
go !off !line !col ('*':cs) =
|
|
Token line col off (off + 1) Tok_times `yield` go (off + 1) line (col + 1) cs
|
|
|
|
go !off !line !col ('×':cs) =
|
|
Token line col off (off + 1) Tok_times `yield` go (off + 1) line (col + 1) cs
|
|
|
|
go !off !line !col ('\\':cs) =
|
|
Token line col off (off + 1) Tok_lambda `yield` go (off + 1) line (col + 1) cs
|
|
|
|
go !off !line !col ('λ':cs) =
|
|
Token line col off (off + 1) Tok_lambda `yield` go (off + 1) line (col + 1) cs
|
|
|
|
go !off !line !col ('-':'>':cs) =
|
|
Token line col off (off + 2) Tok_arrow `yield` go (off + 2) line (col + 2) cs
|
|
|
|
go !off !line !col ('.':'1':cs) =
|
|
Token line col off (off + 2) Tok_proj1 `yield` go (off + 2) line (col + 2) cs
|
|
|
|
go !off !line !col ('.':'2':cs) =
|
|
Token line col off (off + 2) Tok_proj2 `yield` go (off + 2) line (col + 2) cs
|
|
|
|
go !off !line !col (c:cs)
|
|
| isAlpha c = goIdent off off line col (T.singleton c) cs
|
|
|
|
go !_ !line !col (c:_) = Left (LexError c line col)
|
|
|
|
go _ _ _ [] = pure []
|
|
|
|
goIdent !soff !off !line !col !acc [] =
|
|
pure [Token line col soff off (finishIdent acc)]
|
|
|
|
goIdent !soff !off !line !col !acc (c:cs)
|
|
| isAlphaNum c || c == '\''
|
|
= goIdent soff (off + 1) line (col + 1) (T.snoc acc c) cs
|
|
| otherwise
|
|
= Token line col soff off (finishIdent acc) `yield` go (off + 1) line (col + 1) (c:cs)
|
|
|
|
skipComment off line col level ('-':'}':cs)
|
|
| level == 1 = go off line col cs
|
|
| otherwise = skipComment off line col (level - 1) cs
|
|
skipComment off line col level ('{':'-':cs) =
|
|
skipComment off line col (level + 1) cs
|
|
skipComment _ line col _ [] = Left (EOFInComment line col)
|
|
|
|
yield c = fmap (c:)
|
|
|
|
finishIdent c
|
|
| T.pack "let" == c = Tok_let
|
|
| T.pack "Type" == c = Tok_type
|
|
| T.pack "in" == c = Tok_in
|
|
| T.pack "refl" == c = Tok_refl
|
|
| T.pack "coe" == c = Tok_coe
|
|
| T.pack "cong" == c = Tok_cong
|
|
| T.pack "sym" == c = Tok_sym
|
|
| otherwise = Tok_var c
|