a type theory with equality based on setoids
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 

172 lines
4.9 KiB

{-# LANGUAGE BangPatterns #-}
module Presyntax.Lexer where
import Data.Text (Text)
import Data.Char
import qualified Data.Text as T
{- HLINT ignore -}
data TokenClass
= Tok_var Text
| Tok_lambda
| Tok_type
| Tok_let
| Tok_in
-- Operations on equality
| Tok_coe
| Tok_cong
| Tok_refl
| Tok_sym
| Tok_proj1
| Tok_proj2
| Tok_top
| Tok_oparen
| Tok_cparen
| Tok_obrace
| Tok_cbrace
| Tok_arrow
| Tok_times
| Tok_colon
| Tok_comma
| Tok_semi
| Tok_equal
| Tok_under
| Tok_equiv
deriving (Eq, Show, Ord)
data Token
= Token { tokLine :: {-# UNPACK #-} !Int
, tokCol :: {-# UNPACK #-} !Int
, tokSOff :: {-# UNPACK #-} !Int
, tokOff :: {-# UNPACK #-} !Int
, tokClass :: !TokenClass
}
deriving (Eq, Show, Ord)
data LexError
= LexError { leChar :: {-# UNPACK #-} !Char
, leLine :: {-# UNPACK #-} !Int
, leCol :: {-# UNPACK #-} !Int
}
| EOFInComment { leLine :: {-# UNPACK #-} !Int
, leCol :: {-# UNPACK #-} !Int
}
deriving (Eq, Show, Ord)
lexString :: String -> Either LexError [Token]
lexString = go 0 0 0 where
go :: Int -> Int -> Int -> String -> Either LexError [Token]
go !off !line !_ ('\n':xs) =
go (off + 1) (line + 1) 0 xs
go !off !line !col (' ':xs) =
go (off + 1) line (col + 1) xs
go !off !line !_ ('-':'-':xs) =
let (a, b) = span (/= '\n') xs
in go (off + length a) line 0 b
go !off !line !col ('{':'-':xs) = skipComment off line col 1 xs
go !off !line !col ('(':cs) =
Token line col off (off + 1) Tok_oparen `yield` go (off + 1) line (col + 1) cs
go !off !line !col (')':cs) =
Token line col off (off + 1) Tok_cparen `yield` go (off + 1) line (col + 1) cs
go !off !line !col ('{':cs) =
Token line col off (off + 1) Tok_obrace `yield` go (off + 1) line (col + 1) cs
go !off !line !col ('}':cs) =
Token line col off (off + 1) Tok_cbrace `yield` go (off + 1) line (col + 1) cs
go !off !line !col (':':cs) =
Token line col off (off + 1) Tok_colon `yield` go (off + 1) line (col + 1) cs
go !off !line !col (';':cs) =
Token line col off (off + 1) Tok_semi `yield` go (off + 1) line (col + 1) cs
go !off !line !col ('⊤':cs) =
Token line col off (off + 1) Tok_top `yield` go (off + 1) line (col + 1) cs
go !off !line !col ('≡':cs) =
Token line col off (off + 1) Tok_equiv `yield` go (off + 1) line (col + 1) cs
go !off !line !col ('=':'=':cs) =
Token line col off (off + 2) Tok_equiv `yield` go (off + 2) line (col + 2) cs
go !off !line !col ('=':cs) =
Token line col off (off + 1) Tok_equal `yield` go (off + 1) line (col + 1) cs
go !off !line !col ('→':cs) =
Token line col off (off + 1) Tok_arrow `yield` go (off + 1) line (col + 1) cs
go !off !line !col (',':cs) =
Token line col off (off + 1) Tok_comma `yield` go (off + 1) line (col + 1) cs
go !off !line !col ('_':cs) =
Token line col off (off + 1) Tok_under `yield` go (off + 1) line (col + 1) cs
go !off !line !col ('*':cs) =
Token line col off (off + 1) Tok_times `yield` go (off + 1) line (col + 1) cs
go !off !line !col ('×':cs) =
Token line col off (off + 1) Tok_times `yield` go (off + 1) line (col + 1) cs
go !off !line !col ('\\':cs) =
Token line col off (off + 1) Tok_lambda `yield` go (off + 1) line (col + 1) cs
go !off !line !col ('λ':cs) =
Token line col off (off + 1) Tok_lambda `yield` go (off + 1) line (col + 1) cs
go !off !line !col ('-':'>':cs) =
Token line col off (off + 2) Tok_arrow `yield` go (off + 2) line (col + 2) cs
go !off !line !col ('.':'1':cs) =
Token line col off (off + 2) Tok_proj1 `yield` go (off + 2) line (col + 2) cs
go !off !line !col ('.':'2':cs) =
Token line col off (off + 2) Tok_proj2 `yield` go (off + 2) line (col + 2) cs
go !off !line !col (c:cs)
| isAlpha c = goIdent off off line col (T.singleton c) cs
go !_ !line !col (c:_) = Left (LexError c line col)
go _ _ _ [] = pure []
goIdent !soff !off !line !col !acc [] =
pure [Token line col soff off (finishIdent acc)]
goIdent !soff !off !line !col !acc (c:cs)
| isAlphaNum c || c == '\''
= goIdent soff (off + 1) line (col + 1) (T.snoc acc c) cs
| otherwise
= Token line col soff off (finishIdent acc) `yield` go (off + 1) line (col + 1) (c:cs)
skipComment off line col level ('-':'}':cs)
| level == 1 = go off line col cs
| otherwise = skipComment off line col (level - 1) cs
skipComment off line col level ('{':'-':cs) =
skipComment off line col (level + 1) cs
skipComment _ line col _ [] = Left (EOFInComment line col)
yield c = fmap (c:)
finishIdent c
| T.pack "let" == c = Tok_let
| T.pack "Type" == c = Tok_type
| T.pack "in" == c = Tok_in
| T.pack "refl" == c = Tok_refl
| T.pack "coe" == c = Tok_coe
| T.pack "cong" == c = Tok_cong
| T.pack "sym" == c = Tok_sym
| otherwise = Tok_var c