Prototype, extremely bad code implementation of CCHM Cubical Type Theory
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 

192 lines
5.5 KiB

{-# LANGUAGE BangPatterns #-}
module Presyntax.Lexer where
import Data.Text (Text)
import Data.Char
import qualified Data.Text as T
{- HLINT ignore -}
data TokenClass
= Tok_var Text
| Tok_lambda
| Tok_type
| Tok_path
| Tok_phi
| Tok_sub
| Tok_comp
| Tok_tr
| Tok_I
| Tok_I0
| Tok_I1
| Tok_oparen
| Tok_cparen
| Tok_osquare
| Tok_csquare
| Tok_colon
| Tok_arrow
| Tok_let
| Tok_equal
| Tok_in
| Tok_and
| Tok_not
| Tok_or
| Tok_fand
| Tok_for
| Tok_assume
| Tok_p1
| Tok_p2
| Tok_comma
| Tok_times
deriving (Eq, Show, Ord)
data Token
= Token { tokLine :: {-# UNPACK #-} !Int
, tokCol :: {-# UNPACK #-} !Int
, tokSOff :: {-# UNPACK #-} !Int
, tokOff :: {-# UNPACK #-} !Int
, tokClass :: !TokenClass
}
deriving (Eq, Show, Ord)
data LexError
= LexError { leChar :: {-# UNPACK #-} !Char
, leLine :: {-# UNPACK #-} !Int
, leCol :: {-# UNPACK #-} !Int
}
| EOFInComment { leLine :: {-# UNPACK #-} !Int
, leCol :: {-# UNPACK #-} !Int
}
deriving (Eq, Show, Ord)
lexString :: String -> Either LexError [Token]
lexString = go 0 0 0 where
go :: Int -> Int -> Int -> String -> Either LexError [Token]
go !off !line !_ ('\n':xs) =
go (off + 1) (line + 1) 0 xs
go !off !line !col (' ':xs) =
go (off + 1) line (col + 1) xs
go !off !line !_ ('-':'-':xs) =
let (a, b) = span (/= '\n') xs
in go (off + length a) line 0 b
go !off !line !col ('{':'-':xs) = skipComment off line col 1 xs
go !off !line !col ('~':cs) =
Token line col off (off + 1) Tok_not `yield` go (off + 1) line (col + 1) cs
go !off !line !col ('(':cs) =
Token line col off (off + 1) Tok_oparen `yield` go (off + 1) line (col + 1) cs
go !off !line !col (')':cs) =
Token line col off (off + 1) Tok_cparen `yield` go (off + 1) line (col + 1) cs
go !off !line !col ('[':cs) =
Token line col off (off + 1) Tok_osquare `yield` go (off + 1) line (col + 1) cs
go !off !line !col (']':cs) =
Token line col off (off + 1) Tok_csquare `yield` go (off + 1) line (col + 1) cs
go !off !line !col (':':cs) =
Token line col off (off + 1) Tok_colon `yield` go (off + 1) line (col + 1) cs
go !off !line !col (',':cs) =
Token line col off (off + 1) Tok_comma `yield` go (off + 1) line (col + 1) cs
go !off !line !col ('=':cs) =
Token line col off (off + 1) Tok_equal `yield` go (off + 1) line (col + 1) cs
go !off !line !col ('\\':'/':cs) =
Token line col off (off + 2) Tok_for `yield` go (off + 2) line (col + 2) cs
go !off !line !col ('\\':cs) =
Token line col off (off + 1) Tok_lambda `yield` go (off + 1) line (col + 1) cs
go !off !line !col ('*':cs) =
Token line col off (off + 1) Tok_times `yield` go (off + 1) line (col + 1) cs
go !off !line !col ('λ':cs) =
Token line col off (off + 1) Tok_lambda `yield` go (off + 1) line (col + 1) cs
go !off !line !col ('→':cs) =
Token line col off (off + 1) Tok_lambda `yield` go (off + 1) line (col + 1) cs
go !off !line !col ('-':'>':cs) =
Token line col off (off + 2) Tok_arrow `yield` go (off + 2) line (col + 2) cs
go !off !line !col ('&':'&':cs) =
Token line col off (off + 2) Tok_and `yield` go (off + 2) line (col + 2) cs
go !off !line !col ('&':cs) =
Token line col off (off + 1) Tok_fand `yield` go (off + 1) line (col + 1) cs
go !off !line !col ('/':'\\':cs) =
Token line col off (off + 1) Tok_fand `yield` go (off + 1) line (col + 1) cs
go !off !line !col ('|':'|':cs) =
Token line col off (off + 2) Tok_or `yield` go (off + 2) line (col + 2) cs
go !off !line !col ('|':cs) =
Token line col off (off + 1) Tok_for `yield` go (off + 1) line (col + 1) cs
go !off !line !col ('.':'1':cs) =
Token line col off (off + 2) Tok_p1 `yield` go (off + 2) line (col + 2) cs
go !off !line !col ('.':'2':cs) =
Token line col off (off + 2) Tok_p2 `yield` go (off + 2) line (col + 2) cs
go !off !line !col (c:cs)
| isAlpha c = goIdent off off line col (T.singleton c) cs
go !_ !line !col (c:_) = Left (LexError c line col)
go _ _ _ [] = pure []
goIdent !soff !off !line !col !acc [] =
pure [Token line col soff off (finishIdent acc)]
goIdent !soff !off !line !col !acc (c:cs)
| isAlphaNum c || c == '\''
= goIdent soff (off + 1) line (col + 1) (T.snoc acc c) cs
| otherwise
= Token line col soff off (finishIdent acc) `yield` go (off + 1) line (col + 1) (c:cs)
skipComment off line col level ('-':'}':cs)
| level == 1 = go (off + 2) line (col + 2) cs
| otherwise = skipComment (off + 2) line (col + 2) (level - 1) cs
skipComment off line col level ('{':'-':cs) =
skipComment (off + 2) line (col + 2) (level + 1) cs
skipComment off line col level ('\n':cs) =
skipComment (off + 1) (line + 1) 0 level cs
skipComment off line col level (c:cs) =
skipComment (off + 1) line (col + 1) level cs
skipComment _ line col _ [] = Left (EOFInComment line col)
yield c = fmap (c:)
finishIdent c
| T.pack "Type" == c = Tok_type
| T.pack "Path" == c = Tok_path
| T.pack "Partial" == c = Tok_phi
| T.pack "Sub" == c = Tok_sub
| T.pack "comp" == c = Tok_comp
| T.pack "tr" == c = Tok_tr
| T.pack "I" == c = Tok_I
| T.pack "i0" == c = Tok_I0
| T.pack "i1" == c = Tok_I1
| T.pack "let" == c = Tok_let
| T.pack "in" == c = Tok_in
| T.pack "assume" == c = Tok_assume
| otherwise = Tok_var c