|
{-# LANGUAGE BangPatterns #-}
|
|
module Presyntax.Lexer where
|
|
|
|
import Data.Text (Text)
|
|
|
|
import Data.Char
|
|
import qualified Data.Text as T
|
|
|
|
{- HLINT ignore -}
|
|
data TokenClass
|
|
= Tok_var Text
|
|
| Tok_lambda
|
|
|
|
| Tok_type
|
|
| Tok_path
|
|
| Tok_phi
|
|
| Tok_sub
|
|
| Tok_comp
|
|
| Tok_tr
|
|
|
|
| Tok_I
|
|
| Tok_I0
|
|
| Tok_I1
|
|
|
|
| Tok_oparen
|
|
| Tok_cparen
|
|
|
|
| Tok_osquare
|
|
| Tok_csquare
|
|
|
|
| Tok_colon
|
|
| Tok_arrow
|
|
|
|
| Tok_let
|
|
| Tok_equal
|
|
| Tok_in
|
|
|
|
| Tok_and
|
|
| Tok_not
|
|
| Tok_or
|
|
|
|
| Tok_fand
|
|
| Tok_for
|
|
|
|
| Tok_assume
|
|
|
|
| Tok_p1
|
|
| Tok_p2
|
|
| Tok_comma
|
|
| Tok_times
|
|
deriving (Eq, Show, Ord)
|
|
|
|
data Token
|
|
= Token { tokLine :: {-# UNPACK #-} !Int
|
|
, tokCol :: {-# UNPACK #-} !Int
|
|
, tokSOff :: {-# UNPACK #-} !Int
|
|
, tokOff :: {-# UNPACK #-} !Int
|
|
, tokClass :: !TokenClass
|
|
}
|
|
deriving (Eq, Show, Ord)
|
|
|
|
data LexError
|
|
= LexError { leChar :: {-# UNPACK #-} !Char
|
|
, leLine :: {-# UNPACK #-} !Int
|
|
, leCol :: {-# UNPACK #-} !Int
|
|
}
|
|
| EOFInComment { leLine :: {-# UNPACK #-} !Int
|
|
, leCol :: {-# UNPACK #-} !Int
|
|
}
|
|
deriving (Eq, Show, Ord)
|
|
|
|
lexString :: String -> Either LexError [Token]
|
|
lexString = go 0 0 0 where
|
|
go :: Int -> Int -> Int -> String -> Either LexError [Token]
|
|
go !off !line !_ ('\n':xs) =
|
|
go (off + 1) (line + 1) 0 xs
|
|
|
|
go !off !line !col (' ':xs) =
|
|
go (off + 1) line (col + 1) xs
|
|
|
|
go !off !line !_ ('-':'-':xs) =
|
|
let (a, b) = span (/= '\n') xs
|
|
in go (off + length a) line 0 b
|
|
|
|
go !off !line !col ('{':'-':xs) = skipComment off line col 1 xs
|
|
|
|
go !off !line !col ('~':cs) =
|
|
Token line col off (off + 1) Tok_not `yield` go (off + 1) line (col + 1) cs
|
|
|
|
go !off !line !col ('(':cs) =
|
|
Token line col off (off + 1) Tok_oparen `yield` go (off + 1) line (col + 1) cs
|
|
|
|
go !off !line !col (')':cs) =
|
|
Token line col off (off + 1) Tok_cparen `yield` go (off + 1) line (col + 1) cs
|
|
|
|
go !off !line !col ('[':cs) =
|
|
Token line col off (off + 1) Tok_osquare `yield` go (off + 1) line (col + 1) cs
|
|
|
|
go !off !line !col (']':cs) =
|
|
Token line col off (off + 1) Tok_csquare `yield` go (off + 1) line (col + 1) cs
|
|
|
|
go !off !line !col (':':cs) =
|
|
Token line col off (off + 1) Tok_colon `yield` go (off + 1) line (col + 1) cs
|
|
|
|
go !off !line !col (',':cs) =
|
|
Token line col off (off + 1) Tok_comma `yield` go (off + 1) line (col + 1) cs
|
|
|
|
go !off !line !col ('=':cs) =
|
|
Token line col off (off + 1) Tok_equal `yield` go (off + 1) line (col + 1) cs
|
|
|
|
go !off !line !col ('\\':'/':cs) =
|
|
Token line col off (off + 2) Tok_for `yield` go (off + 2) line (col + 2) cs
|
|
|
|
go !off !line !col ('\\':cs) =
|
|
Token line col off (off + 1) Tok_lambda `yield` go (off + 1) line (col + 1) cs
|
|
|
|
go !off !line !col ('*':cs) =
|
|
Token line col off (off + 1) Tok_times `yield` go (off + 1) line (col + 1) cs
|
|
|
|
go !off !line !col ('λ':cs) =
|
|
Token line col off (off + 1) Tok_lambda `yield` go (off + 1) line (col + 1) cs
|
|
|
|
go !off !line !col ('→':cs) =
|
|
Token line col off (off + 1) Tok_lambda `yield` go (off + 1) line (col + 1) cs
|
|
|
|
go !off !line !col ('-':'>':cs) =
|
|
Token line col off (off + 2) Tok_arrow `yield` go (off + 2) line (col + 2) cs
|
|
|
|
go !off !line !col ('&':'&':cs) =
|
|
Token line col off (off + 2) Tok_and `yield` go (off + 2) line (col + 2) cs
|
|
|
|
go !off !line !col ('&':cs) =
|
|
Token line col off (off + 1) Tok_fand `yield` go (off + 1) line (col + 1) cs
|
|
|
|
go !off !line !col ('/':'\\':cs) =
|
|
Token line col off (off + 1) Tok_fand `yield` go (off + 1) line (col + 1) cs
|
|
|
|
go !off !line !col ('|':'|':cs) =
|
|
Token line col off (off + 2) Tok_or `yield` go (off + 2) line (col + 2) cs
|
|
|
|
go !off !line !col ('|':cs) =
|
|
Token line col off (off + 1) Tok_for `yield` go (off + 1) line (col + 1) cs
|
|
|
|
go !off !line !col ('.':'1':cs) =
|
|
Token line col off (off + 2) Tok_p1 `yield` go (off + 2) line (col + 2) cs
|
|
|
|
go !off !line !col ('.':'2':cs) =
|
|
Token line col off (off + 2) Tok_p2 `yield` go (off + 2) line (col + 2) cs
|
|
|
|
go !off !line !col (c:cs)
|
|
| isAlpha c = goIdent off off line col (T.singleton c) cs
|
|
|
|
go !_ !line !col (c:_) = Left (LexError c line col)
|
|
|
|
go _ _ _ [] = pure []
|
|
|
|
goIdent !soff !off !line !col !acc [] =
|
|
pure [Token line col soff off (finishIdent acc)]
|
|
|
|
goIdent !soff !off !line !col !acc (c:cs)
|
|
| isAlphaNum c || c == '\''
|
|
= goIdent soff (off + 1) line (col + 1) (T.snoc acc c) cs
|
|
| otherwise
|
|
= Token line col soff off (finishIdent acc) `yield` go (off + 1) line (col + 1) (c:cs)
|
|
|
|
skipComment off line col level ('-':'}':cs)
|
|
| level == 1 = go (off + 2) line (col + 2) cs
|
|
| otherwise = skipComment (off + 2) line (col + 2) (level - 1) cs
|
|
skipComment off line col level ('{':'-':cs) =
|
|
skipComment (off + 2) line (col + 2) (level + 1) cs
|
|
skipComment off line col level ('\n':cs) =
|
|
skipComment (off + 1) (line + 1) 0 level cs
|
|
skipComment off line col level (c:cs) =
|
|
skipComment (off + 1) line (col + 1) level cs
|
|
skipComment _ line col _ [] = Left (EOFInComment line col)
|
|
|
|
yield c = fmap (c:)
|
|
|
|
finishIdent c
|
|
| T.pack "Type" == c = Tok_type
|
|
| T.pack "Path" == c = Tok_path
|
|
| T.pack "Partial" == c = Tok_phi
|
|
| T.pack "Sub" == c = Tok_sub
|
|
| T.pack "comp" == c = Tok_comp
|
|
| T.pack "tr" == c = Tok_tr
|
|
| T.pack "I" == c = Tok_I
|
|
| T.pack "i0" == c = Tok_I0
|
|
| T.pack "i1" == c = Tok_I1
|
|
| T.pack "let" == c = Tok_let
|
|
| T.pack "in" == c = Tok_in
|
|
| T.pack "assume" == c = Tok_assume
|
|
| otherwise = Tok_var c
|