|
|
- {
- module Presyntax.Lexer where
-
- import qualified Data.ByteString.Lazy as Lbs
- import qualified Data.Text.Encoding as T
- import qualified Data.Text as T
-
- import Presyntax.Tokens
-
- }
-
- %wrapper "monadUserState-bytestring"
-
- $alpha = [a-zA-Z]
- $digit = [0-9]
- $white_nol = $white # \n
-
- tokens :-
- $white_nol+ ;
- "--" .* \n ;
-
- <0,prtext> $alpha [$alpha $digit \_ \']* { variableOrKeyword }
-
- -- zero state: normal lexing
- <0> \= { always TokEqual }
- <0> \: { always TokColon }
- <0> \, { always TokComma }
- <0> \* { always TokStar }
-
- <0> ".1" { always TokPi1 }
- <0> ".2" { always TokPi2 }
-
- <0> \\ { always TokLambda }
- <0> "->" { always TokArrow }
-
- <0> \( { always TokOParen }
- <0> \{ { always TokOBrace }
- <0> \[ { always TokOSquare }
-
- <0> \) { always TokCParen }
- <0> \} { always TokCBrace }
- <0> \] { always TokCSquare }
-
- <0> \; { always TokSemi }
-
- <0> \n { just $ pushStartCode newline }
-
- <0> "&&" { always TokAnd }
- <0> "||" { always TokOr }
-
- <0> "{-" { just $ pushStartCode comment }
-
- <comment> {
- "-}" { \i l -> popStartCode *> skip i l }
- . ;
- }
-
- <0> "{-#" { \i l -> pushStartCode prkw *> always TokOPragma i l }
- <prkw> "PRIMITIVE" { \i l -> popStartCode *> pushStartCode prtext *> always TokPrim i l }
- <prtext> "#-}" { \i l -> popStartCode *> always TokCPragma i l }
-
- <0> ":let" { always TokReplLet }
- <0> ":t"("y"|"yp"|"ype"|()) { yield TokReplT }
-
- -- newline: emit a semicolon when de-denting
- <newline> {
- \n ;
- () { offsideRule }
- }
-
- -- layout: indentation of the next token is context for offside rule
- <layout> {
- \n ;
- () { startLayout }
- }
-
- <empty_layout> () { emptyLayout }
-
- {
- alexEOF :: Alex Token
- alexEOF = do
- (AlexPn _ l c, _, _, _) <- alexGetInput
- pure $ Token TokEof l c
-
- yield k (AlexPn _ l c, _, s, _) i = pure (Token (k $! (T.decodeUtf8 (Lbs.toStrict (Lbs.take i s)))) l c)
-
- always :: TokenClass -> AlexInput -> Int64 -> Alex Token
- always k x i = yield (const k) x i
-
- data AlexUserState = AlexUserState { layoutColumns :: [Int], startCodes :: [Int], leastColumn :: Int }
- alexInitUserState = AlexUserState [1] [] 0
-
- just :: Alex a -> AlexAction Token
- just k _ _ = k *> alexMonadScan
-
- getUserState :: Alex AlexUserState
- getUserState = Alex $ \s -> Right (s, alex_ust s)
-
- mapUserState :: (AlexUserState -> AlexUserState) -> Alex ()
- mapUserState k = Alex $ \s -> Right (s { alex_ust = k $! alex_ust s }, ())
-
- pushStartCode :: Int -> Alex ()
- pushStartCode c = do
- sc <- alexGetStartCode
- mapUserState $ \s -> s { startCodes = sc:startCodes s }
- alexSetStartCode c
-
- popStartCode :: Alex ()
- popStartCode = do
- sc <- startCodes <$> getUserState
- case sc of
- [] -> alexSetStartCode 0
- (x:xs) -> do
- mapUserState $ \s -> s { startCodes = xs }
- alexSetStartCode x
-
- offsideRule :: AlexInput -> Int64 -> Alex Token
- offsideRule (AlexPn _ line col, _, _, _) _
- -- | Lbs.null s = pure (Token TokEof line col)
- | otherwise = do
- ~(col':ctx) <- layoutColumns <$> getUserState
- case col `compare` col' of
- EQ -> do
- popStartCode
- pure (Token TokSemi line col)
- GT -> do
- popStartCode
- alexMonadScan
- LT -> do
- mapUserState $ \s -> s { layoutColumns = ctx }
- pure (Token TokLEnd line col)
-
- emptyLayout :: AlexInput -> Int64 -> Alex Token
- emptyLayout (AlexPn _ line col, _, _, _) _ = do
- popStartCode
- pushStartCode newline
- pure (Token TokLEnd line col)
-
- startLayout :: AlexInput -> Int64 -> Alex Token
- startLayout (AlexPn _ line col, _, _, _) _ = do
- popStartCode
- ~(col':_) <- layoutColumns <$> getUserState
- if col' >= col
- then pushStartCode empty_layout
- else mapUserState $ \s -> s { layoutColumns = col:layoutColumns s }
- pure (Token TokLStart line col)
-
- variableOrKeyword :: AlexAction Token
- variableOrKeyword (AlexPn _ l c, _, s, _) size =
- let text = T.decodeUtf8 (Lbs.toStrict (Lbs.take size s)) in
- case T.unpack text of
- "as" -> pure (Token TokAs l c)
- "in" -> pure (Token TokIn l c)
- "data" -> pure (Token TokData l c)
-
- "postulate" -> laidOut TokPostulate l c
- "let" -> laidOut TokLet l c
- "where" -> laidOut TokWhere l c
- "case" -> laidOut TokCase l c
-
- _ -> pure (Token (TokVar text) l c)
-
- laidOut x l c = do
- pushStartCode layout
- mapUserState $ \s -> s { leastColumn = c }
- pure (Token x l c)
-
- }
|