{ module Presyntax.Lexer where import qualified Data.ByteString.Lazy as Lbs import qualified Data.Text.Encoding as T import qualified Data.Text as T import Presyntax.Tokens import Debug.Trace } %wrapper "monadUserState-bytestring" $alpha = [a-zA-Z] $digit = [0-9] $white_nol = $white # \n tokens :- $white_nol+ ; "--" .* \n ; <0,prtext> $alpha [$alpha $digit \_ \']* { variableOrKeyword } -- zero state: normal lexing <0> \= { always TokEqual } <0> \: { always TokColon } <0> \, { always TokComma } <0> \* { always TokStar } <0> ".1" { always TokPi1 } <0> ".2" { always TokPi2 } <0> \\ { always TokLambda } <0> "->" { always TokArrow } <0> "_" { always TokUnder } <0> \( { always TokOParen } <0> \{ { always TokOBrace } <0> \[ { always TokOSquare } <0> \) { always TokCParen } <0> \} { closeBrace } <0> \] { always TokCSquare } <0> \; { always TokSemi } <0> \n { just $ pushStartCode newline } <0> "&&" { always TokAnd } <0> "||" { always TokOr } <0> "{-" { just $ pushStartCode comment } { "-}" { \i l -> popStartCode *> skip i l } . ; } <0> "{-#" { \i l -> pushStartCode prkw *> always TokOPragma i l } "PRIMITIVE" { \i l -> popStartCode *> pushStartCode prtext *> always TokPrim i l } "#-}" { \i l -> popStartCode *> always TokCPragma i l } <0> ":let" { always TokReplLet } <0> ":t"("y"|"yp"|"ype"|()) { yield TokReplT } -- newline: emit a semicolon when de-denting { \n ; () { offsideRule } } -- layout: indentation of the next token is context for offside rule { \n ; \{ { openBrace } () { startLayout } } () { emptyLayout } { alexEOF :: Alex Token alexEOF = do (AlexPn _ l c, _, _, _) <- alexGetInput pure $ Token TokEof l c yield k (AlexPn _ l c, _, s, _) i = pure (Token (k $! (T.decodeUtf8 (Lbs.toStrict (Lbs.take i s)))) l c) always :: TokenClass -> AlexInput -> Int64 -> Alex Token always k x i = yield (const k) x i data AlexUserState = AlexUserState { layoutColumns :: [Int], startCodes :: [Int], leastColumn :: Int } alexInitUserState = AlexUserState [1] [] 0 just :: Alex a -> AlexAction Token just k _ _ = k *> alexMonadScan getUserState :: Alex AlexUserState getUserState = Alex $ \s -> Right (s, alex_ust s) mapUserState :: (AlexUserState -> AlexUserState) -> Alex () mapUserState k = Alex $ \s -> Right (s { alex_ust = k $! alex_ust s }, ()) pushStartCode :: Int -> Alex () pushStartCode c = do sc <- alexGetStartCode mapUserState $ \s -> s { startCodes = sc:startCodes s } alexSetStartCode c popStartCode :: Alex () popStartCode = do sc <- startCodes <$> getUserState case sc of [] -> alexSetStartCode 0 (x:xs) -> do mapUserState $ \s -> s { startCodes = xs } alexSetStartCode x offsideRule :: AlexInput -> Int64 -> Alex Token offsideRule (AlexPn _ line col, _, _, _) _ -- | Lbs.null s = pure (Token TokEof line col) | otherwise = do ~(col':ctx) <- layoutColumns <$> getUserState case col `compare` col' of EQ -> do popStartCode pure (Token TokSemi line col) GT -> do popStartCode alexMonadScan LT -> do mapUserState $ \s -> s { layoutColumns = ctx } pure (Token TokLEnd line col) emptyLayout :: AlexInput -> Int64 -> Alex Token emptyLayout (AlexPn _ line col, _, _, _) _ = do popStartCode pushStartCode newline pure (Token TokLEnd line col) startLayout :: AlexInput -> Int64 -> Alex Token startLayout (AlexPn _ line col, _, _, _) _ = do popStartCode ~(col':_) <- layoutColumns <$> getUserState if col' >= col then pushStartCode empty_layout else mapUserState $ \s -> s { layoutColumns = col:layoutColumns s } pure (Token TokLStart line col) openBrace :: AlexInput -> Int64 -> Alex Token openBrace (AlexPn _ line col, _, _, _) _ = do popStartCode mapUserState $ \s -> s { layoutColumns = minBound:layoutColumns s } pure (Token TokOBrace line col) closeBrace :: AlexInput -> Int64 -> Alex Token closeBrace (AlexPn _ line col, _, _, _) _ = do ~(col':tail) <- layoutColumns <$> getUserState if col' < 0 then mapUserState $ \s -> s { layoutColumns = tail } else pure () pure (Token TokCBrace line col) variableOrKeyword :: AlexAction Token variableOrKeyword (AlexPn _ l c, _, s, _) size = let text = T.decodeUtf8 (Lbs.toStrict (Lbs.take size s)) in case T.unpack text of "as" -> pure (Token TokAs l c) "in" -> pure (Token TokIn l c) "data" -> pure (Token TokData l c) "postulate" -> laidOut TokPostulate l c "let" -> laidOut TokLet l c "where" -> laidOut TokWhere l c "case" -> laidOut TokCase l c _ -> pure (Token (TokVar text) l c) laidOut x l c = do pushStartCode layout mapUserState $ \s -> s { leastColumn = c } pure (Token x l c) }