{ module Presyntax.Lexer where import qualified Data.ByteString.Lazy as Lbs import qualified Data.Text.Encoding as T import qualified Data.Text as T import Presyntax.Tokens } %wrapper "monadUserState-bytestring" $alpha = [a-zA-Z] $digit = [0-9] $white_nol = $white # \n tokens :- $white_nol+ ; "--" .* \n ; <0,prtext> $alpha [$alpha $digit \_ \']* { yield tokVar } -- zero state: normal lexing <0> \= { always TokEqual } <0> \: { always TokColon } <0> \, { always TokComma } <0> \* { always TokStar } <0> ".1" { always TokPi1 } <0> ".2" { always TokPi2 } <0> \\ { always TokLambda } <0> "->" { always TokArrow } <0> \( { always TokOParen } <0> \{ { always TokOBrace } <0> \[ { always TokOSquare } <0> \) { always TokCParen } <0> \} { always TokCBrace } <0> \] { always TokCSquare } <0> \; { always TokSemi } <0> \n { just $ pushStartCode newline } <0> "&&" { always TokAnd } <0> "||" { always TokOr } <0> "{-" { just $ pushStartCode comment } { "-}" { \i l -> popStartCode *> skip i l } . ; } <0> "{-#" { \i l -> pushStartCode prkw *> always TokOPragma i l } "PRIMITIVE" { \i l -> popStartCode *> pushStartCode prtext *> always TokPrim i l } "#-}" { \i l -> popStartCode *> always TokCPragma i l } <0> ":let" { always TokReplLet } <0> ":t"("y"|"yp"|"ype"|()) { yield TokReplT } -- newline: emit a semicolon when de-denting { \n ; () { offsideRule } } { alexEOF :: Alex Token alexEOF = do (AlexPn _ l c, _, _, _) <- alexGetInput pure $ Token TokEof l c yield k (AlexPn _ l c, _, s, _) i = pure (Token (k $! (T.decodeUtf8 (Lbs.toStrict (Lbs.take i s)))) l c) always :: TokenClass -> AlexInput -> Int64 -> Alex Token always k x i = yield (const k) x i data AlexUserState = AlexUserState { layoutColumns :: [Int], startCodes :: [Int] } alexInitUserState = AlexUserState [1] [] just :: Alex a -> AlexAction Token just k _ _ = k *> alexMonadScan getUserState :: Alex AlexUserState getUserState = Alex $ \s -> Right (s, alex_ust s) mapUserState :: (AlexUserState -> AlexUserState) -> Alex () mapUserState k = Alex $ \s -> Right (s { alex_ust = k $! alex_ust s }, ()) pushStartCode :: Int -> Alex () pushStartCode c = do sc <- alexGetStartCode mapUserState $ \s -> s { startCodes = sc:startCodes s } alexSetStartCode c popStartCode :: Alex () popStartCode = do sc <- startCodes <$> getUserState case sc of [] -> alexSetStartCode 0 (x:xs) -> do mapUserState $ \s -> s { startCodes = xs } alexSetStartCode x offsideRule :: AlexInput -> Int64 -> Alex Token offsideRule (AlexPn _ line col, _, s, _) _ | Lbs.null s = pure (Token TokEof line col) | otherwise = do ~(col':_) <- layoutColumns <$> getUserState case col `compare` col' of EQ -> do popStartCode pure (Token TokSemi line col) GT -> do popStartCode alexMonadScan LT -> alexError "wrong ass indentation" tokVar :: T.Text -> TokenClass tokVar text = case T.unpack text of "as" -> TokAs _ -> TokVar text }