{ module Frontend.Autogen.Lexer where import qualified Data.ByteString.Lazy as Lbs import qualified Data.Text.Encoding as T import qualified Data.Text as T import Frontend.Lexer.Tokens import Control.Monad import Debug.Trace } %wrapper "monadUserState-bytestring" $alpha = [a-zA-Z] $digit = [0-9] $white_nol = $white # \n tokens :- $white_nol+ ; <0,module_header> "--" .* \n { just $ pushStartCode newline } <0,module_header,import_> $alpha [$alpha $digit \_ \']* { variableOrKeyword } <0> \= { always TokEqual } <0> \: \: { always TokDoubleColon } <0> \\ { \i l -> setPendingLC *> yield' (const TokLambda) i l } <0> "->" { always TokArrow } <0> "_" { always TokUnder } <0> \{ { always TokOBrace } <0> \[ { always TokOSquare } <0,module_header,import_> { \, { always TokComma } \( { always TokOParen } \) { always TokCParen } } <0> \} { closeBrace } <0> \] { always TokCSquare } <0> \;+ { always TokSemi } <0> \n { just $ pushStartCode newline } <0> \" { just startString } { \\ \" { stringSeg (T.singleton '"') } \\ \\ { stringSeg (T.singleton '\\') } \\ a { stringSeg (T.singleton '\a') } \\ b { stringSeg (T.singleton '\b') } \\ f { stringSeg (T.singleton '\f') } \\ n { stringSeg (T.singleton '\n') } \\ \n { stringSeg (T.singleton '\n') } \\ r { stringSeg (T.singleton '\r') } \\ v { stringSeg (T.singleton '\v') } \\ t { stringSeg (T.singleton '\t') } \" { endString } [^\\\"]+ { stringChar } } <0,newline,comment,import_,module_header> "{-" { just $ pushStartCode comment } { "-}" { \i l -> popStartCode *> skip i l } . ; } -- newline: emit a semicolon when de-denting { \n ; "--" .* \n ; () { offsideRule } } -- layout: indentation of the next token is context for offside rule { \n ; "--" .* \n ; \{ { openBrace } () { startLayout } } { \n { just $ pushStartCode newline } "--" .* \n { just $ pushStartCode newline } } () { emptyLayout } () { emitPendingToken } { \n ; } { alexEOF :: Alex Token alexEOF = do (AlexPn _ l c, _, _, _) <- alexGetInput maybePopImportSC state <- getUserState unless (T.null (stringBuffer state)) $ do alexError $ "Unterminated string literal at line " ++ show l ++ ", column " ++ show c case layoutColumns state of -- EOF is allowed to close as many layout contexts as there are -- pending (number of pending layout contexts is the length of the -- list minus one, since there's the one initial layout context.) _:tail -> do mapUserState $ \s -> s { pendingTokens = (Token TokLEnd l c <$ tail) ++ [Token TokEof l c] , layoutColumns = [] } pushStartCode pending pure (Token TokLEnd l c) _ -> pure $ Token TokEof l c yield k inp i = clearPendingLC *> yield' k inp i yield' k (AlexPn _ l c, _, s, _) i = do pure (Token (k $! (T.decodeUtf8 (Lbs.toStrict (Lbs.take i s)))) l c) setPendingLC = mapUserState $ \s -> s { pendingLambdaCase = True } clearPendingLC = mapUserState $ \s -> s { pendingLambdaCase = False } always :: TokenClass -> AlexInput -> Int64 -> Alex Token always k x i = yield (const k) x i startString = do mapUserState $ \s -> s { stringBuffer = T.empty } pushStartCode string endString (AlexPn _ l c, _, _, _) _i = do text <- stringBuffer <$> getUserState mapUserState $ \s -> s { stringBuffer = T.empty } popStartCode pure (Token (TokString text) l c) stringChar input@(AlexPn _ _ _, _, buf, _) i = do mapUserState $ \s -> s { stringBuffer = stringBuffer s <> T.decodeUtf8 (Lbs.toStrict (Lbs.take i buf)) } alexMonadScan stringSeg text _ _ = do mapUserState $ \s -> s { stringBuffer = stringBuffer s <> text } alexMonadScan data LayoutState = LetLayout { layoutCol :: Int } | Layout { layoutCol :: Int } | ModLayout { layoutCol :: Int } deriving (Show) data AlexUserState = AlexUserState { layoutColumns :: [LayoutState] , startCodes :: [Int] , leastColumn :: Int , pendingLayoutKw :: Maybe (Int -> LayoutState) , pendingTokens :: [Token] , pendingLambdaCase :: Bool , haveModuleHeader :: Bool , stringBuffer :: T.Text } alexInitUserState = AlexUserState [] [] 0 Nothing [] False False T.empty emitPendingToken :: AlexAction Token emitPendingToken _ _ = do t <- getUserState case pendingTokens t of [] -> do popStartCode alexMonadScan (x:xs) -> do mapUserState $ \s -> s { pendingTokens = xs } pure x delayToken :: Token -> Alex () delayToken t = do mapUserState $ \s -> s { pendingTokens = t:pendingTokens s } pushStartCode pending just :: Alex a -> AlexAction Token just k _ _ = k *> alexMonadScan getUserState :: Alex AlexUserState getUserState = Alex $ \s -> Right (s, alex_ust s) mapUserState :: (AlexUserState -> AlexUserState) -> Alex () mapUserState k = Alex $ \s -> Right (s { alex_ust = k $! alex_ust s }, ()) pushStartCode :: Int -> Alex () pushStartCode c = do sc <- alexGetStartCode mapUserState $ \s -> s { startCodes = sc:startCodes s } alexSetStartCode c popStartCode :: Alex () popStartCode = do sc <- startCodes <$> getUserState case sc of [] -> alexSetStartCode 0 (x:xs) -> do mapUserState $ \s -> s { startCodes = xs } alexSetStartCode x offsideRule :: AlexInput -> Int64 -> Alex Token offsideRule (AlexPn _ line col, _, s, _) _ = do ~(col':ctx) <- layoutColumns <$> getUserState case col `compare` layoutCol col' of EQ -> do popStartCode maybePopImportSC pure (Token TokSemi line col) GT -> do popStartCode alexMonadScan LT -> do mapUserState $ \s -> s { layoutColumns = ctx } pure (Token TokLEnd line col) maybePopImportSC :: Alex () maybePopImportSC = do startcode <- alexGetStartCode when (startcode == import_) popStartCode emptyLayout :: AlexInput -> Int64 -> Alex Token emptyLayout (AlexPn _ line col, _, _, _) _ = do popStartCode pushStartCode newline pure (Token TokLEnd line col) startLayout :: AlexInput -> Int64 -> Alex Token startLayout (AlexPn _ line col, _, _, _) _ = do state <- getUserState popStartCode let col' = case layoutColumns state of [] -> 0 (x:_) -> layoutCol x layoutKind = case pendingLayoutKw state of Just s -> s Nothing -> Layout if col < col' then pushStartCode empty_layout else mapUserState $ \s -> s { layoutColumns = layoutKind col:layoutColumns s } pure (Token TokLStart line col) getLayout :: Alex LayoutState getLayout = do t <- getUserState case layoutColumns t of (x:_) -> pure x _ -> error "No layout?" openBrace :: AlexInput -> Int64 -> Alex Token openBrace (AlexPn _ line col, _, _, _) _ = do popStartCode mapUserState $ \s -> s { layoutColumns = Layout minBound:layoutColumns s } pure (Token TokOBrace line col) popLayoutContext :: Alex () popLayoutContext = mapUserState $ \s -> s { layoutColumns = tail (layoutColumns s) } closeBrace :: AlexInput -> Int64 -> Alex Token closeBrace (AlexPn _ line col, _, _, _) _ = do ~(col':_) <- layoutColumns <$> getUserState if layoutCol col' < 0 then popLayoutContext else pure () pure (Token TokCBrace line col) variableOrKeyword :: AlexAction Token variableOrKeyword (AlexPn _ l c, _, s, _) size = do sc <- alexGetStartCode state <- getUserState clearPendingLC let text = T.decodeUtf8 (Lbs.toStrict (Lbs.take size s)) col = layoutCol (head (layoutColumns state)) case T.unpack text of "as" | sc == import_, c > col -> pure (Token TokAs l c) | sc == import_ -> offsideKeyword (TokVar text) l c | otherwise -> pure (Token (TokVar text) l c) "qualified" | sc == import_, c > col -> pure (Token TokQualified l c) | sc == import_ -> offsideKeyword (TokVar text) l c | otherwise -> pure (Token (TokVar text) l c) "let" -> laidOut' (Just LetLayout) TokLet l c "in" -> do laidout <- getLayout case laidout of -- let .. in critical pair: -- 'in' is allowed to close a layout context before the offside rule would apply. LetLayout _ -> earlyEnd TokIn l c _ -> pure (Token TokIn l c) "data" -> pure (Token TokData l c) "where" -> do -- if this is the where in the module_header, then -- pop the start code so that the offside rule applies again when (sc == module_header) popStartCode laidOut' (if sc == module_header then Just ModLayout else Nothing) TokWhere l c "case" -- "case" is a layout token if it's immediately following a \\ | pendingLambdaCase state -> laidOut TokCase l c | otherwise -> pure (Token TokCase l c) "import" -> do pushStartCode import_ pure (Token TokImport l c) "of" -> laidOut TokOf l c "module" -> do unless (haveModuleHeader state) $ do mapUserState $ \s -> s { haveModuleHeader = True } pushStartCode module_header pure (Token TokModule l c) (x:_) | Data.Char.isUpper x -> pure (Token (TokCon text) l c) | otherwise -> pure (Token (TokVar text) l c) [] -> error "empty keyword/identifier" earlyEnd tok l c = do popLayoutContext delayToken (Token tok l c) pure (Token TokLEnd l c) offsideKeyword tok l c = do popLayoutContext delayToken (Token tok l c) pure (Token TokSemi l c) laidOut' n x l c = do pushStartCode layout mapUserState $ \s -> s { leastColumn = c, pendingLayoutKw = n } pure (Token x l c) laidOut = laidOut' Nothing }