|
|
- {
- module Frontend.Autogen.Lexer where
-
- import qualified Data.ByteString.Lazy as Lbs
- import qualified Data.Text.Encoding as T
- import qualified Data.Text as T
-
- import Frontend.Lexer.Tokens
-
- import Control.Monad
-
- import Debug.Trace
- }
-
- %wrapper "monadUserState-bytestring"
-
- $alpha = [a-zA-Z]
- $digit = [0-9]
- $white_nol = $white # \n
-
- tokens :-
- $white_nol+ ;
-
- <0,module_header> "--" .* \n
- { just $ pushStartCode newline }
-
- <0,module_header,import_>
- $alpha [$alpha $digit \_ \']* { variableOrKeyword }
-
- <0> \= { always TokEqual }
- <0> \: \: { always TokDoubleColon }
-
- <0> \\ { \i l -> setPendingLC *> yield' (const TokLambda) i l }
- <0> "->" { always TokArrow }
- <0> "_" { always TokUnder }
-
- <0> \{ { always TokOBrace }
- <0> \[ { always TokOSquare }
-
- <0,module_header,import_> {
- \, { always TokComma }
- \( { always TokOParen }
- \) { always TokCParen }
- }
-
- <0> \} { closeBrace }
- <0> \] { always TokCSquare }
-
- <0> \;+ { always TokSemi }
-
- <0> \n { just $ pushStartCode newline }
-
- <0> \" { just startString }
-
- <string> {
- \\ \" { stringSeg (T.singleton '"') }
- \\ \\ { stringSeg (T.singleton '\\') }
-
- \\ a { stringSeg (T.singleton '\a') }
- \\ b { stringSeg (T.singleton '\b') }
- \\ f { stringSeg (T.singleton '\f') }
- \\ n { stringSeg (T.singleton '\n') }
- \\ \n { stringSeg (T.singleton '\n') }
- \\ r { stringSeg (T.singleton '\r') }
- \\ v { stringSeg (T.singleton '\v') }
- \\ t { stringSeg (T.singleton '\t') }
-
- \" { endString }
-
- [^\\\"]+ { stringChar }
- }
-
- <0,newline,comment,import_,module_header>
- "{-" { just $ pushStartCode comment }
-
- <comment> {
- "-}" { \i l -> popStartCode *> skip i l }
- . ;
- }
-
- -- newline: emit a semicolon when de-denting
- <newline> {
- \n ;
- "--" .* \n ;
-
- () { offsideRule }
- }
-
- -- layout: indentation of the next token is context for offside rule
- <layout> {
- \n ;
- "--" .* \n ;
-
- \{ { openBrace }
- () { startLayout }
- }
-
- <import_> {
- \n { just $ pushStartCode newline }
- "--" .* \n { just $ pushStartCode newline }
- }
-
- <empty_layout> () { emptyLayout }
-
- <pending> () { emitPendingToken }
-
- <module_header> {
- \n ;
- }
-
- {
- alexEOF :: Alex Token
- alexEOF = do
- (AlexPn _ l c, _, _, _) <- alexGetInput
-
- maybePopImportSC
-
- state <- getUserState
-
- unless (T.null (stringBuffer state)) $ do
- alexError $ "Unterminated string literal at line " ++ show l ++ ", column " ++ show c
-
- case layoutColumns state of
- -- EOF is allowed to close as many layout contexts as there are
- -- pending (number of pending layout contexts is the length of the
- -- list minus one, since there's the one initial layout context.)
- _:tail -> do
- mapUserState $ \s ->
- s { pendingTokens = (Token TokLEnd l c <$ tail) ++ [Token TokEof l c]
- , layoutColumns = []
- }
- pushStartCode pending
- pure (Token TokLEnd l c)
- _ -> pure $ Token TokEof l c
-
- yield k inp i = clearPendingLC *> yield' k inp i
-
- yield' k (AlexPn _ l c, _, s, _) i = do
- pure (Token (k $! (T.decodeUtf8 (Lbs.toStrict (Lbs.take i s)))) l c)
-
- setPendingLC = mapUserState $ \s -> s { pendingLambdaCase = True }
- clearPendingLC = mapUserState $ \s -> s { pendingLambdaCase = False }
-
- always :: TokenClass -> AlexInput -> Int64 -> Alex Token
- always k x i = yield (const k) x i
-
- startString = do
- mapUserState $ \s -> s { stringBuffer = T.empty }
- pushStartCode string
-
- endString (AlexPn _ l c, _, _, _) _i = do
- text <- stringBuffer <$> getUserState
- mapUserState $ \s -> s { stringBuffer = T.empty }
- popStartCode
- pure (Token (TokString text) l c)
-
- stringChar input@(AlexPn _ _ _, _, buf, _) i = do
- mapUserState $ \s -> s { stringBuffer = stringBuffer s <> T.decodeUtf8 (Lbs.toStrict (Lbs.take i buf)) }
- alexMonadScan
-
- stringSeg text _ _ = do
- mapUserState $ \s -> s { stringBuffer = stringBuffer s <> text }
- alexMonadScan
-
- data LayoutState
- = LetLayout { layoutCol :: Int }
- | Layout { layoutCol :: Int }
- | ModLayout { layoutCol :: Int }
- deriving (Show)
-
- data AlexUserState =
- AlexUserState { layoutColumns :: [LayoutState]
- , startCodes :: [Int]
- , leastColumn :: Int
-
- , pendingLayoutKw :: Maybe (Int -> LayoutState)
- , pendingTokens :: [Token]
- , pendingLambdaCase :: Bool
- , haveModuleHeader :: Bool
-
- , stringBuffer :: T.Text
- }
-
- alexInitUserState = AlexUserState [] [] 0 Nothing [] False False T.empty
-
- emitPendingToken :: AlexAction Token
- emitPendingToken _ _ = do
- t <- getUserState
- case pendingTokens t of
- [] -> do
- popStartCode
- alexMonadScan
- (x:xs) -> do
- mapUserState $ \s -> s { pendingTokens = xs }
- pure x
-
- delayToken :: Token -> Alex ()
- delayToken t = do
- mapUserState $ \s -> s { pendingTokens = t:pendingTokens s }
- pushStartCode pending
-
- just :: Alex a -> AlexAction Token
- just k _ _ = k *> alexMonadScan
-
- getUserState :: Alex AlexUserState
- getUserState = Alex $ \s -> Right (s, alex_ust s)
-
- mapUserState :: (AlexUserState -> AlexUserState) -> Alex ()
- mapUserState k = Alex $ \s -> Right (s { alex_ust = k $! alex_ust s }, ())
-
- pushStartCode :: Int -> Alex ()
- pushStartCode c = do
- sc <- alexGetStartCode
- mapUserState $ \s -> s { startCodes = sc:startCodes s }
- alexSetStartCode c
-
- popStartCode :: Alex ()
- popStartCode = do
- sc <- startCodes <$> getUserState
- case sc of
- [] -> alexSetStartCode 0
- (x:xs) -> do
- mapUserState $ \s -> s { startCodes = xs }
- alexSetStartCode x
-
- offsideRule :: AlexInput -> Int64 -> Alex Token
- offsideRule (AlexPn _ line col, _, s, _) _ = do
- ~(col':ctx) <- layoutColumns <$> getUserState
- case col `compare` layoutCol col' of
- EQ -> do
- popStartCode
- maybePopImportSC
- pure (Token TokSemi line col)
- GT -> do
- popStartCode
- alexMonadScan
- LT -> do
- mapUserState $ \s -> s { layoutColumns = ctx }
- pure (Token TokLEnd line col)
-
- maybePopImportSC :: Alex ()
- maybePopImportSC = do
- startcode <- alexGetStartCode
- when (startcode == import_) popStartCode
-
- emptyLayout :: AlexInput -> Int64 -> Alex Token
- emptyLayout (AlexPn _ line col, _, _, _) _ = do
- popStartCode
- pushStartCode newline
- pure (Token TokLEnd line col)
-
- startLayout :: AlexInput -> Int64 -> Alex Token
- startLayout (AlexPn _ line col, _, _, _) _ = do
- state <- getUserState
- popStartCode
- let
- col' =
- case layoutColumns state of
- [] -> 0
- (x:_) -> layoutCol x
-
- layoutKind = case pendingLayoutKw state of
- Just s -> s
- Nothing -> Layout
-
- if col < col'
- then pushStartCode empty_layout
- else mapUserState $ \s -> s { layoutColumns = layoutKind col:layoutColumns s }
- pure (Token TokLStart line col)
-
- getLayout :: Alex LayoutState
- getLayout = do
- t <- getUserState
- case layoutColumns t of
- (x:_) -> pure x
- _ -> error "No layout?"
-
- openBrace :: AlexInput -> Int64 -> Alex Token
- openBrace (AlexPn _ line col, _, _, _) _ = do
- popStartCode
- mapUserState $ \s -> s { layoutColumns = Layout minBound:layoutColumns s }
- pure (Token TokOBrace line col)
-
- popLayoutContext :: Alex ()
- popLayoutContext = mapUserState $ \s -> s { layoutColumns = tail (layoutColumns s) }
-
- closeBrace :: AlexInput -> Int64 -> Alex Token
- closeBrace (AlexPn _ line col, _, _, _) _ = do
- ~(col':_) <- layoutColumns <$> getUserState
- if layoutCol col' < 0
- then popLayoutContext
- else pure ()
- pure (Token TokCBrace line col)
-
- variableOrKeyword :: AlexAction Token
- variableOrKeyword (AlexPn _ l c, _, s, _) size = do
- sc <- alexGetStartCode
- state <- getUserState
-
- clearPendingLC
-
- let
- text = T.decodeUtf8 (Lbs.toStrict (Lbs.take size s))
- col = layoutCol (head (layoutColumns state))
-
- case T.unpack text of
- "as"
- | sc == import_, c > col -> pure (Token TokAs l c)
- | sc == import_ -> offsideKeyword (TokVar text) l c
- | otherwise -> pure (Token (TokVar text) l c)
-
- "qualified"
- | sc == import_, c > col -> pure (Token TokQualified l c)
- | sc == import_ -> offsideKeyword (TokVar text) l c
- | otherwise -> pure (Token (TokVar text) l c)
-
- "let" -> laidOut' (Just LetLayout) TokLet l c
- "in" -> do
- laidout <- getLayout
- case laidout of
- -- let .. in critical pair:
- -- 'in' is allowed to close a layout context before the offside rule would apply.
- LetLayout _ -> earlyEnd TokIn l c
- _ -> pure (Token TokIn l c)
-
- "data" -> pure (Token TokData l c)
-
- "where" -> do
- -- if this is the where in the module_header, then
- -- pop the start code so that the offside rule applies again
- when (sc == module_header) popStartCode
- laidOut' (if sc == module_header then Just ModLayout else Nothing) TokWhere l c
-
- "case"
- -- "case" is a layout token if it's immediately following a \\
- | pendingLambdaCase state -> laidOut TokCase l c
- | otherwise -> pure (Token TokCase l c)
-
- "import" -> do
- pushStartCode import_
- pure (Token TokImport l c)
-
- "of" -> laidOut TokOf l c
-
- "module" -> do
- unless (haveModuleHeader state) $ do
- mapUserState $ \s -> s { haveModuleHeader = True }
- pushStartCode module_header
- pure (Token TokModule l c)
-
- (x:_)
- | Data.Char.isUpper x -> pure (Token (TokCon text) l c)
- | otherwise -> pure (Token (TokVar text) l c)
-
- [] -> error "empty keyword/identifier"
-
- earlyEnd tok l c = do
- popLayoutContext
- delayToken (Token tok l c)
- pure (Token TokLEnd l c)
-
- offsideKeyword tok l c = do
- popLayoutContext
- delayToken (Token tok l c)
- pure (Token TokSemi l c)
-
- laidOut' n x l c = do
- pushStartCode layout
- mapUserState $ \s -> s { leastColumn = c, pendingLayoutKw = n }
- pure (Token x l c)
-
- laidOut = laidOut' Nothing
- }
|