|
{
|
|
module Frontend.Autogen.Lexer where
|
|
|
|
import qualified Data.ByteString.Lazy as Lbs
|
|
import qualified Data.Text.Encoding as T
|
|
import qualified Data.Text as T
|
|
|
|
import Frontend.Lexer.Tokens
|
|
|
|
import Control.Monad
|
|
|
|
import Debug.Trace
|
|
}
|
|
|
|
%wrapper "monadUserState-bytestring"
|
|
|
|
$alpha = [a-zA-Z]
|
|
$digit = [0-9]
|
|
$white_nol = $white # \n
|
|
|
|
tokens :-
|
|
$white_nol+ ;
|
|
|
|
<0,module_header> "--" .* \n
|
|
{ just $ pushStartCode newline }
|
|
|
|
<0,module_header,import_>
|
|
$alpha [$alpha $digit \_ \']* { variableOrKeyword }
|
|
|
|
<0> \= { always TokEqual }
|
|
<0> \: \: { always TokDoubleColon }
|
|
|
|
<0> \\ { \i l -> setPendingLC *> yield' (const TokLambda) i l }
|
|
<0> "->" { always TokArrow }
|
|
<0> "_" { always TokUnder }
|
|
|
|
<0> \{ { always TokOBrace }
|
|
<0> \[ { always TokOSquare }
|
|
|
|
<0,module_header,import_> {
|
|
\, { always TokComma }
|
|
\( { always TokOParen }
|
|
\) { always TokCParen }
|
|
}
|
|
|
|
<0> \} { closeBrace }
|
|
<0> \] { always TokCSquare }
|
|
|
|
<0> \;+ { always TokSemi }
|
|
|
|
<0> \n { just $ pushStartCode newline }
|
|
|
|
<0> \" { just startString }
|
|
|
|
<string> {
|
|
\\ \" { stringSeg (T.singleton '"') }
|
|
\\ \\ { stringSeg (T.singleton '\\') }
|
|
|
|
\\ a { stringSeg (T.singleton '\a') }
|
|
\\ b { stringSeg (T.singleton '\b') }
|
|
\\ f { stringSeg (T.singleton '\f') }
|
|
\\ n { stringSeg (T.singleton '\n') }
|
|
\\ \n { stringSeg (T.singleton '\n') }
|
|
\\ r { stringSeg (T.singleton '\r') }
|
|
\\ v { stringSeg (T.singleton '\v') }
|
|
\\ t { stringSeg (T.singleton '\t') }
|
|
|
|
\" { endString }
|
|
|
|
[^\\\"]+ { stringChar }
|
|
}
|
|
|
|
<0,newline,comment,import_,module_header>
|
|
"{-" { just $ pushStartCode comment }
|
|
|
|
<comment> {
|
|
"-}" { \i l -> popStartCode *> skip i l }
|
|
. ;
|
|
}
|
|
|
|
-- newline: emit a semicolon when de-denting
|
|
<newline> {
|
|
\n ;
|
|
"--" .* \n ;
|
|
|
|
() { offsideRule }
|
|
}
|
|
|
|
-- layout: indentation of the next token is context for offside rule
|
|
<layout> {
|
|
\n ;
|
|
"--" .* \n ;
|
|
|
|
\{ { openBrace }
|
|
() { startLayout }
|
|
}
|
|
|
|
<import_> {
|
|
\n { just $ pushStartCode newline }
|
|
"--" .* \n { just $ pushStartCode newline }
|
|
}
|
|
|
|
<empty_layout> () { emptyLayout }
|
|
|
|
<pending> () { emitPendingToken }
|
|
|
|
<module_header> {
|
|
\n ;
|
|
}
|
|
|
|
{
|
|
alexEOF :: Alex Token
|
|
alexEOF = do
|
|
(AlexPn _ l c, _, _, _) <- alexGetInput
|
|
|
|
maybePopImportSC
|
|
|
|
state <- getUserState
|
|
|
|
unless (T.null (stringBuffer state)) $ do
|
|
alexError $ "Unterminated string literal at line " ++ show l ++ ", column " ++ show c
|
|
|
|
case layoutColumns state of
|
|
-- EOF is allowed to close as many layout contexts as there are
|
|
-- pending (number of pending layout contexts is the length of the
|
|
-- list minus one, since there's the one initial layout context.)
|
|
_:tail -> do
|
|
mapUserState $ \s ->
|
|
s { pendingTokens = (Token TokLEnd l c <$ tail) ++ [Token TokEof l c]
|
|
, layoutColumns = []
|
|
}
|
|
pushStartCode pending
|
|
pure (Token TokLEnd l c)
|
|
_ -> pure $ Token TokEof l c
|
|
|
|
yield k inp i = clearPendingLC *> yield' k inp i
|
|
|
|
yield' k (AlexPn _ l c, _, s, _) i = do
|
|
pure (Token (k $! (T.decodeUtf8 (Lbs.toStrict (Lbs.take i s)))) l c)
|
|
|
|
setPendingLC = mapUserState $ \s -> s { pendingLambdaCase = True }
|
|
clearPendingLC = mapUserState $ \s -> s { pendingLambdaCase = False }
|
|
|
|
always :: TokenClass -> AlexInput -> Int64 -> Alex Token
|
|
always k x i = yield (const k) x i
|
|
|
|
startString = do
|
|
mapUserState $ \s -> s { stringBuffer = T.empty }
|
|
pushStartCode string
|
|
|
|
endString (AlexPn _ l c, _, _, _) _i = do
|
|
text <- stringBuffer <$> getUserState
|
|
mapUserState $ \s -> s { stringBuffer = T.empty }
|
|
popStartCode
|
|
pure (Token (TokString text) l c)
|
|
|
|
stringChar input@(AlexPn _ _ _, _, buf, _) i = do
|
|
mapUserState $ \s -> s { stringBuffer = stringBuffer s <> T.decodeUtf8 (Lbs.toStrict (Lbs.take i buf)) }
|
|
alexMonadScan
|
|
|
|
stringSeg text _ _ = do
|
|
mapUserState $ \s -> s { stringBuffer = stringBuffer s <> text }
|
|
alexMonadScan
|
|
|
|
data LayoutState
|
|
= LetLayout { layoutCol :: Int }
|
|
| Layout { layoutCol :: Int }
|
|
| ModLayout { layoutCol :: Int }
|
|
deriving (Show)
|
|
|
|
data AlexUserState =
|
|
AlexUserState { layoutColumns :: [LayoutState]
|
|
, startCodes :: [Int]
|
|
, leastColumn :: Int
|
|
|
|
, pendingLayoutKw :: Maybe (Int -> LayoutState)
|
|
, pendingTokens :: [Token]
|
|
, pendingLambdaCase :: Bool
|
|
, haveModuleHeader :: Bool
|
|
|
|
, stringBuffer :: T.Text
|
|
}
|
|
|
|
alexInitUserState = AlexUserState [] [] 0 Nothing [] False False T.empty
|
|
|
|
emitPendingToken :: AlexAction Token
|
|
emitPendingToken _ _ = do
|
|
t <- getUserState
|
|
case pendingTokens t of
|
|
[] -> do
|
|
popStartCode
|
|
alexMonadScan
|
|
(x:xs) -> do
|
|
mapUserState $ \s -> s { pendingTokens = xs }
|
|
pure x
|
|
|
|
delayToken :: Token -> Alex ()
|
|
delayToken t = do
|
|
mapUserState $ \s -> s { pendingTokens = t:pendingTokens s }
|
|
pushStartCode pending
|
|
|
|
just :: Alex a -> AlexAction Token
|
|
just k _ _ = k *> alexMonadScan
|
|
|
|
getUserState :: Alex AlexUserState
|
|
getUserState = Alex $ \s -> Right (s, alex_ust s)
|
|
|
|
mapUserState :: (AlexUserState -> AlexUserState) -> Alex ()
|
|
mapUserState k = Alex $ \s -> Right (s { alex_ust = k $! alex_ust s }, ())
|
|
|
|
pushStartCode :: Int -> Alex ()
|
|
pushStartCode c = do
|
|
sc <- alexGetStartCode
|
|
mapUserState $ \s -> s { startCodes = sc:startCodes s }
|
|
alexSetStartCode c
|
|
|
|
popStartCode :: Alex ()
|
|
popStartCode = do
|
|
sc <- startCodes <$> getUserState
|
|
case sc of
|
|
[] -> alexSetStartCode 0
|
|
(x:xs) -> do
|
|
mapUserState $ \s -> s { startCodes = xs }
|
|
alexSetStartCode x
|
|
|
|
offsideRule :: AlexInput -> Int64 -> Alex Token
|
|
offsideRule (AlexPn _ line col, _, s, _) _ = do
|
|
~(col':ctx) <- layoutColumns <$> getUserState
|
|
case col `compare` layoutCol col' of
|
|
EQ -> do
|
|
popStartCode
|
|
maybePopImportSC
|
|
pure (Token TokSemi line col)
|
|
GT -> do
|
|
popStartCode
|
|
alexMonadScan
|
|
LT -> do
|
|
mapUserState $ \s -> s { layoutColumns = ctx }
|
|
pure (Token TokLEnd line col)
|
|
|
|
maybePopImportSC :: Alex ()
|
|
maybePopImportSC = do
|
|
startcode <- alexGetStartCode
|
|
when (startcode == import_) popStartCode
|
|
|
|
emptyLayout :: AlexInput -> Int64 -> Alex Token
|
|
emptyLayout (AlexPn _ line col, _, _, _) _ = do
|
|
popStartCode
|
|
pushStartCode newline
|
|
pure (Token TokLEnd line col)
|
|
|
|
startLayout :: AlexInput -> Int64 -> Alex Token
|
|
startLayout (AlexPn _ line col, _, _, _) _ = do
|
|
state <- getUserState
|
|
popStartCode
|
|
let
|
|
col' =
|
|
case layoutColumns state of
|
|
[] -> 0
|
|
(x:_) -> layoutCol x
|
|
|
|
layoutKind = case pendingLayoutKw state of
|
|
Just s -> s
|
|
Nothing -> Layout
|
|
|
|
if col < col'
|
|
then pushStartCode empty_layout
|
|
else mapUserState $ \s -> s { layoutColumns = layoutKind col:layoutColumns s }
|
|
pure (Token TokLStart line col)
|
|
|
|
getLayout :: Alex LayoutState
|
|
getLayout = do
|
|
t <- getUserState
|
|
case layoutColumns t of
|
|
(x:_) -> pure x
|
|
_ -> error "No layout?"
|
|
|
|
openBrace :: AlexInput -> Int64 -> Alex Token
|
|
openBrace (AlexPn _ line col, _, _, _) _ = do
|
|
popStartCode
|
|
mapUserState $ \s -> s { layoutColumns = Layout minBound:layoutColumns s }
|
|
pure (Token TokOBrace line col)
|
|
|
|
popLayoutContext :: Alex ()
|
|
popLayoutContext = mapUserState $ \s -> s { layoutColumns = tail (layoutColumns s) }
|
|
|
|
closeBrace :: AlexInput -> Int64 -> Alex Token
|
|
closeBrace (AlexPn _ line col, _, _, _) _ = do
|
|
~(col':_) <- layoutColumns <$> getUserState
|
|
if layoutCol col' < 0
|
|
then popLayoutContext
|
|
else pure ()
|
|
pure (Token TokCBrace line col)
|
|
|
|
variableOrKeyword :: AlexAction Token
|
|
variableOrKeyword (AlexPn _ l c, _, s, _) size = do
|
|
sc <- alexGetStartCode
|
|
state <- getUserState
|
|
|
|
clearPendingLC
|
|
|
|
let
|
|
text = T.decodeUtf8 (Lbs.toStrict (Lbs.take size s))
|
|
col = layoutCol (head (layoutColumns state))
|
|
|
|
case T.unpack text of
|
|
"as"
|
|
| sc == import_, c > col -> pure (Token TokAs l c)
|
|
| sc == import_ -> offsideKeyword (TokVar text) l c
|
|
| otherwise -> pure (Token (TokVar text) l c)
|
|
|
|
"qualified"
|
|
| sc == import_, c > col -> pure (Token TokQualified l c)
|
|
| sc == import_ -> offsideKeyword (TokVar text) l c
|
|
| otherwise -> pure (Token (TokVar text) l c)
|
|
|
|
"let" -> laidOut' (Just LetLayout) TokLet l c
|
|
"in" -> do
|
|
laidout <- getLayout
|
|
case laidout of
|
|
-- let .. in critical pair:
|
|
-- 'in' is allowed to close a layout context before the offside rule would apply.
|
|
LetLayout _ -> earlyEnd TokIn l c
|
|
_ -> pure (Token TokIn l c)
|
|
|
|
"data" -> pure (Token TokData l c)
|
|
|
|
"where" -> do
|
|
-- if this is the where in the module_header, then
|
|
-- pop the start code so that the offside rule applies again
|
|
when (sc == module_header) popStartCode
|
|
laidOut' (if sc == module_header then Just ModLayout else Nothing) TokWhere l c
|
|
|
|
"case"
|
|
-- "case" is a layout token if it's immediately following a \\
|
|
| pendingLambdaCase state -> laidOut TokCase l c
|
|
| otherwise -> pure (Token TokCase l c)
|
|
|
|
"import" -> do
|
|
pushStartCode import_
|
|
pure (Token TokImport l c)
|
|
|
|
"of" -> laidOut TokOf l c
|
|
|
|
"module" -> do
|
|
unless (haveModuleHeader state) $ do
|
|
mapUserState $ \s -> s { haveModuleHeader = True }
|
|
pushStartCode module_header
|
|
pure (Token TokModule l c)
|
|
|
|
(x:_)
|
|
| Data.Char.isUpper x -> pure (Token (TokCon text) l c)
|
|
| otherwise -> pure (Token (TokVar text) l c)
|
|
|
|
[] -> error "empty keyword/identifier"
|
|
|
|
earlyEnd tok l c = do
|
|
popLayoutContext
|
|
delayToken (Token tok l c)
|
|
pure (Token TokLEnd l c)
|
|
|
|
offsideKeyword tok l c = do
|
|
popLayoutContext
|
|
delayToken (Token tok l c)
|
|
pure (Token TokSemi l c)
|
|
|
|
laidOut' n x l c = do
|
|
pushStartCode layout
|
|
mapUserState $ \s -> s { leastColumn = c, pendingLayoutKw = n }
|
|
pure (Token x l c)
|
|
|
|
laidOut = laidOut' Nothing
|
|
}
|