Abbie's Haskell compiler
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

373 lines
10 KiB

{
module Frontend.Autogen.Lexer where
import qualified Data.ByteString.Lazy as Lbs
import qualified Data.Text.Encoding as T
import qualified Data.Text as T
import Frontend.Lexer.Tokens
import Control.Monad
import Debug.Trace
}
%wrapper "monadUserState-bytestring"
$alpha = [a-zA-Z]
$digit = [0-9]
$white_nol = $white # \n
tokens :-
$white_nol+ ;
<0,module_header> "--" .* \n
{ just $ pushStartCode newline }
<0,module_header,import_>
$alpha [$alpha $digit \_ \']* { variableOrKeyword }
<0> \= { always TokEqual }
<0> \: \: { always TokDoubleColon }
<0> \\ { \i l -> setPendingLC *> yield' (const TokLambda) i l }
<0> "->" { always TokArrow }
<0> "_" { always TokUnder }
<0> \{ { always TokOBrace }
<0> \[ { always TokOSquare }
<0,module_header,import_> {
\, { always TokComma }
\( { always TokOParen }
\) { always TokCParen }
}
<0> \} { closeBrace }
<0> \] { always TokCSquare }
<0> \;+ { always TokSemi }
<0> \n { just $ pushStartCode newline }
<0> \" { just startString }
<string> {
\\ \" { stringSeg (T.singleton '"') }
\\ \\ { stringSeg (T.singleton '\\') }
\\ a { stringSeg (T.singleton '\a') }
\\ b { stringSeg (T.singleton '\b') }
\\ f { stringSeg (T.singleton '\f') }
\\ n { stringSeg (T.singleton '\n') }
\\ \n { stringSeg (T.singleton '\n') }
\\ r { stringSeg (T.singleton '\r') }
\\ v { stringSeg (T.singleton '\v') }
\\ t { stringSeg (T.singleton '\t') }
\" { endString }
[^\\\"]+ { stringChar }
}
<0,newline,comment,import_,module_header>
"{-" { just $ pushStartCode comment }
<comment> {
"-}" { \i l -> popStartCode *> skip i l }
. ;
}
-- newline: emit a semicolon when de-denting
<newline> {
\n ;
"--" .* \n ;
() { offsideRule }
}
-- layout: indentation of the next token is context for offside rule
<layout> {
\n ;
"--" .* \n ;
\{ { openBrace }
() { startLayout }
}
<import_> {
\n { just $ pushStartCode newline }
"--" .* \n { just $ pushStartCode newline }
}
<empty_layout> () { emptyLayout }
<pending> () { emitPendingToken }
<module_header> {
\n ;
}
{
alexEOF :: Alex Token
alexEOF = do
(AlexPn _ l c, _, _, _) <- alexGetInput
maybePopImportSC
state <- getUserState
unless (T.null (stringBuffer state)) $ do
alexError $ "Unterminated string literal at line " ++ show l ++ ", column " ++ show c
case layoutColumns state of
-- EOF is allowed to close as many layout contexts as there are
-- pending (number of pending layout contexts is the length of the
-- list minus one, since there's the one initial layout context.)
_:tail -> do
mapUserState $ \s ->
s { pendingTokens = (Token TokLEnd l c <$ tail) ++ [Token TokEof l c]
, layoutColumns = []
}
pushStartCode pending
pure (Token TokLEnd l c)
_ -> pure $ Token TokEof l c
yield k inp i = clearPendingLC *> yield' k inp i
yield' k (AlexPn _ l c, _, s, _) i = do
pure (Token (k $! (T.decodeUtf8 (Lbs.toStrict (Lbs.take i s)))) l c)
setPendingLC = mapUserState $ \s -> s { pendingLambdaCase = True }
clearPendingLC = mapUserState $ \s -> s { pendingLambdaCase = False }
always :: TokenClass -> AlexInput -> Int64 -> Alex Token
always k x i = yield (const k) x i
startString = do
mapUserState $ \s -> s { stringBuffer = T.empty }
pushStartCode string
endString (AlexPn _ l c, _, _, _) _i = do
text <- stringBuffer <$> getUserState
mapUserState $ \s -> s { stringBuffer = T.empty }
popStartCode
pure (Token (TokString text) l c)
stringChar input@(AlexPn _ _ _, _, buf, _) i = do
mapUserState $ \s -> s { stringBuffer = stringBuffer s <> T.decodeUtf8 (Lbs.toStrict (Lbs.take i buf)) }
alexMonadScan
stringSeg text _ _ = do
mapUserState $ \s -> s { stringBuffer = stringBuffer s <> text }
alexMonadScan
data LayoutState
= LetLayout { layoutCol :: Int }
| Layout { layoutCol :: Int }
| ModLayout { layoutCol :: Int }
deriving (Show)
data AlexUserState =
AlexUserState { layoutColumns :: [LayoutState]
, startCodes :: [Int]
, leastColumn :: Int
, pendingLayoutKw :: Maybe (Int -> LayoutState)
, pendingTokens :: [Token]
, pendingLambdaCase :: Bool
, haveModuleHeader :: Bool
, stringBuffer :: T.Text
}
alexInitUserState = AlexUserState [] [] 0 Nothing [] False False T.empty
emitPendingToken :: AlexAction Token
emitPendingToken _ _ = do
t <- getUserState
case pendingTokens t of
[] -> do
popStartCode
alexMonadScan
(x:xs) -> do
mapUserState $ \s -> s { pendingTokens = xs }
pure x
delayToken :: Token -> Alex ()
delayToken t = do
mapUserState $ \s -> s { pendingTokens = t:pendingTokens s }
pushStartCode pending
just :: Alex a -> AlexAction Token
just k _ _ = k *> alexMonadScan
getUserState :: Alex AlexUserState
getUserState = Alex $ \s -> Right (s, alex_ust s)
mapUserState :: (AlexUserState -> AlexUserState) -> Alex ()
mapUserState k = Alex $ \s -> Right (s { alex_ust = k $! alex_ust s }, ())
pushStartCode :: Int -> Alex ()
pushStartCode c = do
sc <- alexGetStartCode
mapUserState $ \s -> s { startCodes = sc:startCodes s }
alexSetStartCode c
popStartCode :: Alex ()
popStartCode = do
sc <- startCodes <$> getUserState
case sc of
[] -> alexSetStartCode 0
(x:xs) -> do
mapUserState $ \s -> s { startCodes = xs }
alexSetStartCode x
offsideRule :: AlexInput -> Int64 -> Alex Token
offsideRule (AlexPn _ line col, _, s, _) _ = do
~(col':ctx) <- layoutColumns <$> getUserState
case col `compare` layoutCol col' of
EQ -> do
popStartCode
maybePopImportSC
pure (Token TokSemi line col)
GT -> do
popStartCode
alexMonadScan
LT -> do
mapUserState $ \s -> s { layoutColumns = ctx }
pure (Token TokLEnd line col)
maybePopImportSC :: Alex ()
maybePopImportSC = do
startcode <- alexGetStartCode
when (startcode == import_) popStartCode
emptyLayout :: AlexInput -> Int64 -> Alex Token
emptyLayout (AlexPn _ line col, _, _, _) _ = do
popStartCode
pushStartCode newline
pure (Token TokLEnd line col)
startLayout :: AlexInput -> Int64 -> Alex Token
startLayout (AlexPn _ line col, _, _, _) _ = do
state <- getUserState
popStartCode
let
col' =
case layoutColumns state of
[] -> 0
(x:_) -> layoutCol x
layoutKind = case pendingLayoutKw state of
Just s -> s
Nothing -> Layout
if col < col'
then pushStartCode empty_layout
else mapUserState $ \s -> s { layoutColumns = layoutKind col:layoutColumns s }
pure (Token TokLStart line col)
getLayout :: Alex LayoutState
getLayout = do
t <- getUserState
case layoutColumns t of
(x:_) -> pure x
_ -> error "No layout?"
openBrace :: AlexInput -> Int64 -> Alex Token
openBrace (AlexPn _ line col, _, _, _) _ = do
popStartCode
mapUserState $ \s -> s { layoutColumns = Layout minBound:layoutColumns s }
pure (Token TokOBrace line col)
popLayoutContext :: Alex ()
popLayoutContext = mapUserState $ \s -> s { layoutColumns = tail (layoutColumns s) }
closeBrace :: AlexInput -> Int64 -> Alex Token
closeBrace (AlexPn _ line col, _, _, _) _ = do
~(col':_) <- layoutColumns <$> getUserState
if layoutCol col' < 0
then popLayoutContext
else pure ()
pure (Token TokCBrace line col)
variableOrKeyword :: AlexAction Token
variableOrKeyword (AlexPn _ l c, _, s, _) size = do
sc <- alexGetStartCode
state <- getUserState
clearPendingLC
let
text = T.decodeUtf8 (Lbs.toStrict (Lbs.take size s))
col = layoutCol (head (layoutColumns state))
case T.unpack text of
"as"
| sc == import_, c > col -> pure (Token TokAs l c)
| sc == import_ -> offsideKeyword (TokVar text) l c
| otherwise -> pure (Token (TokVar text) l c)
"qualified"
| sc == import_, c > col -> pure (Token TokQualified l c)
| sc == import_ -> offsideKeyword (TokVar text) l c
| otherwise -> pure (Token (TokVar text) l c)
"let" -> laidOut' (Just LetLayout) TokLet l c
"in" -> do
laidout <- getLayout
case laidout of
-- let .. in critical pair:
-- 'in' is allowed to close a layout context before the offside rule would apply.
LetLayout _ -> earlyEnd TokIn l c
_ -> pure (Token TokIn l c)
"data" -> pure (Token TokData l c)
"where" -> do
-- if this is the where in the module_header, then
-- pop the start code so that the offside rule applies again
when (sc == module_header) popStartCode
laidOut' (if sc == module_header then Just ModLayout else Nothing) TokWhere l c
"case"
-- "case" is a layout token if it's immediately following a \\
| pendingLambdaCase state -> laidOut TokCase l c
| otherwise -> pure (Token TokCase l c)
"import" -> do
pushStartCode import_
pure (Token TokImport l c)
"of" -> laidOut TokOf l c
"module" -> do
unless (haveModuleHeader state) $ do
mapUserState $ \s -> s { haveModuleHeader = True }
pushStartCode module_header
pure (Token TokModule l c)
(x:_)
| Data.Char.isUpper x -> pure (Token (TokCon text) l c)
| otherwise -> pure (Token (TokVar text) l c)
[] -> error "empty keyword/identifier"
earlyEnd tok l c = do
popLayoutContext
delayToken (Token tok l c)
pure (Token TokLEnd l c)
offsideKeyword tok l c = do
popLayoutContext
delayToken (Token tok l c)
pure (Token TokSemi l c)
laidOut' n x l c = do
pushStartCode layout
mapUserState $ \s -> s { leastColumn = c, pendingLayoutKw = n }
pure (Token x l c)
laidOut = laidOut' Nothing
}