|
@ -1,30 +1,33 @@ |
|
|
{ |
|
|
{ |
|
|
module Frontend.Autogen.Lexer where |
|
|
module Frontend.Autogen.Lexer where |
|
|
|
|
|
|
|
|
|
|
|
import Control.Monad |
|
|
|
|
|
|
|
|
import qualified Data.ByteString.Lazy as Lbs |
|
|
import qualified Data.ByteString.Lazy as Lbs |
|
|
import qualified Data.Text.Encoding as T |
|
|
import qualified Data.Text.Encoding as T |
|
|
import qualified Data.Text as T |
|
|
import qualified Data.Text as T |
|
|
|
|
|
import qualified Data.Char |
|
|
|
|
|
import Data.Int (Int64) |
|
|
|
|
|
|
|
|
|
|
|
import Frontend.Lexer.Wrapper |
|
|
import Frontend.Lexer.Tokens |
|
|
import Frontend.Lexer.Tokens |
|
|
|
|
|
|
|
|
import Control.Monad |
|
|
|
|
|
|
|
|
|
|
|
import Debug.Trace |
|
|
|
|
|
|
|
|
import Frontend.Parser.Posn |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
%wrapper "monadUserState-bytestring" |
|
|
|
|
|
|
|
|
-- %wrapper "monadUserState-bytestring" |
|
|
|
|
|
|
|
|
$alpha = [a-zA-Z] |
|
|
$alpha = [a-zA-Z] |
|
|
$digit = [0-9] |
|
|
$digit = [0-9] |
|
|
$white_nol = $white # \n |
|
|
|
|
|
|
|
|
$white_nol = $white # [\n\t] |
|
|
|
|
|
|
|
|
tokens :- |
|
|
tokens :- |
|
|
$white_nol+ ; |
|
|
$white_nol+ ; |
|
|
|
|
|
\t { \_ _ -> alexError "tab character in source code" } |
|
|
|
|
|
|
|
|
<0,module_header> "--" .* \n |
|
|
|
|
|
|
|
|
<0,import_> "--" .* \n |
|
|
{ just $ pushStartCode newline } |
|
|
{ just $ pushStartCode newline } |
|
|
|
|
|
|
|
|
<0,module_header,import_> |
|
|
|
|
|
|
|
|
<0,import_> |
|
|
$alpha [$alpha $digit \_ \' \.]* { variableOrKeyword } |
|
|
$alpha [$alpha $digit \_ \' \.]* { variableOrKeyword } |
|
|
|
|
|
|
|
|
<0> \= { always TokEqual } |
|
|
<0> \= { always TokEqual } |
|
@ -37,7 +40,7 @@ tokens :- |
|
|
<0> \{ { always TokOBrace } |
|
|
<0> \{ { always TokOBrace } |
|
|
<0> \[ { always TokOSquare } |
|
|
<0> \[ { always TokOSquare } |
|
|
|
|
|
|
|
|
<0,module_header,import_> { |
|
|
|
|
|
|
|
|
<0,import_> { |
|
|
\, { always TokComma } |
|
|
\, { always TokComma } |
|
|
\( { always TokOParen } |
|
|
\( { always TokOParen } |
|
|
\) { always TokCParen } |
|
|
\) { always TokCParen } |
|
@ -48,33 +51,33 @@ tokens :- |
|
|
|
|
|
|
|
|
<0> \;+ { always TokSemi } |
|
|
<0> \;+ { always TokSemi } |
|
|
|
|
|
|
|
|
<0> \n { just $ pushStartCode newline } |
|
|
|
|
|
|
|
|
<0,import_> \n { just $ pushStartCode newline } |
|
|
|
|
|
|
|
|
<0> \" { just startString } |
|
|
|
|
|
|
|
|
<0> \" { startString } |
|
|
|
|
|
|
|
|
<string> { |
|
|
<string> { |
|
|
\\ \" { stringSeg (T.singleton '"') } |
|
|
|
|
|
\\ \\ { stringSeg (T.singleton '\\') } |
|
|
|
|
|
|
|
|
|
|
|
\\ a { stringSeg (T.singleton '\a') } |
|
|
|
|
|
\\ b { stringSeg (T.singleton '\b') } |
|
|
|
|
|
\\ f { stringSeg (T.singleton '\f') } |
|
|
|
|
|
\\ n { stringSeg (T.singleton '\n') } |
|
|
|
|
|
\\ \n { stringSeg (T.singleton '\n') } |
|
|
|
|
|
\\ r { stringSeg (T.singleton '\r') } |
|
|
|
|
|
\\ v { stringSeg (T.singleton '\v') } |
|
|
|
|
|
\\ t { stringSeg (T.singleton '\t') } |
|
|
|
|
|
|
|
|
\\ \" { stringAppend (T.singleton '"') } |
|
|
|
|
|
\\ \\ { stringAppend (T.singleton '\\') } |
|
|
|
|
|
|
|
|
|
|
|
\\ a { stringAppend (T.singleton '\a') } |
|
|
|
|
|
\\ b { stringAppend (T.singleton '\b') } |
|
|
|
|
|
\\ f { stringAppend (T.singleton '\f') } |
|
|
|
|
|
\\ n { stringAppend (T.singleton '\n') } |
|
|
|
|
|
\\ \n { stringAppend (T.singleton '\n') } |
|
|
|
|
|
\\ r { stringAppend (T.singleton '\r') } |
|
|
|
|
|
\\ v { stringAppend (T.singleton '\v') } |
|
|
|
|
|
\\ t { stringAppend (T.singleton '\t') } |
|
|
|
|
|
|
|
|
\" { endString } |
|
|
\" { endString } |
|
|
|
|
|
|
|
|
[^\\\"]+ { stringChar } |
|
|
|
|
|
|
|
|
[^\\\"]+ { stringSegment } |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
<0,newline,comment,import_,module_header> |
|
|
|
|
|
|
|
|
<0,newline,comment,import_> |
|
|
"{-" { just $ pushStartCode comment } |
|
|
"{-" { just $ pushStartCode comment } |
|
|
|
|
|
|
|
|
<comment> { |
|
|
<comment> { |
|
|
"-}" { \i l -> popStartCode *> skip i l } |
|
|
|
|
|
|
|
|
"-}" { \_ _ -> popStartCode *> alexMonadScan } |
|
|
. ; |
|
|
. ; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
@ -104,14 +107,10 @@ tokens :- |
|
|
|
|
|
|
|
|
<pending> () { emitPendingToken } |
|
|
<pending> () { emitPendingToken } |
|
|
|
|
|
|
|
|
<module_header> { |
|
|
|
|
|
\n ; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
{ |
|
|
{ |
|
|
alexEOF :: Alex Token |
|
|
alexEOF :: Alex Token |
|
|
alexEOF = do |
|
|
alexEOF = do |
|
|
(AlexPn _ l c, _, _, _) <- alexGetInput |
|
|
|
|
|
|
|
|
(Posn l c, _, _, _) <- alexGetInput |
|
|
|
|
|
|
|
|
maybePopImportSC |
|
|
maybePopImportSC |
|
|
|
|
|
|
|
@ -135,7 +134,7 @@ alexEOF = do |
|
|
|
|
|
|
|
|
yield k inp i = clearPendingLC *> yield' k inp i |
|
|
yield k inp i = clearPendingLC *> yield' k inp i |
|
|
|
|
|
|
|
|
yield' k (AlexPn _ l c, _, s, _) i = do |
|
|
|
|
|
|
|
|
yield' k (Posn l c, _, s, _) i = do |
|
|
pure (Token (k $! (T.decodeUtf8 (Lbs.toStrict (Lbs.take i s)))) l c) |
|
|
pure (Token (k $! (T.decodeUtf8 (Lbs.toStrict (Lbs.take i s)))) l c) |
|
|
|
|
|
|
|
|
setPendingLC = mapUserState $ \s -> s { pendingLambdaCase = True } |
|
|
setPendingLC = mapUserState $ \s -> s { pendingLambdaCase = True } |
|
@ -144,45 +143,32 @@ clearPendingLC = mapUserState $ \s -> s { pendingLambdaCase = False } |
|
|
always :: TokenClass -> AlexInput -> Int64 -> Alex Token |
|
|
always :: TokenClass -> AlexInput -> Int64 -> Alex Token |
|
|
always k x i = yield (const k) x i |
|
|
always k x i = yield (const k) x i |
|
|
|
|
|
|
|
|
startString = do |
|
|
|
|
|
mapUserState $ \s -> s { stringBuffer = T.empty } |
|
|
|
|
|
|
|
|
-- reset the string buffer and push the string start code |
|
|
|
|
|
startString (p, _, _, _) _ = do |
|
|
|
|
|
mapUserState $ \s -> s { stringBuffer = T.empty, stringStartPosn = Just p } |
|
|
pushStartCode string |
|
|
pushStartCode string |
|
|
|
|
|
alexMonadScan |
|
|
|
|
|
|
|
|
endString (AlexPn _ l c, _, _, _) _i = do |
|
|
|
|
|
text <- stringBuffer <$> getUserState |
|
|
|
|
|
mapUserState $ \s -> s { stringBuffer = T.empty } |
|
|
|
|
|
|
|
|
-- pop the string start code, and emit the string buffer as a token. |
|
|
|
|
|
endString (Posn l c, _, _, _) _i = do |
|
|
|
|
|
state <- getUserState |
|
|
|
|
|
|
|
|
|
|
|
mapUserState $ \s -> s { stringBuffer = T.empty, stringStartPosn = Nothing } |
|
|
popStartCode |
|
|
popStartCode |
|
|
pure (Token (TokString text) l c) |
|
|
|
|
|
|
|
|
|
|
|
stringChar input@(AlexPn _ _ _, _, buf, _) i = do |
|
|
|
|
|
|
|
|
let (Just (Posn l c)) = stringStartPosn state |
|
|
|
|
|
pure (Token (TokString (stringBuffer state)) l c) |
|
|
|
|
|
|
|
|
|
|
|
-- append a /lexed/ region to the string buffer |
|
|
|
|
|
stringSegment (Posn _ _, _, buf, _) i = do |
|
|
mapUserState $ \s -> s { stringBuffer = stringBuffer s <> T.decodeUtf8 (Lbs.toStrict (Lbs.take i buf)) } |
|
|
mapUserState $ \s -> s { stringBuffer = stringBuffer s <> T.decodeUtf8 (Lbs.toStrict (Lbs.take i buf)) } |
|
|
alexMonadScan |
|
|
alexMonadScan |
|
|
|
|
|
|
|
|
stringSeg text _ _ = do |
|
|
|
|
|
|
|
|
-- append a constant fragment to the string buffer. |
|
|
|
|
|
stringAppend text _ _ = do |
|
|
mapUserState $ \s -> s { stringBuffer = stringBuffer s <> text } |
|
|
mapUserState $ \s -> s { stringBuffer = stringBuffer s <> text } |
|
|
alexMonadScan |
|
|
alexMonadScan |
|
|
|
|
|
|
|
|
data LayoutState |
|
|
|
|
|
= LetLayout { layoutCol :: Int } |
|
|
|
|
|
| Layout { layoutCol :: Int } |
|
|
|
|
|
| ModLayout { layoutCol :: Int } |
|
|
|
|
|
deriving (Show) |
|
|
|
|
|
|
|
|
|
|
|
data AlexUserState = |
|
|
|
|
|
AlexUserState { layoutColumns :: ![LayoutState] |
|
|
|
|
|
, startCodes :: ![Int] |
|
|
|
|
|
, leastColumn :: !Int |
|
|
|
|
|
|
|
|
|
|
|
, pendingLayoutKw :: Maybe (Int -> LayoutState) |
|
|
|
|
|
, pendingTokens :: ![Token] |
|
|
|
|
|
, pendingLambdaCase :: !Bool |
|
|
|
|
|
, haveModuleHeader :: !Bool |
|
|
|
|
|
|
|
|
|
|
|
, stringBuffer :: !T.Text |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
alexInitUserState = AlexUserState [] [] 0 Nothing [] False False T.empty |
|
|
|
|
|
|
|
|
|
|
|
emitPendingToken :: AlexAction Token |
|
|
emitPendingToken :: AlexAction Token |
|
|
emitPendingToken _ _ = do |
|
|
emitPendingToken _ _ = do |
|
|
t <- getUserState |
|
|
t <- getUserState |
|
@ -224,19 +210,68 @@ popStartCode = do |
|
|
alexSetStartCode x |
|
|
alexSetStartCode x |
|
|
|
|
|
|
|
|
offsideRule :: AlexInput -> Int64 -> Alex Token |
|
|
offsideRule :: AlexInput -> Int64 -> Alex Token |
|
|
offsideRule (AlexPn _ line col, _, s, _) _ = do |
|
|
|
|
|
~(col':ctx) <- layoutColumns <$> getUserState |
|
|
|
|
|
case col `compare` layoutCol col' of |
|
|
|
|
|
EQ -> do |
|
|
|
|
|
popStartCode |
|
|
|
|
|
maybePopImportSC |
|
|
|
|
|
pure (Token TokSemi line col) |
|
|
|
|
|
GT -> do |
|
|
|
|
|
popStartCode |
|
|
|
|
|
alexMonadScan |
|
|
|
|
|
LT -> do |
|
|
|
|
|
mapUserState $ \s -> s { layoutColumns = ctx } |
|
|
|
|
|
pure (Token TokLEnd line col) |
|
|
|
|
|
|
|
|
offsideRule (Posn line col, _, _, _) _ = do |
|
|
|
|
|
columns <- layoutColumns <$> getUserState |
|
|
|
|
|
|
|
|
|
|
|
let continue = popStartCode *> alexMonadScan |
|
|
|
|
|
|
|
|
|
|
|
-- The "offside rule" governs how to insert virtual semicolon and |
|
|
|
|
|
-- closing '}' tokens. It applies in the "newline" state, and, if we |
|
|
|
|
|
-- stay in that state, the rule keeps applying. There are a couple of |
|
|
|
|
|
-- cases: |
|
|
|
|
|
case columns of |
|
|
|
|
|
-- If we have no layout context (or we're in a layout context that |
|
|
|
|
|
-- started with a physical '{'), then the offside rule plain doesn't |
|
|
|
|
|
-- apply. |
|
|
|
|
|
[] -> continue |
|
|
|
|
|
ExplicitLayout:_ -> continue |
|
|
|
|
|
-- Otherwise, we're dealing with something like |
|
|
|
|
|
-- |
|
|
|
|
|
-- do token |
|
|
|
|
|
-- ^ this is the layout column. |
|
|
|
|
|
col':ctx -> do |
|
|
|
|
|
case col `compare` layoutCol col' of |
|
|
|
|
|
-- If we have something like |
|
|
|
|
|
-- |
|
|
|
|
|
-- do token |
|
|
|
|
|
-- token |
|
|
|
|
|
-- ^ this is where we are |
|
|
|
|
|
-- then we emit a semicolon (and possibly do some bookeeping, |
|
|
|
|
|
-- like leaving the newline state) |
|
|
|
|
|
EQ -> do |
|
|
|
|
|
popStartCode |
|
|
|
|
|
maybePopImportSC |
|
|
|
|
|
pure (Token TokSemi line col) |
|
|
|
|
|
|
|
|
|
|
|
-- If we have something like |
|
|
|
|
|
-- |
|
|
|
|
|
-- do token |
|
|
|
|
|
-- token |
|
|
|
|
|
-- ^ this is where we are |
|
|
|
|
|
-- then we don't emit anything, just leave the newline state, |
|
|
|
|
|
-- since this token continues the previous logical line. |
|
|
|
|
|
GT -> continue |
|
|
|
|
|
|
|
|
|
|
|
-- If we have something like |
|
|
|
|
|
-- |
|
|
|
|
|
-- C D E |
|
|
|
|
|
-- do token |
|
|
|
|
|
-- do token |
|
|
|
|
|
-- do token |
|
|
|
|
|
-- token |
|
|
|
|
|
-- ^ we are here |
|
|
|
|
|
-- then we're behind the layout context, but not just one, three! |
|
|
|
|
|
-- we emit a closing '}' to close context 'E', and STAY in the |
|
|
|
|
|
-- newline context. when we eventually end up here again |
|
|
|
|
|
-- (recurring interleaved with the lexer state machine), we |
|
|
|
|
|
-- close the D and C contexts in the same way. |
|
|
|
|
|
LT -> do |
|
|
|
|
|
mapUserState $ \s -> s { layoutColumns = ctx } |
|
|
|
|
|
pure (Token TokLEnd line col) |
|
|
|
|
|
|
|
|
|
|
|
-- eventually we either exhaust all the layout contexts or get |
|
|
|
|
|
-- to a layout context we're EQ or GT compared to. in that case |
|
|
|
|
|
-- one of the other rules apply. |
|
|
|
|
|
|
|
|
maybePopImportSC :: Alex () |
|
|
maybePopImportSC :: Alex () |
|
|
maybePopImportSC = do |
|
|
maybePopImportSC = do |
|
@ -244,13 +279,13 @@ maybePopImportSC = do |
|
|
when (startcode == import_) popStartCode |
|
|
when (startcode == import_) popStartCode |
|
|
|
|
|
|
|
|
emptyLayout :: AlexInput -> Int64 -> Alex Token |
|
|
emptyLayout :: AlexInput -> Int64 -> Alex Token |
|
|
emptyLayout (AlexPn _ line col, _, _, _) _ = do |
|
|
|
|
|
|
|
|
emptyLayout (Posn line col, _, _, _) _ = do |
|
|
popStartCode |
|
|
popStartCode |
|
|
pushStartCode newline |
|
|
pushStartCode newline |
|
|
pure (Token TokLEnd line col) |
|
|
pure (Token TokLEnd line col) |
|
|
|
|
|
|
|
|
startLayout :: AlexInput -> Int64 -> Alex Token |
|
|
startLayout :: AlexInput -> Int64 -> Alex Token |
|
|
startLayout (AlexPn _ line col, _, _, _) _ = do |
|
|
|
|
|
|
|
|
startLayout (Posn line col, _, _, _) _ = do |
|
|
state <- getUserState |
|
|
state <- getUserState |
|
|
popStartCode |
|
|
popStartCode |
|
|
let |
|
|
let |
|
@ -263,42 +298,70 @@ startLayout (AlexPn _ line col, _, _, _) _ = do |
|
|
Just s -> s |
|
|
Just s -> s |
|
|
Nothing -> Layout |
|
|
Nothing -> Layout |
|
|
|
|
|
|
|
|
if col < col' |
|
|
|
|
|
|
|
|
-- here's another rule. suppose we have: |
|
|
|
|
|
-- |
|
|
|
|
|
-- foo = bar where |
|
|
|
|
|
-- spam = ham |
|
|
|
|
|
-- |
|
|
|
|
|
-- if we just apply the rule that the next token after a layout |
|
|
|
|
|
-- keyword determines the column for the layout context, then we're |
|
|
|
|
|
-- starting another layout context at column 1! that's definitely not |
|
|
|
|
|
-- what we want. |
|
|
|
|
|
-- |
|
|
|
|
|
-- so a new layout context only starts if the first token is to the right |
|
|
|
|
|
-- of the previous layout context. that is: a block only starts if it's |
|
|
|
|
|
-- on the same line as the layout context, or indented further. |
|
|
|
|
|
if col <= col' |
|
|
then pushStartCode empty_layout |
|
|
then pushStartCode empty_layout |
|
|
else mapUserState $ \s -> s { layoutColumns = layoutKind col:layoutColumns s } |
|
|
else mapUserState $ \s -> s { layoutColumns = layoutKind col:layoutColumns s } |
|
|
pure (Token TokLStart line col) |
|
|
pure (Token TokLStart line col) |
|
|
|
|
|
|
|
|
getLayout :: Alex LayoutState |
|
|
|
|
|
getLayout = do |
|
|
|
|
|
t <- getUserState |
|
|
|
|
|
case layoutColumns t of |
|
|
|
|
|
(x:_) -> pure x |
|
|
|
|
|
_ -> error "No layout?" |
|
|
|
|
|
|
|
|
popLayoutContext :: Alex () |
|
|
|
|
|
popLayoutContext = mapUserState $ \s -> s { layoutColumns = tail (layoutColumns s) } |
|
|
|
|
|
|
|
|
openBrace :: AlexInput -> Int64 -> Alex Token |
|
|
openBrace :: AlexInput -> Int64 -> Alex Token |
|
|
openBrace (AlexPn _ line col, _, _, _) _ = do |
|
|
|
|
|
|
|
|
openBrace (Posn line col, _, _, _) _ = do |
|
|
|
|
|
-- if we see a '{' token, we're probably in the layout state. in that |
|
|
|
|
|
-- case, we pop it! otherwise, we just pop the state anyway: if we |
|
|
|
|
|
-- were in <0>, then popping gets us back in <0>. |
|
|
popStartCode |
|
|
popStartCode |
|
|
mapUserState $ \s -> s { layoutColumns = Layout minBound:layoutColumns s } |
|
|
|
|
|
pure (Token TokOBrace line col) |
|
|
|
|
|
|
|
|
|
|
|
popLayoutContext :: Alex () |
|
|
|
|
|
popLayoutContext = mapUserState $ \s -> s { layoutColumns = tail (layoutColumns s) } |
|
|
|
|
|
|
|
|
-- we push an ExplicitLayout state so that the offside rule stops |
|
|
|
|
|
-- applying (logical lines are delimited by physical semicolons) and a |
|
|
|
|
|
-- '}' can close it. |
|
|
|
|
|
mapUserState $ \s -> s { layoutColumns = ExplicitLayout:layoutColumns s } |
|
|
|
|
|
pure (Token TokOBrace line col) |
|
|
|
|
|
|
|
|
closeBrace :: AlexInput -> Int64 -> Alex Token |
|
|
closeBrace :: AlexInput -> Int64 -> Alex Token |
|
|
closeBrace (AlexPn _ line col, _, _, _) _ = do |
|
|
|
|
|
~(col':_) <- layoutColumns <$> getUserState |
|
|
|
|
|
if layoutCol col' < 0 |
|
|
|
|
|
then popLayoutContext |
|
|
|
|
|
else pure () |
|
|
|
|
|
|
|
|
closeBrace (Posn line col, _, _, _) _ = do |
|
|
|
|
|
-- if we're lexing a '}' token (physical) and the rightmost layout |
|
|
|
|
|
-- context was started by a physical '{', then we can close it. |
|
|
|
|
|
-- otherwise we do nothing and probably get a parse error! |
|
|
|
|
|
columns <- layoutColumns <$> getUserState |
|
|
|
|
|
case columns of |
|
|
|
|
|
ExplicitLayout:_ -> popLayoutContext |
|
|
|
|
|
_ -> pure () |
|
|
pure (Token TokCBrace line col) |
|
|
pure (Token TokCBrace line col) |
|
|
|
|
|
|
|
|
variableOrKeyword :: AlexAction Token |
|
|
variableOrKeyword :: AlexAction Token |
|
|
variableOrKeyword (AlexPn _ l c, _, s, _) size = finishVarKw l c $ T.decodeUtf8 (Lbs.toStrict (Lbs.take size s)) |
|
|
|
|
|
|
|
|
variableOrKeyword (Posn l c, _, s, _) size = finishVarKw l c $ T.decodeUtf8 (Lbs.toStrict (Lbs.take size s)) |
|
|
|
|
|
|
|
|
finishVarKw :: Int -> Int -> T.Text -> Alex Token |
|
|
finishVarKw :: Int -> Int -> T.Text -> Alex Token |
|
|
finishVarKw l c text |
|
|
finishVarKw l c text |
|
|
| T.null text = undefined |
|
|
| T.null text = undefined |
|
|
| Data.Char.isUpper (T.head text), T.singleton '.' `T.isInfixOf` text = pure $ |
|
|
| Data.Char.isUpper (T.head text), T.singleton '.' `T.isInfixOf` text = pure $ |
|
|
|
|
|
-- if we have a token like A.B.C, we reverse it and span at the |
|
|
|
|
|
-- first (last) dot, so that we have, e.g.: |
|
|
|
|
|
-- |
|
|
|
|
|
-- "Aa.Bb.Cc" -> "cC.bB.aA" |
|
|
|
|
|
-- "Cc.Bb.Aa" -> ("Cc", ".bB.aA") |
|
|
|
|
|
-- |
|
|
|
|
|
-- what we have then is the suffix and the prefix, but they've both |
|
|
|
|
|
-- been reversed. so we unreverse them, and also drop the first |
|
|
|
|
|
-- (last) dot from the prefix. |
|
|
|
|
|
-- |
|
|
|
|
|
-- if the suffix starts with an uppercase letter, it's a constructor |
|
|
|
|
|
-- symbol (ConId). |
|
|
let |
|
|
let |
|
|
txet = T.reverse text |
|
|
txet = T.reverse text |
|
|
(suffix', prefix') = T.span (/= '.') txet |
|
|
(suffix', prefix') = T.span (/= '.') txet |
|
@ -319,6 +382,18 @@ finishVarKw l c text = do |
|
|
let col = layoutCol (head (layoutColumns state)) |
|
|
let col = layoutCol (head (layoutColumns state)) |
|
|
|
|
|
|
|
|
case T.unpack text of |
|
|
case T.unpack text of |
|
|
|
|
|
-- we handle the contextual 'as'/'qualified' tokens using a |
|
|
|
|
|
-- startcode. |
|
|
|
|
|
-- |
|
|
|
|
|
-- in the import_ state, as and qualified are keywords, unless the |
|
|
|
|
|
-- offside rule would apply to emit a ';' or '}' token. in that |
|
|
|
|
|
-- case, we emit a semicolon (what the offside rule would do!), and |
|
|
|
|
|
-- set the "keyword" (now changed to an identifier) as pending, so |
|
|
|
|
|
-- that it will be emitted by the next alexMonadScan. |
|
|
|
|
|
"import" -> do |
|
|
|
|
|
pushStartCode import_ |
|
|
|
|
|
pure (Token TokImport l c) |
|
|
|
|
|
|
|
|
"as" |
|
|
"as" |
|
|
| sc == import_, c > col -> pure (Token TokAs l c) |
|
|
| sc == import_, c > col -> pure (Token TokAs l c) |
|
|
| sc == import_ -> offsideKeyword (TokUnqual VarId text) l c |
|
|
| sc == import_ -> offsideKeyword (TokUnqual VarId text) l c |
|
@ -329,58 +404,79 @@ finishVarKw l c text = do |
|
|
| sc == import_ -> offsideKeyword (TokUnqual VarId text) l c |
|
|
| sc == import_ -> offsideKeyword (TokUnqual VarId text) l c |
|
|
| otherwise -> pure (Token (TokUnqual VarId text) l c) |
|
|
| otherwise -> pure (Token (TokUnqual VarId text) l c) |
|
|
|
|
|
|
|
|
|
|
|
-- when starting a layout context for let expressions we make sure |
|
|
|
|
|
-- that it is distinguishable from layout contexts started by |
|
|
|
|
|
-- anything else, because let layout contexts can be terminated |
|
|
|
|
|
-- ahead of time by the 'in' token. for instance in: |
|
|
|
|
|
-- |
|
|
|
|
|
-- let x = 1 in x |
|
|
|
|
|
-- |
|
|
|
|
|
-- there is no reason for the layout context that started after |
|
|
|
|
|
-- 'let' to be terminated by the 'in' token, since the offside rule |
|
|
|
|
|
-- hasn't had a chance to apply. the token stream in that case would look like |
|
|
|
|
|
-- |
|
|
|
|
|
-- 'let' '{' x '=' 1 'in' x |
|
|
|
|
|
-- |
|
|
|
|
|
-- which is a parse error. we do not implement the rule which says parse errors |
|
|
|
|
|
-- terminate layout contexts, instead doing this approximation. |
|
|
"let" -> laidOut' (Just LetLayout) TokLet l c |
|
|
"let" -> laidOut' (Just LetLayout) TokLet l c |
|
|
"in" -> do |
|
|
"in" -> do |
|
|
laidout <- getLayout |
|
|
|
|
|
|
|
|
laidout <- layoutColumns <$> getUserState |
|
|
case laidout of |
|
|
case laidout of |
|
|
-- let .. in critical pair: |
|
|
|
|
|
-- 'in' is allowed to close a layout context before the offside rule would apply. |
|
|
|
|
|
LetLayout _ -> earlyEnd TokIn l c |
|
|
|
|
|
|
|
|
LetLayout _:_ -> earlyEnd TokIn l c |
|
|
_ -> pure (Token TokIn l c) |
|
|
_ -> pure (Token TokIn l c) |
|
|
|
|
|
|
|
|
"data" -> pure (Token TokData l c) |
|
|
"data" -> pure (Token TokData l c) |
|
|
|
|
|
|
|
|
"where" -> do |
|
|
|
|
|
-- if this is the where in the module_header, then |
|
|
|
|
|
-- pop the start code so that the offside rule applies again |
|
|
|
|
|
when (sc == module_header) popStartCode |
|
|
|
|
|
laidOut' (if sc == module_header then Just ModLayout else Nothing) TokWhere l c |
|
|
|
|
|
|
|
|
"where" -> laidOut TokWhere l c |
|
|
|
|
|
"module" -> pure (Token TokModule l c) |
|
|
|
|
|
|
|
|
|
|
|
-- when we lex a \ token, a flag is set in the lexer state to |
|
|
|
|
|
-- indicate that, if there is a 'case' token directly following, |
|
|
|
|
|
-- that token is to be interpreted as part of a lambda-case |
|
|
|
|
|
-- construct, and so must start a layout context for its branches. |
|
|
"case" |
|
|
"case" |
|
|
-- "case" is a layout token if it's immediately following a \\ |
|
|
|
|
|
| pendingLambdaCase state -> laidOut TokCase l c |
|
|
| pendingLambdaCase state -> laidOut TokCase l c |
|
|
| otherwise -> pure (Token TokCase l c) |
|
|
| otherwise -> pure (Token TokCase l c) |
|
|
|
|
|
|
|
|
"import" -> do |
|
|
|
|
|
pushStartCode import_ |
|
|
|
|
|
pure (Token TokImport l c) |
|
|
|
|
|
|
|
|
|
|
|
"of" -> laidOut TokOf l c |
|
|
"of" -> laidOut TokOf l c |
|
|
|
|
|
|
|
|
"module" -> do |
|
|
|
|
|
unless (haveModuleHeader state) $ do |
|
|
|
|
|
mapUserState $ \s -> s { haveModuleHeader = True } |
|
|
|
|
|
pushStartCode module_header |
|
|
|
|
|
pure (Token TokModule l c) |
|
|
|
|
|
|
|
|
|
|
|
(x:_) -> pure (Token (TokUnqual VarId text) l c) |
|
|
|
|
|
|
|
|
(_:_) -> pure (Token (TokUnqual VarId text) l c) |
|
|
|
|
|
|
|
|
[] -> error "empty keyword/identifier" |
|
|
[] -> error "empty keyword/identifier" |
|
|
|
|
|
|
|
|
|
|
|
earlyEnd :: TokenClass -> Int -> Int -> Alex Token |
|
|
earlyEnd tok l c = do |
|
|
earlyEnd tok l c = do |
|
|
popLayoutContext |
|
|
popLayoutContext |
|
|
delayToken (Token tok l c) |
|
|
delayToken (Token tok l c) |
|
|
pure (Token TokLEnd l c) |
|
|
pure (Token TokLEnd l c) |
|
|
|
|
|
|
|
|
|
|
|
offsideKeyword :: TokenClass -> Int -> Int -> Alex Token |
|
|
offsideKeyword tok l c = do |
|
|
offsideKeyword tok l c = do |
|
|
popLayoutContext |
|
|
|
|
|
|
|
|
popStartCode |
|
|
delayToken (Token tok l c) |
|
|
delayToken (Token tok l c) |
|
|
pure (Token TokSemi l c) |
|
|
pure (Token TokSemi l c) |
|
|
|
|
|
|
|
|
|
|
|
laidOut' :: Maybe (Int -> LayoutState) -> TokenClass -> Int -> Int -> Alex Token |
|
|
laidOut' n x l c = do |
|
|
laidOut' n x l c = do |
|
|
pushStartCode layout |
|
|
pushStartCode layout |
|
|
mapUserState $ \s -> s { leastColumn = c, pendingLayoutKw = n } |
|
|
mapUserState $ \s -> s { leastColumn = c, pendingLayoutKw = n } |
|
|
pure (Token x l c) |
|
|
pure (Token x l c) |
|
|
|
|
|
|
|
|
laidOut = laidOut' Nothing |
|
|
laidOut = laidOut' Nothing |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
alexMonadScan = do |
|
|
|
|
|
inp@(_,_,_,n) <- alexGetInput |
|
|
|
|
|
|
|
|
|
|
|
sc <- alexGetStartCode |
|
|
|
|
|
case alexScan inp sc of |
|
|
|
|
|
AlexEOF -> alexEOF |
|
|
|
|
|
AlexError error@(_,_,inp,_) -> |
|
|
|
|
|
alexError $ "Unexpected character: " ++ show (T.head (T.decodeUtf8 (Lbs.toStrict inp))) |
|
|
|
|
|
AlexSkip inp _len -> do |
|
|
|
|
|
alexSetInput inp |
|
|
|
|
|
alexMonadScan |
|
|
|
|
|
AlexToken inp'@(_,_,_,n') _ action -> let len = n'-n in do |
|
|
|
|
|
alexSetInput inp' |
|
|
|
|
|
action (ignorePendingBytes inp) len |
|
|
|
|
|
} |