amelia
/
ahc

{module Frontend.Autogen.Lexer where
import Control.Monad
import qualified Data.ByteString.Lazy as Lbsimport qualified Data.Text.Encoding as Timport qualified Data.Text as Timport qualified Data.Charimport Data.Int (Int64)
import Frontend.Lexer.Wrapperimport Frontend.Lexer.Tokensimport Frontend.Parser.Posn}
-- %wrapper "monadUserState-bytestring"
$lower = [a-z]$upper = [A-Z]$alpha = [ $lower $upper ]
$digit = [0-9]$alnum = [ $alpha $digit ]
$white_nol = $white # [\n\t]
$optail = [\! \# \$ \% \& \* \+ \. \/ \< \= \> \? \@ \\ \^ \| \- \~ \:]$ophead = $optail # \:
@conid     = $upper [$alnum \_ \']*@namespace = (@conid \.)*
tokens :-  $white_nol+                   ;  \t                            { \_ _ -> alexError "tab character in source code" }
<0,import_,foreign_> "--" .* \n  { just $ pushStartCode newline }
<0> \=                            { always TokEqual }<0> \`                            { always TokTick }<0> \: \:                         { always TokDoubleColon }<foreign_> \: \:                  { \i l -> popStartCode *> always TokDoubleColon i l }
<0> \\                            { \i l -> setPendingLC *> yield' (const TokLambda) i l }<0> "->"                          { always TokArrow }<0> "_"                           { always TokUnder }
<0> \{                            { always TokOBrace }<0> \[                            { always TokOSquare }
<0,import_,foreign_> {  \,              { always TokComma }  \(              { always TokOParen }  \)              { always TokCParen }}
<0> \}                            { closeBrace }<0> \]                            { always TokCSquare }
<0> \;+                           { always TokSemi }
<0,import_,foreign_> \n           { just $ pushStartCode newline }
<0,foreign_> \"                   { startString }
<string> {  \\ \"                           { stringAppend (T.singleton '"') }  \\ \\                           { stringAppend (T.singleton '\\') }
  \\ a                            { stringAppend (T.singleton '\a') }  \\ b                            { stringAppend (T.singleton '\b') }  \\ f                            { stringAppend (T.singleton '\f') }  \\ n                            { stringAppend (T.singleton '\n') }  \\ \n                           { stringAppend (T.singleton '\n') }  \\ r                            { stringAppend (T.singleton '\r') }  \\ v                            { stringAppend (T.singleton '\v') }  \\ t                            { stringAppend (T.singleton '\t') }
  \"                              { endString }    [^\\\"]+                        { stringSegment }}
<0,newline,comment,import_>  "{-"          { just $ pushStartCode comment }
<comment> {  "-}"   { \_ _ -> popStartCode *> alexMonadScan }  .      ;}
-- newline: emit a semicolon when de-denting<newline> {  \n         ;  "--" .* \n ;    () { offsideRule }}
-- layout: indentation of the next token is context for offside rule<layout> {  \n         ;  "--" .* \n ;     \{ { openBrace }  () { startLayout }}
<empty_layout> () { emptyLayout }
<pending> () { emitPendingToken }
-- identifiers and keywords<0,import_,foreign_> {  $lower [$alpha $digit \_ \']*   { variableOrKeyword }  $upper [$alpha $digit \_ \']*   { yield (TokUnqual ConId) }
  $ophead $optail*                { yield (TokUnqualOp VarId) }  : $optail*                      { yield (TokUnqualOp ConId) }
  @namespace $lower [$alpha $digit \_ \']* { qualifiedVariable }  @namespace $upper [$alpha $digit \_ \']* { qualifiedVariable }
  @namespace $ophead $optail*              { qualifiedOperator }  @namespace : $optail*                    { qualifiedOperator }}
{alexEOF :: Alex TokenalexEOF = do  (Posn l c, _, _, _) <- alexGetInput    maybePopImportSC
  state <- getUserState
  unless (T.null (stringBuffer state)) $ do    alexError $ "Unterminated string literal at line " ++ show l ++ ", column " ++ show c
  case layoutColumns state of    -- EOF is allowed to close as many layout contexts as there are    -- pending (number of pending layout contexts is the length of the    -- list minus one, since there's the one initial layout context.)    _:tail  -> do      mapUserState $ \s ->        s { pendingTokens = (Token TokLEnd l c <$ tail) ++ [Token TokEof l c]          , layoutColumns = []          }      pushStartCode pending      pure (Token TokLEnd l c)    _    -> pure $ Token TokEof l c
yield k inp i = clearPendingLC *> yield' k inp i
yield' k (Posn l c, _, s, _) i = do  pure (Token (k $! (T.decodeUtf8 (Lbs.toStrict (Lbs.take i s)))) l c)
setPendingLC    = mapUserState $ \s -> s { pendingLambdaCase = True }clearPendingLC  = mapUserState $ \s -> s { pendingLambdaCase = False }
always :: TokenClass -> AlexInput -> Int64 -> Alex Tokenalways k x i = yield (const k) x i
-- reset the string buffer and push the string start codestartString (p, _, _, _) _ = do  mapUserState $ \s -> s { stringBuffer = T.empty, stringStartPosn = Just p }  pushStartCode string  alexMonadScan
-- pop the string start code, and emit the string buffer as a token.endString (Posn l c, _, _, _) _i = do  state <- getUserState
  mapUserState $ \s -> s { stringBuffer = T.empty, stringStartPosn = Nothing }  popStartCode
  let (Just (Posn l c)) = stringStartPosn state  pure (Token (TokString (stringBuffer state)) l c)
-- append a /lexed/ region to the string bufferstringSegment (Posn _ _, _, buf, _) i = do  mapUserState $ \s -> s { stringBuffer = stringBuffer s <> T.decodeUtf8 (Lbs.toStrict (Lbs.take i buf)) }  alexMonadScan
-- append a constant fragment to the string buffer.stringAppend text _ _ = do  mapUserState $ \s -> s { stringBuffer = stringBuffer s <> text }  alexMonadScan
emitPendingToken :: AlexAction TokenemitPendingToken _ _ = do  t <- getUserState  case pendingTokens t of    [] -> do      popStartCode      alexMonadScan    (x:xs) -> do      mapUserState $ \s -> s { pendingTokens = xs }      pure x
delayToken :: Token -> Alex ()delayToken t = do  mapUserState $ \s -> s { pendingTokens = t:pendingTokens s }  pushStartCode pending
just :: Alex a -> AlexAction Tokenjust k _ _ = k *> alexMonadScan
getUserState :: Alex AlexUserStategetUserState = Alex $ \s -> Right (s, alex_ust s)
mapUserState :: (AlexUserState -> AlexUserState) -> Alex ()mapUserState k = Alex $ \s -> Right (s { alex_ust = k $! alex_ust s }, ())
pushStartCode :: Int -> Alex ()pushStartCode c = do  sc <- alexGetStartCode  mapUserState $ \s -> s { startCodes = sc:startCodes s }  alexSetStartCode c  popStartCode :: Alex ()popStartCode = do  sc <- startCodes <$> getUserState  case sc of    [] -> alexSetStartCode 0    (x:xs) -> do      mapUserState $ \s -> s { startCodes = xs }      alexSetStartCode x
offsideRule :: AlexInput -> Int64 -> Alex TokenoffsideRule (Posn line col, _, _, _) _ = do  columns <- layoutColumns <$> getUserState
  let continue = popStartCode *> alexMonadScan
  -- The "offside rule" governs how to insert virtual semicolon and  -- closing '}' tokens. It applies in the "newline" state, and, if we  -- stay in that state, the rule keeps applying. There are a couple of  -- cases:  case columns of    -- If we have no layout context (or we're in a layout context that    -- started with a physical '{'), then the offside rule plain doesn't    -- apply.    []               -> continue    ExplicitLayout:_ -> continue    -- Otherwise, we're dealing with something like    --     --   do token    --      ^ this is the layout column.    col':ctx -> do      case col `compare` layoutCol col' of        -- If we have something like        --        --   do token        --      token        --      ^ this is where we are        -- then we emit a semicolon (and possibly do some bookeeping,        -- like leaving the newline state)        EQ -> do          popStartCode          maybePopImportSC          pure (Token TokSemi line col)
        -- If we have something like        --        --  do token        --      token        --      ^ this is where we are        -- then we don't emit anything, just leave the newline state,        -- since this token continues the previous logical line.        GT -> continue
        -- If we have something like        --        --       C  D  E        --    do token        --       do token        --          do token        --   token        --   ^ we are here        -- then we're behind the layout context, but not just one, three!        -- we emit a closing '}' to close context 'E', and STAY in the        -- newline context. when we eventually end up here again        -- (recurring interleaved with the lexer state machine), we        -- close the D and C contexts in the same way.        LT -> do          mapUserState $ \s -> s { layoutColumns = ctx }          pure (Token TokLEnd line col)
        -- eventually we either exhaust all the layout contexts or get        -- to a layout context we're EQ or GT compared to. in that case        -- one of the other rules apply.
maybePopImportSC :: Alex ()maybePopImportSC = do  startcode <- alexGetStartCode  when (startcode == import_ || startcode == foreign_) popStartCode
emptyLayout :: AlexInput -> Int64 -> Alex TokenemptyLayout (Posn line col, _, _, _) _ = do  popStartCode  pushStartCode newline  pure (Token TokLEnd line col)
startLayout :: AlexInput -> Int64 -> Alex TokenstartLayout (Posn line col, _, _, _) _ = do  state <- getUserState  popStartCode  let    col' =      case layoutColumns state of        [] -> 0        (x:_) -> layoutCol x
    layoutKind = case pendingLayoutKw state of      Just s -> s      Nothing -> Layout
  -- here's another rule. suppose we have:  --  --    foo = bar where  --    spam = ham  --  -- if we just apply the rule that the next token after a layout  -- keyword determines the column for the layout context, then we're  -- starting another layout context at column 1! that's definitely not  -- what we want.  --  -- so a new layout context only starts if the first token is to the right  -- of the previous layout context. that is: a block only starts if it's  -- on the same line as the layout context, or indented further.  if col <= col'    then pushStartCode empty_layout    else mapUserState $ \s -> s { layoutColumns = layoutKind col:layoutColumns s }  pure (Token TokLStart line col)
popLayoutContext :: Alex ()popLayoutContext = mapUserState $ \s -> s { layoutColumns = tail (layoutColumns s) }
openBrace :: AlexInput -> Int64 -> Alex TokenopenBrace (Posn line col, _, _, _) _ = do  -- if we see a '{' token, we're probably in the layout state. in that  -- case, we pop it!  otherwise, we just pop the state anyway: if we  -- were in <0>, then popping gets us back in <0>.  popStartCode
  -- we push an ExplicitLayout state so that the offside rule stops  -- applying (logical lines are delimited by physical semicolons) and a  -- '}' can close it.  mapUserState $ \s -> s { layoutColumns = ExplicitLayout:layoutColumns s }  pure (Token TokOBrace line col)
closeBrace :: AlexInput -> Int64 -> Alex TokencloseBrace (Posn line col, _, _, _) _ = do  -- if we're lexing a '}' token (physical) and the rightmost layout  -- context was started by a physical '{', then we can close it.  -- otherwise we do nothing and probably get a parse error!  columns <- layoutColumns <$> getUserState  case columns of    ExplicitLayout:_ -> popLayoutContext    _ -> pure ()  pure (Token TokCBrace line col)
variableOrKeyword :: AlexAction TokenvariableOrKeyword (Posn l c, _, s, _) size =  finishVarKw l c $ T.decodeUtf8 (Lbs.toStrict (Lbs.take size s))
qualifiedVariable :: AlexAction TokenqualifiedVariable (Posn l c, _, s, _) size =  finishVar TokUnqual TokQual l c $ T.decodeUtf8 (Lbs.toStrict (Lbs.take size s))
qualifiedOperator :: AlexAction TokenqualifiedOperator (Posn l c, _, s, _) size =  finishVar TokUnqualOp TokQualOp l c $ T.decodeUtf8 (Lbs.toStrict (Lbs.take size s))
finishVarKw :: Int -> Int -> T.Text -> Alex TokenfinishVarKw l c text  | T.null text = undefined  | Data.Char.isUpper (T.head text) = finishVar TokUnqual TokQual l c text
finishVarKw l c text = do  sc    <- alexGetStartCode  state <- getUserState    clearPendingLC
  let col  = layoutCol (head (layoutColumns state))
  case T.unpack text of    -- we handle the contextual 'as'/'qualified' tokens using a    -- startcode.    --    -- in the import_ state, as and qualified are keywords, unless the    -- offside rule would apply to emit a ';' or '}' token. in that    -- case, we emit a semicolon (what the offside rule would do!), and    -- set the "keyword" (now changed to an identifier) as pending, so    -- that it will be emitted by the next alexMonadScan.    "import" -> do      when ((sc /= import_) && (sc /= foreign_)) $ pushStartCode import_      pure (Token TokImport l c)
    "as" -> conditionalKeyword l c import_ (c > col) TokAs    "qualified" -> conditionalKeyword l c import_ (c > col) TokQualified
    "foreign" -> do      when (sc /= foreign_) $ pushStartCode foreign_      pure (Token TokForeign l c)          "export" -> conditionalKeyword l c foreign_ (c > col) TokExport    "safe" -> conditionalKeyword l c foreign_ (c > col) TokSafe    "unsafe" -> conditionalKeyword l c foreign_ (c > col) TokUnsafe    "ccall" -> conditionalKeyword l c foreign_ (c > col) TokCCall
    -- when starting a layout context for let expressions we make sure    -- that it is distinguishable from layout contexts started by    -- anything else, because let layout contexts can be terminated    -- ahead of time by the 'in' token. for instance in:    --    --    let x = 1 in x    --    -- there is no reason for the layout context that started after    -- 'let' to be terminated by the 'in' token, since the offside rule    -- hasn't had a chance to apply. the token stream in that case would look like    --    --    'let' '{' x '=' 1 'in' x    --    -- which is a parse error. we do not implement the rule which says parse errors    -- terminate layout contexts, instead doing this approximation.    "let"  -> laidOut' (Just LetLayout) TokLet l c    "in"   -> do      laidout <- layoutColumns <$> getUserState      case laidout of        LetLayout _:_ -> earlyEnd TokIn l c        _ -> pure (Token TokIn l c)
    "data" -> pure (Token TokData l c)
    "where" -> laidOut TokWhere l c    "module" -> pure (Token TokModule l c)
    -- when we lex a \ token, a flag is set in the lexer state to    -- indicate that, if there is a 'case' token directly following,    -- that token is to be interpreted as part of a lambda-case    -- construct, and so must start a layout context for its branches.    "case"      | pendingLambdaCase state -> laidOut TokCase l c      | otherwise               -> pure (Token TokCase l c)
    "of"    -> laidOut TokOf l c
    (_:_) -> pure (Token (TokUnqual VarId text) l c)
    [] -> error "empty keyword/identifier"

conditionalKeyword l c import_ cond kw = do  sc <- alexGetStartCode  case () of    () | sc == import_, cond -> pure (Token kw l c)       | sc == import_       -> offsideKeyword (TokUnqual VarId text) l c       | otherwise           -> pure (Token (TokUnqual VarId text) l c)  where text = T.pack (show kw)
finishVar :: (IdClass -> T.Text -> TokenClass) -> (IdClass -> T.Text -> T.Text -> TokenClass) -> Int -> Int -> T.Text -> Alex TokenfinishVar tokunqual tokqual l c text  | T.null text = undefined  | Data.Char.isUpper (T.head text), T.singleton '.' `T.isInfixOf` text = pure $    let      txet = T.reverse text      (suffix', prefix') = T.span (/= '.') txet
      prefix = T.reverse (T.tail prefix')      suffix = T.reverse suffix'    in if Data.Char.isUpper (T.head suffix)        then Token (tokqual ConId prefix suffix) l c        else Token (tokqual VarId prefix suffix) l c  | Data.Char.isUpper (T.head text) = pure $ Token (tokunqual ConId text) l c  | otherwise = pure $ Token (tokunqual VarId text) l c
earlyEnd :: TokenClass -> Int -> Int -> Alex TokenearlyEnd tok l c = do  popLayoutContext  delayToken (Token tok l c)  pure (Token TokLEnd l c)
offsideKeyword :: TokenClass -> Int -> Int -> Alex TokenoffsideKeyword tok l c = do  popStartCode  delayToken (Token tok l c)  pure (Token TokSemi l c)
laidOut' :: Maybe (Int -> LayoutState) -> TokenClass -> Int -> Int -> Alex TokenlaidOut' n x l c = do  pushStartCode layout  mapUserState $ \s -> s { leastColumn = c, pendingLayoutKw = n }  pure (Token x l c)
laidOut = laidOut' Nothing
alexMonadScan = do  inp@(_,_,_,n) <- alexGetInput
  sc <- alexGetStartCode  case alexScan inp sc of    AlexEOF -> alexEOF    AlexError error@(_,_,inp,_) ->      alexError $ "Unexpected character: " ++ show (T.head (T.decodeUtf8 (Lbs.toStrict inp)))    AlexSkip inp _len -> do      alexSetInput inp      alexMonadScan    AlexToken inp'@(_,_,_,n') _ action -> let len = n'-n in do      alexSetInput inp'      action (ignorePendingBytes inp) len}