Refactor parser/lexer error message infra

2 years ago · 27186d92b3
--- a/.gitignore
+++ b/.gitignore
@ -14,5 +14,5 @@
 !/Setup.hs

 # alex/happy artefacts
 src/*.o
 src/*.hi
 src/**/*.o
 src/**/*.hi
--- a/ahc.cabal
+++ b/ahc.cabal
@ -28,6 +28,7 @@ executable ahc
 Frontend.Autogen.Lexer,
 Frontend.Autogen.Parser,
 Frontend.Lexer.Tokens,
 Frontend.Lexer.Wrapper,
 Frontend.Parser.Posn,
 Frontend.Syntax

--- a/hie.yaml
+++ b/hie.yaml
@ -1,10 +1,25 @@
 cradle:
 multi:
 - path: "./src/Frontend/Autogen"
 # disable HLS for the parser/lexer
 - path: "./src/Frontend/Autogen/"
 config:
 cradle:
 none:

 # enable it for the ahc source
 - path: "./src/"
 config:
 cradle:
 stack:

 # disable it for my random testing files
 # (and also Setup.hs!)
 - path: "./"
 config:
 cradle:
 stack:
 none:

 - path: "./.stack-work/"
 config:
 cradle:
 none:
--- a/src/Frontend/Autogen/Lexer.x
+++ b/src/Frontend/Autogen/Lexer.x
@ -1,30 +1,33 @@
 {
 module Frontend.Autogen.Lexer where

 import Control.Monad

 import qualified Data.ByteString.Lazy as Lbs
 import qualified Data.Text.Encoding as T
 import qualified Data.Text as T
 import qualified Data.Char
 import Data.Int (Int64)

 import Frontend.Lexer.Wrapper
 import Frontend.Lexer.Tokens

 import Control.Monad

 import Debug.Trace
 import Frontend.Parser.Posn
 }

 %wrapper "monadUserState-bytestring"
 -- %wrapper "monadUserState-bytestring"

 $alpha = [a-zA-Z]
 $digit = [0-9]
 $white_nol = $white # \n
 $white_nol = $white # [\n\t]

 tokens :-
 $white_nol+ ;
 \t { \_ _ -> alexError "tab character in source code" }

 <0,module_header> "--" .* \n
 <0,import_> "--" .* \n
 { just $ pushStartCode newline }
 
 <0,module_header,import_>
 <0,import_>
 $alpha [$alpha $digit \_ \' \.]* { variableOrKeyword }

 <0> \= { always TokEqual }
@ -37,7 +40,7 @@ tokens :-
 <0> \{ { always TokOBrace }
 <0> \[ { always TokOSquare }

 <0,module_header,import_> {
 <0,import_> {
 \, { always TokComma }
 \( { always TokOParen }
 \) { always TokCParen }
@ -48,33 +51,33 @@ tokens :-

 <0> \;+ { always TokSemi }

 <0> \n  { just $ pushStartCode newline }
 <0,import_> \n { just $ pushStartCode newline }

 <0> \" { just startString }
 <0> \" { startString }

 <string> {
 \\ \" { stringSeg (T.singleton '"') }
 \\ \\ { stringSeg (T.singleton '\\') }

 \\ a { stringSeg (T.singleton '\a') }
 \\ b { stringSeg (T.singleton '\b') }
 \\ f { stringSeg (T.singleton '\f') }
 \\ n { stringSeg (T.singleton '\n') }
 \\ \n { stringSeg (T.singleton '\n') }
 \\ r { stringSeg (T.singleton '\r') }
 \\ v { stringSeg (T.singleton '\v') }
 \\ t { stringSeg (T.singleton '\t') }
 \\ \" { stringAppend (T.singleton '"') }
 \\ \\ { stringAppend (T.singleton '\\') }

 \\ a { stringAppend (T.singleton '\a') }
 \\ b { stringAppend (T.singleton '\b') }
 \\ f { stringAppend (T.singleton '\f') }
 \\ n { stringAppend (T.singleton '\n') }
 \\ \n { stringAppend (T.singleton '\n') }
 \\ r { stringAppend (T.singleton '\r') }
 \\ v { stringAppend (T.singleton '\v') }
 \\ t { stringAppend (T.singleton '\t') }

 \" { endString }
 
 [^\\\"]+ { stringChar }
 [^\\\"]+ { stringSegment }
 }

 <0,newline,comment,import_,module_header>
 <0,newline,comment,import_>
 "{-" { just $ pushStartCode comment }

 <comment> {
 "-}" { \i l -> popStartCode *> skip i l }
 "-}" { \_ _ -> popStartCode *> alexMonadScan }
 . ;
 }

@ -104,14 +107,10 @@ tokens :-

 <pending> () { emitPendingToken }

 <module_header> {
 \n ;
 }

 {
 alexEOF :: Alex Token
 alexEOF = do
 (AlexPn _ l c, _, _, _) <- alexGetInput
 (Posn l c, _, _, _) <- alexGetInput
 
 maybePopImportSC

@ -135,7 +134,7 @@ alexEOF = do

 yield k inp i = clearPendingLC *> yield' k inp i

 yield' k (AlexPn _ l c, _, s, _) i = do
 yield' k (Posn l c, _, s, _) i = do
 pure (Token (k $! (T.decodeUtf8 (Lbs.toStrict (Lbs.take i s)))) l c)

 setPendingLC = mapUserState $ \s -> s { pendingLambdaCase = True }
@ -144,45 +143,32 @@ clearPendingLC = mapUserState $ \s -> s { pendingLambdaCase = False }
 always :: TokenClass -> AlexInput -> Int64 -> Alex Token
 always k x i = yield (const k) x i

 startString = do
 mapUserState $ \s -> s { stringBuffer = T.empty }
 -- reset the string buffer and push the string start code
 startString (p, _, _, _) _ = do
 mapUserState $ \s -> s { stringBuffer = T.empty, stringStartPosn = Just p }
 pushStartCode string
 alexMonadScan

 endString (AlexPn _ l c, _, _, _) _i = do
 text <- stringBuffer <$> getUserState
 mapUserState $ \s -> s { stringBuffer = T.empty }
 -- pop the string start code, and emit the string buffer as a token.
 endString (Posn l c, _, _, _) _i = do
 state <- getUserState

 mapUserState $ \s -> s { stringBuffer = T.empty, stringStartPosn = Nothing }
 popStartCode
 pure (Token (TokString text) l c)

 stringChar input@(AlexPn _ _ _, _, buf, _) i = do
 let (Just (Posn l c)) = stringStartPosn state
 pure (Token (TokString (stringBuffer state)) l c)

 -- append a /lexed/ region to the string buffer
 stringSegment (Posn _ _, _, buf, _) i = do
 mapUserState $ \s -> s { stringBuffer = stringBuffer s <> T.decodeUtf8 (Lbs.toStrict (Lbs.take i buf)) }
 alexMonadScan

 stringSeg text _ _ = do
 -- append a constant fragment to the string buffer.
 stringAppend text _ _ = do
 mapUserState $ \s -> s { stringBuffer = stringBuffer s <> text }
 alexMonadScan

 data LayoutState
 = LetLayout { layoutCol :: Int }
 | Layout { layoutCol :: Int }
 | ModLayout { layoutCol :: Int }
 deriving (Show)

 data AlexUserState =
 AlexUserState { layoutColumns :: ![LayoutState]
 , startCodes :: ![Int]
 , leastColumn :: !Int

 , pendingLayoutKw :: Maybe (Int -> LayoutState)
 , pendingTokens :: ![Token]
 , pendingLambdaCase :: !Bool
 , haveModuleHeader :: !Bool

 , stringBuffer :: !T.Text
 }

 alexInitUserState = AlexUserState [] [] 0 Nothing [] False False T.empty

 emitPendingToken :: AlexAction Token
 emitPendingToken _ _ = do
 t <- getUserState
@ -224,19 +210,68 @@ popStartCode = do
 alexSetStartCode x

 offsideRule :: AlexInput -> Int64 -> Alex Token
 offsideRule (AlexPn _ line col, _, s, _) _ = do
 ~(col':ctx) <- layoutColumns <$> getUserState
 case col `compare` layoutCol col' of
 EQ -> do
 popStartCode
 maybePopImportSC
 pure (Token TokSemi line col)
 GT -> do
 popStartCode
 alexMonadScan
 LT -> do
 mapUserState $ \s -> s { layoutColumns = ctx }
 pure (Token TokLEnd line col)
 offsideRule (Posn line col, _, _, _) _ = do
 columns <- layoutColumns <$> getUserState

 let continue = popStartCode *> alexMonadScan

 -- The "offside rule" governs how to insert virtual semicolon and
 -- closing '}' tokens. It applies in the "newline" state, and, if we
 -- stay in that state, the rule keeps applying. There are a couple of
 -- cases:
 case columns of
 -- If we have no layout context (or we're in a layout context that
 -- started with a physical '{'), then the offside rule plain doesn't
 -- apply.
 [] -> continue
 ExplicitLayout:_ -> continue
 -- Otherwise, we're dealing with something like
 -- 
 -- do token
 -- ^ this is the layout column.
 col':ctx -> do
 case col `compare` layoutCol col' of
 -- If we have something like
 --
 -- do token
 -- token
 -- ^ this is where we are
 -- then we emit a semicolon (and possibly do some bookeeping,
 -- like leaving the newline state)
 EQ -> do
 popStartCode
 maybePopImportSC
 pure (Token TokSemi line col)

 -- If we have something like
 --
 -- do token
 -- token
 -- ^ this is where we are
 -- then we don't emit anything, just leave the newline state,
 -- since this token continues the previous logical line.
 GT -> continue

 -- If we have something like
 --
 -- C D E
 -- do token
 -- do token
 -- do token
 -- token
 -- ^ we are here
 -- then we're behind the layout context, but not just one, three!
 -- we emit a closing '}' to close context 'E', and STAY in the
 -- newline context. when we eventually end up here again
 -- (recurring interleaved with the lexer state machine), we
 -- close the D and C contexts in the same way.
 LT -> do
 mapUserState $ \s -> s { layoutColumns = ctx }
 pure (Token TokLEnd line col)

 -- eventually we either exhaust all the layout contexts or get
 -- to a layout context we're EQ or GT compared to. in that case
 -- one of the other rules apply.

 maybePopImportSC :: Alex ()
 maybePopImportSC = do
@ -244,13 +279,13 @@ maybePopImportSC = do
 when (startcode == import_) popStartCode

 emptyLayout :: AlexInput -> Int64 -> Alex Token
 emptyLayout (AlexPn _ line col, _, _, _) _ = do
 emptyLayout (Posn line col, _, _, _) _ = do
 popStartCode
 pushStartCode newline
 pure (Token TokLEnd line col)

 startLayout :: AlexInput -> Int64 -> Alex Token
 startLayout (AlexPn _ line col, _, _, _) _ = do
 startLayout (Posn line col, _, _, _) _ = do
 state <- getUserState
 popStartCode
 let
@ -263,42 +298,70 @@ startLayout (AlexPn _ line col, _, _, _) _ = do
 Just s -> s
 Nothing -> Layout

 if col < col'
 -- here's another rule. suppose we have:
 --
 -- foo = bar where
 -- spam = ham
 --
 -- if we just apply the rule that the next token after a layout
 -- keyword determines the column for the layout context, then we're
 -- starting another layout context at column 1! that's definitely not
 -- what we want.
 --
 -- so a new layout context only starts if the first token is to the right
 -- of the previous layout context. that is: a block only starts if it's
 -- on the same line as the layout context, or indented further.
 if col <= col'
 then pushStartCode empty_layout
 else mapUserState $ \s -> s { layoutColumns = layoutKind col:layoutColumns s }
 pure (Token TokLStart line col)

 getLayout :: Alex LayoutState
 getLayout = do
 t <- getUserState
 case layoutColumns t of
 (x:_) -> pure x
 _ -> error "No layout?"
 popLayoutContext :: Alex ()
 popLayoutContext = mapUserState $ \s -> s { layoutColumns = tail (layoutColumns s) }

 openBrace :: AlexInput -> Int64 -> Alex Token
 openBrace (AlexPn _ line col, _, _, _) _ = do
 openBrace (Posn line col, _, _, _) _ = do
 -- if we see a '{' token, we're probably in the layout state. in that
 -- case, we pop it! otherwise, we just pop the state anyway: if we
 -- were in <0>, then popping gets us back in <0>.
 popStartCode
 mapUserState $ \s -> s { layoutColumns = Layout minBound:layoutColumns s }
 pure (Token TokOBrace line col)

 popLayoutContext :: Alex ()
 popLayoutContext = mapUserState $ \s -> s { layoutColumns = tail (layoutColumns s) }
 -- we push an ExplicitLayout state so that the offside rule stops
 -- applying (logical lines are delimited by physical semicolons) and a
 -- '}' can close it.
 mapUserState $ \s -> s { layoutColumns = ExplicitLayout:layoutColumns s }
 pure (Token TokOBrace line col)

 closeBrace :: AlexInput -> Int64 -> Alex Token
 closeBrace (AlexPn _ line col, _, _, _) _ = do
 ~(col':_) <- layoutColumns <$> getUserState
 if layoutCol col' < 0
 then popLayoutContext 
 else pure ()
 closeBrace (Posn line col, _, _, _) _ = do
 -- if we're lexing a '}' token (physical) and the rightmost layout
 -- context was started by a physical '{', then we can close it.
 -- otherwise we do nothing and probably get a parse error!
 columns <- layoutColumns <$> getUserState
 case columns of
 ExplicitLayout:_ -> popLayoutContext
 _ -> pure ()
 pure (Token TokCBrace line col)

 variableOrKeyword :: AlexAction Token
 variableOrKeyword (AlexPn _ l c, _, s, _) size = finishVarKw l c $ T.decodeUtf8 (Lbs.toStrict (Lbs.take size s))
 variableOrKeyword (Posn l c, _, s, _) size = finishVarKw l c $ T.decodeUtf8 (Lbs.toStrict (Lbs.take size s))

 finishVarKw :: Int -> Int -> T.Text -> Alex Token
 finishVarKw l c text
 | T.null text = undefined
 | Data.Char.isUpper (T.head text), T.singleton '.' `T.isInfixOf` text = pure $
 -- if we have a token like A.B.C, we reverse it and span at the
 -- first (last) dot, so that we have, e.g.:
 --
 -- "Aa.Bb.Cc" -> "cC.bB.aA"
 -- "Cc.Bb.Aa" -> ("Cc", ".bB.aA")
 --
 -- what we have then is the suffix and the prefix, but they've both
 -- been reversed. so we unreverse them, and also drop the first
 -- (last) dot from the prefix.
 --
 -- if the suffix starts with an uppercase letter, it's a constructor
 -- symbol (ConId).
 let
 txet = T.reverse text
 (suffix', prefix') = T.span (/= '.') txet
@ -319,6 +382,18 @@ finishVarKw l c text = do
 let col = layoutCol (head (layoutColumns state))

 case T.unpack text of
 -- we handle the contextual 'as'/'qualified' tokens using a
 -- startcode.
 --
 -- in the import_ state, as and qualified are keywords, unless the
 -- offside rule would apply to emit a ';' or '}' token. in that
 -- case, we emit a semicolon (what the offside rule would do!), and
 -- set the "keyword" (now changed to an identifier) as pending, so
 -- that it will be emitted by the next alexMonadScan.
 "import" -> do
 pushStartCode import_
 pure (Token TokImport l c)

 "as"
 | sc == import_, c > col -> pure (Token TokAs l c)
 | sc == import_ -> offsideKeyword (TokUnqual VarId text) l c
@ -329,58 +404,79 @@ finishVarKw l c text = do
 | sc == import_ -> offsideKeyword (TokUnqual VarId text) l c
 | otherwise -> pure (Token (TokUnqual VarId text) l c)

 -- when starting a layout context for let expressions we make sure
 -- that it is distinguishable from layout contexts started by
 -- anything else, because let layout contexts can be terminated
 -- ahead of time by the 'in' token. for instance in:
 --
 -- let x = 1 in x
 --
 -- there is no reason for the layout context that started after
 -- 'let' to be terminated by the 'in' token, since the offside rule
 -- hasn't had a chance to apply. the token stream in that case would look like
 --
 -- 'let' '{' x '=' 1 'in' x
 --
 -- which is a parse error. we do not implement the rule which says parse errors
 -- terminate layout contexts, instead doing this approximation.
 "let" -> laidOut' (Just LetLayout) TokLet l c
 "in" -> do
 laidout <- getLayout
 laidout <- layoutColumns <$> getUserState
 case laidout of
 -- let .. in critical pair:
 -- 'in' is allowed to close a layout context before the offside rule would apply.
 LetLayout _ -> earlyEnd TokIn l c
 LetLayout _:_ -> earlyEnd TokIn l c
 _ -> pure (Token TokIn l c)

 "data" -> pure (Token TokData l c)

 "where" -> do
 -- if this is the where in the module_header, then
 -- pop the start code so that the offside rule applies again
 when (sc == module_header) popStartCode
 laidOut' (if sc == module_header then Just ModLayout else Nothing) TokWhere l c
 "where" -> laidOut TokWhere l c
 "module" -> pure (Token TokModule l c)

 -- when we lex a \ token, a flag is set in the lexer state to
 -- indicate that, if there is a 'case' token directly following,
 -- that token is to be interpreted as part of a lambda-case
 -- construct, and so must start a layout context for its branches.
 "case"
 -- "case" is a layout token if it's immediately following a \\
 | pendingLambdaCase state -> laidOut TokCase l c
 | otherwise -> pure (Token TokCase l c)

 "import" -> do
 pushStartCode import_
 pure (Token TokImport l c)

 "of" -> laidOut TokOf l c

 "module" -> do
 unless (haveModuleHeader state) $ do
 mapUserState $ \s -> s { haveModuleHeader = True }
 pushStartCode module_header
 pure (Token TokModule l c)

 (x:_) -> pure (Token (TokUnqual VarId text) l c)
 (_:_) -> pure (Token (TokUnqual VarId text) l c)

 [] -> error "empty keyword/identifier"

 earlyEnd :: TokenClass -> Int -> Int -> Alex Token
 earlyEnd tok l c = do
 popLayoutContext
 delayToken (Token tok l c)
 pure (Token TokLEnd l c)

 offsideKeyword :: TokenClass -> Int -> Int -> Alex Token
 offsideKeyword tok l c = do
 popLayoutContext
 popStartCode
 delayToken (Token tok l c)
 pure (Token TokSemi l c)

 laidOut' :: Maybe (Int -> LayoutState) -> TokenClass -> Int -> Int -> Alex Token
 laidOut' n x l c = do
 pushStartCode layout
 mapUserState $ \s -> s { leastColumn = c, pendingLayoutKw = n }
 pure (Token x l c)

 laidOut = laidOut' Nothing
 }

 alexMonadScan = do
 inp@(_,_,_,n) <- alexGetInput

 sc <- alexGetStartCode
 case alexScan inp sc of
 AlexEOF -> alexEOF
 AlexError error@(_,_,inp,_) ->
 alexError $ "Unexpected character: " ++ show (T.head (T.decodeUtf8 (Lbs.toStrict inp)))
 AlexSkip inp _len -> do
 alexSetInput inp
 alexMonadScan
 AlexToken inp'@(_,_,_,n') _ action -> let len = n'-n in do
 alexSetInput inp'
 action (ignorePendingBytes inp) len
 }
--- a/src/Frontend/Autogen/Parser.y
+++ b/src/Frontend/Autogen/Parser.y
@ -3,16 +3,20 @@
 module Frontend.Autogen.Parser where

 import qualified Data.Text as T
 import Data.Maybe

 import Frontend.Lexer.Wrapper
 import Frontend.Lexer.Tokens
 import Frontend.Parser.Posn
 import Frontend.Syntax
 import Frontend.Autogen.Lexer

 import qualified Prelude
 import Prelude hiding (span)

 import Debug.Trace

 import Control.Monad
 }

 %name parseExp Exp
@ -121,13 +125,13 @@ Apat :: { Pat }
 | '(' CommaList(Pat) ')' { span $1 $3 $ makeTuplePattern $2 }

 Decl :: { Decl }
 : CommaList1(VAR) '::' Type { TySig (getVar `fmap` $1) $3 }
 | VAR Apat List(Apat) Rhs { FunDecl (getVar $1) ($2:$3) $4 }
 | Pat Rhs { PatDecl $1 $2 }
 : CommaList1(VAR) '::' Type { TySig (getVar `fmap` $1) $3 (startPosn (head $1)) (endPosn $3) }
 | VAR Apat List(Apat) Rhs { FunDecl (getVar $1) ($2:$3) $4 (startPosn $1) (endPosn $4) }
 | Pat Rhs { PatDecl $1 $2 (startPosn $1) (endPosn $2) }

 Rhs :: { Rhs }
 : '=' Exp { BareRhs $2 [] }
 | '=' Exp 'where' LaidOutList(Decl) { BareRhs $2 (thd $4) }
 : '=' Exp { BareRhs $2 [] (startPosn $1) (endPosn $2) }
 | '=' Exp 'where' LaidOutList(Decl) { BareRhs $2 (thd $4) (startPosn $1) (endPosn $4) }

 LaidOutList(p)
 : START Opt(Semis) LOLContents(p, END) { (startPosn $1, lolEnd $3, lolList $3) }
@ -140,14 +144,17 @@ LOLContents(p, End)

 Module :: { Module }
 : 'module' CON ImportExportList 'where' LaidOutList(ModItem)
 { Module { moduleName = toModId (getVar $2)
 , moduleExports = $3
 , moduleItems = thd $5 }
 {% do { (imports,items) <- spanModuleItems (thd $5)
 ; pure $ Module { moduleName = toModId (getVar $2)
 , moduleExports = fst $3
 , moduleImports = imports
 , moduleItems = items }
 }
 }

 ImportExportList :: { Maybe [NamespacedItem ParsedVar] }
 : {-empty-} { Nothing }
 | '(' CommaList(NSItem) ')' { Just $2 }
 ImportExportList :: { (Maybe [NamespacedItem ParsedVar], Maybe Posn) }
 : {-empty-} { (Nothing, Nothing) }
 | '(' CommaList(NSItem) ')' { (Just $2, Just (endPosn $3)) }

 NSItem :: { NamespacedItem ParsedVar }
 : VAR { IEVar (getVar $1) }
@ -155,21 +162,21 @@ NSItem :: { NamespacedItem ParsedVar }
 | 'module' CON { IEModule (getVar $2) }

 ModItem :: { Item }
 : Decl { ModDecl $1 }
 | Import { ModImport $1 } 
 : Decl { ModDecl $1 (startPosn $1) (endPosn $1) }
 | Import { ModImport $1 (startPosn $1) (endPosn $1) } 

 Import :: { ModuleImport ParsedVar }
 : 'import' modid ImportExportList
 { Import $2 $3 False Nothing }
 { Import $2 (fst $3) False Nothing (startPosn $1) (fromMaybe (endPosn $2) (snd $3)) }

 | 'import' modid ImportExportList 'as' CON
 { Import $2 $3 False (Just (getVar $5)) }
 { Import $2 (fst $3) False (Just (getVar $5)) (startPosn $1) (endPosn $5) }
 
 | 'import' 'qualified' modid ImportExportList
 { Import $3 $4 True Nothing }
 { Import $3 (fst $4) True Nothing (startPosn $1) (fromMaybe (endPosn $3) (snd $4)) }
 
 | 'import' 'qualified' modid ImportExportList 'as' CON
 { Import $3 $4 True (Just (getVar $6)) }
 { Import $3 (fst $4) True (Just (getVar $6)) (startPosn $1) (endPosn $6) }

 Opt(p)
 : { () }
@ -246,4 +253,24 @@ makeTuplePattern xs = TupPat xs

 makeTuple [x] = ParenExp x
 makeTuple xs = Tuple xs

 spanModuleItems xs = do
 let
 isImport (ModImport _ _ _) = True
 isImport _ = False

 (imports, items) = Prelude.span isImport xs
 
 forM_ items $ \x -> case x of
 ModImport _ start end ->
 alexThrow $ \fname ->
 ParseError { parseErrorMessage = "all import statements should be at the top of the file."
 , parseErrorInlineDesc = Just "unexpected import statement"
 , parseErrorFilename = fname
 , parseErrorBegin = start
 , parseErrorEnd = end
 }
 _ -> pure ()

 pure (map itemImport imports, items)
 }
--- a/src/Frontend/Lexer/Wrapper.hs
+++ b/src/Frontend/Lexer/Wrapper.hs
@ -0,0 +1,181 @@
 module Frontend.Lexer.Wrapper where

 import Control.Applicative as App (Applicative (..))

 import Data.Word (Word8)

 import Data.Int (Int64)
 import qualified Data.Char
 import qualified Data.ByteString.Lazy as ByteString
 import qualified Data.ByteString.Internal as ByteString (w2c)
 import Frontend.Lexer.Tokens (Token)
 import qualified Data.Text as T
 import Frontend.Parser.Posn

 type Byte = Word8
 type AlexInput = ( Posn, -- current position,
 Char, -- previous char
 ByteString.ByteString, -- current input string
 Int64) -- bytes consumed so far

 ignorePendingBytes :: AlexInput -> AlexInput
 ignorePendingBytes i = i -- no pending bytes when lexing bytestrings

 alexInputPrevChar :: AlexInput -> Char
 alexInputPrevChar (_,c,_,_) = c

 alexGetByte :: AlexInput -> Maybe (Byte,AlexInput)
 alexGetByte (p,_,cs,n) =
 case ByteString.uncons cs of
 Nothing -> Nothing
 Just (b, cs') ->
 let c = ByteString.w2c b
 p' = alexMove p c
 n' = n+1
 in p' `seq` cs' `seq` n' `seq` Just (b, (p', c, cs',n'))

 -- -----------------------------------------------------------------------------
 -- Token positions

 -- `Posn' records the location of a token in the input text. It has three
 -- fields: the address (number of chacaters preceding the token), line number
 -- and column of a token within the file. `start_pos' gives the position of the
 -- start of the file and `eof_pos' a standard encoding for the end of file.
 -- `move_pos' calculates the new position after traversing a given character,
 -- assuming the usual eight character tab stops.


 alexStartPos :: Posn
 alexStartPos = Posn 1 1

 alexMove :: Posn -> Char -> Posn
 alexMove (Posn l c) '\t' = Posn l (c+8-((c-1) `mod` 8))
 alexMove (Posn l _) '\n' = Posn (l+1) 1
 alexMove (Posn l c) _ = Posn l (c+1)

 data AlexState = AlexState {
 alex_pos :: !Posn, -- position at current input location

 alex_bpos:: !Int64, -- bytes consumed so far
 alex_inp :: ByteString.ByteString, -- the current input
 alex_chr :: !Char, -- the character before the input

 alex_scd :: !Int -- the current startcode

 , alex_ust :: AlexUserState -- AlexUserState will be defined in the user program
 , alex_fname :: String
 }

 runAlex :: String -> ByteString.ByteString -> Alex a -> Either ParseError a
 runAlex fname input__ (Alex f) =
 case f initState of
 Left msg -> Left msg
 Right ( _, a ) -> Right a
 where
 initState = AlexState
 { alex_bpos = 0
 , alex_pos = alexStartPos
 , alex_inp = input__
 , alex_chr = '\n'

 , alex_ust = alexInitUserState

 , alex_scd = 0
 , alex_fname = fname
 }

 newtype Alex a = Alex { unAlex :: AlexState -> Either ParseError (AlexState, a) }

 instance Functor Alex where
 fmap f a = Alex $ \s -> case unAlex a s of
 Left msg -> Left msg
 Right (s', a') -> Right (s', f a')

 instance Applicative Alex where
 pure a = Alex $ \s -> Right (s, a)
 fa <*> a = Alex $ \s -> case unAlex fa s of
 Left msg -> Left msg
 Right (s', f) -> case unAlex a s' of
 Left msg -> Left msg
 Right (s'', b) -> Right (s'', f b)

 instance Monad Alex where
 m >>= k = Alex $ \s -> case unAlex m s of
 Left msg -> Left msg
 Right (s',a) -> unAlex (k a) s'
 return = App.pure

 alexGetInput :: Alex AlexInput
 alexGetInput =
 Alex $ \s@AlexState{alex_pos=pos,alex_bpos=bpos,alex_chr=c,alex_inp=inp__} ->
 Right (s, (pos,c,inp__,bpos))


 alexSetInput :: AlexInput -> Alex ()
 alexSetInput (pos,c,inp__,bpos)
 = Alex $ \s -> Right ( s { alex_pos = pos
 , alex_bpos = bpos
 , alex_chr = c
 , alex_inp = inp__
 }
 , ())

 alexError :: String -> Alex a
 alexError message = Alex $ \s -> Left (ParseError message (alex_fname s) Nothing (alex_pos s) (alex_pos s))

 alexErrorPosn :: Posn -> Posn -> String -> Alex a
 alexErrorPosn start end message = Alex $ \s -> Left (ParseError message (alex_fname s) Nothing start end)

 alexThrow :: (String -> ParseError) -> Alex a
 alexThrow err = Alex $ \s -> Left (err (alex_fname s))

 alexGetStartCode :: Alex Int
 alexGetStartCode = Alex $ \s@AlexState{alex_scd=sc} -> Right (s, sc)

 alexSetStartCode :: Int -> Alex ()
 alexSetStartCode sc = Alex $ \s -> Right (s{alex_scd=sc}, ())

 -- -----------------------------------------------------------------------------
 -- Useful token actions

 type AlexAction result = AlexInput -> Int64 -> Alex result

 -- perform an action for this token, and set the start code to a new value
 andBegin :: AlexAction result -> Int -> AlexAction result
 (action `andBegin` code) input__ len = do
 alexSetStartCode code
 action input__ len

 token :: (AlexInput -> Int64 -> token) -> AlexAction token
 token t input__ len = return (t input__ len)

 data LayoutState
 = LetLayout { layoutCol :: Int }
 | Layout { layoutCol :: Int }
 | ExplicitLayout
 deriving (Show)

 data AlexUserState =
 AlexUserState { layoutColumns :: ![LayoutState]
 , startCodes :: ![Int]
 , leastColumn :: !Int

 , pendingLayoutKw :: Maybe (Int -> LayoutState)
 , pendingTokens :: ![Token]
 , pendingLambdaCase :: !Bool

 , stringBuffer :: !T.Text
 , stringStartPosn :: Maybe Posn
 }

 alexInitUserState :: AlexUserState
 alexInitUserState = AlexUserState [] [] 0 Nothing [] False T.empty Nothing

 data ParseError
 = ParseError { parseErrorMessage :: String
 , parseErrorFilename :: String
 , parseErrorInlineDesc :: Maybe String
 , parseErrorBegin :: Posn
 , parseErrorEnd :: Posn
 }
 deriving (Eq, Show)
--- a/src/Frontend/Syntax.hs
+++ b/src/Frontend/Syntax.hs
@ -72,15 +72,27 @@ instance HasPosn (FeType var) where
 span sp ep x = SPType x (startPosn sp) (endPosn ep)

 data FeDecl var
 = PatDecl (FePat var) (FeRhs var)
 | FunDecl var [FePat var] (FeRhs var)
 | TySig [var] (FeType var)
 = PatDecl { pdPat :: FePat var, declRhs :: FeRhs var, declBegin :: Posn, declEnd :: Posn }
 | FunDecl { fdVar :: var, fdArgs :: [FePat var], declRhs :: FeRhs var, declBegin :: Posn, declEnd :: Posn }
 | TySig { tsVars :: [var], tsType :: FeType var, declBegin :: Posn, declEnd :: Posn }
 deriving (Eq, Show, Ord)

 instance HasPosn (FeDecl var) where
 startPosn = declBegin
 endPosn = declEnd

 span sp ep s = s { declBegin = startPosn sp, declEnd = endPosn ep }

 data FeRhs var
 = BareRhs (FeExpr var) [FeDecl var]
 = BareRhs { bareRhs :: FeExpr var, rhsWhere :: [FeDecl var], rhsBegin :: Posn, rhsEnd :: Posn }
 deriving (Eq, Show, Ord)

 instance HasPosn (FeRhs var) where
 startPosn = rhsBegin
 endPosn = rhsEnd

 span sp ep s = s { rhsBegin = startPosn sp, rhsEnd = endPosn ep }

 data Literal
 = LitString T.Text
 | LitNumber Integer
@ -89,6 +101,7 @@ data Literal
 data FeModule var
 = Module { moduleName :: var
 , moduleExports :: Maybe [NamespacedItem var]
 , moduleImports :: [ModuleImport var]
 , moduleItems :: [ModuleItem var]
 }
 deriving (Eq, Show, Ord)
@ -98,9 +111,17 @@ data ModuleImport var
 , importList :: Maybe [NamespacedItem var]
 , importQualified :: Bool
 , importAlias :: Maybe var
 , importBegin :: Posn
 , importEnd :: Posn
 }
 deriving (Eq, Show, Ord)

 instance HasPosn (ModuleImport var) where
 startPosn = importBegin
 endPosn = importEnd

 span sp ep s = s { importBegin = startPosn sp, importEnd = endPosn ep }

 data NamespacedItem var
 = IEVar var
 | IECon var
@ -108,8 +129,8 @@ data NamespacedItem var
 deriving (Eq, Show, Ord)

 data ModuleItem var
 = ModDecl (FeDecl var)
 | ModImport (ModuleImport var)
 = ModDecl  { itemDecl :: FeDecl var, itemBegin :: Posn, itemEnd :: Posn }
 | ModImport { itemImport :: ModuleImport var, itemBegin :: Posn, itemEnd :: Posn }
 deriving (Eq, Show, Ord)

 data ParsedVar
@ -140,4 +161,10 @@ instance HasPosn ParsedVar where
 startPosn = varBegin
 endPosn = varEnd

 span sp ep s = s { varBegin = startPosn sp, varEnd = endPosn ep }
 span sp ep s = s { varBegin = startPosn sp, varEnd = endPosn ep }

 instance HasPosn (ModuleItem var) where
 startPosn = itemBegin
 endPosn = itemEnd

 span sp ep s = s { itemBegin = startPosn sp, itemEnd = endPosn ep }
--- a/src/Main.hs
+++ b/src/Main.hs
@ -1,40 +1,91 @@
 -- {-# LANGUAGE BlockArguments #-}
 module Main where

 import Control.Monad ( unless )

 import qualified Data.ByteString.Lazy as Lbs
 import qualified Data.Text.Encoding as T
 import qualified Data.Text as T
 import Data.Foldable

 import Frontend.Lexer.Tokens
 import Frontend.Autogen.Lexer
 import Debug.Trace

 import Frontend.Autogen.Parser
 import Frontend.Autogen.Lexer
 import Frontend.Lexer.Wrapper
 import Frontend.Lexer.Tokens

 import qualified Data.Text.Encoding as T
 import qualified Data.Text as T
 import Control.Monad ( unless )
 import System.Environment (getArgs)
 import Text.Show.Pretty (pPrint)

 import Debug.Trace
 import Text.Show.Pretty (pPrint)
 import Frontend.Parser.Posn
 import System.Posix.Internals
 import GHC.IO.Handle.FD (stdout, handleToFd)
 import GHC.IO.FD
 import Data.Maybe (fromMaybe)

 main :: IO ()
 main = do
 args <- getArgs
 for_ args $ \str -> do
 Main.lex pPrint parseMod =<< Lbs.readFile str
 Main.lex str pPrint parseMod =<< Lbs.readFile str

 testParse :: String -> IO ()
 testParse s = Main.lex print parseMod (Lbs.fromStrict (T.encodeUtf8 (T.pack s)))
 testParse s = Main.lex "<interactive>" print parseMod (Lbs.fromStrict (T.encodeUtf8 (T.pack s)))

 testLex :: String -> IO ()
 testLex s = Main.lex (const (pure ())) (scan []) (Lbs.fromStrict (T.encodeUtf8 (T.pack s)))
 
 lex :: (a -> IO ()) -> Alex a -> Lbs.ByteString -> IO ()
 lex show cont arg = do
 let x = runAlex arg cont
 testLex s = Main.lex "<interactive>" (const (pure ())) (scan []) (Lbs.fromStrict (T.encodeUtf8 (T.pack s)))

 lex :: String -> (a -> IO ()) -> Alex a -> Lbs.ByteString -> IO ()
 lex fname show cont arg = do
 let x = runAlex fname arg cont
 case x of
 Left e -> print e
 Left e -> showParseError e
 Right x -> show x

 showParseError :: ParseError -> IO ()
 showParseError pe = do
 code <- lines <$> readFile (parseErrorFilename pe)
 color <- fmap (1 ==) . c_isatty . fdFD =<< handleToFd GHC.IO.Handle.FD.stdout

 let
 linum = posnLine (parseErrorBegin pe)
 startcol = posnColm (parseErrorBegin pe)

 multiline = linum /= posnLine (parseErrorEnd pe)

 width
 | multiline = 0
 | otherwise = max 0 (posnColm (parseErrorEnd pe) - startcol - 1)

 linum' = show linum

 line = [ show linum ++ sep ++ code !! (linum - 1) | linum <- [ linum .. posnLine (parseErrorEnd pe) ] ]

 padding = replicate (length linum') ' ' ++ sep
 padding' = replicate (length linum') ' ' ++ " "

 caret = replicate (startcol - 1) ' ' ++ red ++ "^"
 squiggle = replicate width '~'

 (red, bold, reset, sep)
 | color = ("\x1b[31m", "\x1b[1m", "\x1b[0m", "\x1b[1;34m \9474 \x1b[0m")
 | otherwise = ("", "", "", "")

 putStr . unlines $
 [ bold
 ++ parseErrorFilename pe
 ++ ":" ++ show linum ++ ":" ++ show startcol ++ ": "
 ++ red ++ "parse error:" ++ reset

 , padding'
 , init (unlines line)
 , padding' ++ caret ++ squiggle ++ reset ++ fromMaybe "" ((" " ++) <$> parseErrorInlineDesc pe)

 , ""
 , padding' ++ parseErrorMessage pe
 ]

 scan :: [Token] -> Alex [Token]
 scan acc = do
 tok <- alexMonadScan
@ -45,6 +96,7 @@ scan acc = do
 , "just lexed: " ++ show tok
 , "sc: " ++ show sc
 , "sc stack: " ++ show (startCodes state)
 , "layout stack: " ++ show (layoutColumns state)
 ]
 case tokenClass tok of
 TokEof -> pure (reverse acc)