Refactor parser/lexer error message infra

3 years ago · 27186d92b3
--- a/.gitignore
+++ b/.gitignore
@ -14,5 +14,5 @@
 !/Setup.hs
 # alex/happy artefacts
 src/*.o
 src/*.hi
 src/**/*.o
 src/**/*.hi
--- a/ahc.cabal
+++ b/ahc.cabal
@ -28,6 +28,7 @@ executable ahc
    Frontend.Autogen.Lexer,
    Frontend.Autogen.Parser,
    Frontend.Lexer.Tokens,
    Frontend.Lexer.Wrapper,
    Frontend.Parser.Posn,
    Frontend.Syntax
--- a/hie.yaml
+++ b/hie.yaml
@ -1,10 +1,25 @@
 cradle:
  multi:
    - path: "./src/Frontend/Autogen"
    # disable HLS for the parser/lexer
    - path: "./src/Frontend/Autogen/"
      config:
        cradle:
          none:
    # enable it for the ahc source
    - path: "./src/"
      config:
        cradle:
          stack:
    # disable it for my random testing files
    # (and also Setup.hs!)
    - path: "./"
      config:
        cradle:
          stack:
          none:
    - path: "./.stack-work/"
      config:
        cradle:
          none:
--- a/src/Frontend/Autogen/Lexer.x
+++ b/src/Frontend/Autogen/Lexer.x
@ -1,30 +1,33 @@
 {
 module Frontend.Autogen.Lexer where
 import Control.Monad
 import qualified Data.ByteString.Lazy as Lbs
 import qualified Data.Text.Encoding as T
 import qualified Data.Text as T
 import qualified Data.Char
 import Data.Int (Int64)
 import Frontend.Lexer.Wrapper
 import Frontend.Lexer.Tokens
 import Control.Monad
 import Debug.Trace
 import Frontend.Parser.Posn
 }
 %wrapper "monadUserState-bytestring"
 -- %wrapper "monadUserState-bytestring"
 $alpha = [a-zA-Z]
 $digit = [0-9]
 $white_nol = $white # \n
 $white_nol = $white # [\n\t]
 tokens :-
  $white_nol+                   ;
  \t                            { \_ _ -> alexError "tab character in source code" }
 <0,module_header> "--" .* \n
 <0,import_> "--" .* \n
  { just $ pushStartCode newline }
 <0,module_header,import_>
 <0,import_>
  $alpha [$alpha $digit \_ \' \.]* { variableOrKeyword }
 <0> \=                            { always TokEqual }
@ -37,7 +40,7 @@ tokens :-
 <0> \{                            { always TokOBrace }
 <0> \[                            { always TokOSquare }
 <0,module_header,import_> {
 <0,import_> {
  \,              { always TokComma }
  \(              { always TokOParen }
  \)              { always TokCParen }
@ -48,33 +51,33 @@ tokens :-
 <0> \;+                           { always TokSemi }
 <0> \n                            { just $ pushStartCode newline }
 <0,import_> \n                    { just $ pushStartCode newline }
 <0> \"                            { just startString }
 <0> \"                            { startString }
 <string> {
  \\ \"                           { stringSeg (T.singleton '"') }
  \\ \\                           { stringSeg (T.singleton '\\') }
  \\ a                            { stringSeg (T.singleton '\a') }
  \\ b                            { stringSeg (T.singleton '\b') }
  \\ f                            { stringSeg (T.singleton '\f') }
  \\ n                            { stringSeg (T.singleton '\n') }
  \\ \n                           { stringSeg (T.singleton '\n') }
  \\ r                            { stringSeg (T.singleton '\r') }
  \\ v                            { stringSeg (T.singleton '\v') }
  \\ t                            { stringSeg (T.singleton '\t') }
  \\ \"                           { stringAppend (T.singleton '"') }
  \\ \\                           { stringAppend (T.singleton '\\') }
  \\ a                            { stringAppend (T.singleton '\a') }
  \\ b                            { stringAppend (T.singleton '\b') }
  \\ f                            { stringAppend (T.singleton '\f') }
  \\ n                            { stringAppend (T.singleton '\n') }
  \\ \n                           { stringAppend (T.singleton '\n') }
  \\ r                            { stringAppend (T.singleton '\r') }
  \\ v                            { stringAppend (T.singleton '\v') }
  \\ t                            { stringAppend (T.singleton '\t') }
  \"                              { endString }
  [^\\\"]+                        { stringChar }
  [^\\\"]+                        { stringSegment }
 }
 <0,newline,comment,import_,module_header>
 <0,newline,comment,import_>
  "{-"          { just $ pushStartCode comment }
 <comment> {
  "-}"   { \i l -> popStartCode *> skip i l }
  "-}"   { \_ _ -> popStartCode *> alexMonadScan }
  .      ;
 }
@ -104,14 +107,10 @@ tokens :-
 <pending> () { emitPendingToken }
 <module_header> {
  \n ;
 }
 {
 alexEOF :: Alex Token
 alexEOF = do
  (AlexPn _ l c, _, _, _) <- alexGetInput
  (Posn l c, _, _, _) <- alexGetInput
  maybePopImportSC
@ -135,7 +134,7 @@ alexEOF = do
 yield k inp i = clearPendingLC *> yield' k inp i
 yield' k (AlexPn _ l c, _, s, _) i = do
 yield' k (Posn l c, _, s, _) i = do
  pure (Token (k $! (T.decodeUtf8 (Lbs.toStrict (Lbs.take i s)))) l c)
 setPendingLC    = mapUserState $ \s -> s { pendingLambdaCase = True }
@ -144,45 +143,32 @@ clearPendingLC  = mapUserState $ \s -> s { pendingLambdaCase = False }
 always :: TokenClass -> AlexInput -> Int64 -> Alex Token
 always k x i = yield (const k) x i
 startString = do
  mapUserState $ \s -> s { stringBuffer = T.empty }
 -- reset the string buffer and push the string start code
 startString (p, _, _, _) _ = do
  mapUserState $ \s -> s { stringBuffer = T.empty, stringStartPosn = Just p }
  pushStartCode string
  alexMonadScan
 endString (AlexPn _ l c, _, _, _) _i = do
  text <- stringBuffer <$> getUserState
  mapUserState $ \s -> s { stringBuffer = T.empty }
 -- pop the string start code, and emit the string buffer as a token.
 endString (Posn l c, _, _, _) _i = do
  state <- getUserState
  mapUserState $ \s -> s { stringBuffer = T.empty, stringStartPosn = Nothing }
  popStartCode
  pure (Token (TokString text) l c)
 stringChar input@(AlexPn _ _ _, _, buf, _) i = do
  let (Just (Posn l c)) = stringStartPosn state
  pure (Token (TokString (stringBuffer state)) l c)
 -- append a /lexed/ region to the string buffer
 stringSegment (Posn _ _, _, buf, _) i = do
  mapUserState $ \s -> s { stringBuffer = stringBuffer s <> T.decodeUtf8 (Lbs.toStrict (Lbs.take i buf)) }
  alexMonadScan
 stringSeg text _ _ = do
 -- append a constant fragment to the string buffer.
 stringAppend text _ _ = do
  mapUserState $ \s -> s { stringBuffer = stringBuffer s <> text }
  alexMonadScan
 data LayoutState
  = LetLayout     { layoutCol :: Int }
  | Layout        { layoutCol :: Int }
  | ModLayout     { layoutCol :: Int }
  deriving (Show)
 data AlexUserState =
  AlexUserState { layoutColumns     :: ![LayoutState]
                , startCodes        :: ![Int]
                , leastColumn       :: !Int
                , pendingLayoutKw   :: Maybe (Int -> LayoutState)
                , pendingTokens     :: ![Token]
                , pendingLambdaCase :: !Bool
                , haveModuleHeader  :: !Bool
                , stringBuffer      :: !T.Text
                }
 alexInitUserState = AlexUserState [] [] 0 Nothing [] False False T.empty
 emitPendingToken :: AlexAction Token
 emitPendingToken _ _ = do
  t <- getUserState
@ -224,19 +210,68 @@ popStartCode = do
      alexSetStartCode x
 offsideRule :: AlexInput -> Int64 -> Alex Token
 offsideRule (AlexPn _ line col, _, s, _) _ = do
  ~(col':ctx) <- layoutColumns <$> getUserState
  case col `compare` layoutCol col' of
    EQ -> do
      popStartCode
      maybePopImportSC
      pure (Token TokSemi line col)
    GT -> do
      popStartCode
      alexMonadScan
    LT -> do
      mapUserState $ \s -> s { layoutColumns = ctx }
      pure (Token TokLEnd line col)
 offsideRule (Posn line col, _, _, _) _ = do
  columns <- layoutColumns <$> getUserState
  let continue = popStartCode *> alexMonadScan
  -- The "offside rule" governs how to insert virtual semicolon and
  -- closing '}' tokens. It applies in the "newline" state, and, if we
  -- stay in that state, the rule keeps applying. There are a couple of
  -- cases:
  case columns of
    -- If we have no layout context (or we're in a layout context that
    -- started with a physical '{'), then the offside rule plain doesn't
    -- apply.
    []               -> continue
    ExplicitLayout:_ -> continue
    -- Otherwise, we're dealing with something like
    -- 
    --   do token
    --      ^ this is the layout column.
    col':ctx -> do
      case col `compare` layoutCol col' of
        -- If we have something like
        --
        --   do token
        --      token
        --      ^ this is where we are
        -- then we emit a semicolon (and possibly do some bookeeping,
        -- like leaving the newline state)
        EQ -> do
          popStartCode
          maybePopImportSC
          pure (Token TokSemi line col)
        -- If we have something like
        --
        --  do token
        --      token
        --      ^ this is where we are
        -- then we don't emit anything, just leave the newline state,
        -- since this token continues the previous logical line.
        GT -> continue
        -- If we have something like
        --
        --       C  D  E
        --    do token
        --       do token
        --          do token
        --   token
        --   ^ we are here
        -- then we're behind the layout context, but not just one, three!
        -- we emit a closing '}' to close context 'E', and STAY in the
        -- newline context. when we eventually end up here again
        -- (recurring interleaved with the lexer state machine), we
        -- close the D and C contexts in the same way.
        LT -> do
          mapUserState $ \s -> s { layoutColumns = ctx }
          pure (Token TokLEnd line col)
        -- eventually we either exhaust all the layout contexts or get
        -- to a layout context we're EQ or GT compared to. in that case
        -- one of the other rules apply.
 maybePopImportSC :: Alex ()
 maybePopImportSC = do
@ -244,13 +279,13 @@ maybePopImportSC = do
  when (startcode == import_) popStartCode
 emptyLayout :: AlexInput -> Int64 -> Alex Token
 emptyLayout (AlexPn _ line col, _, _, _) _ = do
 emptyLayout (Posn line col, _, _, _) _ = do
  popStartCode
  pushStartCode newline
  pure (Token TokLEnd line col)
 startLayout :: AlexInput -> Int64 -> Alex Token
 startLayout (AlexPn _ line col, _, _, _) _ = do
 startLayout (Posn line col, _, _, _) _ = do
  state <- getUserState
  popStartCode
  let
@ -263,42 +298,70 @@ startLayout (AlexPn _ line col, _, _, _) _ = do
      Just s -> s
      Nothing -> Layout
  if col < col'
  -- here's another rule. suppose we have:
  --
  --    foo = bar where
  --    spam = ham
  --
  -- if we just apply the rule that the next token after a layout
  -- keyword determines the column for the layout context, then we're
  -- starting another layout context at column 1! that's definitely not
  -- what we want.
  --
  -- so a new layout context only starts if the first token is to the right
  -- of the previous layout context. that is: a block only starts if it's
  -- on the same line as the layout context, or indented further.
  if col <= col'
    then pushStartCode empty_layout
    else mapUserState $ \s -> s { layoutColumns = layoutKind col:layoutColumns s }
  pure (Token TokLStart line col)
 getLayout :: Alex LayoutState
 getLayout = do
  t <- getUserState
  case layoutColumns t of
    (x:_) -> pure x
    _ -> error "No layout?"
 popLayoutContext :: Alex ()
 popLayoutContext = mapUserState $ \s -> s { layoutColumns = tail (layoutColumns s) }
 openBrace :: AlexInput -> Int64 -> Alex Token
 openBrace (AlexPn _ line col, _, _, _) _ = do
 openBrace (Posn line col, _, _, _) _ = do
  -- if we see a '{' token, we're probably in the layout state. in that
  -- case, we pop it!  otherwise, we just pop the state anyway: if we
  -- were in <0>, then popping gets us back in <0>.
  popStartCode
  mapUserState $ \s -> s { layoutColumns = Layout minBound:layoutColumns s }
  pure (Token TokOBrace line col)
 popLayoutContext :: Alex ()
 popLayoutContext = mapUserState $ \s -> s { layoutColumns = tail (layoutColumns s) }
  -- we push an ExplicitLayout state so that the offside rule stops
  -- applying (logical lines are delimited by physical semicolons) and a
  -- '}' can close it.
  mapUserState $ \s -> s { layoutColumns = ExplicitLayout:layoutColumns s }
  pure (Token TokOBrace line col)
 closeBrace :: AlexInput -> Int64 -> Alex Token
 closeBrace (AlexPn _ line col, _, _, _) _ = do
  ~(col':_) <- layoutColumns <$> getUserState
  if layoutCol col' < 0
    then popLayoutContext 
    else pure ()
 closeBrace (Posn line col, _, _, _) _ = do
  -- if we're lexing a '}' token (physical) and the rightmost layout
  -- context was started by a physical '{', then we can close it.
  -- otherwise we do nothing and probably get a parse error!
  columns <- layoutColumns <$> getUserState
  case columns of
    ExplicitLayout:_ -> popLayoutContext
    _ -> pure ()
  pure (Token TokCBrace line col)
 variableOrKeyword :: AlexAction Token
 variableOrKeyword (AlexPn _ l c, _, s, _) size = finishVarKw l c $ T.decodeUtf8 (Lbs.toStrict (Lbs.take size s))
 variableOrKeyword (Posn l c, _, s, _) size = finishVarKw l c $ T.decodeUtf8 (Lbs.toStrict (Lbs.take size s))
 finishVarKw :: Int -> Int -> T.Text -> Alex Token
 finishVarKw l c text
  | T.null text = undefined
  | Data.Char.isUpper (T.head text), T.singleton '.' `T.isInfixOf` text = pure $
    -- if we have a token like A.B.C, we reverse it and span at the
    -- first (last) dot, so that we have, e.g.:
    --
    -- "Aa.Bb.Cc" -> "cC.bB.aA"
    -- "Cc.Bb.Aa" -> ("Cc", ".bB.aA")
    --
    -- what we have then is the suffix and the prefix, but they've both
    -- been reversed. so we unreverse them, and also drop the first
    -- (last) dot from the prefix.
    --
    -- if the suffix starts with an uppercase letter, it's a constructor
    -- symbol (ConId).
    let
      txet = T.reverse text
      (suffix', prefix') = T.span (/= '.') txet
@ -319,6 +382,18 @@ finishVarKw l c text = do
  let col  = layoutCol (head (layoutColumns state))
  case T.unpack text of
    -- we handle the contextual 'as'/'qualified' tokens using a
    -- startcode.
    --
    -- in the import_ state, as and qualified are keywords, unless the
    -- offside rule would apply to emit a ';' or '}' token. in that
    -- case, we emit a semicolon (what the offside rule would do!), and
    -- set the "keyword" (now changed to an identifier) as pending, so
    -- that it will be emitted by the next alexMonadScan.
    "import" -> do
      pushStartCode import_
      pure (Token TokImport l c)
    "as"
      | sc == import_, c > col -> pure (Token TokAs l c)
      | sc == import_          -> offsideKeyword (TokUnqual VarId text) l c
@ -329,58 +404,79 @@ finishVarKw l c text = do
      | sc == import_          -> offsideKeyword (TokUnqual VarId text) l c
      | otherwise              -> pure (Token (TokUnqual VarId text) l c)
    -- when starting a layout context for let expressions we make sure
    -- that it is distinguishable from layout contexts started by
    -- anything else, because let layout contexts can be terminated
    -- ahead of time by the 'in' token. for instance in:
    --
    --    let x = 1 in x
    --
    -- there is no reason for the layout context that started after
    -- 'let' to be terminated by the 'in' token, since the offside rule
    -- hasn't had a chance to apply. the token stream in that case would look like
    --
    --    'let' '{' x '=' 1 'in' x
    --
    -- which is a parse error. we do not implement the rule which says parse errors
    -- terminate layout contexts, instead doing this approximation.
    "let"  -> laidOut' (Just LetLayout) TokLet l c
    "in"   -> do
      laidout <- getLayout
      laidout <- layoutColumns <$> getUserState
      case laidout of
        -- let .. in critical pair:
        -- 'in' is allowed to close a layout context before the offside rule would apply.
        LetLayout _ -> earlyEnd TokIn l c
        LetLayout _:_ -> earlyEnd TokIn l c
        _ -> pure (Token TokIn l c)
    "data" -> pure (Token TokData l c)
    "where" -> do
      -- if this is the where in the module_header, then
      -- pop the start code so that the offside rule applies again
      when (sc == module_header) popStartCode
      laidOut' (if sc == module_header then Just ModLayout else Nothing) TokWhere l c
    "where" -> laidOut TokWhere l c
    "module" -> pure (Token TokModule l c)
    -- when we lex a \ token, a flag is set in the lexer state to
    -- indicate that, if there is a 'case' token directly following,
    -- that token is to be interpreted as part of a lambda-case
    -- construct, and so must start a layout context for its branches.
    "case"
      -- "case" is a layout token if it's immediately following a \\
      | pendingLambdaCase state -> laidOut TokCase l c
      | otherwise               -> pure (Token TokCase l c)
    "import" -> do
      pushStartCode import_
      pure (Token TokImport l c)
    "of"    -> laidOut TokOf l c
    "module" -> do
      unless (haveModuleHeader state) $ do
        mapUserState $ \s -> s { haveModuleHeader = True }
        pushStartCode module_header
      pure (Token TokModule l c)
    (x:_) -> pure (Token (TokUnqual VarId text) l c)
    (_:_) -> pure (Token (TokUnqual VarId text) l c)
    [] -> error "empty keyword/identifier"
 earlyEnd :: TokenClass -> Int -> Int -> Alex Token
 earlyEnd tok l c = do
  popLayoutContext
  delayToken (Token tok l c)
  pure (Token TokLEnd l c)
 offsideKeyword :: TokenClass -> Int -> Int -> Alex Token
 offsideKeyword tok l c = do
  popLayoutContext
  popStartCode
  delayToken (Token tok l c)
  pure (Token TokSemi l c)
 laidOut' :: Maybe (Int -> LayoutState) -> TokenClass -> Int -> Int -> Alex Token
 laidOut' n x l c = do
  pushStartCode layout
  mapUserState $ \s -> s { leastColumn = c, pendingLayoutKw = n }
  pure (Token x l c)
 laidOut = laidOut' Nothing
 }
 alexMonadScan = do
  inp@(_,_,_,n) <- alexGetInput
  sc <- alexGetStartCode
  case alexScan inp sc of
    AlexEOF -> alexEOF
    AlexError error@(_,_,inp,_) ->
      alexError $ "Unexpected character: " ++ show (T.head (T.decodeUtf8 (Lbs.toStrict inp)))
    AlexSkip inp _len -> do
      alexSetInput inp
      alexMonadScan
    AlexToken inp'@(_,_,_,n') _ action -> let len = n'-n in do
      alexSetInput inp'
      action (ignorePendingBytes inp) len
 }
--- a/src/Frontend/Autogen/Parser.y
+++ b/src/Frontend/Autogen/Parser.y
@ -3,16 +3,20 @@
 module Frontend.Autogen.Parser where
 import qualified Data.Text as T
 import Data.Maybe
 import Frontend.Lexer.Wrapper
 import Frontend.Lexer.Tokens
 import Frontend.Parser.Posn
 import Frontend.Syntax
 import Frontend.Autogen.Lexer
 import qualified Prelude
 import Prelude hiding (span)
 import Debug.Trace
 import Control.Monad
 }
 %name parseExp Exp
@ -121,13 +125,13 @@ Apat :: { Pat }
  | '(' CommaList(Pat) ')' { span $1 $3 $ makeTuplePattern $2 }
 Decl :: { Decl }
  : CommaList1(VAR) '::' Type  { TySig   (getVar `fmap` $1) $3 }
  | VAR Apat List(Apat) Rhs    { FunDecl (getVar $1) ($2:$3) $4 }
  | Pat Rhs                    { PatDecl $1 $2 }
  : CommaList1(VAR) '::' Type  { TySig   (getVar `fmap` $1) $3 (startPosn (head $1)) (endPosn $3) }
  | VAR Apat List(Apat) Rhs    { FunDecl (getVar $1) ($2:$3) $4 (startPosn $1) (endPosn $4) }
  | Pat Rhs                    { PatDecl $1 $2 (startPosn $1) (endPosn $2) }
 Rhs :: { Rhs }
  : '=' Exp                           { BareRhs $2 [] }
  | '=' Exp 'where' LaidOutList(Decl) { BareRhs $2 (thd $4) }
  : '=' Exp                           { BareRhs $2 [] (startPosn $1) (endPosn $2) }
  | '=' Exp 'where' LaidOutList(Decl) { BareRhs $2 (thd $4) (startPosn $1) (endPosn $4) }
 LaidOutList(p)
  : START Opt(Semis) LOLContents(p, END) { (startPosn $1, lolEnd $3, lolList $3) }
@ -140,14 +144,17 @@ LOLContents(p, End)
 Module :: { Module }
  : 'module' CON ImportExportList 'where' LaidOutList(ModItem)
    { Module { moduleName    = toModId (getVar $2)
             , moduleExports = $3
             , moduleItems   = thd $5 }
    {% do { (imports,items) <- spanModuleItems (thd $5)
          ; pure $ Module { moduleName    = toModId (getVar $2)
                          , moduleExports = fst $3
                          , moduleImports = imports
                          , moduleItems   = items }
          }
    }
 ImportExportList :: { Maybe [NamespacedItem ParsedVar] }
  : {-empty-}                 { Nothing }
  | '(' CommaList(NSItem) ')' { Just $2 }
 ImportExportList :: { (Maybe [NamespacedItem ParsedVar], Maybe Posn) }
  : {-empty-}                 { (Nothing, Nothing) }
  | '(' CommaList(NSItem) ')' { (Just $2, Just (endPosn $3)) }
 NSItem :: { NamespacedItem ParsedVar }
  : VAR          { IEVar (getVar $1) }
@ -155,21 +162,21 @@ NSItem :: { NamespacedItem ParsedVar }
  | 'module' CON { IEModule (getVar $2) }
 ModItem :: { Item }
  : Decl            { ModDecl $1 }
  | Import          { ModImport $1 } 
  : Decl            { ModDecl $1 (startPosn $1) (endPosn $1) }
  | Import          { ModImport $1 (startPosn $1) (endPosn $1) } 
 Import :: { ModuleImport ParsedVar }
  : 'import' modid ImportExportList
    { Import $2 $3 False Nothing }
    { Import $2 (fst $3) False Nothing (startPosn $1) (fromMaybe (endPosn $2) (snd $3)) }
  | 'import' modid ImportExportList 'as' CON
    { Import $2 $3 False (Just (getVar $5)) }
    { Import $2 (fst $3) False (Just (getVar $5)) (startPosn $1) (endPosn $5) }
  | 'import' 'qualified' modid ImportExportList
    { Import $3 $4 True Nothing }
    { Import $3 (fst $4) True Nothing (startPosn $1) (fromMaybe (endPosn $3) (snd $4)) }
  | 'import' 'qualified' modid ImportExportList 'as' CON
    { Import $3 $4 True (Just (getVar $6)) }
    { Import $3 (fst $4) True (Just (getVar $6)) (startPosn $1) (endPosn $6) }
 Opt(p)
  :           { () }
@ -246,4 +253,24 @@ makeTuplePattern xs = TupPat xs
 makeTuple [x] = ParenExp x
 makeTuple xs  = Tuple xs
 spanModuleItems xs = do
  let
    isImport (ModImport _ _ _) = True
    isImport _ = False
    (imports, items) = Prelude.span isImport xs
  forM_ items $ \x -> case x of
    ModImport _ start end ->
      alexThrow $ \fname ->
        ParseError { parseErrorMessage    = "all import statements should be at the top of the file."
                   , parseErrorInlineDesc = Just "unexpected import statement"
                   , parseErrorFilename   = fname
                   , parseErrorBegin      = start
                   , parseErrorEnd        = end
                   }
    _ -> pure ()
  pure (map itemImport imports, items)
 }
--- a/src/Frontend/Lexer/Wrapper.hs
+++ b/src/Frontend/Lexer/Wrapper.hs
@ -0,0 +1,181 @@
 module Frontend.Lexer.Wrapper where
 import Control.Applicative as App (Applicative (..))
 import Data.Word (Word8)
 import Data.Int (Int64)
 import qualified Data.Char
 import qualified Data.ByteString.Lazy     as ByteString
 import qualified Data.ByteString.Internal as ByteString (w2c)
 import Frontend.Lexer.Tokens (Token)
 import qualified Data.Text as T
 import Frontend.Parser.Posn
 type Byte = Word8
 type AlexInput = ( Posn,     -- current position,
                   Char,         -- previous char
                   ByteString.ByteString,        -- current input string
                   Int64)           -- bytes consumed so far
 ignorePendingBytes :: AlexInput -> AlexInput
 ignorePendingBytes i = i   -- no pending bytes when lexing bytestrings
 alexInputPrevChar :: AlexInput -> Char
 alexInputPrevChar (_,c,_,_) = c
 alexGetByte :: AlexInput -> Maybe (Byte,AlexInput)
 alexGetByte (p,_,cs,n) =
    case ByteString.uncons cs of
        Nothing -> Nothing
        Just (b, cs') ->
            let c   = ByteString.w2c b
                p'  = alexMove p c
                n'  = n+1
            in p' `seq` cs' `seq` n' `seq` Just (b, (p', c, cs',n'))
 -- -----------------------------------------------------------------------------
 -- Token positions
 -- `Posn' records the location of a token in the input text.  It has three
 -- fields: the address (number of chacaters preceding the token), line number
 -- and column of a token within the file. `start_pos' gives the position of the
 -- start of the file and `eof_pos' a standard encoding for the end of file.
 -- `move_pos' calculates the new position after traversing a given character,
 -- assuming the usual eight character tab stops.
 alexStartPos :: Posn
 alexStartPos = Posn 1 1
 alexMove :: Posn -> Char -> Posn
 alexMove (Posn l c) '\t' = Posn l     (c+8-((c-1) `mod` 8))
 alexMove (Posn l _) '\n' = Posn (l+1) 1
 alexMove (Posn l c) _    = Posn l     (c+1)
 data AlexState = AlexState {
        alex_pos :: !Posn,  -- position at current input location
        alex_bpos:: !Int64,     -- bytes consumed so far
        alex_inp :: ByteString.ByteString,      -- the current input
        alex_chr :: !Char,      -- the character before the input
        alex_scd :: !Int        -- the current startcode
      , alex_ust :: AlexUserState -- AlexUserState will be defined in the user program
      , alex_fname :: String
    }
 runAlex :: String -> ByteString.ByteString -> Alex a -> Either ParseError a
 runAlex fname input__ (Alex f) =
  case f initState of
    Left msg -> Left msg
    Right ( _, a ) -> Right a
  where
    initState = AlexState
      { alex_bpos = 0
      , alex_pos = alexStartPos
      , alex_inp = input__
      , alex_chr = '\n'
      , alex_ust = alexInitUserState
      , alex_scd = 0
      , alex_fname = fname
      }
 newtype Alex a = Alex { unAlex :: AlexState -> Either ParseError (AlexState, a) }
 instance Functor Alex where
  fmap f a = Alex $ \s -> case unAlex a s of
                            Left msg -> Left msg
                            Right (s', a') -> Right (s', f a')
 instance Applicative Alex where
  pure a   = Alex $ \s -> Right (s, a)
  fa <*> a = Alex $ \s -> case unAlex fa s of
                            Left msg -> Left msg
                            Right (s', f) -> case unAlex a s' of
                                               Left msg -> Left msg
                                               Right (s'', b) -> Right (s'', f b)
 instance Monad Alex where
  m >>= k  = Alex $ \s -> case unAlex m s of
                                Left msg -> Left msg
                                Right (s',a) -> unAlex (k a) s'
  return = App.pure
 alexGetInput :: Alex AlexInput
 alexGetInput =
  Alex $ \s@AlexState{alex_pos=pos,alex_bpos=bpos,alex_chr=c,alex_inp=inp__} ->
    Right (s, (pos,c,inp__,bpos))
 alexSetInput :: AlexInput -> Alex ()
 alexSetInput (pos,c,inp__,bpos)
 = Alex $ \s -> Right ( s { alex_pos  = pos
                          , alex_bpos = bpos
                          , alex_chr  = c
                          , alex_inp  = inp__
                          }
                      , ())
 alexError :: String -> Alex a
 alexError message = Alex $ \s -> Left (ParseError message (alex_fname s) Nothing (alex_pos s) (alex_pos s))
 alexErrorPosn :: Posn -> Posn -> String -> Alex a
 alexErrorPosn start end message = Alex $ \s -> Left (ParseError message (alex_fname s) Nothing start end)
 alexThrow :: (String -> ParseError) -> Alex a
 alexThrow err = Alex $ \s -> Left (err (alex_fname s))
 alexGetStartCode :: Alex Int
 alexGetStartCode = Alex $ \s@AlexState{alex_scd=sc} -> Right (s, sc)
 alexSetStartCode :: Int -> Alex ()
 alexSetStartCode sc = Alex $ \s -> Right (s{alex_scd=sc}, ())
 -- -----------------------------------------------------------------------------
 -- Useful token actions
 type AlexAction result = AlexInput -> Int64 -> Alex result
 -- perform an action for this token, and set the start code to a new value
 andBegin :: AlexAction result -> Int -> AlexAction result
 (action `andBegin` code) input__ len = do
  alexSetStartCode code
  action input__ len
 token :: (AlexInput -> Int64 -> token) -> AlexAction token
 token t input__ len = return (t input__ len)
 data LayoutState
  = LetLayout      { layoutCol :: Int }
  | Layout         { layoutCol :: Int }
  | ExplicitLayout
  deriving (Show)
 data AlexUserState =
  AlexUserState { layoutColumns     :: ![LayoutState]
                , startCodes        :: ![Int]
                , leastColumn       :: !Int
                , pendingLayoutKw   :: Maybe (Int -> LayoutState)
                , pendingTokens     :: ![Token]
                , pendingLambdaCase :: !Bool
                , stringBuffer      :: !T.Text
                , stringStartPosn   :: Maybe Posn
                }
 alexInitUserState :: AlexUserState
 alexInitUserState = AlexUserState [] [] 0 Nothing [] False T.empty Nothing
 data ParseError
  = ParseError { parseErrorMessage    :: String
               , parseErrorFilename   :: String
               , parseErrorInlineDesc :: Maybe String
               , parseErrorBegin      :: Posn
               , parseErrorEnd        :: Posn
               }
  deriving (Eq, Show)
--- a/src/Frontend/Syntax.hs
+++ b/src/Frontend/Syntax.hs
@ -72,15 +72,27 @@ instance HasPosn (FeType var) where
  span sp ep x              = SPType x (startPosn sp) (endPosn ep)
 data FeDecl var
  = PatDecl (FePat var) (FeRhs var)
  | FunDecl var [FePat var] (FeRhs var)
  | TySig   [var] (FeType var)
  = PatDecl { pdPat :: FePat var, declRhs :: FeRhs var, declBegin :: Posn, declEnd :: Posn }
  | FunDecl { fdVar :: var, fdArgs :: [FePat var], declRhs :: FeRhs var, declBegin :: Posn, declEnd :: Posn }
  | TySig   { tsVars :: [var], tsType :: FeType var, declBegin :: Posn, declEnd :: Posn }
  deriving (Eq, Show, Ord)
 instance HasPosn (FeDecl var) where
  startPosn = declBegin
  endPosn   = declEnd
  span sp ep s = s { declBegin = startPosn sp, declEnd = endPosn ep }
 data FeRhs var
  = BareRhs (FeExpr var) [FeDecl var]
  = BareRhs { bareRhs :: FeExpr var, rhsWhere :: [FeDecl var], rhsBegin :: Posn, rhsEnd :: Posn }
  deriving (Eq, Show, Ord)
 instance HasPosn (FeRhs var) where
  startPosn = rhsBegin
  endPosn   = rhsEnd
  span sp ep s = s { rhsBegin = startPosn sp, rhsEnd = endPosn ep }
 data Literal
  = LitString T.Text
  | LitNumber Integer
@ -89,6 +101,7 @@ data Literal
 data FeModule var
  = Module { moduleName    :: var
           , moduleExports :: Maybe [NamespacedItem var]
           , moduleImports :: [ModuleImport var]
           , moduleItems   :: [ModuleItem var]
           }
  deriving (Eq, Show, Ord)
@ -98,9 +111,17 @@ data ModuleImport var
           , importList      :: Maybe [NamespacedItem var]
           , importQualified :: Bool
           , importAlias     :: Maybe var
           , importBegin     :: Posn
           , importEnd       :: Posn
           }
  deriving (Eq, Show, Ord)
 instance HasPosn (ModuleImport var) where
  startPosn = importBegin
  endPosn   = importEnd
  span sp ep s = s { importBegin = startPosn sp, importEnd = endPosn ep }
 data NamespacedItem var
  = IEVar    var
  | IECon    var
@ -108,8 +129,8 @@ data NamespacedItem var
  deriving (Eq, Show, Ord)
 data ModuleItem var
  = ModDecl (FeDecl var)
  | ModImport (ModuleImport var)
  = ModDecl   { itemDecl :: FeDecl var, itemBegin :: Posn, itemEnd :: Posn }
  | ModImport { itemImport :: ModuleImport var, itemBegin :: Posn, itemEnd :: Posn }
  deriving (Eq, Show, Ord)
 data ParsedVar
@ -140,4 +161,10 @@ instance HasPosn ParsedVar where
  startPosn = varBegin
  endPosn = varEnd
  span sp ep s = s { varBegin = startPosn sp, varEnd = endPosn ep }
  span sp ep s = s { varBegin = startPosn sp, varEnd = endPosn ep }
 instance HasPosn (ModuleItem var) where
  startPosn = itemBegin
  endPosn   = itemEnd
  span sp ep s = s { itemBegin = startPosn sp, itemEnd = endPosn ep }
--- a/src/Main.hs
+++ b/src/Main.hs
@ -1,40 +1,91 @@
 -- {-# LANGUAGE BlockArguments #-}
 module Main where
 import Control.Monad ( unless )
 import qualified Data.ByteString.Lazy as Lbs
 import qualified Data.Text.Encoding as T
 import qualified Data.Text as T
 import Data.Foldable
 import Frontend.Lexer.Tokens
 import Frontend.Autogen.Lexer
 import Debug.Trace
 import Frontend.Autogen.Parser
 import Frontend.Autogen.Lexer
 import Frontend.Lexer.Wrapper
 import Frontend.Lexer.Tokens
 import qualified Data.Text.Encoding as T
 import qualified Data.Text as T
 import Control.Monad ( unless )
 import System.Environment (getArgs)
 import Text.Show.Pretty (pPrint)
 import Debug.Trace
 import Text.Show.Pretty (pPrint)
 import Frontend.Parser.Posn
 import System.Posix.Internals
 import GHC.IO.Handle.FD (stdout, handleToFd)
 import GHC.IO.FD
 import Data.Maybe (fromMaybe)
 main :: IO ()
 main = do
  args <- getArgs
  for_ args $ \str -> do
    Main.lex pPrint parseMod =<< Lbs.readFile str
    Main.lex str pPrint parseMod =<< Lbs.readFile str
 testParse :: String -> IO ()
 testParse s = Main.lex print parseMod (Lbs.fromStrict (T.encodeUtf8 (T.pack s)))
 testParse s = Main.lex "<interactive>" print parseMod (Lbs.fromStrict (T.encodeUtf8 (T.pack s)))
 testLex :: String -> IO ()
 testLex s = Main.lex (const (pure ())) (scan []) (Lbs.fromStrict (T.encodeUtf8 (T.pack s)))
 lex :: (a -> IO ()) -> Alex a -> Lbs.ByteString -> IO ()
 lex show cont arg = do
  let x = runAlex arg cont
 testLex s = Main.lex "<interactive>" (const (pure ())) (scan []) (Lbs.fromStrict (T.encodeUtf8 (T.pack s)))
 lex :: String -> (a -> IO ()) -> Alex a -> Lbs.ByteString -> IO ()
 lex fname show cont arg = do
  let x = runAlex fname arg cont
  case x of
    Left e -> print e
    Left e -> showParseError e
    Right x -> show x
 showParseError :: ParseError -> IO ()
 showParseError pe = do
  code <- lines <$> readFile (parseErrorFilename pe)
  color <- fmap (1 ==) . c_isatty . fdFD =<< handleToFd GHC.IO.Handle.FD.stdout
  let
    linum    = posnLine (parseErrorBegin pe)
    startcol = posnColm (parseErrorBegin pe)
    multiline = linum /= posnLine (parseErrorEnd pe)
    width
      | multiline = 0
      | otherwise = max 0 (posnColm (parseErrorEnd pe) - startcol - 1)
    linum' = show linum
    line = [ show linum ++ sep ++ code !! (linum - 1) | linum <- [ linum .. posnLine (parseErrorEnd pe) ] ]
    padding = replicate (length linum') ' ' ++ sep
    padding' = replicate (length linum') ' ' ++ "   "
    caret = replicate (startcol - 1) ' ' ++ red ++ "^"
    squiggle = replicate width '~'
    (red, bold, reset, sep)
      | color     = ("\x1b[31m", "\x1b[1m", "\x1b[0m", "\x1b[1;34m \9474 \x1b[0m")
      | otherwise = ("", "", "", "")
  putStr . unlines $
    [ bold
      ++ parseErrorFilename pe
      ++ ":" ++ show linum ++ ":" ++ show startcol ++ ": "
      ++ red ++ "parse error:" ++ reset
    , padding'
    , init (unlines line)
    , padding' ++ caret ++ squiggle ++ reset ++ fromMaybe "" ((" " ++) <$> parseErrorInlineDesc pe)
    , ""
    , padding' ++ parseErrorMessage pe
    ]
 scan :: [Token] -> Alex [Token]
 scan acc = do
  tok <- alexMonadScan
@ -45,6 +96,7 @@ scan acc = do
    , "just lexed: " ++ show tok
    , "sc: "         ++ show sc
    , "sc stack: "   ++ show (startCodes state)
    , "layout stack: " ++ show (layoutColumns state)
    ]
  case tokenClass tok of
    TokEof -> pure (reverse acc)