Browse Source

Refactor parser/lexer error message infra

master
Amélia Liao 2 years ago
parent
commit
27186d92b3
8 changed files with 558 additions and 159 deletions
  1. +2
    -2
      .gitignore
  2. +1
    -0
      ahc.cabal
  3. +17
    -2
      hie.yaml
  4. +212
    -116
      src/Frontend/Autogen/Lexer.x
  5. +44
    -17
      src/Frontend/Autogen/Parser.y
  6. +181
    -0
      src/Frontend/Lexer/Wrapper.hs
  7. +34
    -7
      src/Frontend/Syntax.hs
  8. +67
    -15
      src/Main.hs

+ 2
- 2
.gitignore View File

@ -14,5 +14,5 @@
!/Setup.hs !/Setup.hs
# alex/happy artefacts # alex/happy artefacts
src/*.o
src/*.hi
src/**/*.o
src/**/*.hi

+ 1
- 0
ahc.cabal View File

@ -28,6 +28,7 @@ executable ahc
Frontend.Autogen.Lexer, Frontend.Autogen.Lexer,
Frontend.Autogen.Parser, Frontend.Autogen.Parser,
Frontend.Lexer.Tokens, Frontend.Lexer.Tokens,
Frontend.Lexer.Wrapper,
Frontend.Parser.Posn, Frontend.Parser.Posn,
Frontend.Syntax Frontend.Syntax


+ 17
- 2
hie.yaml View File

@ -1,10 +1,25 @@
cradle: cradle:
multi: multi:
- path: "./src/Frontend/Autogen"
# disable HLS for the parser/lexer
- path: "./src/Frontend/Autogen/"
config: config:
cradle: cradle:
none: none:
# enable it for the ahc source
- path: "./src/"
config:
cradle:
stack:
# disable it for my random testing files
# (and also Setup.hs!)
- path: "./" - path: "./"
config: config:
cradle: cradle:
stack:
none:
- path: "./.stack-work/"
config:
cradle:
none:

+ 212
- 116
src/Frontend/Autogen/Lexer.x View File

@ -1,30 +1,33 @@
{ {
module Frontend.Autogen.Lexer where module Frontend.Autogen.Lexer where
import Control.Monad
import qualified Data.ByteString.Lazy as Lbs import qualified Data.ByteString.Lazy as Lbs
import qualified Data.Text.Encoding as T import qualified Data.Text.Encoding as T
import qualified Data.Text as T import qualified Data.Text as T
import qualified Data.Char
import Data.Int (Int64)
import Frontend.Lexer.Wrapper
import Frontend.Lexer.Tokens import Frontend.Lexer.Tokens
import Control.Monad
import Debug.Trace
import Frontend.Parser.Posn
} }
%wrapper "monadUserState-bytestring"
-- %wrapper "monadUserState-bytestring"
$alpha = [a-zA-Z] $alpha = [a-zA-Z]
$digit = [0-9] $digit = [0-9]
$white_nol = $white # \n
$white_nol = $white # [\n\t]
tokens :- tokens :-
$white_nol+ ; $white_nol+ ;
\t { \_ _ -> alexError "tab character in source code" }
<0,module_header> "--" .* \n
<0,import_> "--" .* \n
{ just $ pushStartCode newline } { just $ pushStartCode newline }
<0,module_header,import_>
<0,import_>
$alpha [$alpha $digit \_ \' \.]* { variableOrKeyword } $alpha [$alpha $digit \_ \' \.]* { variableOrKeyword }
<0> \= { always TokEqual } <0> \= { always TokEqual }
@ -37,7 +40,7 @@ tokens :-
<0> \{ { always TokOBrace } <0> \{ { always TokOBrace }
<0> \[ { always TokOSquare } <0> \[ { always TokOSquare }
<0,module_header,import_> {
<0,import_> {
\, { always TokComma } \, { always TokComma }
\( { always TokOParen } \( { always TokOParen }
\) { always TokCParen } \) { always TokCParen }
@ -48,33 +51,33 @@ tokens :-
<0> \;+ { always TokSemi } <0> \;+ { always TokSemi }
<0> \n { just $ pushStartCode newline }
<0,import_> \n { just $ pushStartCode newline }
<0> \" { just startString }
<0> \" { startString }
<string> { <string> {
\\ \" { stringSeg (T.singleton '"') }
\\ \\ { stringSeg (T.singleton '\\') }
\\ a { stringSeg (T.singleton '\a') }
\\ b { stringSeg (T.singleton '\b') }
\\ f { stringSeg (T.singleton '\f') }
\\ n { stringSeg (T.singleton '\n') }
\\ \n { stringSeg (T.singleton '\n') }
\\ r { stringSeg (T.singleton '\r') }
\\ v { stringSeg (T.singleton '\v') }
\\ t { stringSeg (T.singleton '\t') }
\\ \" { stringAppend (T.singleton '"') }
\\ \\ { stringAppend (T.singleton '\\') }
\\ a { stringAppend (T.singleton '\a') }
\\ b { stringAppend (T.singleton '\b') }
\\ f { stringAppend (T.singleton '\f') }
\\ n { stringAppend (T.singleton '\n') }
\\ \n { stringAppend (T.singleton '\n') }
\\ r { stringAppend (T.singleton '\r') }
\\ v { stringAppend (T.singleton '\v') }
\\ t { stringAppend (T.singleton '\t') }
\" { endString } \" { endString }
[^\\\"]+ { stringChar }
[^\\\"]+ { stringSegment }
} }
<0,newline,comment,import_,module_header>
<0,newline,comment,import_>
"{-" { just $ pushStartCode comment } "{-" { just $ pushStartCode comment }
<comment> { <comment> {
"-}" { \i l -> popStartCode *> skip i l }
"-}" { \_ _ -> popStartCode *> alexMonadScan }
. ; . ;
} }
@ -104,14 +107,10 @@ tokens :-
<pending> () { emitPendingToken } <pending> () { emitPendingToken }
<module_header> {
\n ;
}
{ {
alexEOF :: Alex Token alexEOF :: Alex Token
alexEOF = do alexEOF = do
(AlexPn _ l c, _, _, _) <- alexGetInput
(Posn l c, _, _, _) <- alexGetInput
maybePopImportSC maybePopImportSC
@ -135,7 +134,7 @@ alexEOF = do
yield k inp i = clearPendingLC *> yield' k inp i yield k inp i = clearPendingLC *> yield' k inp i
yield' k (AlexPn _ l c, _, s, _) i = do
yield' k (Posn l c, _, s, _) i = do
pure (Token (k $! (T.decodeUtf8 (Lbs.toStrict (Lbs.take i s)))) l c) pure (Token (k $! (T.decodeUtf8 (Lbs.toStrict (Lbs.take i s)))) l c)
setPendingLC = mapUserState $ \s -> s { pendingLambdaCase = True } setPendingLC = mapUserState $ \s -> s { pendingLambdaCase = True }
@ -144,45 +143,32 @@ clearPendingLC = mapUserState $ \s -> s { pendingLambdaCase = False }
always :: TokenClass -> AlexInput -> Int64 -> Alex Token always :: TokenClass -> AlexInput -> Int64 -> Alex Token
always k x i = yield (const k) x i always k x i = yield (const k) x i
startString = do
mapUserState $ \s -> s { stringBuffer = T.empty }
-- reset the string buffer and push the string start code
startString (p, _, _, _) _ = do
mapUserState $ \s -> s { stringBuffer = T.empty, stringStartPosn = Just p }
pushStartCode string pushStartCode string
alexMonadScan
endString (AlexPn _ l c, _, _, _) _i = do
text <- stringBuffer <$> getUserState
mapUserState $ \s -> s { stringBuffer = T.empty }
-- pop the string start code, and emit the string buffer as a token.
endString (Posn l c, _, _, _) _i = do
state <- getUserState
mapUserState $ \s -> s { stringBuffer = T.empty, stringStartPosn = Nothing }
popStartCode popStartCode
pure (Token (TokString text) l c)
stringChar input@(AlexPn _ _ _, _, buf, _) i = do
let (Just (Posn l c)) = stringStartPosn state
pure (Token (TokString (stringBuffer state)) l c)
-- append a /lexed/ region to the string buffer
stringSegment (Posn _ _, _, buf, _) i = do
mapUserState $ \s -> s { stringBuffer = stringBuffer s <> T.decodeUtf8 (Lbs.toStrict (Lbs.take i buf)) } mapUserState $ \s -> s { stringBuffer = stringBuffer s <> T.decodeUtf8 (Lbs.toStrict (Lbs.take i buf)) }
alexMonadScan alexMonadScan
stringSeg text _ _ = do
-- append a constant fragment to the string buffer.
stringAppend text _ _ = do
mapUserState $ \s -> s { stringBuffer = stringBuffer s <> text } mapUserState $ \s -> s { stringBuffer = stringBuffer s <> text }
alexMonadScan alexMonadScan
data LayoutState
= LetLayout { layoutCol :: Int }
| Layout { layoutCol :: Int }
| ModLayout { layoutCol :: Int }
deriving (Show)
data AlexUserState =
AlexUserState { layoutColumns :: ![LayoutState]
, startCodes :: ![Int]
, leastColumn :: !Int
, pendingLayoutKw :: Maybe (Int -> LayoutState)
, pendingTokens :: ![Token]
, pendingLambdaCase :: !Bool
, haveModuleHeader :: !Bool
, stringBuffer :: !T.Text
}
alexInitUserState = AlexUserState [] [] 0 Nothing [] False False T.empty
emitPendingToken :: AlexAction Token emitPendingToken :: AlexAction Token
emitPendingToken _ _ = do emitPendingToken _ _ = do
t <- getUserState t <- getUserState
@ -224,19 +210,68 @@ popStartCode = do
alexSetStartCode x alexSetStartCode x
offsideRule :: AlexInput -> Int64 -> Alex Token offsideRule :: AlexInput -> Int64 -> Alex Token
offsideRule (AlexPn _ line col, _, s, _) _ = do
~(col':ctx) <- layoutColumns <$> getUserState
case col `compare` layoutCol col' of
EQ -> do
popStartCode
maybePopImportSC
pure (Token TokSemi line col)
GT -> do
popStartCode
alexMonadScan
LT -> do
mapUserState $ \s -> s { layoutColumns = ctx }
pure (Token TokLEnd line col)
offsideRule (Posn line col, _, _, _) _ = do
columns <- layoutColumns <$> getUserState
let continue = popStartCode *> alexMonadScan
-- The "offside rule" governs how to insert virtual semicolon and
-- closing '}' tokens. It applies in the "newline" state, and, if we
-- stay in that state, the rule keeps applying. There are a couple of
-- cases:
case columns of
-- If we have no layout context (or we're in a layout context that
-- started with a physical '{'), then the offside rule plain doesn't
-- apply.
[] -> continue
ExplicitLayout:_ -> continue
-- Otherwise, we're dealing with something like
--
-- do token
-- ^ this is the layout column.
col':ctx -> do
case col `compare` layoutCol col' of
-- If we have something like
--
-- do token
-- token
-- ^ this is where we are
-- then we emit a semicolon (and possibly do some bookeeping,
-- like leaving the newline state)
EQ -> do
popStartCode
maybePopImportSC
pure (Token TokSemi line col)
-- If we have something like
--
-- do token
-- token
-- ^ this is where we are
-- then we don't emit anything, just leave the newline state,
-- since this token continues the previous logical line.
GT -> continue
-- If we have something like
--
-- C D E
-- do token
-- do token
-- do token
-- token
-- ^ we are here
-- then we're behind the layout context, but not just one, three!
-- we emit a closing '}' to close context 'E', and STAY in the
-- newline context. when we eventually end up here again
-- (recurring interleaved with the lexer state machine), we
-- close the D and C contexts in the same way.
LT -> do
mapUserState $ \s -> s { layoutColumns = ctx }
pure (Token TokLEnd line col)
-- eventually we either exhaust all the layout contexts or get
-- to a layout context we're EQ or GT compared to. in that case
-- one of the other rules apply.
maybePopImportSC :: Alex () maybePopImportSC :: Alex ()
maybePopImportSC = do maybePopImportSC = do
@ -244,13 +279,13 @@ maybePopImportSC = do
when (startcode == import_) popStartCode when (startcode == import_) popStartCode
emptyLayout :: AlexInput -> Int64 -> Alex Token emptyLayout :: AlexInput -> Int64 -> Alex Token
emptyLayout (AlexPn _ line col, _, _, _) _ = do
emptyLayout (Posn line col, _, _, _) _ = do
popStartCode popStartCode
pushStartCode newline pushStartCode newline
pure (Token TokLEnd line col) pure (Token TokLEnd line col)
startLayout :: AlexInput -> Int64 -> Alex Token startLayout :: AlexInput -> Int64 -> Alex Token
startLayout (AlexPn _ line col, _, _, _) _ = do
startLayout (Posn line col, _, _, _) _ = do
state <- getUserState state <- getUserState
popStartCode popStartCode
let let
@ -263,42 +298,70 @@ startLayout (AlexPn _ line col, _, _, _) _ = do
Just s -> s Just s -> s
Nothing -> Layout Nothing -> Layout
if col < col'
-- here's another rule. suppose we have:
--
-- foo = bar where
-- spam = ham
--
-- if we just apply the rule that the next token after a layout
-- keyword determines the column for the layout context, then we're
-- starting another layout context at column 1! that's definitely not
-- what we want.
--
-- so a new layout context only starts if the first token is to the right
-- of the previous layout context. that is: a block only starts if it's
-- on the same line as the layout context, or indented further.
if col <= col'
then pushStartCode empty_layout then pushStartCode empty_layout
else mapUserState $ \s -> s { layoutColumns = layoutKind col:layoutColumns s } else mapUserState $ \s -> s { layoutColumns = layoutKind col:layoutColumns s }
pure (Token TokLStart line col) pure (Token TokLStart line col)
getLayout :: Alex LayoutState
getLayout = do
t <- getUserState
case layoutColumns t of
(x:_) -> pure x
_ -> error "No layout?"
popLayoutContext :: Alex ()
popLayoutContext = mapUserState $ \s -> s { layoutColumns = tail (layoutColumns s) }
openBrace :: AlexInput -> Int64 -> Alex Token openBrace :: AlexInput -> Int64 -> Alex Token
openBrace (AlexPn _ line col, _, _, _) _ = do
openBrace (Posn line col, _, _, _) _ = do
-- if we see a '{' token, we're probably in the layout state. in that
-- case, we pop it! otherwise, we just pop the state anyway: if we
-- were in <0>, then popping gets us back in <0>.
popStartCode popStartCode
mapUserState $ \s -> s { layoutColumns = Layout minBound:layoutColumns s }
pure (Token TokOBrace line col)
popLayoutContext :: Alex ()
popLayoutContext = mapUserState $ \s -> s { layoutColumns = tail (layoutColumns s) }
-- we push an ExplicitLayout state so that the offside rule stops
-- applying (logical lines are delimited by physical semicolons) and a
-- '}' can close it.
mapUserState $ \s -> s { layoutColumns = ExplicitLayout:layoutColumns s }
pure (Token TokOBrace line col)
closeBrace :: AlexInput -> Int64 -> Alex Token closeBrace :: AlexInput -> Int64 -> Alex Token
closeBrace (AlexPn _ line col, _, _, _) _ = do
~(col':_) <- layoutColumns <$> getUserState
if layoutCol col' < 0
then popLayoutContext
else pure ()
closeBrace (Posn line col, _, _, _) _ = do
-- if we're lexing a '}' token (physical) and the rightmost layout
-- context was started by a physical '{', then we can close it.
-- otherwise we do nothing and probably get a parse error!
columns <- layoutColumns <$> getUserState
case columns of
ExplicitLayout:_ -> popLayoutContext
_ -> pure ()
pure (Token TokCBrace line col) pure (Token TokCBrace line col)
variableOrKeyword :: AlexAction Token variableOrKeyword :: AlexAction Token
variableOrKeyword (AlexPn _ l c, _, s, _) size = finishVarKw l c $ T.decodeUtf8 (Lbs.toStrict (Lbs.take size s))
variableOrKeyword (Posn l c, _, s, _) size = finishVarKw l c $ T.decodeUtf8 (Lbs.toStrict (Lbs.take size s))
finishVarKw :: Int -> Int -> T.Text -> Alex Token finishVarKw :: Int -> Int -> T.Text -> Alex Token
finishVarKw l c text finishVarKw l c text
| T.null text = undefined | T.null text = undefined
| Data.Char.isUpper (T.head text), T.singleton '.' `T.isInfixOf` text = pure $ | Data.Char.isUpper (T.head text), T.singleton '.' `T.isInfixOf` text = pure $
-- if we have a token like A.B.C, we reverse it and span at the
-- first (last) dot, so that we have, e.g.:
--
-- "Aa.Bb.Cc" -> "cC.bB.aA"
-- "Cc.Bb.Aa" -> ("Cc", ".bB.aA")
--
-- what we have then is the suffix and the prefix, but they've both
-- been reversed. so we unreverse them, and also drop the first
-- (last) dot from the prefix.
--
-- if the suffix starts with an uppercase letter, it's a constructor
-- symbol (ConId).
let let
txet = T.reverse text txet = T.reverse text
(suffix', prefix') = T.span (/= '.') txet (suffix', prefix') = T.span (/= '.') txet
@ -319,6 +382,18 @@ finishVarKw l c text = do
let col = layoutCol (head (layoutColumns state)) let col = layoutCol (head (layoutColumns state))
case T.unpack text of case T.unpack text of
-- we handle the contextual 'as'/'qualified' tokens using a
-- startcode.
--
-- in the import_ state, as and qualified are keywords, unless the
-- offside rule would apply to emit a ';' or '}' token. in that
-- case, we emit a semicolon (what the offside rule would do!), and
-- set the "keyword" (now changed to an identifier) as pending, so
-- that it will be emitted by the next alexMonadScan.
"import" -> do
pushStartCode import_
pure (Token TokImport l c)
"as" "as"
| sc == import_, c > col -> pure (Token TokAs l c) | sc == import_, c > col -> pure (Token TokAs l c)
| sc == import_ -> offsideKeyword (TokUnqual VarId text) l c | sc == import_ -> offsideKeyword (TokUnqual VarId text) l c
@ -329,58 +404,79 @@ finishVarKw l c text = do
| sc == import_ -> offsideKeyword (TokUnqual VarId text) l c | sc == import_ -> offsideKeyword (TokUnqual VarId text) l c
| otherwise -> pure (Token (TokUnqual VarId text) l c) | otherwise -> pure (Token (TokUnqual VarId text) l c)
-- when starting a layout context for let expressions we make sure
-- that it is distinguishable from layout contexts started by
-- anything else, because let layout contexts can be terminated
-- ahead of time by the 'in' token. for instance in:
--
-- let x = 1 in x
--
-- there is no reason for the layout context that started after
-- 'let' to be terminated by the 'in' token, since the offside rule
-- hasn't had a chance to apply. the token stream in that case would look like
--
-- 'let' '{' x '=' 1 'in' x
--
-- which is a parse error. we do not implement the rule which says parse errors
-- terminate layout contexts, instead doing this approximation.
"let" -> laidOut' (Just LetLayout) TokLet l c "let" -> laidOut' (Just LetLayout) TokLet l c
"in" -> do "in" -> do
laidout <- getLayout
laidout <- layoutColumns <$> getUserState
case laidout of case laidout of
-- let .. in critical pair:
-- 'in' is allowed to close a layout context before the offside rule would apply.
LetLayout _ -> earlyEnd TokIn l c
LetLayout _:_ -> earlyEnd TokIn l c
_ -> pure (Token TokIn l c) _ -> pure (Token TokIn l c)
"data" -> pure (Token TokData l c) "data" -> pure (Token TokData l c)
"where" -> do
-- if this is the where in the module_header, then
-- pop the start code so that the offside rule applies again
when (sc == module_header) popStartCode
laidOut' (if sc == module_header then Just ModLayout else Nothing) TokWhere l c
"where" -> laidOut TokWhere l c
"module" -> pure (Token TokModule l c)
-- when we lex a \ token, a flag is set in the lexer state to
-- indicate that, if there is a 'case' token directly following,
-- that token is to be interpreted as part of a lambda-case
-- construct, and so must start a layout context for its branches.
"case" "case"
-- "case" is a layout token if it's immediately following a \\
| pendingLambdaCase state -> laidOut TokCase l c | pendingLambdaCase state -> laidOut TokCase l c
| otherwise -> pure (Token TokCase l c) | otherwise -> pure (Token TokCase l c)
"import" -> do
pushStartCode import_
pure (Token TokImport l c)
"of" -> laidOut TokOf l c "of" -> laidOut TokOf l c
"module" -> do
unless (haveModuleHeader state) $ do
mapUserState $ \s -> s { haveModuleHeader = True }
pushStartCode module_header
pure (Token TokModule l c)
(x:_) -> pure (Token (TokUnqual VarId text) l c)
(_:_) -> pure (Token (TokUnqual VarId text) l c)
[] -> error "empty keyword/identifier" [] -> error "empty keyword/identifier"
earlyEnd :: TokenClass -> Int -> Int -> Alex Token
earlyEnd tok l c = do earlyEnd tok l c = do
popLayoutContext popLayoutContext
delayToken (Token tok l c) delayToken (Token tok l c)
pure (Token TokLEnd l c) pure (Token TokLEnd l c)
offsideKeyword :: TokenClass -> Int -> Int -> Alex Token
offsideKeyword tok l c = do offsideKeyword tok l c = do
popLayoutContext
popStartCode
delayToken (Token tok l c) delayToken (Token tok l c)
pure (Token TokSemi l c) pure (Token TokSemi l c)
laidOut' :: Maybe (Int -> LayoutState) -> TokenClass -> Int -> Int -> Alex Token
laidOut' n x l c = do laidOut' n x l c = do
pushStartCode layout pushStartCode layout
mapUserState $ \s -> s { leastColumn = c, pendingLayoutKw = n } mapUserState $ \s -> s { leastColumn = c, pendingLayoutKw = n }
pure (Token x l c) pure (Token x l c)
laidOut = laidOut' Nothing laidOut = laidOut' Nothing
}
alexMonadScan = do
inp@(_,_,_,n) <- alexGetInput
sc <- alexGetStartCode
case alexScan inp sc of
AlexEOF -> alexEOF
AlexError error@(_,_,inp,_) ->
alexError $ "Unexpected character: " ++ show (T.head (T.decodeUtf8 (Lbs.toStrict inp)))
AlexSkip inp _len -> do
alexSetInput inp
alexMonadScan
AlexToken inp'@(_,_,_,n') _ action -> let len = n'-n in do
alexSetInput inp'
action (ignorePendingBytes inp) len
}

+ 44
- 17
src/Frontend/Autogen/Parser.y View File

@ -3,16 +3,20 @@
module Frontend.Autogen.Parser where module Frontend.Autogen.Parser where
import qualified Data.Text as T import qualified Data.Text as T
import Data.Maybe
import Frontend.Lexer.Wrapper
import Frontend.Lexer.Tokens import Frontend.Lexer.Tokens
import Frontend.Parser.Posn import Frontend.Parser.Posn
import Frontend.Syntax import Frontend.Syntax
import Frontend.Autogen.Lexer import Frontend.Autogen.Lexer
import qualified Prelude
import Prelude hiding (span) import Prelude hiding (span)
import Debug.Trace import Debug.Trace
import Control.Monad
} }
%name parseExp Exp %name parseExp Exp
@ -121,13 +125,13 @@ Apat :: { Pat }
| '(' CommaList(Pat) ')' { span $1 $3 $ makeTuplePattern $2 } | '(' CommaList(Pat) ')' { span $1 $3 $ makeTuplePattern $2 }
Decl :: { Decl } Decl :: { Decl }
: CommaList1(VAR) '::' Type { TySig (getVar `fmap` $1) $3 }
| VAR Apat List(Apat) Rhs { FunDecl (getVar $1) ($2:$3) $4 }
| Pat Rhs { PatDecl $1 $2 }
: CommaList1(VAR) '::' Type { TySig (getVar `fmap` $1) $3 (startPosn (head $1)) (endPosn $3) }
| VAR Apat List(Apat) Rhs { FunDecl (getVar $1) ($2:$3) $4 (startPosn $1) (endPosn $4) }
| Pat Rhs { PatDecl $1 $2 (startPosn $1) (endPosn $2) }
Rhs :: { Rhs } Rhs :: { Rhs }
: '=' Exp { BareRhs $2 [] }
| '=' Exp 'where' LaidOutList(Decl) { BareRhs $2 (thd $4) }
: '=' Exp { BareRhs $2 [] (startPosn $1) (endPosn $2) }
| '=' Exp 'where' LaidOutList(Decl) { BareRhs $2 (thd $4) (startPosn $1) (endPosn $4) }
LaidOutList(p) LaidOutList(p)
: START Opt(Semis) LOLContents(p, END) { (startPosn $1, lolEnd $3, lolList $3) } : START Opt(Semis) LOLContents(p, END) { (startPosn $1, lolEnd $3, lolList $3) }
@ -140,14 +144,17 @@ LOLContents(p, End)
Module :: { Module } Module :: { Module }
: 'module' CON ImportExportList 'where' LaidOutList(ModItem) : 'module' CON ImportExportList 'where' LaidOutList(ModItem)
{ Module { moduleName = toModId (getVar $2)
, moduleExports = $3
, moduleItems = thd $5 }
{% do { (imports,items) <- spanModuleItems (thd $5)
; pure $ Module { moduleName = toModId (getVar $2)
, moduleExports = fst $3
, moduleImports = imports
, moduleItems = items }
}
} }
ImportExportList :: { Maybe [NamespacedItem ParsedVar] }
: {-empty-} { Nothing }
| '(' CommaList(NSItem) ')' { Just $2 }
ImportExportList :: { (Maybe [NamespacedItem ParsedVar], Maybe Posn) }
: {-empty-} { (Nothing, Nothing) }
| '(' CommaList(NSItem) ')' { (Just $2, Just (endPosn $3)) }
NSItem :: { NamespacedItem ParsedVar } NSItem :: { NamespacedItem ParsedVar }
: VAR { IEVar (getVar $1) } : VAR { IEVar (getVar $1) }
@ -155,21 +162,21 @@ NSItem :: { NamespacedItem ParsedVar }
| 'module' CON { IEModule (getVar $2) } | 'module' CON { IEModule (getVar $2) }
ModItem :: { Item } ModItem :: { Item }
: Decl { ModDecl $1 }
| Import { ModImport $1 }
: Decl { ModDecl $1 (startPosn $1) (endPosn $1) }
| Import { ModImport $1 (startPosn $1) (endPosn $1) }
Import :: { ModuleImport ParsedVar } Import :: { ModuleImport ParsedVar }
: 'import' modid ImportExportList : 'import' modid ImportExportList
{ Import $2 $3 False Nothing }
{ Import $2 (fst $3) False Nothing (startPosn $1) (fromMaybe (endPosn $2) (snd $3)) }
| 'import' modid ImportExportList 'as' CON | 'import' modid ImportExportList 'as' CON
{ Import $2 $3 False (Just (getVar $5)) }
{ Import $2 (fst $3) False (Just (getVar $5)) (startPosn $1) (endPosn $5) }
| 'import' 'qualified' modid ImportExportList | 'import' 'qualified' modid ImportExportList
{ Import $3 $4 True Nothing }
{ Import $3 (fst $4) True Nothing (startPosn $1) (fromMaybe (endPosn $3) (snd $4)) }
| 'import' 'qualified' modid ImportExportList 'as' CON | 'import' 'qualified' modid ImportExportList 'as' CON
{ Import $3 $4 True (Just (getVar $6)) }
{ Import $3 (fst $4) True (Just (getVar $6)) (startPosn $1) (endPosn $6) }
Opt(p) Opt(p)
: { () } : { () }
@ -246,4 +253,24 @@ makeTuplePattern xs = TupPat xs
makeTuple [x] = ParenExp x makeTuple [x] = ParenExp x
makeTuple xs = Tuple xs makeTuple xs = Tuple xs
spanModuleItems xs = do
let
isImport (ModImport _ _ _) = True
isImport _ = False
(imports, items) = Prelude.span isImport xs
forM_ items $ \x -> case x of
ModImport _ start end ->
alexThrow $ \fname ->
ParseError { parseErrorMessage = "all import statements should be at the top of the file."
, parseErrorInlineDesc = Just "unexpected import statement"
, parseErrorFilename = fname
, parseErrorBegin = start
, parseErrorEnd = end
}
_ -> pure ()
pure (map itemImport imports, items)
} }

+ 181
- 0
src/Frontend/Lexer/Wrapper.hs View File

@ -0,0 +1,181 @@
module Frontend.Lexer.Wrapper where
import Control.Applicative as App (Applicative (..))
import Data.Word (Word8)
import Data.Int (Int64)
import qualified Data.Char
import qualified Data.ByteString.Lazy as ByteString
import qualified Data.ByteString.Internal as ByteString (w2c)
import Frontend.Lexer.Tokens (Token)
import qualified Data.Text as T
import Frontend.Parser.Posn
type Byte = Word8
type AlexInput = ( Posn, -- current position,
Char, -- previous char
ByteString.ByteString, -- current input string
Int64) -- bytes consumed so far
ignorePendingBytes :: AlexInput -> AlexInput
ignorePendingBytes i = i -- no pending bytes when lexing bytestrings
alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar (_,c,_,_) = c
alexGetByte :: AlexInput -> Maybe (Byte,AlexInput)
alexGetByte (p,_,cs,n) =
case ByteString.uncons cs of
Nothing -> Nothing
Just (b, cs') ->
let c = ByteString.w2c b
p' = alexMove p c
n' = n+1
in p' `seq` cs' `seq` n' `seq` Just (b, (p', c, cs',n'))
-- -----------------------------------------------------------------------------
-- Token positions
-- `Posn' records the location of a token in the input text. It has three
-- fields: the address (number of chacaters preceding the token), line number
-- and column of a token within the file. `start_pos' gives the position of the
-- start of the file and `eof_pos' a standard encoding for the end of file.
-- `move_pos' calculates the new position after traversing a given character,
-- assuming the usual eight character tab stops.
alexStartPos :: Posn
alexStartPos = Posn 1 1
alexMove :: Posn -> Char -> Posn
alexMove (Posn l c) '\t' = Posn l (c+8-((c-1) `mod` 8))
alexMove (Posn l _) '\n' = Posn (l+1) 1
alexMove (Posn l c) _ = Posn l (c+1)
data AlexState = AlexState {
alex_pos :: !Posn, -- position at current input location
alex_bpos:: !Int64, -- bytes consumed so far
alex_inp :: ByteString.ByteString, -- the current input
alex_chr :: !Char, -- the character before the input
alex_scd :: !Int -- the current startcode
, alex_ust :: AlexUserState -- AlexUserState will be defined in the user program
, alex_fname :: String
}
runAlex :: String -> ByteString.ByteString -> Alex a -> Either ParseError a
runAlex fname input__ (Alex f) =
case f initState of
Left msg -> Left msg
Right ( _, a ) -> Right a
where
initState = AlexState
{ alex_bpos = 0
, alex_pos = alexStartPos
, alex_inp = input__
, alex_chr = '\n'
, alex_ust = alexInitUserState
, alex_scd = 0
, alex_fname = fname
}
newtype Alex a = Alex { unAlex :: AlexState -> Either ParseError (AlexState, a) }
instance Functor Alex where
fmap f a = Alex $ \s -> case unAlex a s of
Left msg -> Left msg
Right (s', a') -> Right (s', f a')
instance Applicative Alex where
pure a = Alex $ \s -> Right (s, a)
fa <*> a = Alex $ \s -> case unAlex fa s of
Left msg -> Left msg
Right (s', f) -> case unAlex a s' of
Left msg -> Left msg
Right (s'', b) -> Right (s'', f b)
instance Monad Alex where
m >>= k = Alex $ \s -> case unAlex m s of
Left msg -> Left msg
Right (s',a) -> unAlex (k a) s'
return = App.pure
alexGetInput :: Alex AlexInput
alexGetInput =
Alex $ \s@AlexState{alex_pos=pos,alex_bpos=bpos,alex_chr=c,alex_inp=inp__} ->
Right (s, (pos,c,inp__,bpos))
alexSetInput :: AlexInput -> Alex ()
alexSetInput (pos,c,inp__,bpos)
= Alex $ \s -> Right ( s { alex_pos = pos
, alex_bpos = bpos
, alex_chr = c
, alex_inp = inp__
}
, ())
alexError :: String -> Alex a
alexError message = Alex $ \s -> Left (ParseError message (alex_fname s) Nothing (alex_pos s) (alex_pos s))
alexErrorPosn :: Posn -> Posn -> String -> Alex a
alexErrorPosn start end message = Alex $ \s -> Left (ParseError message (alex_fname s) Nothing start end)
alexThrow :: (String -> ParseError) -> Alex a
alexThrow err = Alex $ \s -> Left (err (alex_fname s))
alexGetStartCode :: Alex Int
alexGetStartCode = Alex $ \s@AlexState{alex_scd=sc} -> Right (s, sc)
alexSetStartCode :: Int -> Alex ()
alexSetStartCode sc = Alex $ \s -> Right (s{alex_scd=sc}, ())
-- -----------------------------------------------------------------------------
-- Useful token actions
type AlexAction result = AlexInput -> Int64 -> Alex result
-- perform an action for this token, and set the start code to a new value
andBegin :: AlexAction result -> Int -> AlexAction result
(action `andBegin` code) input__ len = do
alexSetStartCode code
action input__ len
token :: (AlexInput -> Int64 -> token) -> AlexAction token
token t input__ len = return (t input__ len)
data LayoutState
= LetLayout { layoutCol :: Int }
| Layout { layoutCol :: Int }
| ExplicitLayout
deriving (Show)
data AlexUserState =
AlexUserState { layoutColumns :: ![LayoutState]
, startCodes :: ![Int]
, leastColumn :: !Int
, pendingLayoutKw :: Maybe (Int -> LayoutState)
, pendingTokens :: ![Token]
, pendingLambdaCase :: !Bool
, stringBuffer :: !T.Text
, stringStartPosn :: Maybe Posn
}
alexInitUserState :: AlexUserState
alexInitUserState = AlexUserState [] [] 0 Nothing [] False T.empty Nothing
data ParseError
= ParseError { parseErrorMessage :: String
, parseErrorFilename :: String
, parseErrorInlineDesc :: Maybe String
, parseErrorBegin :: Posn
, parseErrorEnd :: Posn
}
deriving (Eq, Show)

+ 34
- 7
src/Frontend/Syntax.hs View File

@ -72,15 +72,27 @@ instance HasPosn (FeType var) where
span sp ep x = SPType x (startPosn sp) (endPosn ep) span sp ep x = SPType x (startPosn sp) (endPosn ep)
data FeDecl var data FeDecl var
= PatDecl (FePat var) (FeRhs var)
| FunDecl var [FePat var] (FeRhs var)
| TySig [var] (FeType var)
= PatDecl { pdPat :: FePat var, declRhs :: FeRhs var, declBegin :: Posn, declEnd :: Posn }
| FunDecl { fdVar :: var, fdArgs :: [FePat var], declRhs :: FeRhs var, declBegin :: Posn, declEnd :: Posn }
| TySig { tsVars :: [var], tsType :: FeType var, declBegin :: Posn, declEnd :: Posn }
deriving (Eq, Show, Ord) deriving (Eq, Show, Ord)
instance HasPosn (FeDecl var) where
startPosn = declBegin
endPosn = declEnd
span sp ep s = s { declBegin = startPosn sp, declEnd = endPosn ep }
data FeRhs var data FeRhs var
= BareRhs (FeExpr var) [FeDecl var]
= BareRhs { bareRhs :: FeExpr var, rhsWhere :: [FeDecl var], rhsBegin :: Posn, rhsEnd :: Posn }
deriving (Eq, Show, Ord) deriving (Eq, Show, Ord)
instance HasPosn (FeRhs var) where
startPosn = rhsBegin
endPosn = rhsEnd
span sp ep s = s { rhsBegin = startPosn sp, rhsEnd = endPosn ep }
data Literal data Literal
= LitString T.Text = LitString T.Text
| LitNumber Integer | LitNumber Integer
@ -89,6 +101,7 @@ data Literal
data FeModule var data FeModule var
= Module { moduleName :: var = Module { moduleName :: var
, moduleExports :: Maybe [NamespacedItem var] , moduleExports :: Maybe [NamespacedItem var]
, moduleImports :: [ModuleImport var]
, moduleItems :: [ModuleItem var] , moduleItems :: [ModuleItem var]
} }
deriving (Eq, Show, Ord) deriving (Eq, Show, Ord)
@ -98,9 +111,17 @@ data ModuleImport var
, importList :: Maybe [NamespacedItem var] , importList :: Maybe [NamespacedItem var]
, importQualified :: Bool , importQualified :: Bool
, importAlias :: Maybe var , importAlias :: Maybe var
, importBegin :: Posn
, importEnd :: Posn
} }
deriving (Eq, Show, Ord) deriving (Eq, Show, Ord)
instance HasPosn (ModuleImport var) where
startPosn = importBegin
endPosn = importEnd
span sp ep s = s { importBegin = startPosn sp, importEnd = endPosn ep }
data NamespacedItem var data NamespacedItem var
= IEVar var = IEVar var
| IECon var | IECon var
@ -108,8 +129,8 @@ data NamespacedItem var
deriving (Eq, Show, Ord) deriving (Eq, Show, Ord)
data ModuleItem var data ModuleItem var
= ModDecl (FeDecl var)
| ModImport (ModuleImport var)
= ModDecl { itemDecl :: FeDecl var, itemBegin :: Posn, itemEnd :: Posn }
| ModImport { itemImport :: ModuleImport var, itemBegin :: Posn, itemEnd :: Posn }
deriving (Eq, Show, Ord) deriving (Eq, Show, Ord)
data ParsedVar data ParsedVar
@ -140,4 +161,10 @@ instance HasPosn ParsedVar where
startPosn = varBegin startPosn = varBegin
endPosn = varEnd endPosn = varEnd
span sp ep s = s { varBegin = startPosn sp, varEnd = endPosn ep }
span sp ep s = s { varBegin = startPosn sp, varEnd = endPosn ep }
instance HasPosn (ModuleItem var) where
startPosn = itemBegin
endPosn = itemEnd
span sp ep s = s { itemBegin = startPosn sp, itemEnd = endPosn ep }

+ 67
- 15
src/Main.hs View File

@ -1,40 +1,91 @@
-- {-# LANGUAGE BlockArguments #-} -- {-# LANGUAGE BlockArguments #-}
module Main where module Main where
import Control.Monad ( unless )
import qualified Data.ByteString.Lazy as Lbs import qualified Data.ByteString.Lazy as Lbs
import qualified Data.Text.Encoding as T
import qualified Data.Text as T
import Data.Foldable import Data.Foldable
import Frontend.Lexer.Tokens
import Frontend.Autogen.Lexer
import Debug.Trace
import Frontend.Autogen.Parser import Frontend.Autogen.Parser
import Frontend.Autogen.Lexer
import Frontend.Lexer.Wrapper
import Frontend.Lexer.Tokens
import qualified Data.Text.Encoding as T
import qualified Data.Text as T
import Control.Monad ( unless )
import System.Environment (getArgs) import System.Environment (getArgs)
import Text.Show.Pretty (pPrint)
import Debug.Trace
import Text.Show.Pretty (pPrint)
import Frontend.Parser.Posn
import System.Posix.Internals
import GHC.IO.Handle.FD (stdout, handleToFd)
import GHC.IO.FD
import Data.Maybe (fromMaybe)
main :: IO () main :: IO ()
main = do main = do
args <- getArgs args <- getArgs
for_ args $ \str -> do for_ args $ \str -> do
Main.lex pPrint parseMod =<< Lbs.readFile str
Main.lex str pPrint parseMod =<< Lbs.readFile str
testParse :: String -> IO () testParse :: String -> IO ()
testParse s = Main.lex print parseMod (Lbs.fromStrict (T.encodeUtf8 (T.pack s)))
testParse s = Main.lex "<interactive>" print parseMod (Lbs.fromStrict (T.encodeUtf8 (T.pack s)))
testLex :: String -> IO () testLex :: String -> IO ()
testLex s = Main.lex (const (pure ())) (scan []) (Lbs.fromStrict (T.encodeUtf8 (T.pack s)))
lex :: (a -> IO ()) -> Alex a -> Lbs.ByteString -> IO ()
lex show cont arg = do
let x = runAlex arg cont
testLex s = Main.lex "<interactive>" (const (pure ())) (scan []) (Lbs.fromStrict (T.encodeUtf8 (T.pack s)))
lex :: String -> (a -> IO ()) -> Alex a -> Lbs.ByteString -> IO ()
lex fname show cont arg = do
let x = runAlex fname arg cont
case x of case x of
Left e -> print e
Left e -> showParseError e
Right x -> show x Right x -> show x
showParseError :: ParseError -> IO ()
showParseError pe = do
code <- lines <$> readFile (parseErrorFilename pe)
color <- fmap (1 ==) . c_isatty . fdFD =<< handleToFd GHC.IO.Handle.FD.stdout
let
linum = posnLine (parseErrorBegin pe)
startcol = posnColm (parseErrorBegin pe)
multiline = linum /= posnLine (parseErrorEnd pe)
width
| multiline = 0
| otherwise = max 0 (posnColm (parseErrorEnd pe) - startcol - 1)
linum' = show linum
line = [ show linum ++ sep ++ code !! (linum - 1) | linum <- [ linum .. posnLine (parseErrorEnd pe) ] ]
padding = replicate (length linum') ' ' ++ sep
padding' = replicate (length linum') ' ' ++ " "
caret = replicate (startcol - 1) ' ' ++ red ++ "^"
squiggle = replicate width '~'
(red, bold, reset, sep)
| color = ("\x1b[31m", "\x1b[1m", "\x1b[0m", "\x1b[1;34m \9474 \x1b[0m")
| otherwise = ("", "", "", "")
putStr . unlines $
[ bold
++ parseErrorFilename pe
++ ":" ++ show linum ++ ":" ++ show startcol ++ ": "
++ red ++ "parse error:" ++ reset
, padding'
, init (unlines line)
, padding' ++ caret ++ squiggle ++ reset ++ fromMaybe "" ((" " ++) <$> parseErrorInlineDesc pe)
, ""
, padding' ++ parseErrorMessage pe
]
scan :: [Token] -> Alex [Token] scan :: [Token] -> Alex [Token]
scan acc = do scan acc = do
tok <- alexMonadScan tok <- alexMonadScan
@ -45,6 +96,7 @@ scan acc = do
, "just lexed: " ++ show tok , "just lexed: " ++ show tok
, "sc: " ++ show sc , "sc: " ++ show sc
, "sc stack: " ++ show (startCodes state) , "sc stack: " ++ show (startCodes state)
, "layout stack: " ++ show (layoutColumns state)
] ]
case tokenClass tok of case tokenClass tok of
TokEof -> pure (reverse acc) TokEof -> pure (reverse acc)


Loading…
Cancel
Save