Browse Source

Implement infix operators

and prettify parse error reporting :)
master
Amélia Liao 2 years ago
parent
commit
2bd96f2137
6 changed files with 261 additions and 94 deletions
  1. +55
    -28
      src/Frontend/Autogen/Lexer.x
  2. +118
    -23
      src/Frontend/Autogen/Parser.y
  3. +49
    -35
      src/Frontend/Lexer/Tokens.hs
  4. +5
    -0
      src/Frontend/Parser/Posn.hs
  5. +20
    -2
      src/Frontend/Syntax.hs
  6. +14
    -6
      src/Main.hs

+ 55
- 28
src/Frontend/Autogen/Lexer.x View File

@ -16,21 +16,30 @@ import Frontend.Parser.Posn
-- %wrapper "monadUserState-bytestring"
$alpha = [a-zA-Z]
$lower = [a-z]
$upper = [A-Z]
$alpha = [ $lower $upper ]
$digit = [0-9]
$alnum = [ $alpha $digit ]
$white_nol = $white # [\n\t]
$optail = [\! \# \$ \% \& \* \+ \. \/ \< \= \> \? \@ \\ \^ \| \- \~ \:]
$ophead = $optail # \:
@conid = $upper [$alnum \_ \']*
@namespace = (@conid \.)*
tokens :-
$white_nol+ ;
\t { \_ _ -> alexError "tab character in source code" }
<0,import_> "--" .* \n
{ just $ pushStartCode newline }
<0,import_>
$alpha [$alpha $digit \_ \' \.]* { variableOrKeyword }
<0> \= { always TokEqual }
<0> \` { always TokTick }
<0> \: \: { always TokDoubleColon }
<0> \\ { \i l -> setPendingLC *> yield' (const TokLambda) i l }
@ -107,6 +116,21 @@ tokens :-
<pending> () { emitPendingToken }
-- identifiers and keywords
<0,import_> {
$lower [$alpha $digit \_ \']* { variableOrKeyword }
$upper [$alpha $digit \_ \']* { yield (TokUnqual ConId) }
$ophead $optail* { yield (TokUnqualOp VarId) }
: $optail* { yield (TokUnqualOp ConId) }
@namespace $lower [$alpha $digit \_ \']* { qualifiedVariable }
@namespace $upper [$alpha $digit \_ \']* { qualifiedVariable }
@namespace $ophead $optail* { qualifiedOperator }
@namespace : $optail* { qualifiedOperator }
}
{
alexEOF :: Alex Token
alexEOF = do
@ -344,34 +368,21 @@ closeBrace (Posn line col, _, _, _) _ = do
pure (Token TokCBrace line col)
variableOrKeyword :: AlexAction Token
variableOrKeyword (Posn l c, _, s, _) size = finishVarKw l c $ T.decodeUtf8 (Lbs.toStrict (Lbs.take size s))
variableOrKeyword (Posn l c, _, s, _) size =
finishVarKw l c $ T.decodeUtf8 (Lbs.toStrict (Lbs.take size s))
qualifiedVariable :: AlexAction Token
qualifiedVariable (Posn l c, _, s, _) size =
finishVar TokUnqual TokQual l c $ T.decodeUtf8 (Lbs.toStrict (Lbs.take size s))
qualifiedOperator :: AlexAction Token
qualifiedOperator (Posn l c, _, s, _) size =
finishVar TokUnqualOp TokQualOp l c $ T.decodeUtf8 (Lbs.toStrict (Lbs.take size s))
finishVarKw :: Int -> Int -> T.Text -> Alex Token
finishVarKw l c text
| T.null text = undefined
| Data.Char.isUpper (T.head text), T.singleton '.' `T.isInfixOf` text = pure $
-- if we have a token like A.B.C, we reverse it and span at the
-- first (last) dot, so that we have, e.g.:
--
-- "Aa.Bb.Cc" -> "cC.bB.aA"
-- "Cc.Bb.Aa" -> ("Cc", ".bB.aA")
--
-- what we have then is the suffix and the prefix, but they've both
-- been reversed. so we unreverse them, and also drop the first
-- (last) dot from the prefix.
--
-- if the suffix starts with an uppercase letter, it's a constructor
-- symbol (ConId).
let
txet = T.reverse text
(suffix', prefix') = T.span (/= '.') txet
prefix = T.reverse (T.tail prefix')
suffix = T.reverse suffix'
in if Data.Char.isUpper (T.head suffix)
then Token (TokQual ConId prefix suffix) l c
else Token (TokQual VarId prefix suffix) l c
| Data.Char.isUpper (T.head text) = pure $ Token (TokUnqual ConId text) l c
| Data.Char.isUpper (T.head text) = finishVar TokUnqual TokQual l c text
finishVarKw l c text = do
sc <- alexGetStartCode
@ -445,6 +456,22 @@ finishVarKw l c text = do
[] -> error "empty keyword/identifier"
finishVar :: (IdClass -> T.Text -> TokenClass) -> (IdClass -> T.Text -> T.Text -> TokenClass) -> Int -> Int -> T.Text -> Alex Token
finishVar tokunqual tokqual l c text
| T.null text = undefined
| Data.Char.isUpper (T.head text), T.singleton '.' `T.isInfixOf` text = pure $
let
txet = T.reverse text
(suffix', prefix') = T.span (/= '.') txet
prefix = T.reverse (T.tail prefix')
suffix = T.reverse suffix'
in if Data.Char.isUpper (T.head suffix)
then Token (tokqual ConId prefix suffix) l c
else Token (tokqual VarId prefix suffix) l c
| Data.Char.isUpper (T.head text) = pure $ Token (tokunqual ConId text) l c
| otherwise = pure $ Token (tokunqual VarId text) l c
earlyEnd :: TokenClass -> Int -> Int -> Alex Token
earlyEnd tok l c = do
popLayoutContext


+ 118
- 23
src/Frontend/Autogen/Parser.y View File

@ -37,6 +37,13 @@ import Control.Monad
QVAR { Token (TokQual VarId _ _) _ _ }
QCON { Token (TokQual ConId _ _) _ _ }
VARSYM { Token (TokUnqualOp VarId _) _ _ }
CONSYM { Token (TokUnqualOp ConId _) _ _ }
QVARSYM { Token (TokQualOp VarId _ _) _ _ }
QCONSYM { Token (TokQualOp ConId _ _) _ _ }
STRING { Token (TokString _) _ _ }
'eof' { Token TokEof _ _ }
@ -63,11 +70,13 @@ import Control.Monad
';' { Token TokSemi _ _ }
'=' { Token TokEqual _ _ }
',' { Token TokComma _ _ }
'`' { Token TokTick _ _ }
'let' { Token TokLet _ _ }
'in' { Token TokIn _ _ }
'data' { Token TokData _ _ }
'case' { Token TokCase _ _ }
'of' { Token TokOf _ _ }
'module' { Token TokModule _ _ }
'where' { Token TokWhere _ _ }
@ -82,8 +91,8 @@ Exp :: { Exp }
| InfixExp { $1 }
InfixExp :: { Exp }
: LeftExp {- ... -} { $1 }
-- | LeftExp qop InfixExp { Infix $1 (getVar $2) $3 }
: LeftExp {- ... -} { $1 }
| LeftExp qop InfixExp { span $1 $3 $ Infix $1 $2 $3 }
LeftExp :: { Exp }
: '\\' Apat List(Apat) '->' Exp { span $1 $5 (makeLams ($2:$3) $5) }
@ -95,10 +104,10 @@ FuncExp :: { Exp }
| Aexp { $1 }
Aexp :: { Exp }
: qvar { span $1 $1 $ Ref $1 }
| qcon { span $1 $1 $ Con $1 }
| '(' CommaList(Exp) ')' { span $1 $3 $ makeTuple $2 }
| STRING { span $1 $1 $ Literal (LitString (getString $1)) }
: qvar { span $1 $1 $ Ref $1 }
| gcon { span $1 $1 $ Con $1 }
| '(' CommaList1(Exp) ')' { span $1 $3 $ makeTuple $2 }
| STRING { span $1 $1 $ Literal (LitString (getString $1)) }
Type :: { Type }
: Btype '->' Type { span $1 $3 $ Tyarr $1 $3 }
@ -109,20 +118,24 @@ Btype :: { Type }
| Atype { $1 }
Atype :: { Type }
: qvar { span $1 $1 $ Tyvar $1 }
| qcon { span $1 $1 $ Tycon $1 }
| '(' CommaList(Type) ')' { span $1 $3 $ makeTupleType $2 }
: qvar { span $1 $1 $ Tyvar $1 }
| gtycon { span $1 $1 $ Tycon $1 }
| '[' Type ']' { span $1 $3 $ Tylist $2 }
| '(' CommaList1(Type) ')' { span $1 $3 $ makeTupleType $2 }
Pat :: { Pat }
: Lpat { $1 }
Lpat :: { Pat }
: Apat { $1 }
: Apat { $1 }
| gcon Apat List(Apat) { span $1 (endOfListPos $2 $3) (ConPat $1 ($2 : $3)) }
| Lpat qconop Pat { span $1 $3 $ InfixPat $1 $2 $3 }
Apat :: { Pat }
: VAR { span $1 $1 $ Var (getVar $1) }
| gcon { span $1 $1 $ ConPat $1 [] }
| '_' { span $1 $1 $ Wildcard }
| '(' CommaList(Pat) ')' { span $1 $3 $ makeTuplePattern $2 }
| '(' CommaList1(Pat) ')' { span $1 $3 $ makeTuplePattern $2 }
Decl :: { Decl }
: CommaList1(VAR) '::' Type { TySig (getVar `fmap` $1) $3 (startPosn (head $1)) (endPosn $3) }
@ -186,15 +199,72 @@ Semis
: ';' Semis { () }
| ';' { () }
-- TODO: qualified names
qvarid :: { ParsedVar }
qvarid
: VAR { getVar $1 }
| QVAR { getVar $1 }
qvar :: { ParsedVar }
qvar : VAR { getVar $1 }
| QVAR { getVar $1 }
: qvarid { $1 }
| '(' qvarsym ')' { span $1 $3 $2 }
qconid :: { ParsedVar }
qconid
: CON { getVar $1 }
| QCON { getVar $1 }
qcon :: { ParsedVar }
qcon
: qconid { $1 }
| '(' qconsym ')' { span $1 $3 $2 }
qvarsym :: { ParsedVar }
qvarsym : VARSYM { getVar $1 }
| QVARSYM { getVar $1 }
qconsym :: { ParsedVar }
qconsym : CONSYM { getVar $1 }
| QCONSYM { getVar $1 }
qvarop :: { ParsedVar }
: qvarsym { $1 }
| '`' qvar '`' { span $1 $3 $2 }
qconop :: { ParsedVar }
: qconsym { $1 }
| '`' qcon '`' { span $1 $3 $2 }
qop :: { ParsedVar }
: qvarop { $1 }
| qconop { $1 }
gcon :: { ParsedVar }
: qcon { $1 }
| '(' Commas ')' { BuiltinId { varId = tupleConSymName $2
, varBuiltin = BuiltinTuple $2
, varBegin = startPosn $1
, varEnd = endPosn $3 }
}
| '[' ']' { BuiltinId { varId = T.pack "[]"
, varBuiltin = BuiltinNil
, varBegin = startPosn $1
, varEnd = endPosn $2 }
}
gtycon :: { ParsedVar }
: gcon { $1 }
| '(' '->' ')' { BuiltinId { varId = T.pack "[]"
, varBuiltin = BuiltinArrow
, varBegin = startPosn $1
, varEnd = endPosn $2 }
}
qcon : CON { getVar $1 }
| QCON { getVar $1 }
Commas :: { Int }
: {- empty -} { 0 }
| ',' Commas { (let x = $2 in x `seq` 1 + x) }
modid : qcon { toModId $1 }
modid : qconid { toModId $1 }
List(p)
: {-empty-} { [] }
@ -225,13 +295,21 @@ type Item = ModuleItem ParsedVar
lexer cont = alexMonadScan >>= cont
parseError x = alexError (show x)
parseError (token, expected) = do
(here, _, _, _) <- alexGetInput
alexThrow $ \fn -> ParseError { parseErrorMessage = "expecting one of: " ++ unwords expected
, parseErrorInlineDesc = Just ("unexpected " ++ show (tokenClass token))
, parseErrorBegin = startPosn token
, parseErrorEnd = here
, parseErrorFilename = fn }
makeLams xs b = foldr Lam b xs
getVar :: Token -> ParsedVar
getVar tok@(Token (TokQual _ p s) _ _) = QualVar { varId = s, varPrefix = p, varBegin = startPosn tok, varEnd = endPosn tok }
getVar tok@(Token (TokUnqual _ s) _ _) = UnqualVar { varId = s, varBegin = startPosn tok, varEnd = endPosn tok }
getVar tok@(Token (TokQual _ p s) _ _) = QualVar { varId = s, varPrefix = p, varBegin = startPosn tok, varEnd = endPosn tok }
getVar tok@(Token (TokUnqual _ s) _ _) = UnqualVar { varId = s, varBegin = startPosn tok, varEnd = endPosn tok }
getVar tok@(Token (TokQualOp _ p s) _ _) = QualVar { varId = s, varPrefix = p, varBegin = startPosn tok, varEnd = endPosn tok }
getVar tok@(Token (TokUnqualOp _ s) _ _) = UnqualVar { varId = s, varBegin = startPosn tok, varEnd = endPosn tok }
getVar _ = error "getVar non-var"
getString (Token (TokString s) _ _) = s
@ -245,13 +323,22 @@ emptyLol t = LOL (endPosn t) []
lolCons :: a -> LOL a -> LOL a
lolCons x (LOL p xs) = LOL p (x:xs)
makeTupleType [x] = ParenType x
makeTupleType [x] =
case x of
SPType ParenType{} _ _ -> x
_ -> ParenType x
makeTupleType xs = Tytup xs
makeTuplePattern [x] = ParenPat x
makeTuplePattern [x] =
case x of
SPPat ParenPat{} _ _ -> x
_ -> ParenPat x
makeTuplePattern xs = TupPat xs
makeTuple [x] = ParenExp x
makeTuple [x] =
case x of
SPExpr ParenExp{} _ _ -> x
_ -> ParenExp x
makeTuple xs = Tuple xs
spanModuleItems xs = do
@ -273,4 +360,12 @@ spanModuleItems xs = do
_ -> pure ()
pure (map itemImport imports, items)
tupleConSymName :: Int -> T.Text
tupleConSymName n = T.singleton '(' <> T.replicate n (T.singleton ',') <> T.singleton ')'
endOfListPos :: HasPosn x => x -> [x] -> Posn
endOfListPos x [] = endPosn x
endOfListPos _ xs = endPosn (last xs)
}

+ 49
- 35
src/Frontend/Lexer/Tokens.hs View File

@ -9,6 +9,8 @@ data IdClass = ConId | VarId
data TokenClass
= TokUnqual IdClass Text
| TokQual IdClass Text Text
| TokUnqualOp IdClass Text
| TokQualOp IdClass Text Text
| TokString Text
| TokEof
@ -27,6 +29,7 @@ data TokenClass
| TokCPragma
| TokDoubleColon
| TokTick
| TokEqual
| TokComma
| TokPipe
@ -41,49 +44,60 @@ data TokenClass
| TokQualified
| TokAs
| TokWhere
| TokLambdaCase
| TokCase
| TokOf
| TokData
| TokSemi
deriving (Eq, Show, Ord)
deriving (Eq, Ord)
instance Show TokenClass where
show (TokUnqual _ id) = T.unpack id
show (TokQual _ ns id) = T.unpack ns ++ '.':T.unpack id
show (TokUnqualOp _ id) = T.unpack id
show (TokQualOp _ ns id) = T.unpack ns ++ '.':T.unpack id
show (TokString text) = show text
show TokEof = "<EOF>"
show TokLambda = "\\"
show TokArrow = "->"
show TokUnder = "_"
show TokOParen = "("
show TokOBrace = "{"
show TokOSquare = "["
show TokOPragma = "{-"
show TokCParen = ")"
show TokCBrace = "}"
show TokCSquare = "]"
show TokCPragma = "-}"
show TokDoubleColon = "::"
show TokTick = "`"
show TokEqual = "="
show TokComma = ","
show TokPipe = "|"
show TokSemi = ";"
show TokLet = "let"
show TokIn = "in"
show TokLStart = ""
show TokLEnd = ""
show TokModule = "module"
show TokImport = "import"
show TokQualified = "qualified"
show TokAs = "as"
show TokWhere = "where"
show TokCase = "case"
show TokOf = "of"
show TokData = "data"
tokSize :: TokenClass -> Int
tokSize (TokUnqual _ x) = T.length x
tokSize (TokQual _ x y) = T.length x + T.length y + 1
tokSize TokEof = 0
tokSize TokLambda = 1
tokSize TokOParen = 1
tokSize TokOBrace = 1
tokSize TokOSquare = 1
tokSize TokOPragma = 3
tokSize TokCBrace = 1
tokSize TokCParen = 1
tokSize TokCSquare = 1
tokSize TokCPragma = 3
tokSize TokDoubleColon = 2
tokSize TokEqual = 1
tokSize TokComma = 1
tokSize TokSemi = 1
tokSize TokUnder = 1
tokSize TokArrow = 2
tokSize TokIn = 2
tokSize TokLStart = 0
tokSize TokLEnd = 0
tokSize TokPipe = 1
tokSize (TokString t) = 2 + T.length t
tokSize TokLambdaCase = length "\\case"
tokSize TokWhere = length "where"
tokSize TokData = length "data"
tokSize TokOf = length "of"
tokSize TokCase = length "case"
tokSize TokModule = length "module"
tokSize TokQualified = length "qualified"
tokSize TokImport = length "import"
tokSize TokLet = 3
tokSize TokAs = 2
tokSize = length . show
data Token
= Token { tokenClass :: TokenClass


+ 5
- 0
src/Frontend/Parser/Posn.hs View File

@ -29,5 +29,10 @@ instance HasPosn (Posn, Posn, a) where
span start end (_, _, x) = (startPosn start, endPosn end, x)
instance HasPosn Posn where
startPosn = id
endPosn = id
span _ y _ = endPosn y
thd :: (a, b, c) -> c
thd (_, _, z) = z

+ 20
- 2
src/Frontend/Syntax.hs View File

@ -12,6 +12,7 @@ data FeExpr var
| Let [FeDecl var] (FeExpr var)
| Tuple [FeExpr var]
| Annot (FeExpr var) (FeType var)
| Infix (FeExpr var) var (FeExpr var)
| Literal Literal
@ -35,6 +36,8 @@ data FePat var
| TupPat [FePat var]
| LitPat Literal
| ConPat var [FePat var]
| InfixPat (FePat var) var (FePat var)
| ParenPat (FePat var) -- parsed parentheses
| SPPat (FePat var) Posn Posn
@ -56,6 +59,7 @@ data FeType var
| Tyapp (FeType var) (FeType var)
| Tyarr (FeType var) (FeType var)
| Tytup [FeType var]
| Tylist (FeType var)
| ParenType (FeType var) -- parsed parentheses
| SPType (FeType var) Posn Posn
@ -150,12 +154,26 @@ data ParsedVar
, varBegin :: Posn
, varEnd :: Posn
}
| BuiltinId
{ varId :: Text
, varBuiltin :: BuiltinIdClass
, varBegin :: Posn
, varEnd :: Posn
}
deriving (Eq, Show, Ord)
data BuiltinIdClass
= BuiltinTuple !Int
| BuiltinNil
| BuiltinArrow
deriving (Eq, Show, Ord)
toModId :: ParsedVar -> ParsedVar
toModId x@ModId{} = x
toModId (UnqualVar x y z) = ModId x y z
toModId x@ModId{} = x
toModId (UnqualVar x y z) = ModId x y z
toModId (QualVar id pref b e) = ModId (pref <> T.singleton '.' <> id) b e
toModId BuiltinId{} =
error "Built-in variable can not be a module identifier!"
instance HasPosn ParsedVar where
startPosn = varBegin


+ 14
- 6
src/Main.hs View File

@ -31,23 +31,31 @@ main = do
Main.lex str pPrint parseMod =<< Lbs.readFile str
testParse :: String -> IO ()
testParse s = Main.lex "<interactive>" print parseMod (Lbs.fromStrict (T.encodeUtf8 (T.pack s)))
testParse s = Main.lex "<interactive>" pPrint parseMod (Lbs.fromStrict (T.encodeUtf8 (T.pack s)))
testLex :: String -> IO ()
testLex s = Main.lex "<interactive>" (const (pure ())) (scan []) (Lbs.fromStrict (T.encodeUtf8 (T.pack s)))
lex :: String -> (a -> IO ()) -> Alex a -> Lbs.ByteString -> IO ()
lex fname show cont arg = do
let x = runAlex fname arg cont
let
x = runAlex fname arg cont
color <- fmap (1 ==) . c_isatty . fdFD =<< handleToFd GHC.IO.Handle.FD.stdout
code <- if fname == "<interactive>"
then pure . lines $ T.unpack (T.decodeUtf8 (Lbs.toStrict arg))
else lines <$> readFile fname
case x of
Left e -> showParseError e
Left e -> putStr $ showParseError color code e
Right x -> show x
showParseError :: ParseError -> IO ()
showParseError pe = do
printParseError :: ParseError -> IO ()
printParseError pe = do
code <- lines <$> readFile (parseErrorFilename pe)
color <- fmap (1 ==) . c_isatty . fdFD =<< handleToFd GHC.IO.Handle.FD.stdout
putStr $ showParseError color code pe
showParseError :: Bool -> [String] -> ParseError -> String
showParseError color code pe = do
let
linum = posnLine (parseErrorBegin pe)
startcol = posnColm (parseErrorBegin pe)
@ -72,7 +80,7 @@ showParseError pe = do
| color = ("\x1b[31m", "\x1b[1m", "\x1b[0m", "\x1b[1;34m \9474 \x1b[0m")
| otherwise = ("", "", "", "")
putStr . unlines $
unlines $
[ bold
++ parseErrorFilename pe
++ ":" ++ show linum ++ ":" ++ show startcol ++ ": "


Loading…
Cancel
Save