diff --git a/src/Frontend/Autogen/Lexer.x b/src/Frontend/Autogen/Lexer.x index 392b694..7f6da51 100644 --- a/src/Frontend/Autogen/Lexer.x +++ b/src/Frontend/Autogen/Lexer.x @@ -16,21 +16,30 @@ import Frontend.Parser.Posn -- %wrapper "monadUserState-bytestring" -$alpha = [a-zA-Z] +$lower = [a-z] +$upper = [A-Z] +$alpha = [ $lower $upper ] + $digit = [0-9] +$alnum = [ $alpha $digit ] + $white_nol = $white # [\n\t] +$optail = [\! \# \$ \% \& \* \+ \. \/ \< \= \> \? \@ \\ \^ \| \- \~ \:] +$ophead = $optail # \: + +@conid = $upper [$alnum \_ \']* +@namespace = (@conid \.)* + tokens :- $white_nol+ ; \t { \_ _ -> alexError "tab character in source code" } <0,import_> "--" .* \n { just $ pushStartCode newline } - -<0,import_> - $alpha [$alpha $digit \_ \' \.]* { variableOrKeyword } <0> \= { always TokEqual } +<0> \` { always TokTick } <0> \: \: { always TokDoubleColon } <0> \\ { \i l -> setPendingLC *> yield' (const TokLambda) i l } @@ -107,6 +116,21 @@ tokens :- () { emitPendingToken } +-- identifiers and keywords +<0,import_> { + $lower [$alpha $digit \_ \']* { variableOrKeyword } + $upper [$alpha $digit \_ \']* { yield (TokUnqual ConId) } + + $ophead $optail* { yield (TokUnqualOp VarId) } + : $optail* { yield (TokUnqualOp ConId) } + + @namespace $lower [$alpha $digit \_ \']* { qualifiedVariable } + @namespace $upper [$alpha $digit \_ \']* { qualifiedVariable } + + @namespace $ophead $optail* { qualifiedOperator } + @namespace : $optail* { qualifiedOperator } +} + { alexEOF :: Alex Token alexEOF = do @@ -344,34 +368,21 @@ closeBrace (Posn line col, _, _, _) _ = do pure (Token TokCBrace line col) variableOrKeyword :: AlexAction Token -variableOrKeyword (Posn l c, _, s, _) size = finishVarKw l c $ T.decodeUtf8 (Lbs.toStrict (Lbs.take size s)) +variableOrKeyword (Posn l c, _, s, _) size = + finishVarKw l c $ T.decodeUtf8 (Lbs.toStrict (Lbs.take size s)) + +qualifiedVariable :: AlexAction Token +qualifiedVariable (Posn l c, _, s, _) size = + finishVar TokUnqual TokQual l c $ T.decodeUtf8 (Lbs.toStrict (Lbs.take size s)) + +qualifiedOperator :: AlexAction Token +qualifiedOperator (Posn l c, _, s, _) size = + finishVar TokUnqualOp TokQualOp l c $ T.decodeUtf8 (Lbs.toStrict (Lbs.take size s)) finishVarKw :: Int -> Int -> T.Text -> Alex Token finishVarKw l c text | T.null text = undefined - | Data.Char.isUpper (T.head text), T.singleton '.' `T.isInfixOf` text = pure $ - -- if we have a token like A.B.C, we reverse it and span at the - -- first (last) dot, so that we have, e.g.: - -- - -- "Aa.Bb.Cc" -> "cC.bB.aA" - -- "Cc.Bb.Aa" -> ("Cc", ".bB.aA") - -- - -- what we have then is the suffix and the prefix, but they've both - -- been reversed. so we unreverse them, and also drop the first - -- (last) dot from the prefix. - -- - -- if the suffix starts with an uppercase letter, it's a constructor - -- symbol (ConId). - let - txet = T.reverse text - (suffix', prefix') = T.span (/= '.') txet - - prefix = T.reverse (T.tail prefix') - suffix = T.reverse suffix' - in if Data.Char.isUpper (T.head suffix) - then Token (TokQual ConId prefix suffix) l c - else Token (TokQual VarId prefix suffix) l c - | Data.Char.isUpper (T.head text) = pure $ Token (TokUnqual ConId text) l c + | Data.Char.isUpper (T.head text) = finishVar TokUnqual TokQual l c text finishVarKw l c text = do sc <- alexGetStartCode @@ -445,6 +456,22 @@ finishVarKw l c text = do [] -> error "empty keyword/identifier" +finishVar :: (IdClass -> T.Text -> TokenClass) -> (IdClass -> T.Text -> T.Text -> TokenClass) -> Int -> Int -> T.Text -> Alex Token +finishVar tokunqual tokqual l c text + | T.null text = undefined + | Data.Char.isUpper (T.head text), T.singleton '.' `T.isInfixOf` text = pure $ + let + txet = T.reverse text + (suffix', prefix') = T.span (/= '.') txet + + prefix = T.reverse (T.tail prefix') + suffix = T.reverse suffix' + in if Data.Char.isUpper (T.head suffix) + then Token (tokqual ConId prefix suffix) l c + else Token (tokqual VarId prefix suffix) l c + | Data.Char.isUpper (T.head text) = pure $ Token (tokunqual ConId text) l c + | otherwise = pure $ Token (tokunqual VarId text) l c + earlyEnd :: TokenClass -> Int -> Int -> Alex Token earlyEnd tok l c = do popLayoutContext diff --git a/src/Frontend/Autogen/Parser.y b/src/Frontend/Autogen/Parser.y index b398324..2d9f8d9 100644 --- a/src/Frontend/Autogen/Parser.y +++ b/src/Frontend/Autogen/Parser.y @@ -37,6 +37,13 @@ import Control.Monad QVAR { Token (TokQual VarId _ _) _ _ } QCON { Token (TokQual ConId _ _) _ _ } + + VARSYM { Token (TokUnqualOp VarId _) _ _ } + CONSYM { Token (TokUnqualOp ConId _) _ _ } + + QVARSYM { Token (TokQualOp VarId _ _) _ _ } + QCONSYM { Token (TokQualOp ConId _ _) _ _ } + STRING { Token (TokString _) _ _ } 'eof' { Token TokEof _ _ } @@ -63,11 +70,13 @@ import Control.Monad ';' { Token TokSemi _ _ } '=' { Token TokEqual _ _ } ',' { Token TokComma _ _ } + '`' { Token TokTick _ _ } 'let' { Token TokLet _ _ } 'in' { Token TokIn _ _ } 'data' { Token TokData _ _ } 'case' { Token TokCase _ _ } + 'of' { Token TokOf _ _ } 'module' { Token TokModule _ _ } 'where' { Token TokWhere _ _ } @@ -82,8 +91,8 @@ Exp :: { Exp } | InfixExp { $1 } InfixExp :: { Exp } - : LeftExp {- ... -} { $1 } --- | LeftExp qop InfixExp { Infix $1 (getVar $2) $3 } + : LeftExp {- ... -} { $1 } + | LeftExp qop InfixExp { span $1 $3 $ Infix $1 $2 $3 } LeftExp :: { Exp } : '\\' Apat List(Apat) '->' Exp { span $1 $5 (makeLams ($2:$3) $5) } @@ -95,10 +104,10 @@ FuncExp :: { Exp } | Aexp { $1 } Aexp :: { Exp } - : qvar { span $1 $1 $ Ref $1 } - | qcon { span $1 $1 $ Con $1 } - | '(' CommaList(Exp) ')' { span $1 $3 $ makeTuple $2 } - | STRING { span $1 $1 $ Literal (LitString (getString $1)) } + : qvar { span $1 $1 $ Ref $1 } + | gcon { span $1 $1 $ Con $1 } + | '(' CommaList1(Exp) ')' { span $1 $3 $ makeTuple $2 } + | STRING { span $1 $1 $ Literal (LitString (getString $1)) } Type :: { Type } : Btype '->' Type { span $1 $3 $ Tyarr $1 $3 } @@ -109,20 +118,24 @@ Btype :: { Type } | Atype { $1 } Atype :: { Type } - : qvar { span $1 $1 $ Tyvar $1 } - | qcon { span $1 $1 $ Tycon $1 } - | '(' CommaList(Type) ')' { span $1 $3 $ makeTupleType $2 } + : qvar { span $1 $1 $ Tyvar $1 } + | gtycon { span $1 $1 $ Tycon $1 } + | '[' Type ']' { span $1 $3 $ Tylist $2 } + | '(' CommaList1(Type) ')' { span $1 $3 $ makeTupleType $2 } Pat :: { Pat } : Lpat { $1 } Lpat :: { Pat } - : Apat { $1 } + : Apat { $1 } + | gcon Apat List(Apat) { span $1 (endOfListPos $2 $3) (ConPat $1 ($2 : $3)) } + | Lpat qconop Pat { span $1 $3 $ InfixPat $1 $2 $3 } Apat :: { Pat } : VAR { span $1 $1 $ Var (getVar $1) } + | gcon { span $1 $1 $ ConPat $1 [] } | '_' { span $1 $1 $ Wildcard } - | '(' CommaList(Pat) ')' { span $1 $3 $ makeTuplePattern $2 } + | '(' CommaList1(Pat) ')' { span $1 $3 $ makeTuplePattern $2 } Decl :: { Decl } : CommaList1(VAR) '::' Type { TySig (getVar `fmap` $1) $3 (startPosn (head $1)) (endPosn $3) } @@ -186,15 +199,72 @@ Semis : ';' Semis { () } | ';' { () } --- TODO: qualified names +qvarid :: { ParsedVar } +qvarid + : VAR { getVar $1 } + | QVAR { getVar $1 } + qvar :: { ParsedVar } -qvar : VAR { getVar $1 } - | QVAR { getVar $1 } + : qvarid { $1 } + | '(' qvarsym ')' { span $1 $3 $2 } + +qconid :: { ParsedVar } +qconid + : CON { getVar $1 } + | QCON { getVar $1 } + +qcon :: { ParsedVar } +qcon + : qconid { $1 } + | '(' qconsym ')' { span $1 $3 $2 } + +qvarsym :: { ParsedVar } +qvarsym : VARSYM { getVar $1 } + | QVARSYM { getVar $1 } + +qconsym :: { ParsedVar } +qconsym : CONSYM { getVar $1 } + | QCONSYM { getVar $1 } + +qvarop :: { ParsedVar } + : qvarsym { $1 } + | '`' qvar '`' { span $1 $3 $2 } + +qconop :: { ParsedVar } + : qconsym { $1 } + | '`' qcon '`' { span $1 $3 $2 } + +qop :: { ParsedVar } + : qvarop { $1 } + | qconop { $1 } + +gcon :: { ParsedVar } + : qcon { $1 } + | '(' Commas ')' { BuiltinId { varId = tupleConSymName $2 + , varBuiltin = BuiltinTuple $2 + , varBegin = startPosn $1 + , varEnd = endPosn $3 } + } + + | '[' ']' { BuiltinId { varId = T.pack "[]" + , varBuiltin = BuiltinNil + , varBegin = startPosn $1 + , varEnd = endPosn $2 } + } + +gtycon :: { ParsedVar } + : gcon { $1 } + | '(' '->' ')' { BuiltinId { varId = T.pack "[]" + , varBuiltin = BuiltinArrow + , varBegin = startPosn $1 + , varEnd = endPosn $2 } + } -qcon : CON { getVar $1 } - | QCON { getVar $1 } +Commas :: { Int } + : {- empty -} { 0 } + | ',' Commas { (let x = $2 in x `seq` 1 + x) } -modid : qcon { toModId $1 } +modid : qconid { toModId $1 } List(p) : {-empty-} { [] } @@ -225,13 +295,21 @@ type Item = ModuleItem ParsedVar lexer cont = alexMonadScan >>= cont -parseError x = alexError (show x) +parseError (token, expected) = do + (here, _, _, _) <- alexGetInput + alexThrow $ \fn -> ParseError { parseErrorMessage = "expecting one of: " ++ unwords expected + , parseErrorInlineDesc = Just ("unexpected " ++ show (tokenClass token)) + , parseErrorBegin = startPosn token + , parseErrorEnd = here + , parseErrorFilename = fn } makeLams xs b = foldr Lam b xs getVar :: Token -> ParsedVar -getVar tok@(Token (TokQual _ p s) _ _) = QualVar { varId = s, varPrefix = p, varBegin = startPosn tok, varEnd = endPosn tok } -getVar tok@(Token (TokUnqual _ s) _ _) = UnqualVar { varId = s, varBegin = startPosn tok, varEnd = endPosn tok } +getVar tok@(Token (TokQual _ p s) _ _) = QualVar { varId = s, varPrefix = p, varBegin = startPosn tok, varEnd = endPosn tok } +getVar tok@(Token (TokUnqual _ s) _ _) = UnqualVar { varId = s, varBegin = startPosn tok, varEnd = endPosn tok } +getVar tok@(Token (TokQualOp _ p s) _ _) = QualVar { varId = s, varPrefix = p, varBegin = startPosn tok, varEnd = endPosn tok } +getVar tok@(Token (TokUnqualOp _ s) _ _) = UnqualVar { varId = s, varBegin = startPosn tok, varEnd = endPosn tok } getVar _ = error "getVar non-var" getString (Token (TokString s) _ _) = s @@ -245,13 +323,22 @@ emptyLol t = LOL (endPosn t) [] lolCons :: a -> LOL a -> LOL a lolCons x (LOL p xs) = LOL p (x:xs) -makeTupleType [x] = ParenType x +makeTupleType [x] = + case x of + SPType ParenType{} _ _ -> x + _ -> ParenType x makeTupleType xs = Tytup xs -makeTuplePattern [x] = ParenPat x +makeTuplePattern [x] = + case x of + SPPat ParenPat{} _ _ -> x + _ -> ParenPat x makeTuplePattern xs = TupPat xs -makeTuple [x] = ParenExp x +makeTuple [x] = + case x of + SPExpr ParenExp{} _ _ -> x + _ -> ParenExp x makeTuple xs = Tuple xs spanModuleItems xs = do @@ -273,4 +360,12 @@ spanModuleItems xs = do _ -> pure () pure (map itemImport imports, items) + +tupleConSymName :: Int -> T.Text +tupleConSymName n = T.singleton '(' <> T.replicate n (T.singleton ',') <> T.singleton ')' + +endOfListPos :: HasPosn x => x -> [x] -> Posn +endOfListPos x [] = endPosn x +endOfListPos _ xs = endPosn (last xs) + } diff --git a/src/Frontend/Lexer/Tokens.hs b/src/Frontend/Lexer/Tokens.hs index a554a9c..6d9ba2d 100644 --- a/src/Frontend/Lexer/Tokens.hs +++ b/src/Frontend/Lexer/Tokens.hs @@ -9,6 +9,8 @@ data IdClass = ConId | VarId data TokenClass = TokUnqual IdClass Text | TokQual IdClass Text Text + | TokUnqualOp IdClass Text + | TokQualOp IdClass Text Text | TokString Text | TokEof @@ -27,6 +29,7 @@ data TokenClass | TokCPragma | TokDoubleColon + | TokTick | TokEqual | TokComma | TokPipe @@ -41,49 +44,60 @@ data TokenClass | TokQualified | TokAs | TokWhere - | TokLambdaCase | TokCase | TokOf | TokData | TokSemi - deriving (Eq, Show, Ord) + deriving (Eq, Ord) + +instance Show TokenClass where + show (TokUnqual _ id) = T.unpack id + show (TokQual _ ns id) = T.unpack ns ++ '.':T.unpack id + show (TokUnqualOp _ id) = T.unpack id + show (TokQualOp _ ns id) = T.unpack ns ++ '.':T.unpack id + show (TokString text) = show text + show TokEof = "" + + show TokLambda = "\\" + show TokArrow = "->" + show TokUnder = "_" + + show TokOParen = "(" + show TokOBrace = "{" + show TokOSquare = "[" + show TokOPragma = "{-" + + show TokCParen = ")" + show TokCBrace = "}" + show TokCSquare = "]" + show TokCPragma = "-}" + + show TokDoubleColon = "::" + show TokTick = "`" + show TokEqual = "=" + show TokComma = "," + show TokPipe = "|" + show TokSemi = ";" + + show TokLet = "let" + show TokIn = "in" + show TokLStart = "" + show TokLEnd = "" + + show TokModule = "module" + show TokImport = "import" + show TokQualified = "qualified" + show TokAs = "as" + show TokWhere = "where" + show TokCase = "case" + show TokOf = "of" + show TokData = "data" + tokSize :: TokenClass -> Int -tokSize (TokUnqual _ x) = T.length x -tokSize (TokQual _ x y) = T.length x + T.length y + 1 -tokSize TokEof = 0 -tokSize TokLambda = 1 -tokSize TokOParen = 1 -tokSize TokOBrace = 1 -tokSize TokOSquare = 1 -tokSize TokOPragma = 3 -tokSize TokCBrace = 1 -tokSize TokCParen = 1 -tokSize TokCSquare = 1 -tokSize TokCPragma = 3 -tokSize TokDoubleColon = 2 -tokSize TokEqual = 1 -tokSize TokComma = 1 -tokSize TokSemi = 1 -tokSize TokUnder = 1 -tokSize TokArrow = 2 -tokSize TokIn = 2 -tokSize TokLStart = 0 -tokSize TokLEnd = 0 -tokSize TokPipe = 1 -tokSize (TokString t) = 2 + T.length t -tokSize TokLambdaCase = length "\\case" -tokSize TokWhere = length "where" -tokSize TokData = length "data" -tokSize TokOf = length "of" -tokSize TokCase = length "case" -tokSize TokModule = length "module" -tokSize TokQualified = length "qualified" -tokSize TokImport = length "import" -tokSize TokLet = 3 -tokSize TokAs = 2 +tokSize = length . show data Token = Token { tokenClass :: TokenClass diff --git a/src/Frontend/Parser/Posn.hs b/src/Frontend/Parser/Posn.hs index 19bcedc..3e96f50 100644 --- a/src/Frontend/Parser/Posn.hs +++ b/src/Frontend/Parser/Posn.hs @@ -29,5 +29,10 @@ instance HasPosn (Posn, Posn, a) where span start end (_, _, x) = (startPosn start, endPosn end, x) +instance HasPosn Posn where + startPosn = id + endPosn = id + span _ y _ = endPosn y + thd :: (a, b, c) -> c thd (_, _, z) = z \ No newline at end of file diff --git a/src/Frontend/Syntax.hs b/src/Frontend/Syntax.hs index 8cfb38a..1ba6164 100644 --- a/src/Frontend/Syntax.hs +++ b/src/Frontend/Syntax.hs @@ -12,6 +12,7 @@ data FeExpr var | Let [FeDecl var] (FeExpr var) | Tuple [FeExpr var] | Annot (FeExpr var) (FeType var) + | Infix (FeExpr var) var (FeExpr var) | Literal Literal @@ -35,6 +36,8 @@ data FePat var | TupPat [FePat var] | LitPat Literal + | ConPat var [FePat var] + | InfixPat (FePat var) var (FePat var) | ParenPat (FePat var) -- parsed parentheses | SPPat (FePat var) Posn Posn @@ -56,6 +59,7 @@ data FeType var | Tyapp (FeType var) (FeType var) | Tyarr (FeType var) (FeType var) | Tytup [FeType var] + | Tylist (FeType var) | ParenType (FeType var) -- parsed parentheses | SPType (FeType var) Posn Posn @@ -150,12 +154,26 @@ data ParsedVar , varBegin :: Posn , varEnd :: Posn } + | BuiltinId + { varId :: Text + , varBuiltin :: BuiltinIdClass + , varBegin :: Posn + , varEnd :: Posn + } + deriving (Eq, Show, Ord) + +data BuiltinIdClass + = BuiltinTuple !Int + | BuiltinNil + | BuiltinArrow deriving (Eq, Show, Ord) toModId :: ParsedVar -> ParsedVar -toModId x@ModId{} = x -toModId (UnqualVar x y z) = ModId x y z +toModId x@ModId{} = x +toModId (UnqualVar x y z) = ModId x y z toModId (QualVar id pref b e) = ModId (pref <> T.singleton '.' <> id) b e +toModId BuiltinId{} = + error "Built-in variable can not be a module identifier!" instance HasPosn ParsedVar where startPosn = varBegin diff --git a/src/Main.hs b/src/Main.hs index ebc9214..fe0882c 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -31,23 +31,31 @@ main = do Main.lex str pPrint parseMod =<< Lbs.readFile str testParse :: String -> IO () -testParse s = Main.lex "" print parseMod (Lbs.fromStrict (T.encodeUtf8 (T.pack s))) +testParse s = Main.lex "" pPrint parseMod (Lbs.fromStrict (T.encodeUtf8 (T.pack s))) testLex :: String -> IO () testLex s = Main.lex "" (const (pure ())) (scan []) (Lbs.fromStrict (T.encodeUtf8 (T.pack s))) lex :: String -> (a -> IO ()) -> Alex a -> Lbs.ByteString -> IO () lex fname show cont arg = do - let x = runAlex fname arg cont + let + x = runAlex fname arg cont + color <- fmap (1 ==) . c_isatty . fdFD =<< handleToFd GHC.IO.Handle.FD.stdout + code <- if fname == "" + then pure . lines $ T.unpack (T.decodeUtf8 (Lbs.toStrict arg)) + else lines <$> readFile fname case x of - Left e -> showParseError e + Left e -> putStr $ showParseError color code e Right x -> show x -showParseError :: ParseError -> IO () -showParseError pe = do +printParseError :: ParseError -> IO () +printParseError pe = do code <- lines <$> readFile (parseErrorFilename pe) color <- fmap (1 ==) . c_isatty . fdFD =<< handleToFd GHC.IO.Handle.FD.stdout + putStr $ showParseError color code pe +showParseError :: Bool -> [String] -> ParseError -> String +showParseError color code pe = do let linum = posnLine (parseErrorBegin pe) startcol = posnColm (parseErrorBegin pe) @@ -72,7 +80,7 @@ showParseError pe = do | color = ("\x1b[31m", "\x1b[1m", "\x1b[0m", "\x1b[1;34m \9474 \x1b[0m") | otherwise = ("", "", "", "") - putStr . unlines $ + unlines $ [ bold ++ parseErrorFilename pe ++ ":" ++ show linum ++ ":" ++ show startcol ++ ": "