commit eaeeaec5be5a5eb054dfd4fa6b894e56a506e59e Author: Abigail Magalhães Date: Sat Jul 31 20:03:21 2021 -0300 Initial commit w/ initial fragments of parser and lexer diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1ba0c46 --- /dev/null +++ b/.gitignore @@ -0,0 +1,14 @@ +.stack-work + +# tools +/repl.sh +/.ghci + +# grammar reference +/Haskell Grammar.txt + +# test files +/*.hs + +# except for this one +!/Setup.hs \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..ca46626 --- /dev/null +++ b/LICENSE @@ -0,0 +1,30 @@ +Copyright Abigail Magalhães (c) 2021 + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of Abigail Magalhães nor the names of other + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..755c852 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +# ahc diff --git a/Setup.hs b/Setup.hs new file mode 100644 index 0000000..9a994af --- /dev/null +++ b/Setup.hs @@ -0,0 +1,2 @@ +import Distribution.Simple +main = defaultMain diff --git a/ahc.cabal b/ahc.cabal new file mode 100644 index 0000000..67f901b --- /dev/null +++ b/ahc.cabal @@ -0,0 +1,35 @@ +name: ahc +version: 0.1.0.0 +homepage: https://github.com/plt-hokusai/ahc#readme +license: BSD3 +license-file: LICENSE +author: Abigail Magalhães +maintainer: me@abby.how +copyright: 2021 Abigail Magalhães +category: Web +build-type: Simple +cabal-version: >=2.0 +extra-source-files: README.md + +executable ahc + hs-source-dirs: src + main-is: Main.hs + default-language: Haskell2010 + build-depends: base ^>= 4.14 + , mtl ^>= 2.2 + , syb ^>= 0.7 + , text ^>= 1.2 + , array ^>= 0.5 + , containers ^>= 0.6 + , bytestring ^>= 0.10 + , pretty-show ^>= 1.10 + + other-modules: + Frontend.Autogen.Lexer, + Frontend.Autogen.Parser, + Frontend.Lexer.Tokens, + Frontend.Parser.Posn, + Frontend.Syntax + + build-tool-depends: alex:alex >= 3.2.4 && < 4.0 + , happy:happy >= 1.19.12 && < 2.0 diff --git a/hie.yaml b/hie.yaml new file mode 100644 index 0000000..1961153 --- /dev/null +++ b/hie.yaml @@ -0,0 +1,10 @@ +cradle: + multi: + - path: "./src/Frontend/Autogen" + config: + cradle: + none: + - path: "./" + config: + cradle: + stack: \ No newline at end of file diff --git a/src/Frontend/Autogen/Lexer.x b/src/Frontend/Autogen/Lexer.x new file mode 100644 index 0000000..2f47527 --- /dev/null +++ b/src/Frontend/Autogen/Lexer.x @@ -0,0 +1,373 @@ +{ +module Frontend.Autogen.Lexer where + +import qualified Data.ByteString.Lazy as Lbs +import qualified Data.Text.Encoding as T +import qualified Data.Text as T + +import Frontend.Lexer.Tokens + +import Control.Monad + +import Debug.Trace +} + +%wrapper "monadUserState-bytestring" + +$alpha = [a-zA-Z] +$digit = [0-9] +$white_nol = $white # \n + +tokens :- + $white_nol+ ; + +<0,module_header> "--" .* \n + { just $ pushStartCode newline } + +<0,module_header,import_> + $alpha [$alpha $digit \_ \']* { variableOrKeyword } + +<0> \= { always TokEqual } +<0> \: \: { always TokDoubleColon } + +<0> \\ { \i l -> setPendingLC *> yield' (const TokLambda) i l } +<0> "->" { always TokArrow } +<0> "_" { always TokUnder } + +<0> \{ { always TokOBrace } +<0> \[ { always TokOSquare } + +<0,module_header,import_> { + \, { always TokComma } + \( { always TokOParen } + \) { always TokCParen } +} + +<0> \} { closeBrace } +<0> \] { always TokCSquare } + +<0> \;+ { always TokSemi } + +<0> \n { just $ pushStartCode newline } + +<0> \" { just startString } + + { + \\ \" { stringSeg (T.singleton '"') } + \\ \\ { stringSeg (T.singleton '\\') } + + \\ a { stringSeg (T.singleton '\a') } + \\ b { stringSeg (T.singleton '\b') } + \\ f { stringSeg (T.singleton '\f') } + \\ n { stringSeg (T.singleton '\n') } + \\ \n { stringSeg (T.singleton '\n') } + \\ r { stringSeg (T.singleton '\r') } + \\ v { stringSeg (T.singleton '\v') } + \\ t { stringSeg (T.singleton '\t') } + + \" { endString } + + [^\\\"]+ { stringChar } +} + +<0,newline,comment,import_,module_header> + "{-" { just $ pushStartCode comment } + + { + "-}" { \i l -> popStartCode *> skip i l } + . ; +} + +-- newline: emit a semicolon when de-denting + { + \n ; + "--" .* \n ; + + () { offsideRule } +} + +-- layout: indentation of the next token is context for offside rule + { + \n ; + "--" .* \n ; + + \{ { openBrace } + () { startLayout } +} + + { + \n { just $ pushStartCode newline } + "--" .* \n { just $ pushStartCode newline } +} + + () { emptyLayout } + + () { emitPendingToken } + + { + \n ; +} + +{ +alexEOF :: Alex Token +alexEOF = do + (AlexPn _ l c, _, _, _) <- alexGetInput + + maybePopImportSC + + state <- getUserState + + unless (T.null (stringBuffer state)) $ do + alexError $ "Unterminated string literal at line " ++ show l ++ ", column " ++ show c + + case layoutColumns state of + -- EOF is allowed to close as many layout contexts as there are + -- pending (number of pending layout contexts is the length of the + -- list minus one, since there's the one initial layout context.) + _:tail -> do + mapUserState $ \s -> + s { pendingTokens = (Token TokLEnd l c <$ tail) ++ [Token TokEof l c] + , layoutColumns = [] + } + pushStartCode pending + pure (Token TokLEnd l c) + _ -> pure $ Token TokEof l c + +yield k inp i = clearPendingLC *> yield' k inp i + +yield' k (AlexPn _ l c, _, s, _) i = do + pure (Token (k $! (T.decodeUtf8 (Lbs.toStrict (Lbs.take i s)))) l c) + +setPendingLC = mapUserState $ \s -> s { pendingLambdaCase = True } +clearPendingLC = mapUserState $ \s -> s { pendingLambdaCase = False } + +always :: TokenClass -> AlexInput -> Int64 -> Alex Token +always k x i = yield (const k) x i + +startString = do + mapUserState $ \s -> s { stringBuffer = T.empty } + pushStartCode string + +endString (AlexPn _ l c, _, _, _) _i = do + text <- stringBuffer <$> getUserState + mapUserState $ \s -> s { stringBuffer = T.empty } + popStartCode + pure (Token (TokString text) l c) + +stringChar input@(AlexPn _ _ _, _, buf, _) i = do + mapUserState $ \s -> s { stringBuffer = stringBuffer s <> T.decodeUtf8 (Lbs.toStrict (Lbs.take i buf)) } + alexMonadScan + +stringSeg text _ _ = do + mapUserState $ \s -> s { stringBuffer = stringBuffer s <> text } + alexMonadScan + +data LayoutState + = LetLayout { layoutCol :: Int } + | Layout { layoutCol :: Int } + | ModLayout { layoutCol :: Int } + deriving (Show) + +data AlexUserState = + AlexUserState { layoutColumns :: [LayoutState] + , startCodes :: [Int] + , leastColumn :: Int + + , pendingLayoutKw :: Maybe (Int -> LayoutState) + , pendingTokens :: [Token] + , pendingLambdaCase :: Bool + , haveModuleHeader :: Bool + + , stringBuffer :: T.Text + } + +alexInitUserState = AlexUserState [] [] 0 Nothing [] False False T.empty + +emitPendingToken :: AlexAction Token +emitPendingToken _ _ = do + t <- getUserState + case pendingTokens t of + [] -> do + popStartCode + alexMonadScan + (x:xs) -> do + mapUserState $ \s -> s { pendingTokens = xs } + pure x + +delayToken :: Token -> Alex () +delayToken t = do + mapUserState $ \s -> s { pendingTokens = t:pendingTokens s } + pushStartCode pending + +just :: Alex a -> AlexAction Token +just k _ _ = k *> alexMonadScan + +getUserState :: Alex AlexUserState +getUserState = Alex $ \s -> Right (s, alex_ust s) + +mapUserState :: (AlexUserState -> AlexUserState) -> Alex () +mapUserState k = Alex $ \s -> Right (s { alex_ust = k $! alex_ust s }, ()) + +pushStartCode :: Int -> Alex () +pushStartCode c = do + sc <- alexGetStartCode + mapUserState $ \s -> s { startCodes = sc:startCodes s } + alexSetStartCode c + +popStartCode :: Alex () +popStartCode = do + sc <- startCodes <$> getUserState + case sc of + [] -> alexSetStartCode 0 + (x:xs) -> do + mapUserState $ \s -> s { startCodes = xs } + alexSetStartCode x + +offsideRule :: AlexInput -> Int64 -> Alex Token +offsideRule (AlexPn _ line col, _, s, _) _ = do + ~(col':ctx) <- layoutColumns <$> getUserState + case col `compare` layoutCol col' of + EQ -> do + popStartCode + maybePopImportSC + pure (Token TokSemi line col) + GT -> do + popStartCode + alexMonadScan + LT -> do + mapUserState $ \s -> s { layoutColumns = ctx } + pure (Token TokLEnd line col) + +maybePopImportSC :: Alex () +maybePopImportSC = do + startcode <- alexGetStartCode + when (startcode == import_) popStartCode + +emptyLayout :: AlexInput -> Int64 -> Alex Token +emptyLayout (AlexPn _ line col, _, _, _) _ = do + popStartCode + pushStartCode newline + pure (Token TokLEnd line col) + +startLayout :: AlexInput -> Int64 -> Alex Token +startLayout (AlexPn _ line col, _, _, _) _ = do + state <- getUserState + popStartCode + let + col' = + case layoutColumns state of + [] -> 0 + (x:_) -> layoutCol x + + layoutKind = case pendingLayoutKw state of + Just s -> s + Nothing -> Layout + + if col < col' + then pushStartCode empty_layout + else mapUserState $ \s -> s { layoutColumns = layoutKind col:layoutColumns s } + pure (Token TokLStart line col) + +getLayout :: Alex LayoutState +getLayout = do + t <- getUserState + case layoutColumns t of + (x:_) -> pure x + _ -> error "No layout?" + +openBrace :: AlexInput -> Int64 -> Alex Token +openBrace (AlexPn _ line col, _, _, _) _ = do + popStartCode + mapUserState $ \s -> s { layoutColumns = Layout minBound:layoutColumns s } + pure (Token TokOBrace line col) + +popLayoutContext :: Alex () +popLayoutContext = mapUserState $ \s -> s { layoutColumns = tail (layoutColumns s) } + +closeBrace :: AlexInput -> Int64 -> Alex Token +closeBrace (AlexPn _ line col, _, _, _) _ = do + ~(col':_) <- layoutColumns <$> getUserState + if layoutCol col' < 0 + then popLayoutContext + else pure () + pure (Token TokCBrace line col) + +variableOrKeyword :: AlexAction Token +variableOrKeyword (AlexPn _ l c, _, s, _) size = do + sc <- alexGetStartCode + state <- getUserState + + clearPendingLC + + let + text = T.decodeUtf8 (Lbs.toStrict (Lbs.take size s)) + col = layoutCol (head (layoutColumns state)) + + case T.unpack text of + "as" + | sc == import_, c > col -> pure (Token TokAs l c) + | sc == import_ -> offsideKeyword (TokVar text) l c + | otherwise -> pure (Token (TokVar text) l c) + + "qualified" + | sc == import_, c > col -> pure (Token TokQualified l c) + | sc == import_ -> offsideKeyword (TokVar text) l c + | otherwise -> pure (Token (TokVar text) l c) + + "let" -> laidOut' (Just LetLayout) TokLet l c + "in" -> do + laidout <- getLayout + case laidout of + -- let .. in critical pair: + -- 'in' is allowed to close a layout context before the offside rule would apply. + LetLayout _ -> earlyEnd TokIn l c + _ -> pure (Token TokIn l c) + + "data" -> pure (Token TokData l c) + + "where" -> do + -- if this is the where in the module_header, then + -- pop the start code so that the offside rule applies again + when (sc == module_header) popStartCode + laidOut' (if sc == module_header then Just ModLayout else Nothing) TokWhere l c + + "case" + -- "case" is a layout token if it's immediately following a \\ + | pendingLambdaCase state -> laidOut TokCase l c + | otherwise -> pure (Token TokCase l c) + + "import" -> do + pushStartCode import_ + pure (Token TokImport l c) + + "of" -> laidOut TokOf l c + + "module" -> do + unless (haveModuleHeader state) $ do + mapUserState $ \s -> s { haveModuleHeader = True } + pushStartCode module_header + pure (Token TokModule l c) + + (x:_) + | Data.Char.isUpper x -> pure (Token (TokCon text) l c) + | otherwise -> pure (Token (TokVar text) l c) + + [] -> error "empty keyword/identifier" + +earlyEnd tok l c = do + popLayoutContext + delayToken (Token tok l c) + pure (Token TokLEnd l c) + +offsideKeyword tok l c = do + popLayoutContext + delayToken (Token tok l c) + pure (Token TokSemi l c) + +laidOut' n x l c = do + pushStartCode layout + mapUserState $ \s -> s { leastColumn = c, pendingLayoutKw = n } + pure (Token x l c) + +laidOut = laidOut' Nothing +} diff --git a/src/Frontend/Autogen/Parser.y b/src/Frontend/Autogen/Parser.y new file mode 100644 index 0000000..cc44323 --- /dev/null +++ b/src/Frontend/Autogen/Parser.y @@ -0,0 +1,237 @@ +{ +{-# LANGUAGE FlexibleContexts, FlexibleInstances, ViewPatterns #-} +module Frontend.Autogen.Parser where + +import qualified Data.Text as T +import Data.Text (Text) + +import Frontend.Lexer.Tokens +import Frontend.Parser.Posn +import Frontend.Syntax +import Frontend.Autogen.Lexer + +import Prelude hiding (span) + +import Debug.Trace + +} + +%name parseExp Exp +%name parseMod Module +%name parseType Type + +%tokentype { Token } + +%monad { Alex } +%lexer { lexer } { Token TokEof _ _ } + +%errorhandlertype explist +%error { parseError } + +%token + VAR { Token (TokVar _) _ _ } + CON { Token (TokCon _) _ _ } + STRING { Token (TokString _) _ _ } + 'eof' { Token TokEof _ _ } + + '(' { Token TokOParen _ _ } + ')' { Token TokCParen _ _ } + + '{' { Token TokOBrace _ _ } + '}' { Token TokCBrace _ _ } + + START { Token TokLStart _ _ } + END { Token TokLEnd _ _ } + + '[' { Token TokOSquare _ _ } + ']' { Token TokCSquare _ _ } + + '{-#' { Token TokOPragma _ _ } + '#-}' { Token TokCPragma _ _ } + + '\\' { Token TokLambda _ _ } + + '->' { Token TokArrow _ _ } + '_' { Token TokUnder _ _ } + '::' { Token TokDoubleColon _ _ } + ';' { Token TokSemi _ _ } + '=' { Token TokEqual _ _ } + ',' { Token TokComma _ _ } + + 'let' { Token TokLet _ _ } + 'in' { Token TokIn _ _ } + 'data' { Token TokData _ _ } + 'case' { Token TokCase _ _ } + 'module' { Token TokModule _ _ } + 'where' { Token TokWhere _ _ } + + 'import' { Token TokImport _ _ } + 'as' { Token TokAs _ _ } + 'qualified' { Token TokQualified _ _ } + +%% + +Exp :: { Exp } + : InfixExp '::' Type { span $1 $3 $ Annot $1 $3 } + | InfixExp { $1 } + +InfixExp :: { Exp } + : LeftExp {- ... -} { $1 } +-- | LeftExp qop InfixExp { Infix $1 (getVar $2) $3 } + +LeftExp :: { Exp } + : '\\' Apat List(Apat) '->' Exp { span $1 $5 (makeLams ($2:$3) $5) } + | 'let' LaidOutList(Decl) 'in' Exp { span $1 $4 $ Let (thd $2) $4 } + | FuncExp { $1 } + +FuncExp :: { Exp } + : FuncExp Aexp { span $1 $2 $ App $1 $2 } + | Aexp { $1 } + +Aexp :: { Exp } + : qvar { span $1 $1 $ Ref (getVar $1) } + | qcon { span $1 $1 $ Con (getVar $1) } + | '(' CommaList(Exp) ')' { span $1 $3 $ makeTuple $2 } + | STRING { span $1 $1 $ Literal (LitString (getString $1)) } + +Type :: { Type } + : Btype '->' Type { span $1 $3 $ Tyarr $1 $3 } + | Btype { $1 } + +Btype :: { Type } + : Btype Atype { span $1 $2 $ Tyapp $1 $2 } + | Atype { $1 } + +Atype :: { Type } + : qvar { span $1 $1 $ Tyvar (getVar $1) } + | qcon { span $1 $1 $ Tycon (getVar $1) } + | '(' CommaList(Type) ')' { span $1 $3 $ makeTupleType $2 } + + +Pat :: { Pat } + : Lpat { $1 } + +Lpat :: { Pat } + : Apat { $1 } + +Apat :: { Pat } + : VAR { span $1 $1 $ Var (getVar $1) } + | '_' { span $1 $1 $ Wildcard } + | '(' CommaList(Pat) ')' { span $1 $3 $ makeTuplePattern $2 } + +Decl :: { Decl } + : VAR '::' Type { TySig (getVar $1) $3 } + | VAR Apat List(Apat) Rhs { FunDecl (getVar $1) ($2:$3) $4 } + | Pat Rhs { PatDecl $1 $2 } + +Rhs :: { Rhs } + : '=' Exp { BareRhs $2 [] } + | '=' Exp 'where' LaidOutList(Decl) { BareRhs $2 (thd $4) } + +LaidOutList(p) + : START Opt(Semis) LOLContents(p, END) { (startPosn $1, lolEnd $3, lolList $3) } + | '{' Opt(Semis) LOLContents(p, '}') { (startPosn $1, lolEnd $3, lolList $3) } + +LOLContents(p, End) + : p Semis LOLContents(p,End) { lolCons $1 $3 } + | p Opt(Semis) End { lolCons $1 (emptyLol $3) } + | Opt(Semis) End { emptyLol $2 } + +Module :: { Module } + : 'module' CON ImportExportList 'where' LaidOutList(ModItem) + { Module { moduleName = getVar $2 + , moduleExports = $3 + , moduleItems = thd $5 } + } + +ImportExportList :: { Maybe [NamespacedItem Text] } + : {-empty-} { Nothing } + | '(' CommaList(NSItem) ')' { Just $2 } + +NSItem :: { NamespacedItem Text } + : VAR { IEVar (getVar $1) } + | CON { IECon (getVar $1) } + | 'module' CON { IEModule (getVar $2) } + +ModItem :: { Item } + : Decl { ModDecl $1 } + | Import { ModImport $1 } + +Import + : 'import' qcon ImportExportList + { Import (getVar $2) $3 False Nothing } + + | 'import' qcon ImportExportList 'as' CON + { Import (getVar $2) $3 False (Just (getVar $5)) } + + | 'import' 'qualified' qcon ImportExportList + { Import (getVar $3) $4 True Nothing } + + | 'import' 'qualified' qcon ImportExportList 'as' CON + { Import (getVar $3) $4 True (Just (getVar $6)) } + +Opt(p) + : { () } + | p { () } + +Semis + : ';' Semis { () } + | ';' { () } + +-- TODO: qualified names +qvar : VAR { $1 } +qcon : CON { $1 } + +List(p) + : {-empty-} { [] } + | p List(p) { $1:$2 } + +CommaList(p) + : {-empty-} { [] } + | p { [$1] } + | p ',' CommaList(p) { $1:$3 } + +Block(p) + : START p END { (startPosn $1, endPosn $3, $2) } + | '{' p '}' { (startPosn $1, endPosn $3, $2) } + +{ + +type Exp = FeExpr Text +type Pat = FePat Text +type Decl = FeDecl Text +type Type = FeType Text +type Rhs = FeRhs Text +type Module = FeModule Text +type Item = ModuleItem Text + +lexer cont = alexMonadScan >>= cont + +parseError x = alexError (show x) + +makeLams xs b = foldr Lam b xs + +getVar (Token (TokVar s) _ _) = s +getVar (Token (TokCon s) _ _) = s +getVar _ = error "getVar non-var" + +getString (Token (TokString s) _ _) = s +getString _ = error "getString non-string" + +data LOL a = LOL { lolEnd :: Posn, lolList :: [a] } + +emptyLol :: HasPosn x => x -> LOL a +emptyLol t = LOL (endPosn t) [] + +lolCons :: a -> LOL a -> LOL a +lolCons x (LOL p xs) = LOL p (x:xs) + +makeTupleType [x] = ParenType x +makeTupleType xs = Tytup xs + +makeTuplePattern [x] = ParenPat x +makeTuplePattern xs = TupPat xs + +makeTuple [x] = ParenExp x +makeTuple xs = Tuple xs +} diff --git a/src/Frontend/Lexer/Tokens.hi b/src/Frontend/Lexer/Tokens.hi new file mode 100644 index 0000000..2db5bba Binary files /dev/null and b/src/Frontend/Lexer/Tokens.hi differ diff --git a/src/Frontend/Lexer/Tokens.hs b/src/Frontend/Lexer/Tokens.hs new file mode 100644 index 0000000..97e8844 --- /dev/null +++ b/src/Frontend/Lexer/Tokens.hs @@ -0,0 +1,90 @@ +module Frontend.Lexer.Tokens where + +import qualified Data.Text as T +import Data.Text (Text) + +data TokenClass + = TokVar Text + | TokCon Text + | TokString Text + | TokEof + + | TokLambda + | TokArrow + | TokUnder + + | TokOParen + | TokOBrace + | TokOSquare + | TokOPragma + + | TokCParen + | TokCBrace + | TokCSquare + | TokCPragma + + | TokDoubleColon + | TokEqual + | TokComma + | TokPipe + + | TokLet + | TokIn + | TokLStart + | TokLEnd + + | TokModule + | TokImport + | TokQualified + | TokAs + | TokWhere + | TokLambdaCase + | TokCase + | TokOf + + | TokData + + | TokSemi + deriving (Eq, Show, Ord) + +tokSize :: TokenClass -> Int +tokSize (TokVar x) = T.length x +tokSize (TokCon x) = T.length x +tokSize TokEof = 0 +tokSize TokLambda = 1 +tokSize TokOParen = 1 +tokSize TokOBrace = 1 +tokSize TokOSquare = 1 +tokSize TokOPragma = 3 +tokSize TokCBrace = 1 +tokSize TokCParen = 1 +tokSize TokCSquare = 1 +tokSize TokCPragma = 3 +tokSize TokDoubleColon = 2 +tokSize TokEqual = 1 +tokSize TokComma = 1 +tokSize TokSemi = 1 +tokSize TokUnder = 1 +tokSize TokArrow = 2 +tokSize TokIn = 2 +tokSize TokLStart = 0 +tokSize TokLEnd = 0 +tokSize TokPipe = 1 +tokSize (TokString t) = 2 + T.length t +tokSize TokLambdaCase = length "\\case" +tokSize TokWhere = length "where" +tokSize TokData = length "data" +tokSize TokOf = length "of" +tokSize TokCase = length "case" +tokSize TokModule = length "module" +tokSize TokQualified = length "qualified" +tokSize TokImport = length "import" +tokSize TokLet = 3 +tokSize TokAs = 2 + +data Token + = Token { tokenClass :: TokenClass + , tokStartLine :: !Int + , tokStartCol :: !Int + } + deriving (Eq, Show, Ord) diff --git a/src/Frontend/Lexer/Tokens.o b/src/Frontend/Lexer/Tokens.o new file mode 100644 index 0000000..b738599 Binary files /dev/null and b/src/Frontend/Lexer/Tokens.o differ diff --git a/src/Frontend/Parser/Posn.hi b/src/Frontend/Parser/Posn.hi new file mode 100644 index 0000000..9ed0dc8 Binary files /dev/null and b/src/Frontend/Parser/Posn.hi differ diff --git a/src/Frontend/Parser/Posn.hs b/src/Frontend/Parser/Posn.hs new file mode 100644 index 0000000..19bcedc --- /dev/null +++ b/src/Frontend/Parser/Posn.hs @@ -0,0 +1,33 @@ +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE DefaultSignatures #-} +module Frontend.Parser.Posn where + +import Frontend.Lexer.Tokens +import Data.Typeable +data Posn + = Posn { posnLine :: {-# UNPACK #-} !Int + , posnColm :: {-# UNPACK #-} !Int + } + deriving (Eq, Show, Ord) + +class HasPosn a where + startPosn :: a -> Posn + endPosn :: a -> Posn + + span :: (HasPosn b, HasPosn c) => b -> c -> a -> a + + default span :: Typeable a => b -> c -> a -> a + span _ _ x = error $ "Can't span " ++ show (typeOf x) + +instance HasPosn Token where + startPosn (Token _ l c) = Posn l c + endPosn (Token t l c) = Posn l (c + tokSize t) + +instance HasPosn (Posn, Posn, a) where + startPosn (s, _, _) = s + endPosn (_, e, _) = e + + span start end (_, _, x) = (startPosn start, endPosn end, x) + +thd :: (a, b, c) -> c +thd (_, _, z) = z \ No newline at end of file diff --git a/src/Frontend/Parser/Posn.o b/src/Frontend/Parser/Posn.o new file mode 100644 index 0000000..51c50f7 Binary files /dev/null and b/src/Frontend/Parser/Posn.o differ diff --git a/src/Frontend/Syntax.hi b/src/Frontend/Syntax.hi new file mode 100644 index 0000000..47bb172 Binary files /dev/null and b/src/Frontend/Syntax.hi differ diff --git a/src/Frontend/Syntax.hs b/src/Frontend/Syntax.hs new file mode 100644 index 0000000..969038a --- /dev/null +++ b/src/Frontend/Syntax.hs @@ -0,0 +1,112 @@ +module Frontend.Syntax where + +import Frontend.Parser.Posn +import qualified Data.Text as T + +data FeExpr var + = Ref var + | Con var + | App (FeExpr var) (FeExpr var) + | Lam (FePat var) (FeExpr var) + | Let [FeDecl var] (FeExpr var) + | Tuple [FeExpr var] + | Annot (FeExpr var) (FeType var) + + | Literal Literal + + | ParenExp (FeExpr var) + | SPExpr (FeExpr var) Posn Posn + deriving (Eq, Show, Ord) + +instance HasPosn (FeExpr var) where + startPosn (SPExpr _ s _) = s + startPosn _ = error "no start posn in parsed expression?" + + endPosn (SPExpr _ _ e) = e + endPosn _ = error "no end posn in parsed expression?" + + span sp ep (SPExpr x _ _) = SPExpr x (startPosn sp) (endPosn ep) + span sp ep x = SPExpr x (startPosn sp) (endPosn ep) + +data FePat var + = Var var + | Wildcard + + | TupPat [FePat var] + | LitPat Literal + + | ParenPat (FePat var) -- parsed parentheses + | SPPat (FePat var) Posn Posn + deriving (Eq, Show, Ord) + +instance HasPosn (FePat var) where + startPosn (SPPat _ s _) = s + startPosn _ = error "no start posn in parsed expression?" + + endPosn (SPPat _ _ e) = e + endPosn _ = error "no end posn in parsed pattern?" + + span sp ep (SPPat x _ _) = SPPat x (startPosn sp) (endPosn ep) + span sp ep x = SPPat x (startPosn sp) (endPosn ep) + +data FeType var + = Tyvar var + | Tycon var + | Tyapp (FeType var) (FeType var) + | Tyarr (FeType var) (FeType var) + | Tytup [FeType var] + + | ParenType (FeType var) -- parsed parentheses + | SPType (FeType var) Posn Posn + deriving (Eq, Show, Ord) + +instance HasPosn (FeType var) where + startPosn (SPType _ s _) = s + startPosn _ = error "no start posn in parsed type?" + + endPosn (SPType _ _ e) = e + endPosn _ = error "no end posn in parsed type?" + + span sp ep (SPType x _ _) = SPType x (startPosn sp) (endPosn ep) + span sp ep x = SPType x (startPosn sp) (endPosn ep) + +data FeDecl var + = PatDecl (FePat var) (FeRhs var) + | FunDecl var [FePat var] (FeRhs var) + | TySig var (FeType var) + deriving (Eq, Show, Ord) + +data FeRhs var + = BareRhs (FeExpr var) [FeDecl var] + deriving (Eq, Show, Ord) + +data Literal + = LitString T.Text + | LitNumber Integer + deriving (Eq, Show, Ord) + +data FeModule var + = Module { moduleName :: var + , moduleExports :: Maybe [NamespacedItem var] + , moduleItems :: [ModuleItem var] + } + deriving (Eq, Show, Ord) + +data ModuleImport var + = Import { importMod :: var + , importList :: Maybe [NamespacedItem var] + , importQualified :: Bool + , importAlias :: Maybe var + } + deriving (Eq, Show, Ord) + +data NamespacedItem var + = IEVar var + | IECon var + | IEModule var + deriving (Eq, Show, Ord) + +data ModuleItem var + = ModDecl (FeDecl var) + | ModImport (ModuleImport var) + deriving (Eq, Show, Ord) \ No newline at end of file diff --git a/src/Frontend/Syntax.o b/src/Frontend/Syntax.o new file mode 100644 index 0000000..9c53603 Binary files /dev/null and b/src/Frontend/Syntax.o differ diff --git a/src/Main.hi b/src/Main.hi new file mode 100644 index 0000000..51f1c32 Binary files /dev/null and b/src/Main.hi differ diff --git a/src/Main.hs b/src/Main.hs new file mode 100644 index 0000000..31348eb --- /dev/null +++ b/src/Main.hs @@ -0,0 +1,51 @@ +-- {-# LANGUAGE BlockArguments #-} +module Main where + +import qualified Data.ByteString.Lazy as Lbs +import Data.Foldable + +import Frontend.Lexer.Tokens +import Frontend.Autogen.Lexer +import Frontend.Autogen.Parser + +import qualified Data.Text.Encoding as T +import qualified Data.Text as T +import Control.Monad ( unless ) +import System.Environment (getArgs) +import Text.Show.Pretty (pPrint) + +import Debug.Trace + +main :: IO () +main = do + args <- getArgs + for_ args $ \str -> do + Main.lex pPrint parseMod =<< Lbs.readFile str + +testParse :: String -> IO () +testParse s = Main.lex print parseMod (Lbs.fromStrict (T.encodeUtf8 (T.pack s))) + +testLex :: String -> IO () +testLex s = Main.lex (const (pure ())) (scan []) (Lbs.fromStrict (T.encodeUtf8 (T.pack s))) + +lex :: (a -> IO ()) -> Alex a -> Lbs.ByteString -> IO () +lex show cont arg = do + let x = runAlex arg cont + case x of + Left e -> print e + Right x -> show x + +scan :: [Token] -> Alex [Token] +scan acc = do + tok <- alexMonadScan + sc <- alexGetStartCode + state <- getUserState + traceM . unlines $ + [ "----------------" + , "just lexed: " ++ show tok + , "sc: " ++ show sc + , "sc stack: " ++ show (startCodes state) + ] + case tokenClass tok of + TokEof -> pure (reverse acc) + _ -> scan (tok:acc) diff --git a/src/Main.o b/src/Main.o new file mode 100644 index 0000000..c5cf209 Binary files /dev/null and b/src/Main.o differ diff --git a/stack.yaml b/stack.yaml new file mode 100644 index 0000000..697c908 --- /dev/null +++ b/stack.yaml @@ -0,0 +1,67 @@ +# This file was automatically generated by 'stack init' +# +# Some commonly used options have been documented as comments in this file. +# For advanced use and comprehensive documentation of the format, please see: +# https://docs.haskellstack.org/en/stable/yaml_configuration/ + +# Resolver to choose a 'specific' stackage snapshot or a compiler version. +# A snapshot resolver dictates the compiler version and the set of packages +# to be used for project dependencies. For example: +# +# resolver: lts-3.5 +# resolver: nightly-2015-09-21 +# resolver: ghc-7.10.2 +# +# The location of a snapshot can be provided as a file or url. Stack assumes +# a snapshot provided as a file might change, whereas a url resource does not. +# +# resolver: ./custom-snapshot.yaml +# resolver: https://example.com/snapshots/2018-01-01.yaml +resolver: + url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/18/3.yaml + +# User packages to be built. +# Various formats can be used as shown in the example below. +# +# packages: +# - some-directory +# - https://example.com/foo/bar/baz-0.0.2.tar.gz +# subdirs: +# - auto-update +# - wai +packages: +- . +# Dependency packages to be pulled from upstream that are not in the resolver. +# These entries can reference officially published versions as well as +# forks / in-progress versions pinned to a git hash. For example: +# +# extra-deps: +# - acme-missiles-0.3 +# - git: https://github.com/commercialhaskell/stack.git +# commit: e7b331f14bcffb8367cd58fbfc8b40ec7642100a +# +# extra-deps: [] + +# Override default flag values for local packages and extra-deps +# flags: {} + +# Extra package databases containing global packages +# extra-package-dbs: [] + +# Control whether we use the GHC we find on the path +# system-ghc: true +# +# Require a specific version of stack, using version ranges +# require-stack-version: -any # Default +# require-stack-version: ">=2.7" +# +# Override the architecture used by stack, especially useful on Windows +# arch: i386 +# arch: x86_64 +# +# Extra directories used by stack for building +# extra-include-dirs: [/path/to/dir] +# extra-lib-dirs: [/path/to/dir] +# +# Allow a newer minor version of GHC than the snapshot specifies +# compiler-check: newer-minor diff --git a/stack.yaml.lock b/stack.yaml.lock new file mode 100644 index 0000000..6ee0750 --- /dev/null +++ b/stack.yaml.lock @@ -0,0 +1,13 @@ +# This file was autogenerated by Stack. +# You should not edit this file by hand. +# For more information, please see the documentation at: +# https://docs.haskellstack.org/en/stable/lock_files + +packages: [] +snapshots: +- completed: + size: 585603 + url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/18/3.yaml + sha256: 694573e96dca34db5636edb1fe6c96bb233ca0f9fb96c1ead1671cdfa9bd73e9 + original: + url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/18/3.yaml