Browse Source

Initial commit w/ initial fragments of parser and lexer

master
Amélia Liao 2 years ago
commit
eaeeaec5be
22 changed files with 1068 additions and 0 deletions
  1. +14
    -0
      .gitignore
  2. +30
    -0
      LICENSE
  3. +1
    -0
      README.md
  4. +2
    -0
      Setup.hs
  5. +35
    -0
      ahc.cabal
  6. +10
    -0
      hie.yaml
  7. +373
    -0
      src/Frontend/Autogen/Lexer.x
  8. +237
    -0
      src/Frontend/Autogen/Parser.y
  9. BIN
      src/Frontend/Lexer/Tokens.hi
  10. +90
    -0
      src/Frontend/Lexer/Tokens.hs
  11. BIN
      src/Frontend/Lexer/Tokens.o
  12. BIN
      src/Frontend/Parser/Posn.hi
  13. +33
    -0
      src/Frontend/Parser/Posn.hs
  14. BIN
      src/Frontend/Parser/Posn.o
  15. BIN
      src/Frontend/Syntax.hi
  16. +112
    -0
      src/Frontend/Syntax.hs
  17. BIN
      src/Frontend/Syntax.o
  18. BIN
      src/Main.hi
  19. +51
    -0
      src/Main.hs
  20. BIN
      src/Main.o
  21. +67
    -0
      stack.yaml
  22. +13
    -0
      stack.yaml.lock

+ 14
- 0
.gitignore View File

@ -0,0 +1,14 @@
.stack-work
# tools
/repl.sh
/.ghci
# grammar reference
/Haskell Grammar.txt
# test files
/*.hs
# except for this one
!/Setup.hs

+ 30
- 0
LICENSE View File

@ -0,0 +1,30 @@
Copyright Abigail Magalhães (c) 2021
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials provided
with the distribution.
* Neither the name of Abigail Magalhães nor the names of other
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+ 1
- 0
README.md View File

@ -0,0 +1 @@
# ahc

+ 2
- 0
Setup.hs View File

@ -0,0 +1,2 @@
import Distribution.Simple
main = defaultMain

+ 35
- 0
ahc.cabal View File

@ -0,0 +1,35 @@
name: ahc
version: 0.1.0.0
homepage: https://github.com/plt-hokusai/ahc#readme
license: BSD3
license-file: LICENSE
author: Abigail Magalhães
maintainer: [email protected]
copyright: 2021 Abigail Magalhães
category: Web
build-type: Simple
cabal-version: >=2.0
extra-source-files: README.md
executable ahc
hs-source-dirs: src
main-is: Main.hs
default-language: Haskell2010
build-depends: base ^>= 4.14
, mtl ^>= 2.2
, syb ^>= 0.7
, text ^>= 1.2
, array ^>= 0.5
, containers ^>= 0.6
, bytestring ^>= 0.10
, pretty-show ^>= 1.10
other-modules:
Frontend.Autogen.Lexer,
Frontend.Autogen.Parser,
Frontend.Lexer.Tokens,
Frontend.Parser.Posn,
Frontend.Syntax
build-tool-depends: alex:alex >= 3.2.4 && < 4.0
, happy:happy >= 1.19.12 && < 2.0

+ 10
- 0
hie.yaml View File

@ -0,0 +1,10 @@
cradle:
multi:
- path: "./src/Frontend/Autogen"
config:
cradle:
none:
- path: "./"
config:
cradle:
stack:

+ 373
- 0
src/Frontend/Autogen/Lexer.x View File

@ -0,0 +1,373 @@
{
module Frontend.Autogen.Lexer where
import qualified Data.ByteString.Lazy as Lbs
import qualified Data.Text.Encoding as T
import qualified Data.Text as T
import Frontend.Lexer.Tokens
import Control.Monad
import Debug.Trace
}
%wrapper "monadUserState-bytestring"
$alpha = [a-zA-Z]
$digit = [0-9]
$white_nol = $white # \n
tokens :-
$white_nol+ ;
<0,module_header> "--" .* \n
{ just $ pushStartCode newline }
<0,module_header,import_>
$alpha [$alpha $digit \_ \']* { variableOrKeyword }
<0> \= { always TokEqual }
<0> \: \: { always TokDoubleColon }
<0> \\ { \i l -> setPendingLC *> yield' (const TokLambda) i l }
<0> "->" { always TokArrow }
<0> "_" { always TokUnder }
<0> \{ { always TokOBrace }
<0> \[ { always TokOSquare }
<0,module_header,import_> {
\, { always TokComma }
\( { always TokOParen }
\) { always TokCParen }
}
<0> \} { closeBrace }
<0> \] { always TokCSquare }
<0> \;+ { always TokSemi }
<0> \n { just $ pushStartCode newline }
<0> \" { just startString }
<string> {
\\ \" { stringSeg (T.singleton '"') }
\\ \\ { stringSeg (T.singleton '\\') }
\\ a { stringSeg (T.singleton '\a') }
\\ b { stringSeg (T.singleton '\b') }
\\ f { stringSeg (T.singleton '\f') }
\\ n { stringSeg (T.singleton '\n') }
\\ \n { stringSeg (T.singleton '\n') }
\\ r { stringSeg (T.singleton '\r') }
\\ v { stringSeg (T.singleton '\v') }
\\ t { stringSeg (T.singleton '\t') }
\" { endString }
[^\\\"]+ { stringChar }
}
<0,newline,comment,import_,module_header>
"{-" { just $ pushStartCode comment }
<comment> {
"-}" { \i l -> popStartCode *> skip i l }
. ;
}
-- newline: emit a semicolon when de-denting
<newline> {
\n ;
"--" .* \n ;
() { offsideRule }
}
-- layout: indentation of the next token is context for offside rule
<layout> {
\n ;
"--" .* \n ;
\{ { openBrace }
() { startLayout }
}
<import_> {
\n { just $ pushStartCode newline }
"--" .* \n { just $ pushStartCode newline }
}
<empty_layout> () { emptyLayout }
<pending> () { emitPendingToken }
<module_header> {
\n ;
}
{
alexEOF :: Alex Token
alexEOF = do
(AlexPn _ l c, _, _, _) <- alexGetInput
maybePopImportSC
state <- getUserState
unless (T.null (stringBuffer state)) $ do
alexError $ "Unterminated string literal at line " ++ show l ++ ", column " ++ show c
case layoutColumns state of
-- EOF is allowed to close as many layout contexts as there are
-- pending (number of pending layout contexts is the length of the
-- list minus one, since there's the one initial layout context.)
_:tail -> do
mapUserState $ \s ->
s { pendingTokens = (Token TokLEnd l c <$ tail) ++ [Token TokEof l c]
, layoutColumns = []
}
pushStartCode pending
pure (Token TokLEnd l c)
_ -> pure $ Token TokEof l c
yield k inp i = clearPendingLC *> yield' k inp i
yield' k (AlexPn _ l c, _, s, _) i = do
pure (Token (k $! (T.decodeUtf8 (Lbs.toStrict (Lbs.take i s)))) l c)
setPendingLC = mapUserState $ \s -> s { pendingLambdaCase = True }
clearPendingLC = mapUserState $ \s -> s { pendingLambdaCase = False }
always :: TokenClass -> AlexInput -> Int64 -> Alex Token
always k x i = yield (const k) x i
startString = do
mapUserState $ \s -> s { stringBuffer = T.empty }
pushStartCode string
endString (AlexPn _ l c, _, _, _) _i = do
text <- stringBuffer <$> getUserState
mapUserState $ \s -> s { stringBuffer = T.empty }
popStartCode
pure (Token (TokString text) l c)
stringChar input@(AlexPn _ _ _, _, buf, _) i = do
mapUserState $ \s -> s { stringBuffer = stringBuffer s <> T.decodeUtf8 (Lbs.toStrict (Lbs.take i buf)) }
alexMonadScan
stringSeg text _ _ = do
mapUserState $ \s -> s { stringBuffer = stringBuffer s <> text }
alexMonadScan
data LayoutState
= LetLayout { layoutCol :: Int }
| Layout { layoutCol :: Int }
| ModLayout { layoutCol :: Int }
deriving (Show)
data AlexUserState =
AlexUserState { layoutColumns :: [LayoutState]
, startCodes :: [Int]
, leastColumn :: Int
, pendingLayoutKw :: Maybe (Int -> LayoutState)
, pendingTokens :: [Token]
, pendingLambdaCase :: Bool
, haveModuleHeader :: Bool
, stringBuffer :: T.Text
}
alexInitUserState = AlexUserState [] [] 0 Nothing [] False False T.empty
emitPendingToken :: AlexAction Token
emitPendingToken _ _ = do
t <- getUserState
case pendingTokens t of
[] -> do
popStartCode
alexMonadScan
(x:xs) -> do
mapUserState $ \s -> s { pendingTokens = xs }
pure x
delayToken :: Token -> Alex ()
delayToken t = do
mapUserState $ \s -> s { pendingTokens = t:pendingTokens s }
pushStartCode pending
just :: Alex a -> AlexAction Token
just k _ _ = k *> alexMonadScan
getUserState :: Alex AlexUserState
getUserState = Alex $ \s -> Right (s, alex_ust s)
mapUserState :: (AlexUserState -> AlexUserState) -> Alex ()
mapUserState k = Alex $ \s -> Right (s { alex_ust = k $! alex_ust s }, ())
pushStartCode :: Int -> Alex ()
pushStartCode c = do
sc <- alexGetStartCode
mapUserState $ \s -> s { startCodes = sc:startCodes s }
alexSetStartCode c
popStartCode :: Alex ()
popStartCode = do
sc <- startCodes <$> getUserState
case sc of
[] -> alexSetStartCode 0
(x:xs) -> do
mapUserState $ \s -> s { startCodes = xs }
alexSetStartCode x
offsideRule :: AlexInput -> Int64 -> Alex Token
offsideRule (AlexPn _ line col, _, s, _) _ = do
~(col':ctx) <- layoutColumns <$> getUserState
case col `compare` layoutCol col' of
EQ -> do
popStartCode
maybePopImportSC
pure (Token TokSemi line col)
GT -> do
popStartCode
alexMonadScan
LT -> do
mapUserState $ \s -> s { layoutColumns = ctx }
pure (Token TokLEnd line col)
maybePopImportSC :: Alex ()
maybePopImportSC = do
startcode <- alexGetStartCode
when (startcode == import_) popStartCode
emptyLayout :: AlexInput -> Int64 -> Alex Token
emptyLayout (AlexPn _ line col, _, _, _) _ = do
popStartCode
pushStartCode newline
pure (Token TokLEnd line col)
startLayout :: AlexInput -> Int64 -> Alex Token
startLayout (AlexPn _ line col, _, _, _) _ = do
state <- getUserState
popStartCode
let
col' =
case layoutColumns state of
[] -> 0
(x:_) -> layoutCol x
layoutKind = case pendingLayoutKw state of
Just s -> s
Nothing -> Layout
if col < col'
then pushStartCode empty_layout
else mapUserState $ \s -> s { layoutColumns = layoutKind col:layoutColumns s }
pure (Token TokLStart line col)
getLayout :: Alex LayoutState
getLayout = do
t <- getUserState
case layoutColumns t of
(x:_) -> pure x
_ -> error "No layout?"
openBrace :: AlexInput -> Int64 -> Alex Token
openBrace (AlexPn _ line col, _, _, _) _ = do
popStartCode
mapUserState $ \s -> s { layoutColumns = Layout minBound:layoutColumns s }
pure (Token TokOBrace line col)
popLayoutContext :: Alex ()
popLayoutContext = mapUserState $ \s -> s { layoutColumns = tail (layoutColumns s) }
closeBrace :: AlexInput -> Int64 -> Alex Token
closeBrace (AlexPn _ line col, _, _, _) _ = do
~(col':_) <- layoutColumns <$> getUserState
if layoutCol col' < 0
then popLayoutContext
else pure ()
pure (Token TokCBrace line col)
variableOrKeyword :: AlexAction Token
variableOrKeyword (AlexPn _ l c, _, s, _) size = do
sc <- alexGetStartCode
state <- getUserState
clearPendingLC
let
text = T.decodeUtf8 (Lbs.toStrict (Lbs.take size s))
col = layoutCol (head (layoutColumns state))
case T.unpack text of
"as"
| sc == import_, c > col -> pure (Token TokAs l c)
| sc == import_ -> offsideKeyword (TokVar text) l c
| otherwise -> pure (Token (TokVar text) l c)
"qualified"
| sc == import_, c > col -> pure (Token TokQualified l c)
| sc == import_ -> offsideKeyword (TokVar text) l c
| otherwise -> pure (Token (TokVar text) l c)
"let" -> laidOut' (Just LetLayout) TokLet l c
"in" -> do
laidout <- getLayout
case laidout of
-- let .. in critical pair:
-- 'in' is allowed to close a layout context before the offside rule would apply.
LetLayout _ -> earlyEnd TokIn l c
_ -> pure (Token TokIn l c)
"data" -> pure (Token TokData l c)
"where" -> do
-- if this is the where in the module_header, then
-- pop the start code so that the offside rule applies again
when (sc == module_header) popStartCode
laidOut' (if sc == module_header then Just ModLayout else Nothing) TokWhere l c
"case"
-- "case" is a layout token if it's immediately following a \\
| pendingLambdaCase state -> laidOut TokCase l c
| otherwise -> pure (Token TokCase l c)
"import" -> do
pushStartCode import_
pure (Token TokImport l c)
"of" -> laidOut TokOf l c
"module" -> do
unless (haveModuleHeader state) $ do
mapUserState $ \s -> s { haveModuleHeader = True }
pushStartCode module_header
pure (Token TokModule l c)
(x:_)
| Data.Char.isUpper x -> pure (Token (TokCon text) l c)
| otherwise -> pure (Token (TokVar text) l c)
[] -> error "empty keyword/identifier"
earlyEnd tok l c = do
popLayoutContext
delayToken (Token tok l c)
pure (Token TokLEnd l c)
offsideKeyword tok l c = do
popLayoutContext
delayToken (Token tok l c)
pure (Token TokSemi l c)
laidOut' n x l c = do
pushStartCode layout
mapUserState $ \s -> s { leastColumn = c, pendingLayoutKw = n }
pure (Token x l c)
laidOut = laidOut' Nothing
}

+ 237
- 0
src/Frontend/Autogen/Parser.y View File

@ -0,0 +1,237 @@
{
{-# LANGUAGE FlexibleContexts, FlexibleInstances, ViewPatterns #-}
module Frontend.Autogen.Parser where
import qualified Data.Text as T
import Data.Text (Text)
import Frontend.Lexer.Tokens
import Frontend.Parser.Posn
import Frontend.Syntax
import Frontend.Autogen.Lexer
import Prelude hiding (span)
import Debug.Trace
}
%name parseExp Exp
%name parseMod Module
%name parseType Type
%tokentype { Token }
%monad { Alex }
%lexer { lexer } { Token TokEof _ _ }
%errorhandlertype explist
%error { parseError }
%token
VAR { Token (TokVar _) _ _ }
CON { Token (TokCon _) _ _ }
STRING { Token (TokString _) _ _ }
'eof' { Token TokEof _ _ }
'(' { Token TokOParen _ _ }
')' { Token TokCParen _ _ }
'{' { Token TokOBrace _ _ }
'}' { Token TokCBrace _ _ }
START { Token TokLStart _ _ }
END { Token TokLEnd _ _ }
'[' { Token TokOSquare _ _ }
']' { Token TokCSquare _ _ }
'{-#' { Token TokOPragma _ _ }
'#-}' { Token TokCPragma _ _ }
'\\' { Token TokLambda _ _ }
'->' { Token TokArrow _ _ }
'_' { Token TokUnder _ _ }
'::' { Token TokDoubleColon _ _ }
';' { Token TokSemi _ _ }
'=' { Token TokEqual _ _ }
',' { Token TokComma _ _ }
'let' { Token TokLet _ _ }
'in' { Token TokIn _ _ }
'data' { Token TokData _ _ }
'case' { Token TokCase _ _ }
'module' { Token TokModule _ _ }
'where' { Token TokWhere _ _ }
'import' { Token TokImport _ _ }
'as' { Token TokAs _ _ }
'qualified' { Token TokQualified _ _ }
%%
Exp :: { Exp }
: InfixExp '::' Type { span $1 $3 $ Annot $1 $3 }
| InfixExp { $1 }
InfixExp :: { Exp }
: LeftExp {- ... -} { $1 }
-- | LeftExp qop InfixExp { Infix $1 (getVar $2) $3 }
LeftExp :: { Exp }
: '\\' Apat List(Apat) '->' Exp { span $1 $5 (makeLams ($2:$3) $5) }
| 'let' LaidOutList(Decl) 'in' Exp { span $1 $4 $ Let (thd $2) $4 }
| FuncExp { $1 }
FuncExp :: { Exp }
: FuncExp Aexp { span $1 $2 $ App $1 $2 }
| Aexp { $1 }
Aexp :: { Exp }
: qvar { span $1 $1 $ Ref (getVar $1) }
| qcon { span $1 $1 $ Con (getVar $1) }
| '(' CommaList(Exp) ')' { span $1 $3 $ makeTuple $2 }
| STRING { span $1 $1 $ Literal (LitString (getString $1)) }
Type :: { Type }
: Btype '->' Type { span $1 $3 $ Tyarr $1 $3 }
| Btype { $1 }
Btype :: { Type }
: Btype Atype { span $1 $2 $ Tyapp $1 $2 }
| Atype { $1 }
Atype :: { Type }
: qvar { span $1 $1 $ Tyvar (getVar $1) }
| qcon { span $1 $1 $ Tycon (getVar $1) }
| '(' CommaList(Type) ')' { span $1 $3 $ makeTupleType $2 }
Pat :: { Pat }
: Lpat { $1 }
Lpat :: { Pat }
: Apat { $1 }
Apat :: { Pat }
: VAR { span $1 $1 $ Var (getVar $1) }
| '_' { span $1 $1 $ Wildcard }
| '(' CommaList(Pat) ')' { span $1 $3 $ makeTuplePattern $2 }
Decl :: { Decl }
: VAR '::' Type { TySig (getVar $1) $3 }
| VAR Apat List(Apat) Rhs { FunDecl (getVar $1) ($2:$3) $4 }
| Pat Rhs { PatDecl $1 $2 }
Rhs :: { Rhs }
: '=' Exp { BareRhs $2 [] }
| '=' Exp 'where' LaidOutList(Decl) { BareRhs $2 (thd $4) }
LaidOutList(p)
: START Opt(Semis) LOLContents(p, END) { (startPosn $1, lolEnd $3, lolList $3) }
| '{' Opt(Semis) LOLContents(p, '}') { (startPosn $1, lolEnd $3, lolList $3) }
LOLContents(p, End)
: p Semis LOLContents(p,End) { lolCons $1 $3 }
| p Opt(Semis) End { lolCons $1 (emptyLol $3) }
| Opt(Semis) End { emptyLol $2 }
Module :: { Module }
: 'module' CON ImportExportList 'where' LaidOutList(ModItem)
{ Module { moduleName = getVar $2
, moduleExports = $3
, moduleItems = thd $5 }
}
ImportExportList :: { Maybe [NamespacedItem Text] }
: {-empty-} { Nothing }
| '(' CommaList(NSItem) ')' { Just $2 }
NSItem :: { NamespacedItem Text }
: VAR { IEVar (getVar $1) }
| CON { IECon (getVar $1) }
| 'module' CON { IEModule (getVar $2) }
ModItem :: { Item }
: Decl { ModDecl $1 }
| Import { ModImport $1 }
Import
: 'import' qcon ImportExportList
{ Import (getVar $2) $3 False Nothing }
| 'import' qcon ImportExportList 'as' CON
{ Import (getVar $2) $3 False (Just (getVar $5)) }
| 'import' 'qualified' qcon ImportExportList
{ Import (getVar $3) $4 True Nothing }
| 'import' 'qualified' qcon ImportExportList 'as' CON
{ Import (getVar $3) $4 True (Just (getVar $6)) }
Opt(p)
: { () }
| p { () }
Semis
: ';' Semis { () }
| ';' { () }
-- TODO: qualified names
qvar : VAR { $1 }
qcon : CON { $1 }
List(p)
: {-empty-} { [] }
| p List(p) { $1:$2 }
CommaList(p)
: {-empty-} { [] }
| p { [$1] }
| p ',' CommaList(p) { $1:$3 }
Block(p)
: START p END { (startPosn $1, endPosn $3, $2) }
| '{' p '}' { (startPosn $1, endPosn $3, $2) }
{
type Exp = FeExpr Text
type Pat = FePat Text
type Decl = FeDecl Text
type Type = FeType Text
type Rhs = FeRhs Text
type Module = FeModule Text
type Item = ModuleItem Text
lexer cont = alexMonadScan >>= cont
parseError x = alexError (show x)
makeLams xs b = foldr Lam b xs
getVar (Token (TokVar s) _ _) = s
getVar (Token (TokCon s) _ _) = s
getVar _ = error "getVar non-var"
getString (Token (TokString s) _ _) = s
getString _ = error "getString non-string"
data LOL a = LOL { lolEnd :: Posn, lolList :: [a] }
emptyLol :: HasPosn x => x -> LOL a
emptyLol t = LOL (endPosn t) []
lolCons :: a -> LOL a -> LOL a
lolCons x (LOL p xs) = LOL p (x:xs)
makeTupleType [x] = ParenType x
makeTupleType xs = Tytup xs
makeTuplePattern [x] = ParenPat x
makeTuplePattern xs = TupPat xs
makeTuple [x] = ParenExp x
makeTuple xs = Tuple xs
}

BIN
src/Frontend/Lexer/Tokens.hi View File


+ 90
- 0
src/Frontend/Lexer/Tokens.hs View File

@ -0,0 +1,90 @@
module Frontend.Lexer.Tokens where
import qualified Data.Text as T
import Data.Text (Text)
data TokenClass
= TokVar Text
| TokCon Text
| TokString Text
| TokEof
| TokLambda
| TokArrow
| TokUnder
| TokOParen
| TokOBrace
| TokOSquare
| TokOPragma
| TokCParen
| TokCBrace
| TokCSquare
| TokCPragma
| TokDoubleColon
| TokEqual
| TokComma
| TokPipe
| TokLet
| TokIn
| TokLStart
| TokLEnd
| TokModule
| TokImport
| TokQualified
| TokAs
| TokWhere
| TokLambdaCase
| TokCase
| TokOf
| TokData
| TokSemi
deriving (Eq, Show, Ord)
tokSize :: TokenClass -> Int
tokSize (TokVar x) = T.length x
tokSize (TokCon x) = T.length x
tokSize TokEof = 0
tokSize TokLambda = 1
tokSize TokOParen = 1
tokSize TokOBrace = 1
tokSize TokOSquare = 1
tokSize TokOPragma = 3
tokSize TokCBrace = 1
tokSize TokCParen = 1
tokSize TokCSquare = 1
tokSize TokCPragma = 3
tokSize TokDoubleColon = 2
tokSize TokEqual = 1
tokSize TokComma = 1
tokSize TokSemi = 1
tokSize TokUnder = 1
tokSize TokArrow = 2
tokSize TokIn = 2
tokSize TokLStart = 0
tokSize TokLEnd = 0
tokSize TokPipe = 1
tokSize (TokString t) = 2 + T.length t
tokSize TokLambdaCase = length "\\case"
tokSize TokWhere = length "where"
tokSize TokData = length "data"
tokSize TokOf = length "of"
tokSize TokCase = length "case"
tokSize TokModule = length "module"
tokSize TokQualified = length "qualified"
tokSize TokImport = length "import"
tokSize TokLet = 3
tokSize TokAs = 2
data Token
= Token { tokenClass :: TokenClass
, tokStartLine :: !Int
, tokStartCol :: !Int
}
deriving (Eq, Show, Ord)

BIN
src/Frontend/Lexer/Tokens.o View File


BIN
src/Frontend/Parser/Posn.hi View File


+ 33
- 0
src/Frontend/Parser/Posn.hs View File

@ -0,0 +1,33 @@
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE DefaultSignatures #-}
module Frontend.Parser.Posn where
import Frontend.Lexer.Tokens
import Data.Typeable
data Posn
= Posn { posnLine :: {-# UNPACK #-} !Int
, posnColm :: {-# UNPACK #-} !Int
}
deriving (Eq, Show, Ord)
class HasPosn a where
startPosn :: a -> Posn
endPosn :: a -> Posn
span :: (HasPosn b, HasPosn c) => b -> c -> a -> a
default span :: Typeable a => b -> c -> a -> a
span _ _ x = error $ "Can't span " ++ show (typeOf x)
instance HasPosn Token where
startPosn (Token _ l c) = Posn l c
endPosn (Token t l c) = Posn l (c + tokSize t)
instance HasPosn (Posn, Posn, a) where
startPosn (s, _, _) = s
endPosn (_, e, _) = e
span start end (_, _, x) = (startPosn start, endPosn end, x)
thd :: (a, b, c) -> c
thd (_, _, z) = z

BIN
src/Frontend/Parser/Posn.o View File


BIN
src/Frontend/Syntax.hi View File


+ 112
- 0
src/Frontend/Syntax.hs View File

@ -0,0 +1,112 @@
module Frontend.Syntax where
import Frontend.Parser.Posn
import qualified Data.Text as T
data FeExpr var
= Ref var
| Con var
| App (FeExpr var) (FeExpr var)
| Lam (FePat var) (FeExpr var)
| Let [FeDecl var] (FeExpr var)
| Tuple [FeExpr var]
| Annot (FeExpr var) (FeType var)
| Literal Literal
| ParenExp (FeExpr var)
| SPExpr (FeExpr var) Posn Posn
deriving (Eq, Show, Ord)
instance HasPosn (FeExpr var) where
startPosn (SPExpr _ s _) = s
startPosn _ = error "no start posn in parsed expression?"
endPosn (SPExpr _ _ e) = e
endPosn _ = error "no end posn in parsed expression?"
span sp ep (SPExpr x _ _) = SPExpr x (startPosn sp) (endPosn ep)
span sp ep x = SPExpr x (startPosn sp) (endPosn ep)
data FePat var
= Var var
| Wildcard
| TupPat [FePat var]
| LitPat Literal
| ParenPat (FePat var) -- parsed parentheses
| SPPat (FePat var) Posn Posn
deriving (Eq, Show, Ord)
instance HasPosn (FePat var) where
startPosn (SPPat _ s _) = s
startPosn _ = error "no start posn in parsed expression?"
endPosn (SPPat _ _ e) = e
endPosn _ = error "no end posn in parsed pattern?"
span sp ep (SPPat x _ _) = SPPat x (startPosn sp) (endPosn ep)
span sp ep x = SPPat x (startPosn sp) (endPosn ep)
data FeType var
= Tyvar var
| Tycon var
| Tyapp (FeType var) (FeType var)
| Tyarr (FeType var) (FeType var)
| Tytup [FeType var]
| ParenType (FeType var) -- parsed parentheses
| SPType (FeType var) Posn Posn
deriving (Eq, Show, Ord)
instance HasPosn (FeType var) where
startPosn (SPType _ s _) = s
startPosn _ = error "no start posn in parsed type?"
endPosn (SPType _ _ e) = e
endPosn _ = error "no end posn in parsed type?"
span sp ep (SPType x _ _) = SPType x (startPosn sp) (endPosn ep)
span sp ep x = SPType x (startPosn sp) (endPosn ep)
data FeDecl var
= PatDecl (FePat var) (FeRhs var)
| FunDecl var [FePat var] (FeRhs var)
| TySig var (FeType var)
deriving (Eq, Show, Ord)
data FeRhs var
= BareRhs (FeExpr var) [FeDecl var]
deriving (Eq, Show, Ord)
data Literal
= LitString T.Text
| LitNumber Integer
deriving (Eq, Show, Ord)
data FeModule var
= Module { moduleName :: var
, moduleExports :: Maybe [NamespacedItem var]
, moduleItems :: [ModuleItem var]
}
deriving (Eq, Show, Ord)
data ModuleImport var
= Import { importMod :: var
, importList :: Maybe [NamespacedItem var]
, importQualified :: Bool
, importAlias :: Maybe var
}
deriving (Eq, Show, Ord)
data NamespacedItem var
= IEVar var
| IECon var
| IEModule var
deriving (Eq, Show, Ord)
data ModuleItem var
= ModDecl (FeDecl var)
| ModImport (ModuleImport var)
deriving (Eq, Show, Ord)

BIN
src/Frontend/Syntax.o View File


BIN
src/Main.hi View File


+ 51
- 0
src/Main.hs View File

@ -0,0 +1,51 @@
-- {-# LANGUAGE BlockArguments #-}
module Main where
import qualified Data.ByteString.Lazy as Lbs
import Data.Foldable
import Frontend.Lexer.Tokens
import Frontend.Autogen.Lexer
import Frontend.Autogen.Parser
import qualified Data.Text.Encoding as T
import qualified Data.Text as T
import Control.Monad ( unless )
import System.Environment (getArgs)
import Text.Show.Pretty (pPrint)
import Debug.Trace
main :: IO ()
main = do
args <- getArgs
for_ args $ \str -> do
Main.lex pPrint parseMod =<< Lbs.readFile str
testParse :: String -> IO ()
testParse s = Main.lex print parseMod (Lbs.fromStrict (T.encodeUtf8 (T.pack s)))
testLex :: String -> IO ()
testLex s = Main.lex (const (pure ())) (scan []) (Lbs.fromStrict (T.encodeUtf8 (T.pack s)))
lex :: (a -> IO ()) -> Alex a -> Lbs.ByteString -> IO ()
lex show cont arg = do
let x = runAlex arg cont
case x of
Left e -> print e
Right x -> show x
scan :: [Token] -> Alex [Token]
scan acc = do
tok <- alexMonadScan
sc <- alexGetStartCode
state <- getUserState
traceM . unlines $
[ "----------------"
, "just lexed: " ++ show tok
, "sc: " ++ show sc
, "sc stack: " ++ show (startCodes state)
]
case tokenClass tok of
TokEof -> pure (reverse acc)
_ -> scan (tok:acc)

BIN
src/Main.o View File


+ 67
- 0
stack.yaml View File

@ -0,0 +1,67 @@
# This file was automatically generated by 'stack init'
#
# Some commonly used options have been documented as comments in this file.
# For advanced use and comprehensive documentation of the format, please see:
# https://docs.haskellstack.org/en/stable/yaml_configuration/
# Resolver to choose a 'specific' stackage snapshot or a compiler version.
# A snapshot resolver dictates the compiler version and the set of packages
# to be used for project dependencies. For example:
#
# resolver: lts-3.5
# resolver: nightly-2015-09-21
# resolver: ghc-7.10.2
#
# The location of a snapshot can be provided as a file or url. Stack assumes
# a snapshot provided as a file might change, whereas a url resource does not.
#
# resolver: ./custom-snapshot.yaml
# resolver: https://example.com/snapshots/2018-01-01.yaml
resolver:
url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/18/3.yaml
# User packages to be built.
# Various formats can be used as shown in the example below.
#
# packages:
# - some-directory
# - https://example.com/foo/bar/baz-0.0.2.tar.gz
# subdirs:
# - auto-update
# - wai
packages:
- .
# Dependency packages to be pulled from upstream that are not in the resolver.
# These entries can reference officially published versions as well as
# forks / in-progress versions pinned to a git hash. For example:
#
# extra-deps:
# - acme-missiles-0.3
# - git: https://github.com/commercialhaskell/stack.git
# commit: e7b331f14bcffb8367cd58fbfc8b40ec7642100a
#
# extra-deps: []
# Override default flag values for local packages and extra-deps
# flags: {}
# Extra package databases containing global packages
# extra-package-dbs: []
# Control whether we use the GHC we find on the path
# system-ghc: true
#
# Require a specific version of stack, using version ranges
# require-stack-version: -any # Default
# require-stack-version: ">=2.7"
#
# Override the architecture used by stack, especially useful on Windows
# arch: i386
# arch: x86_64
#
# Extra directories used by stack for building
# extra-include-dirs: [/path/to/dir]
# extra-lib-dirs: [/path/to/dir]
#
# Allow a newer minor version of GHC than the snapshot specifies
# compiler-check: newer-minor

+ 13
- 0
stack.yaml.lock View File

@ -0,0 +1,13 @@
# This file was autogenerated by Stack.
# You should not edit this file by hand.
# For more information, please see the documentation at:
# https://docs.haskellstack.org/en/stable/lock_files
packages: []
snapshots:
- completed:
size: 585603
url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/18/3.yaml
sha256: 694573e96dca34db5636edb1fe6c96bb233ca0f9fb96c1ead1671cdfa9bd73e9
original:
url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/18/3.yaml

Loading…
Cancel
Save