From cac787310f0bb9ae49a3f32a9b83694d580c585e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Abigail=20Magalh=C3=A3es?= Date: Thu, 5 Aug 2021 21:03:01 -0300 Subject: [PATCH] fixup: name resolution dump 1 --- .gitignore | 5 +- Setup.hs | 2 + ahc.cabal | 26 ++-- src/Ahc/Data/Lens.hs | 37 +++++ src/Ahc/Data/Lens/Tuple.hs | 44 ++++++ src/Errors.hs | 43 +++--- src/Frontend/Autogen/Lexer.x | 60 ++++---- src/Frontend/Autogen/Parser.y | 52 +++++-- src/Frontend/Lexer/Tokens.hs | 14 +- src/Frontend/Lexer/Unicode.hs | 78 ++++++++++ src/Frontend/Lexer/Wrapper.hs | 53 +++---- src/Frontend/Parser/Foreign.hs | 17 ++- src/Frontend/Parser/Posn.hs | 17 ++- src/Frontend/Syntax.hs | 98 ++++++------- src/Frontend/Syntax/Var.hs | 56 ++++++++ src/Main.hs | 54 ++++--- src/Main/Queries.hs | 42 ++++++ src/Main/Rules.hs | 121 ++++++++++++++++ src/Rename/Rename.hs | 252 +++++++++++++++++++++++++++++++++ src/Rename/Types.hs | 81 +++++++++++ 20 files changed, 959 insertions(+), 193 deletions(-) create mode 100644 src/Ahc/Data/Lens.hs create mode 100644 src/Ahc/Data/Lens/Tuple.hs create mode 100644 src/Frontend/Lexer/Unicode.hs create mode 100644 src/Frontend/Syntax/Var.hs create mode 100644 src/Main/Queries.hs create mode 100644 src/Main/Rules.hs create mode 100644 src/Rename/Rename.hs create mode 100644 src/Rename/Types.hs diff --git a/.gitignore b/.gitignore index 278e615..a87f292 100644 --- a/.gitignore +++ b/.gitignore @@ -15,4 +15,7 @@ # alex/happy artefacts src/**/*.o -src/**/*.hi \ No newline at end of file +src/**/*.hi + +# compiler stuff +.ahc-cache \ No newline at end of file diff --git a/Setup.hs b/Setup.hs index 9a994af..09e11ce 100644 --- a/Setup.hs +++ b/Setup.hs @@ -1,2 +1,4 @@ + import Distribution.Simple + main = defaultMain diff --git a/ahc.cabal b/ahc.cabal index df62fe4..6f13815 100644 --- a/ahc.cabal +++ b/ahc.cabal @@ -15,23 +15,33 @@ executable ahc hs-source-dirs: src main-is: Main.hs default-language: Haskell2010 - build-depends: base ^>= 4.14 - , mtl ^>= 2.2 - , syb ^>= 0.7 - , text ^>= 1.2 - , array ^>= 0.5 - , containers ^>= 0.6 - , bytestring ^>= 0.10 - , pretty-show ^>= 1.10 + build-depends: base ^>= 4.14 + , mtl ^>= 2.2 + , syb ^>= 0.7 + , text ^>= 1.2 + , array ^>= 0.5 + , shake ^>= 0.19.5 + , deepseq ^>= 1.4 + , binary ^>= 0.8 + , hashable ^>= 1.3 + , containers ^>= 0.6 + , bytestring ^>= 0.10 + , pretty-show ^>= 1.10 + , cryptohash-sha1 ^>= 0.11 + , unordered-containers ^>= 0.2 other-modules: Frontend.Autogen.Lexer, Frontend.Autogen.Parser, Frontend.Lexer.Tokens, Frontend.Lexer.Wrapper, + Frontend.Lexer.Unicode, Frontend.Parser.Posn, Frontend.Parser.Foreign, Frontend.Syntax, + Frontend.Syntax.Var, + Main.Queries, + Main.Rules, Errors build-tool-depends: alex:alex >= 3.2.4 && < 4.0 diff --git a/src/Ahc/Data/Lens.hs b/src/Ahc/Data/Lens.hs new file mode 100644 index 0000000..60466cb --- /dev/null +++ b/src/Ahc/Data/Lens.hs @@ -0,0 +1,37 @@ +{-# LANGUAGE RankNTypes #-} +module Ahc.Data.Lens where + +import Control.Applicative + +import Data.Functor.Identity + +type Lens s t a b = forall f. Functor f => (a -> f b) -> s -> f t + +type Lens' s a = Lens s s a a + +get :: Lens s t a b -> s -> a +get l = getConst . l Const + +over :: Lens s t a b -> (a -> b) -> s -> t +over l m = runIdentity . l (Identity . m) + +set :: Lens s t a b -> b -> s -> t +set l b a = over l (const b) a + +(.~) :: Lens s t a b -> b -> s -> t +(.~) = set +infixr 4 .~ + +(%~) :: Lens s t a b -> (a -> b) -> s -> t +(%~) = over +infixr 4 %~ + +(^.) :: s -> Lens s t a b -> a +x ^. l = get l x + +(&) :: t1 -> (t1 -> t2) -> t2 +x & f = f x +infixr 0 & + +lens :: (s -> a) -> (s -> b -> t) -> Lens s t a b +lens sa sbt afb s = sbt s <$> afb (sa s) diff --git a/src/Ahc/Data/Lens/Tuple.hs b/src/Ahc/Data/Lens/Tuple.hs new file mode 100644 index 0000000..fd04e23 --- /dev/null +++ b/src/Ahc/Data/Lens/Tuple.hs @@ -0,0 +1,44 @@ +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE FunctionalDependencies #-} +{-# LANGUAGE FlexibleInstances #-} +module Ahc.Data.Lens.Tuple where + +import Ahc.Data.Lens + +import Data.Functor.Identity + +class Field1 s t a b | s -> a, t -> b, s b -> t, t a -> s where + _1 :: Lens s t a b + +instance Field1 (Identity a) (Identity a) a a where + _1 = lens runIdentity (\_ x -> Identity x) + +instance Field1 (a, b) (a', b) a a' where + _1 = lens (\(~(x, _)) -> x) (\(~(_, y)) x -> (x, y)) + +instance Field1 (a, b, c) (a', b, c) a a' where + _1 = lens (\(~(x, _, _)) -> x) (\(~(_, y, z)) x -> (x, y, z)) + +instance Field1 (a, b, c, d) (a', b, c, d) a a' where + _1 = lens (\(~(x, _, _, _)) -> x) (\(~(_, y, z, a)) x -> (x, y, z, a)) + +class Field2 s t a b | s -> a, t -> b, s b -> t, t a -> s where + _2 :: Lens s t a b + +instance Field2 (a, b) (a, b') b b' where + _2 = lens (\(~(_, y)) -> y) (\(~(x, _)) y -> (x, y)) + +instance Field2 (a, b, c) (a, b', c) b b' where + _2 = lens (\(~(_, x, _)) -> x) (\(~(x, _, z)) y -> (x, y, z)) + +instance Field2 (a, b, c, d) (a, b', c, d) b b' where + _2 = lens (\(~(_, x, _, _)) -> x) (\(~(x, _, z, a)) y -> (x, y, z, a)) + +class Field3 s t a b | s -> a, t -> b, s b -> t, t a -> s where + _3 :: Lens s t a b + +instance Field3 (a, b, c) (a, b, c') c c' where + _3 = lens (\(~(_, _, x)) -> x) (\(~(x, y, _)) z -> (x, y, z)) + +instance Field3 (a, b, c, d) (a, b, c', d) c c' where + _3 = lens (\(~(_, _, x, _)) -> x) (\(~(x, y, _, a)) z -> (x, y, z, a)) \ No newline at end of file diff --git a/src/Errors.hs b/src/Errors.hs index 5d98d52..663dd77 100644 --- a/src/Errors.hs +++ b/src/Errors.hs @@ -1,10 +1,21 @@ +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE DeriveAnyClass #-} module Errors where -import Frontend.Parser.Posn +import Control.Exception + import Data.Maybe + +import Development.Shake.Classes + +import Frontend.Parser.Posn + import qualified GHC.IO.Handle.FD -import System.Posix.Internals (c_isatty) +import GHC.Generics (Generic) import GHC.IO.FD +import GHC.Stack + +import System.Posix.Internals (c_isatty) data AhcError = AhcError { errorMessage :: String @@ -14,15 +25,15 @@ data AhcError , errorEnd :: Posn , errorPointers :: [ErrorPointer] } - deriving (Eq, Show) + deriving (Eq, Show, Generic, Binary, Hashable, NFData, Typeable, Exception) -emptyError :: AhcError +emptyError :: HasCallStack => AhcError emptyError = - AhcError (error "parse errors must have a message") - (error "parse errors must have a filename") + AhcError (error "errors must have a message") + (error "errors must have a filename") Nothing - (error "parse errors must have a start pos") - (error "parse errors must have an end pos") + (error "errors must have a start pos") + (error "errors must have an end pos") [] data ErrorPointer @@ -31,7 +42,7 @@ data ErrorPointer , errorPointerNote :: Maybe String , errorPointerDrawSquiggle :: Bool } - deriving (Eq, Show) + deriving (Eq, Show, Generic, Binary, Hashable, NFData) showAhcError :: Bool -> [String] -> AhcError -> String showAhcError color code pe = do @@ -39,7 +50,7 @@ showAhcError color code pe = do [ bold ++ errorFilename pe ++ ":" ++ show linum ++ ":" ++ show startcol ++ ": " - ++ red ++ "parse error:" ++ reset + ++ red ++ "error:" ++ reset ] ++ map (render bold red sep reset) (mainPointer:errorPointers pe) ++ [ padToMax "" ++ " " ++ errorMessage pe @@ -69,11 +80,11 @@ showAhcError color code pe = do width | multiline = 0 - | otherwise = max 0 (posnColm (errorEnd pe) - startcol - 1) + | otherwise = max 0 (posnColm end - startcol - 1) linum' = padToMax $ show linum - line = [ show linum ++ sep ++ code !! (linum - 1) | linum <- [ linum .. posnLine (errorEnd pe) ] ] + line = [ show linum ++ sep ++ code !! (linum - 1) | linum <- [ linum .. posnLine end ] ] padding = replicate (length linum') ' ' ++ sep padding' = replicate (length linum') ' ' ++ " " @@ -82,18 +93,18 @@ showAhcError color code pe = do squiggle | squig = replicate width '~' | otherwise = "" - + init $ unlines [ padding , init (unlines line) , padding ++ caret ++ squiggle ++ reset ++ fromMaybe "" ((" " ++) <$> note) ] -printParseError :: AhcError -> IO () -printParseError pe = do +printAhcError :: AhcError -> IO () +printAhcError pe = do code <- lines <$> readFile (errorFilename pe) color <- fmap (1 ==) . c_isatty . fdFD =<< GHC.IO.Handle.FD.handleToFd GHC.IO.Handle.FD.stdout putStr $ showAhcError color code pe pointTo :: HasPosn x => x -> x -> String -> [ErrorPointer] -pointTo s e msg = [ErrorPointer (startPosn s) (endPosn e) (Just msg) False] \ No newline at end of file +pointTo s e msg = [ErrorPointer (startPosn s) (endPosn e) (Just msg) False] diff --git a/src/Frontend/Autogen/Lexer.x b/src/Frontend/Autogen/Lexer.x index 975c471..243a796 100644 --- a/src/Frontend/Autogen/Lexer.x +++ b/src/Frontend/Autogen/Lexer.x @@ -3,8 +3,7 @@ module Frontend.Autogen.Lexer where import Control.Monad -import qualified Data.ByteString.Lazy as Lbs -import qualified Data.Text.Encoding as T +import qualified Data.Text.Lazy as Lt import qualified Data.Text as T import qualified Data.Char import Data.Int (Int64) @@ -15,17 +14,18 @@ import Frontend.Parser.Posn } -- %wrapper "monadUserState-bytestring" +%encoding "latin1" -$lower = [a-z] -$upper = [A-Z] +$lower = [ a-z \xF1 ] +$upper = [ A-Z \xF0 ] $alpha = [ $lower $upper ] -$digit = [0-9] -$alnum = [ $alpha $digit ] +$digit = [ 0-9 ] +$alnum = [ $alpha $digit \xF4 \xF3 ] -$white_nol = $white # [\n\t] +$white_nol = [ $white \xf5 ] # [\n\t] -$optail = [\! \# \$ \% \& \* \+ \. \/ \< \= \> \? \@ \\ \^ \| \- \~ \:] +$optail = [\! \# \$ \% \& \* \+ \. \/ \< \= \> \? \@ \\ \^ \| \- \~ \: \xF2] $ophead = $optail # \: @conid = $upper [$alnum \_ \']* @@ -130,7 +130,7 @@ tokens :- { alexEOF :: Alex Token alexEOF = do - (Posn l c, _, _, _) <- alexGetInput + AI (Posn l c) _ _ _ <- alexGetInput maybePopImportSC @@ -154,8 +154,8 @@ alexEOF = do yield k inp i = clearPendingLC *> yield' k inp i -yield' k (Posn l c, _, s, _) i = do - pure (Token (k $! (T.decodeUtf8 (Lbs.toStrict (Lbs.take i s)))) l c) +yield' k (AI (Posn l c) _ s _) i = do + pure (Token (k $! (Lt.toStrict (Lt.take i s))) l c) setPendingLC = mapUserState $ \s -> s { pendingLambdaCase = True } clearPendingLC = mapUserState $ \s -> s { pendingLambdaCase = False } @@ -164,13 +164,13 @@ always :: TokenClass -> AlexInput -> Int64 -> Alex Token always k x i = yield (const k) x i -- reset the string buffer and push the string start code -startString (p, _, _, _) _ = do +startString (AI p _ _ _) _ = do mapUserState $ \s -> s { stringBuffer = T.empty, stringStartPosn = Just p } pushStartCode string alexMonadScan -- pop the string start code, and emit the string buffer as a token. -endString (Posn l c, _, _, _) _i = do +endString _ _i = do state <- getUserState mapUserState $ \s -> s { stringBuffer = T.empty, stringStartPosn = Nothing } @@ -180,8 +180,8 @@ endString (Posn l c, _, _, _) _i = do pure (Token (TokString (stringBuffer state)) l c) -- append a /lexed/ region to the string buffer -stringSegment (Posn _ _, _, buf, _) i = do - mapUserState $ \s -> s { stringBuffer = stringBuffer s <> T.decodeUtf8 (Lbs.toStrict (Lbs.take i buf)) } +stringSegment (AI (Posn _ _) _ buf _) i = do + mapUserState $ \s -> s { stringBuffer = stringBuffer s <> (Lt.toStrict (Lt.take i buf)) } alexMonadScan -- append a constant fragment to the string buffer. @@ -230,7 +230,7 @@ popStartCode = do alexSetStartCode x offsideRule :: AlexInput -> Int64 -> Alex Token -offsideRule (Posn line col, _, _, _) _ = do +offsideRule (AI (Posn line col) _ _ _) _ = do columns <- layoutColumns <$> getUserState let continue = popStartCode *> alexMonadScan @@ -299,13 +299,13 @@ maybePopImportSC = do when (startcode == import_ || startcode == foreign_) popStartCode emptyLayout :: AlexInput -> Int64 -> Alex Token -emptyLayout (Posn line col, _, _, _) _ = do +emptyLayout (AI (Posn line col) _ _ _) _ = do popStartCode pushStartCode newline pure (Token TokLEnd line col) startLayout :: AlexInput -> Int64 -> Alex Token -startLayout (Posn line col, _, _, _) _ = do +startLayout (AI (Posn line col) _ _ _) _ = do state <- getUserState popStartCode let @@ -340,7 +340,7 @@ popLayoutContext :: Alex () popLayoutContext = mapUserState $ \s -> s { layoutColumns = tail (layoutColumns s) } openBrace :: AlexInput -> Int64 -> Alex Token -openBrace (Posn line col, _, _, _) _ = do +openBrace (AI (Posn line col) _ _ _) _ = do -- if we see a '{' token, we're probably in the layout state. in that -- case, we pop it! otherwise, we just pop the state anyway: if we -- were in <0>, then popping gets us back in <0>. @@ -353,7 +353,7 @@ openBrace (Posn line col, _, _, _) _ = do pure (Token TokOBrace line col) closeBrace :: AlexInput -> Int64 -> Alex Token -closeBrace (Posn line col, _, _, _) _ = do +closeBrace (AI (Posn line col) _ _ _) _ = do -- if we're lexing a '}' token (physical) and the rightmost layout -- context was started by a physical '{', then we can close it. -- otherwise we do nothing and probably get a parse error! @@ -364,16 +364,16 @@ closeBrace (Posn line col, _, _, _) _ = do pure (Token TokCBrace line col) variableOrKeyword :: AlexAction Token -variableOrKeyword (Posn l c, _, s, _) size = - finishVarKw l c $ T.decodeUtf8 (Lbs.toStrict (Lbs.take size s)) +variableOrKeyword (AI (Posn l c) _ s _) size = + finishVarKw l c $ Lt.toStrict (Lt.take size s) qualifiedVariable :: AlexAction Token -qualifiedVariable (Posn l c, _, s, _) size = - finishVar TokUnqual TokQual l c $ T.decodeUtf8 (Lbs.toStrict (Lbs.take size s)) +qualifiedVariable (AI (Posn l c) _ s _) size = + finishVar TokUnqual TokQual l c $ Lt.toStrict (Lt.take size s) qualifiedOperator :: AlexAction Token -qualifiedOperator (Posn l c, _, s, _) size = - finishVar TokUnqualOp TokQualOp l c $ T.decodeUtf8 (Lbs.toStrict (Lbs.take size s)) +qualifiedOperator (AI (Posn l c) _ s _) size = + finishVar TokUnqualOp TokQualOp l c $ Lt.toStrict (Lt.take size s) finishVarKw :: Int -> Int -> T.Text -> Alex Token finishVarKw l c text @@ -500,17 +500,17 @@ laidOut' n x l c = do laidOut = laidOut' Nothing alexMonadScan = do - inp@(_,_,_,n) <- alexGetInput + inp@(AI _ _ _ n) <- alexGetInput sc <- alexGetStartCode case alexScan inp sc of AlexEOF -> alexEOF - AlexError error@(_,_,inp,_) -> - alexError $ "Unexpected character: " ++ show (T.head (T.decodeUtf8 (Lbs.toStrict inp))) + AlexError (AI _ _ inp _) -> + alexError $ "Unexpected character: " ++ show (Lt.head inp) AlexSkip inp _len -> do alexSetInput inp alexMonadScan - AlexToken inp'@(_,_,_,n') _ action -> let len = n'-n in do + AlexToken inp'@(AI _ _ _ n') _ action -> let len = n'-n in do alexSetInput inp' action (ignorePendingBytes inp) len } \ No newline at end of file diff --git a/src/Frontend/Autogen/Parser.y b/src/Frontend/Autogen/Parser.y index a03aa53..e7b683f 100644 --- a/src/Frontend/Autogen/Parser.y +++ b/src/Frontend/Autogen/Parser.y @@ -1,5 +1,5 @@ { -{-# LANGUAGE FlexibleContexts, FlexibleInstances, ViewPatterns #-} +{-# LANGUAGE FlexibleContexts, FlexibleInstances, ViewPatterns, PartialTypeSignatures #-} module Frontend.Autogen.Parser where import qualified Data.Text as T @@ -10,6 +10,7 @@ import Frontend.Lexer.Wrapper import Frontend.Autogen.Lexer import Frontend.Lexer.Tokens import Frontend.Parser.Posn +import Frontend.Syntax.Var import Frontend.Syntax import qualified Prelude @@ -20,6 +21,9 @@ import Debug.Trace import Control.Monad import Errors + +import Ahc.Data.Lens.Tuple (Field3(_3)) +import Ahc.Data.Lens ((^.)) } %name exp Exp @@ -105,7 +109,7 @@ InfixExp :: { Exp } LeftExp :: { Exp } : '\\' Apat List(Apat) '->' Exp { span $1 $5 (makeLams ($2:$3) $5) } - | 'let' LaidOutList(Decl) 'in' Exp { span $1 $4 $ Let (thd $2) $4 } + | 'let' LaidOutList(Decl) 'in' Exp { span $1 $4 $ Let (($2 :: (_,_,_)) ^. _3) $4 } | FuncExp { $1 } FuncExp :: { Exp } @@ -153,11 +157,11 @@ Decl :: { Decl } Rhs :: { Rhs } : '=' Exp { BareRhs $2 [] (startPosn $1) (endPosn $2) } - | '=' Exp 'where' LaidOutList(Decl) { BareRhs $2 (thd $4) (startPosn $1) (endPosn $4) } + | '=' Exp 'where' LaidOutList(Decl) { BareRhs $2 (($4 :: (_, _, _)) ^. _3) (startPosn $1) (endPosn $4) } LaidOutList(p) - : START Opt(Semis) LOLContents(p, END) { (startPosn $1, lolEnd $3, lolList $3) } - | '{' Opt(Semis) LOLContents(p, '}') { (startPosn $1, lolEnd $3, lolList $3) } + : START Opt(Semis) LOLContents(p, CLOSE) { (startPosn $1, lolEnd $3, lolList $3) } + | '{' Opt(Semis) LOLContents(p, '}') { (startPosn $1, lolEnd $3, lolList $3) } LOLContents(p, End) : p Semis LOLContents(p,End) { lolCons $1 $3 } @@ -165,9 +169,9 @@ LOLContents(p, End) | Opt(Semis) End { emptyLol $2 } Module :: { Module } - : 'module' CON ImportExportList 'where' LaidOutList(ModItem) - {% do { (imports,items) <- spanModuleItems (thd $5) - ; pure $ Module { moduleName = toModId (getVar $2) + : 'module' modid ImportExportList 'where' LaidOutList(ModItem) + {% do { (imports,items) <- spanModuleItems (($5 :: (_,_,_)) ^. _3) + ; pure $ Module { moduleName = toModId $2 , moduleExports = fst $3 , moduleImports = imports , moduleItems = items } @@ -216,7 +220,7 @@ FfiItem :: { FfiItem ParsedVar } } } | 'import' CallConv VAR error {% do { state <- getUserState - ; (here, _, _, _) <- alexGetInput + ; AI here _ _ _ <- alexGetInput ; let vartk = $3 ; case lastToken state of Just tok -> alexThrow $ \fn -> @@ -256,7 +260,7 @@ CallConv :: { FfiCallConv } : 'ccall' { CC_CCall } | error {% do { state <- getUserState - ; (here, _, _, _) <- alexGetInput + ; AI here _ _ _ <- alexGetInput ; case lastToken state of Just tok -> alexThrow $ \fn -> emptyError { errorMessage = "ahc only supports the 'ccall' calling convention" @@ -346,7 +350,19 @@ Commas :: { Int } : {- empty -} { 0 } | ',' Commas { (let x = $2 in x `seq` 1 + x) } -modid : qconid { toModId $1 } +modid + : qconid { toModId $1 } + | qvarid + {% do { AI here _ _ _ <- alexGetInput + ; alexThrow $ \fn -> + emptyError { errorMessage = "module names must be uppercase" + , errorInlineDesc = Just ("this is lowercase!") + , errorBegin = startPosn $1 + , errorEnd = endPosn $1 + , errorFilename = fn + } + } + } List(p) : {-empty-} { [] } @@ -362,8 +378,16 @@ CommaList1(p) | p ',' CommaList(p) { $1:$3 } Block(p) - : START p END { (startPosn $1, endPosn $3, $2) } - | '{' p '}' { (startPosn $1, endPosn $3, $2) } + : START p CLOSE { (startPosn $1, endPosn $2, $2) } + | '{' p '}' { (startPosn $1, endPosn $2, $2) } + +CLOSE :: { Posn } + : END { endPosn $1 } + | error {% do { popLayoutContext + ; AI here _ _ _ <- alexGetInput + ; pure here + } + } { @@ -381,7 +405,7 @@ lexer cont = do cont tok parseError (token, expected) = do - (here, _, _, _) <- alexGetInput + AI here _ _ _ <- alexGetInput alexThrow $ \fn -> emptyError { errorMessage = "expecting one of: " ++ unwords expected , errorInlineDesc = Just ("unexpected " ++ show (tokenClass token)) , errorBegin = startPosn token diff --git a/src/Frontend/Lexer/Tokens.hs b/src/Frontend/Lexer/Tokens.hs index 06ed763..f63c977 100644 --- a/src/Frontend/Lexer/Tokens.hs +++ b/src/Frontend/Lexer/Tokens.hs @@ -7,11 +7,11 @@ data IdClass = ConId | VarId deriving (Eq, Show, Ord) data TokenClass - = TokUnqual IdClass Text - | TokQual IdClass Text Text + = TokUnqual IdClass Text + | TokQual IdClass Text Text | TokUnqualOp IdClass Text | TokQualOp IdClass Text Text - | TokString Text + | TokString Text | TokEof | TokLambda @@ -89,8 +89,8 @@ instance Show TokenClass where show TokLet = "let" show TokIn = "in" - show TokLStart = "" - show TokLEnd = "" + show TokLStart = "" + show TokLEnd = "" show TokModule = "module" show TokImport = "import" @@ -130,7 +130,9 @@ isKeywordToken TokData = True isKeywordToken _ = False tokSize :: TokenClass -> Int -tokSize = length . show +tokSize TokLStart = 0 +tokSize TokLEnd = 0 +tokSize x = length (show x) data Token = Token { tokenClass :: TokenClass diff --git a/src/Frontend/Lexer/Unicode.hs b/src/Frontend/Lexer/Unicode.hs new file mode 100644 index 0000000..5a68605 --- /dev/null +++ b/src/Frontend/Lexer/Unicode.hs @@ -0,0 +1,78 @@ +module Frontend.Lexer.Unicode + ( UnicodeClass(..) + , classify + , fudgeCharacterClass + ) where + +import Data.Word (Word8) +import Data.Char + +-- | A less specfic version of 'GeneralCategory', grouping characters +-- into a couple of key categories. +data UnicodeClass + -- These are used to designate the beginning of symbols + = Upper | Lower | Symbol + + -- Generic and digit can be used in identifiers, but not at the start of one + | Generic | Digit + + | Whitespace + + -- These are guaranteed parse order. The only difference is that "graphic" is printable, while + -- other may not be. + | OtherGraphic | Other + deriving (Eq, Show) + +-- | Determine the class for a given character. +classify :: Char -> UnicodeClass +classify c = case generalCategory c of + -- See classification descriptions in + -- + + -- Cased letters + UppercaseLetter -> Upper + LowercaseLetter -> Lower + TitlecaseLetter -> Upper + + ModifierLetter -> Generic + OtherLetter -> Lower + + NonSpacingMark -> Generic + SpacingCombiningMark -> OtherGraphic + EnclosingMark -> OtherGraphic + + DecimalNumber -> Digit + LetterNumber -> Generic + OtherNumber -> Digit + + ConnectorPunctuation -> Symbol + DashPunctuation -> Symbol + OpenPunctuation -> OtherGraphic + ClosePunctuation -> OtherGraphic + InitialQuote -> OtherGraphic + FinalQuote -> OtherGraphic + OtherPunctuation -> Symbol + + MathSymbol -> Symbol + CurrencySymbol -> Symbol + ModifierSymbol -> Symbol + -- So this _could_ be Lower or something, just so we can allow for emoji variables. + -- Hrmrm, maybe not. + OtherSymbol -> Symbol + + Space -> Whitespace + + -- This is all the wacky things in C* and Z* groups + _ -> Other + +-- | Convert a character class into a fake byte which will be used by +-- "Parser.Lexer" +fudgeCharacterClass :: UnicodeClass -> Word8 +fudgeCharacterClass Upper = 0xf0 +fudgeCharacterClass Lower = 0xf1 +fudgeCharacterClass Symbol = 0xf2 +fudgeCharacterClass Generic = 0xf3 +fudgeCharacterClass Digit = 0xf4 +fudgeCharacterClass Whitespace = 0xf5 +fudgeCharacterClass OtherGraphic = 0xf6 +fudgeCharacterClass Other = 0xf7 diff --git a/src/Frontend/Lexer/Wrapper.hs b/src/Frontend/Lexer/Wrapper.hs index 07e9b25..093746d 100644 --- a/src/Frontend/Lexer/Wrapper.hs +++ b/src/Frontend/Lexer/Wrapper.hs @@ -2,8 +2,7 @@ module Frontend.Lexer.Wrapper where import Control.Applicative as App (Applicative (..)) -import qualified Data.ByteString.Internal as ByteString (w2c) -import qualified Data.ByteString.Lazy as ByteString +import qualified Data.Text.Lazy as Lt import qualified Data.Text as T import qualified Data.Char import Data.Word (Word8) @@ -12,29 +11,31 @@ import Data.Int (Int64) import Errors import Frontend.Lexer.Tokens (Token) +import Frontend.Lexer.Unicode import Frontend.Parser.Posn -type Byte = Word8 -type AlexInput = ( Posn, -- current position, - Char, -- previous char - ByteString.ByteString, -- current input string - Int64) -- bytes consumed so far +data AlexInput = + AI { aiPosn :: !Posn -- current position, + , aiChar :: !Char -- previous char + , aiInput :: !Lt.Text -- current input string + , aiRead :: !Int64 -- bytes consumed so far + } ignorePendingBytes :: AlexInput -> AlexInput ignorePendingBytes i = i -- no pending bytes when lexing bytestrings alexInputPrevChar :: AlexInput -> Char -alexInputPrevChar (_,c,_,_) = c - -alexGetByte :: AlexInput -> Maybe (Byte,AlexInput) -alexGetByte (p,_,cs,n) = - case ByteString.uncons cs of - Nothing -> Nothing - Just (b, cs') -> - let c = ByteString.w2c b - p' = alexMove p c - n' = n+1 - in p' `seq` cs' `seq` n' `seq` Just (b, (p', c, cs',n')) +alexInputPrevChar = Data.Char.chr . fromIntegral . fudgeCharacterClass . classify . aiChar + +alexGetByte :: AlexInput -> Maybe (Word8, AlexInput) +alexGetByte (AI p c cs n) = do + (char, cs') <- Lt.uncons cs + let byte + | char <= '\x7f' = fromIntegral (Data.Char.ord char) + | otherwise = fudgeCharacterClass (classify char) + p' = alexMove p char + n' = n+1 + Just (byte, AI p' char cs' n') -- ----------------------------------------------------------------------------- -- Token positions @@ -46,7 +47,6 @@ alexGetByte (p,_,cs,n) = -- `move_pos' calculates the new position after traversing a given character, -- assuming the usual eight character tab stops. - alexStartPos :: Posn alexStartPos = Posn 1 1 @@ -59,7 +59,7 @@ data AlexState = AlexState { alex_pos :: !Posn, -- position at current input location alex_bpos:: !Int64, -- bytes consumed so far - alex_inp :: ByteString.ByteString, -- the current input + alex_inp :: Lt.Text, -- the current input alex_chr :: !Char, -- the character before the input alex_scd :: !Int -- the current startcode @@ -68,7 +68,7 @@ data AlexState = AlexState { , alex_fname :: String } -runAlex :: String -> ByteString.ByteString -> Alex a -> Either AhcError a +runAlex :: String -> Lt.Text -> Alex a -> Either AhcError a runAlex fname input__ (Alex f) = case f initState of Left msg -> Left msg @@ -110,11 +110,11 @@ instance Monad Alex where alexGetInput :: Alex AlexInput alexGetInput = Alex $ \s@AlexState{alex_pos=pos,alex_bpos=bpos,alex_chr=c,alex_inp=inp__} -> - Right (s, (pos,c,inp__,bpos)) + Right (s, AI pos c inp__ bpos) alexSetInput :: AlexInput -> Alex () -alexSetInput (pos,c,inp__,bpos) +alexSetInput (AI pos c inp__ bpos) = Alex $ \s -> Right ( s { alex_pos = pos , alex_bpos = bpos , alex_chr = c @@ -167,10 +167,11 @@ data AlexUserState = , pendingLambdaCase :: !Bool , stringBuffer :: !T.Text - , stringStartPosn :: Maybe Posn + , stringStartPosn :: !(Maybe Posn) - , lastToken :: Maybe Token + , lastToken :: !(Maybe Token) + , parenDepth :: !Int } alexInitUserState :: AlexUserState -alexInitUserState = AlexUserState [] [] 0 Nothing [] False T.empty Nothing Nothing +alexInitUserState = AlexUserState [] [] 0 Nothing [] False T.empty Nothing Nothing 0 diff --git a/src/Frontend/Parser/Foreign.hs b/src/Frontend/Parser/Foreign.hs index d9c9e45..b568e50 100644 --- a/src/Frontend/Parser/Foreign.hs +++ b/src/Frontend/Parser/Foreign.hs @@ -4,20 +4,23 @@ module Frontend.Parser.Foreign where import Control.Monad import qualified Data.Text as T +import Data.Char (isAlpha) import Data.List +import Errors import Frontend.Lexer.Wrapper import Frontend.Lexer.Tokens +import Frontend.Parser.Posn import Frontend.Syntax import Text.Show.Pretty -import Frontend.Parser.Posn -import Data.Char (isAlpha) -import Errors parseForeignItem :: Token -> Alex FfiImpEnt -parseForeignItem token@(Token (TokString impent) line col) = go 0 impent emptyItem +parseForeignItem token@(Token (TokString impent) line col) + | T.unpack impent == "dynamic" = pure Dynamic + | T.unpack impent == "wrapper" = pure Wrapper + | otherwise = go 0 impent emptyItem where pn = Posn line (col + 1) @@ -36,7 +39,7 @@ parseForeignItem token@(Token (TokString impent) line col) = go 0 impent emptyIt case fiHeader item of Nothing -> go (off + T.length x) xs item{fiHeader = Just x} Just _ -> tooManyHeaders pn off (T.length x) - | T.singleton '&' `T.isPrefixOf` x, T.length x == 1 = + | T.singleton '&' == x = if fiIsRef item then tooManyReferences pn off else go (off + 1) xs item{fiIsRef = True} @@ -88,7 +91,7 @@ tooManyReferences (Posn l c) off = tooManyStatics :: Posn -> Int -> Alex a tooManyStatics (Posn l c) off = alexThrow $ \fname -> - emptyError + emptyError { errorMessage = "this foreign entity has too many 'static's; only one is allowed." , errorFilename = fname , errorInlineDesc = Just "repeated 'static'" @@ -102,7 +105,7 @@ parseCid (Posn l c) off id | isValidCIdent id = pure $ Just id | otherwise = alexThrow $ \fname -> - emptyError + emptyError { errorMessage = "names for foreign entities must be valid C identifiers." , errorFilename = fname , errorInlineDesc = Just "this is not a C identifier" diff --git a/src/Frontend/Parser/Posn.hs b/src/Frontend/Parser/Posn.hs index 3e96f50..3e592f1 100644 --- a/src/Frontend/Parser/Posn.hs +++ b/src/Frontend/Parser/Posn.hs @@ -1,14 +1,22 @@ {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE DefaultSignatures #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE DeriveAnyClass #-} module Frontend.Parser.Posn where -import Frontend.Lexer.Tokens import Data.Typeable + +import Development.Shake.Classes + +import Frontend.Lexer.Tokens + +import GHC.Generics (Generic) + data Posn = Posn { posnLine :: {-# UNPACK #-} !Int , posnColm :: {-# UNPACK #-} !Int } - deriving (Eq, Show, Ord) + deriving (Eq, Show, Ord, Generic, Binary, Hashable, NFData) class HasPosn a where startPosn :: a -> Posn @@ -32,7 +40,4 @@ instance HasPosn (Posn, Posn, a) where instance HasPosn Posn where startPosn = id endPosn = id - span _ y _ = endPosn y - -thd :: (a, b, c) -> c -thd (_, _, z) = z \ No newline at end of file + span _ y _ = endPosn y \ No newline at end of file diff --git a/src/Frontend/Syntax.hs b/src/Frontend/Syntax.hs index 935aaa7..af415bd 100644 --- a/src/Frontend/Syntax.hs +++ b/src/Frontend/Syntax.hs @@ -1,9 +1,16 @@ +{-# LANGUAGE DeriveGeneric, DeriveAnyClass #-} module Frontend.Syntax where -import Frontend.Parser.Posn import qualified Data.Text as T import Data.Text (Text) +import Development.Shake.Classes (Binary, Hashable, NFData) + +import Frontend.Parser.Posn +import Frontend.Syntax.Var + +import GHC.Generics (Generic) + data FeExpr var = Ref var | Con var @@ -18,7 +25,7 @@ data FeExpr var | ParenExp (FeExpr var) | SPExpr (FeExpr var) Posn Posn - deriving (Eq, Show, Ord) + deriving (Eq, Show, Generic, Binary, Hashable, NFData) instance HasPosn (FeExpr var) where startPosn (SPExpr _ s _) = s @@ -41,7 +48,7 @@ data FePat var | ParenPat (FePat var) -- parsed parentheses | SPPat (FePat var) Posn Posn - deriving (Eq, Show, Ord) + deriving (Eq, Show, Generic, Binary, Hashable, NFData) instance HasPosn (FePat var) where startPosn (SPPat _ s _) = s @@ -63,7 +70,7 @@ data FeType var | ParenType (FeType var) -- parsed parentheses | SPType (FeType var) Posn Posn - deriving (Eq, Show, Ord) + deriving (Eq, Show, Generic, Binary, Hashable, NFData) instance HasPosn (FeType var) where startPosn (SPType _ s _) = s @@ -76,10 +83,20 @@ instance HasPosn (FeType var) where span sp ep x = SPType x (startPosn sp) (endPosn ep) data FeDecl var - = PatDecl { pdPat :: FePat var, declRhs :: FeRhs var, declBegin :: Posn, declEnd :: Posn } - | FunDecl { fdVar :: var, fdArgs :: [FePat var], declRhs :: FeRhs var, declBegin :: Posn, declEnd :: Posn } - | TySig { tsVars :: [var], tsType :: FeType var, declBegin :: Posn, declEnd :: Posn } - deriving (Eq, Show, Ord) + = PatDecl { pdPat :: FePat var + , declRhs :: FeRhs var + , declBegin :: Posn + , declEnd :: Posn } + | FunDecl { fdVar :: var + , fdArgs :: [FePat var] + , declRhs :: FeRhs var + , declBegin :: Posn + , declEnd :: Posn } + | TySig { tsVars :: [var] + , tsType :: FeType var + , declBegin :: Posn + , declEnd :: Posn } + deriving (Eq, Show, Generic, Binary, Hashable, NFData) instance HasPosn (FeDecl var) where startPosn = declBegin @@ -88,8 +105,11 @@ instance HasPosn (FeDecl var) where span sp ep s = s { declBegin = startPosn sp, declEnd = endPosn ep } data FeRhs var - = BareRhs { bareRhs :: FeExpr var, rhsWhere :: [FeDecl var], rhsBegin :: Posn, rhsEnd :: Posn } - deriving (Eq, Show, Ord) + = BareRhs { bareRhs :: FeExpr var + , rhsWhere :: [FeDecl var] + , rhsBegin :: Posn + , rhsEnd :: Posn } + deriving (Eq, Show, Generic, Binary, Hashable, NFData) instance HasPosn (FeRhs var) where startPosn = rhsBegin @@ -100,7 +120,7 @@ instance HasPosn (FeRhs var) where data Literal = LitString T.Text | LitNumber Integer - deriving (Eq, Show, Ord) + deriving (Eq, Show, Generic, Binary, Hashable, NFData) data FeModule var = Module { moduleName :: var @@ -108,7 +128,7 @@ data FeModule var , moduleImports :: [ModuleImport var] , moduleItems :: [ModuleItem var] } - deriving (Eq, Show, Ord) + deriving (Eq, Show, Generic, Binary, Hashable, NFData) data ModuleImport var = Import { importMod :: var @@ -118,7 +138,7 @@ data ModuleImport var , importBegin :: Posn , importEnd :: Posn } - deriving (Eq, Show, Ord) + deriving (Eq, Show, Generic, Binary, Hashable, NFData) instance HasPosn (ModuleImport var) where startPosn = importBegin @@ -130,13 +150,13 @@ data NamespacedItem var = IEVar var | IECon var | IEModule var - deriving (Eq, Show, Ord) + deriving (Eq, Show, Generic, Binary, Hashable, NFData) data ModuleItem var = ModDecl { itemDecl :: FeDecl var, itemBegin :: Posn, itemEnd :: Posn } | ModImport { itemImport :: ModuleImport var, itemBegin :: Posn, itemEnd :: Posn } | ModFfi { itemForeign :: FfiItem var, itemBegin :: Posn, itemEnd :: Posn } - deriving (Eq, Show, Ord) + deriving (Eq, Show, Generic, Binary, Hashable, NFData) data FfiItem var = FfiImport @@ -156,10 +176,10 @@ data FfiItem var , fiBegin :: Posn , fiEnd :: Posn } - deriving (Eq, Show, Ord) + deriving (Eq, Show, Generic, Binary, Hashable, NFData) -data FfiCallConv = CC_CCall deriving (Eq, Show, Ord) -data FfiSafety = Safe | Unsafe deriving (Eq, Show, Ord) +data FfiCallConv = CC_CCall deriving (Eq, Show, Ord, Generic, Binary, Hashable, NFData) +data FfiSafety = Safe | Unsafe deriving (Eq, Show, Ord, Generic, Binary, Hashable, NFData) data FfiImpEnt = ForeignItem { fiItemName :: Maybe Text @@ -169,45 +189,7 @@ data FfiImpEnt } | Dynamic | Wrapper - deriving (Eq, Show, Ord) - -data ParsedVar - = UnqualVar - { varId :: Text - , varBegin :: Posn - , varEnd :: Posn - } - | QualVar - { varId :: Text - , varPrefix :: Text - , varBegin :: Posn - , varEnd :: Posn - } - | ModId - { varId :: Text - , varBegin :: Posn - , varEnd :: Posn - } - | BuiltinId - { varId :: Text - , varBuiltin :: BuiltinIdClass - , varBegin :: Posn - , varEnd :: Posn - } - deriving (Eq, Show, Ord) - -data BuiltinIdClass - = BuiltinTuple !Int - | BuiltinNil - | BuiltinArrow - deriving (Eq, Show, Ord) - -toModId :: ParsedVar -> ParsedVar -toModId x@ModId{} = x -toModId (UnqualVar x y z) = ModId x y z -toModId (QualVar id pref b e) = ModId (pref <> T.singleton '.' <> id) b e -toModId BuiltinId{} = - error "Built-in variable can not be a module identifier!" + deriving (Eq, Show, Generic, Binary, Hashable, NFData) instance HasPosn ParsedVar where startPosn = varBegin @@ -225,4 +207,4 @@ instance HasPosn (FfiItem var) where startPosn = fiBegin endPosn = fiEnd - span sp ep s = s { fiBegin = startPosn sp, fiEnd = endPosn ep } \ No newline at end of file + span sp ep s = s { fiBegin = startPosn sp, fiEnd = endPosn ep } diff --git a/src/Frontend/Syntax/Var.hs b/src/Frontend/Syntax/Var.hs new file mode 100644 index 0000000..37f3fa7 --- /dev/null +++ b/src/Frontend/Syntax/Var.hs @@ -0,0 +1,56 @@ +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE DeriveAnyClass #-} +module Frontend.Syntax.Var where + +import qualified Data.Text as T +import Data.Text (Text) + +import Development.Shake.Classes + +import Frontend.Parser.Posn + +import GHC.Generics (Generic) + +data ParsedVar + = UnqualVar + { varId :: Text + , varBegin :: Posn + , varEnd :: Posn + } + | QualVar + { varId :: Text + , varPrefix :: Text + , varBegin :: Posn + , varEnd :: Posn + } + | ModId + { varId :: Text + , varBegin :: Posn + , varEnd :: Posn + } + | BuiltinId + { varId :: Text + , varBuiltin :: BuiltinIdClass + , varBegin :: Posn + , varEnd :: Posn + } + deriving (Eq, Show, Generic, Binary, Hashable, NFData) + +data BuiltinIdClass + = BuiltinTuple !Int + | BuiltinNil + | BuiltinArrow + deriving (Eq, Show, Generic, Binary, Hashable, NFData) + +toModId :: ParsedVar -> ParsedVar +toModId x@ModId{} = x +toModId (UnqualVar x y z) = ModId x y z +toModId (QualVar id pref b e) = ModId (pref <> T.singleton '.' <> id) b e +toModId BuiltinId{} = + error "Built-in variable can not be a module identifier!" + +splitModuleIdentifier :: Text -> [Text] +splitModuleIdentifier t + | T.null fst = [] + | otherwise = fst:splitModuleIdentifier (T.drop 1 snd) + where (fst, snd) = T.span (/= '.') t diff --git a/src/Main.hs b/src/Main.hs index f6dc3b0..185ba15 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -1,56 +1,68 @@ -- {-# LANGUAGE BlockArguments #-} module Main where -import Control.Monad ( unless ) +import Control.Exception -import qualified Data.ByteString.Lazy as Lbs -import qualified Data.Text.Encoding as T -import qualified Data.Text as T -import Data.Foldable +import qualified Data.Text.Lazy as Lt +import Data.Typeable import Debug.Trace +import Development.Shake + +import Errors + import Frontend.Autogen.Parser import Frontend.Autogen.Lexer import Frontend.Lexer.Wrapper import Frontend.Lexer.Tokens +import GHC.IO.Handle.FD (stdout, handleToFd) +import GHC.IO.FD + +import Main.Rules + import System.Environment (getArgs) +import System.Posix.Internals import Text.Show.Pretty (pPrint) -import Frontend.Parser.Posn -import System.Posix.Internals -import GHC.IO.Handle.FD (stdout, handleToFd) -import GHC.IO.FD -import Data.Maybe (fromMaybe) -import Errors main :: IO () -main = do - args <- getArgs - for_ args $ \str -> do - Main.lex str (\_ -> putStrLn $ str ++ " parsed!") parseMod =<< Lbs.readFile str +main = + do + args <- getArgs + print args + shake opts (compilerRules args) + `catch` \(ShakeException _ _ some) -> handle some + where + opts = + shakeOptions { shakeFiles = ".ahc-cache" + , shakeVerbosity = Verbose + , shakeProgress = progressDisplay 0.01 putStrLn + } + handle (SomeException e) = + case cast e of + Just x -> printAhcError x + Nothing -> throwIO e testParse :: String -> IO () -testParse s = Main.lex "" pPrint parseMod (Lbs.fromStrict (T.encodeUtf8 (T.pack s))) +testParse s = Main.lex "" pPrint parseMod (Lt.pack s) testLex :: String -> IO () -testLex s = Main.lex "" (const (pure ())) (scan []) (Lbs.fromStrict (T.encodeUtf8 (T.pack s))) +testLex s = Main.lex "" (const (pure ())) (scan []) (Lt.pack s) -lex :: String -> (a -> IO ()) -> Alex a -> Lbs.ByteString -> IO () +lex :: String -> (a -> IO ()) -> Alex a -> Lt.Text -> IO () lex fname show cont arg = do let x = runAlex fname arg cont color <- fmap (1 ==) . c_isatty . fdFD =<< handleToFd GHC.IO.Handle.FD.stdout code <- if fname == "" - then pure . lines $ T.unpack (T.decodeUtf8 (Lbs.toStrict arg)) + then pure . lines $ Lt.unpack arg else lines <$> readFile fname case x of Left e -> putStr $ showAhcError color code e Right x -> show x - - scan :: [Token] -> Alex [Token] scan acc = do tok <- alexMonadScan diff --git a/src/Main/Queries.hs b/src/Main/Queries.hs new file mode 100644 index 0000000..808673e --- /dev/null +++ b/src/Main/Queries.hs @@ -0,0 +1,42 @@ +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE DeriveAnyClass #-} +{-# LANGUAGE DerivingStrategies #-} +module Main.Queries where + +import Data.ByteString (ByteString) + +import Development.Shake.Classes +import Development.Shake + +import Frontend.Syntax.Var (ParsedVar) +import Frontend.Syntax (FeModule) + +import GHC.Generics (Generic) + +import Rename.Types (FqVar, RenamedMod) + +newtype AhcParsedModule = AhcParsedModule { ahcpmFilePath :: String } + deriving newtype (Eq, Show, Binary, Hashable, NFData) + +type instance RuleResult AhcParsedModule = FeModule ParsedVar + +newtype AhcRenamedModule = AhcRenamedModule { ahcrnFilePath :: String } + deriving newtype (Eq, Show, Binary, Hashable, NFData) + +type instance RuleResult AhcRenamedModule = RenamedMod + +data AhcModuleFilepath = + AhcModuleFilepath + { ahcfpImportingModule :: String + , ahcfpModName :: ParsedVar + } + deriving (Eq, Show, Generic, Binary, Hashable, NFData) + +type instance RuleResult AhcModuleFilepath = String + +newtype AhcModuleHash = AhcModuleHash { ahcmhFilePath :: String } + deriving newtype (Eq, Show, Binary, Hashable, NFData) + +type instance RuleResult AhcModuleHash = ByteString diff --git a/src/Main/Rules.hs b/src/Main/Rules.hs new file mode 100644 index 0000000..dedb9d0 --- /dev/null +++ b/src/Main/Rules.hs @@ -0,0 +1,121 @@ +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE TypeFamilies #-} +module Main.Rules where + +import Control.DeepSeq (deepseq) +import Control.Concurrent +import Control.Exception +import Control.Monad + +import Crypto.Hash.SHA1 as Sha1 + +import qualified Data.ByteString.Lazy as Lbs +import qualified Data.Text.Lazy.IO as Lt +import Data.ByteString (ByteString) +import qualified Data.Text as T +import Data.Foldable (for_) + +import Development.Shake.FilePath +import Development.Shake.Classes +import Development.Shake + +import Errors + +import Frontend.Lexer.Wrapper (runAlex) +import Frontend.Autogen.Parser +import Frontend.Parser.Posn +import Frontend.Syntax.Var +import Frontend.Syntax + +import GHC.Generics (Generic) + +import Main.Queries + +import Rename.Types (FqVar, ModuleId(..), RenamedMod) +import qualified Rename.Types as Rv +import Rename.Rename + +import System.IO + +import Text.Show.Pretty + +findModuleOracle :: Rules () +findModuleOracle = void $ addOracleCache find where + find :: AhcModuleFilepath -> Action String + find (AhcModuleFilepath our_fp (ModId id (Posn sl sc) pe)) = go "" id where + go fp xs | T.null xs || not (T.singleton '.' `T.isInfixOf` xs) = do + fp <- pure (fp T.unpack xs <.> "hs") + t <- doesFileExist fp + unless t . liftIO . throwIO $ + emptyError { errorMessage = "file does not exist:" + , errorFilename = our_fp + , errorBegin = Posn sl (sc + length fp) + , errorEnd = pe + } + pure fp + + go fp xs = do + (x, xs) <- pure $ T.span (/= '.') xs + ex <- doesDirectoryExist (T.unpack x) + unless ex . liftIO . throwIO $ + emptyError { errorMessage = "directory in module path does not exist:" + , errorFilename = our_fp + , errorBegin = Posn sl (sc + length fp) + , errorEnd = Posn sl (sc + length fp + T.length x) + } + go (fp T.unpack x) xs + + find _ = undefined + +parserOracle :: Rules (String -> Action (FeModule ParsedVar)) +parserOracle = fmap (. AhcParsedModule) $ addOracleCache (parse . ahcpmFilePath) where + parse :: String -> Action (FeModule ParsedVar) + parse fpath = do + need [fpath] + fileContents <- liftIO $ Lt.readFile fpath + + let mod = runAlex fpath fileContents parseMod + () <- liftIO . evaluate $ rnf mod + + case mod of + Left e -> liftIO $ throwIO e + Right x -> pure x + +renamerOracle :: Rules (String -> Action RenamedMod) +renamerOracle = fmap (. AhcRenamedModule) $ addOracleCache (rename . ahcrnFilePath) where + rename :: String -> Action RenamedMod + rename fpath = do + mod <- askOracle (AhcParsedModule fpath) :: Action (FeModule ParsedVar) + hash <- askOracle (AhcModuleHash fpath) + mvar <- liftIO $ newMVar mempty + let + modid = ModuleId { Rv.moduleName = varId $ Frontend.Syntax.moduleName mod + , moduleHash = hash + , moduleFp = fpath + } + rnctx = RenameCtx { rcOurModId = modid + , rcOurFilePath = fpath + , rcScope = mempty + , rcThisModule = mvar + , rcSpanStart = Posn 1 1 + , rcSpanEnd = Posn 1 1 + } + runRename (renameModule mod) rnctx + +hashOracle :: Rules (String -> Action ByteString) +hashOracle = fmap (. AhcModuleHash) . addOracleCache $ \(AhcModuleHash fp) -> do + need [fp] + fileContents <- liftIO $ Lbs.readFile fp + pure (Sha1.hashlazy fileContents) + +compilerRules :: [String] -> Rules () +compilerRules wanted_mods = do + findModuleOracle + parser <- parserOracle + hash <- hashOracle + rename <- renamerOracle + + action $ do + for_ wanted_mods $ \path -> do + ast <- rename path + liftIO $ pPrint ast diff --git a/src/Rename/Rename.hs b/src/Rename/Rename.hs new file mode 100644 index 0000000..120abc2 --- /dev/null +++ b/src/Rename/Rename.hs @@ -0,0 +1,252 @@ +{-# LANGUAGE DeriveAnyClass #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE DerivingVia #-} +{-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE RankNTypes #-} +module Rename.Rename where + +import Ahc.Data.Lens + +import Control.Monad.IO.Class +import Control.Monad.Reader +import Control.Concurrent +import Control.Exception + +import qualified Data.HashMap.Strict as HashMap +import Data.HashMap.Strict (HashMap) +import Data.ByteString (ByteString) +import Data.Text (Text) + +import Debug.Trace + +import Development.Shake ( Action, askOracle ) +import Development.Shake.Classes + +import Errors + +import qualified Frontend.Syntax as Fe +import Frontend.Parser.Posn +import Frontend.Syntax.Var +import Frontend.Syntax + +import GHC.Generics + +import Main.Queries + +import Rename.Types + +import Text.Show.Pretty +import Control.Applicative + +newtype Rename a = RenameM { runRename :: RenameCtx -> Action a } + deriving (Functor, Applicative, Monad, MonadIO, MonadReader RenameCtx) + via ReaderT RenameCtx Action + +liftAct :: Action a -> Rename a +liftAct a = RenameM $ \x -> a + +data RenameCtx = + RenameCtx + { rcOurModId :: ModuleId + , rcOurFilePath :: String + , rcScope :: Scope + , rcThisModule :: MVar Scope + , rcSpanStart :: Posn + , rcSpanEnd :: Posn + } + +_rcScope :: Lens' RenameCtx Scope +_rcScope = lens rcScope (\s x -> s { rcScope = x }) + +renamerSourcePos :: Posn -> Posn -> RenameCtx -> RenameCtx +renamerSourcePos start end x = x { rcSpanStart = start, rcSpanEnd = end } + +renameModule :: FeModule ParsedVar -> Rename RenamedMod +renameModule Fe.Module{moduleExports, moduleImports, moduleItems, Fe.moduleName} = do + (imports, scopes) <- fmap unzip $ traverse importModule moduleImports + our_mod_id <- asks rcOurModId + local (extendBy (mconcat scopes)) $ do + items <- renameModuleBody moduleItems + + this_mod <- liftIO . readMVar =<< asks rcThisModule + liftIO $ print this_mod + export <- traverse (renameModVarList this_mod) moduleExports + + our_mod_name <- makeVariable Rename.Types.Module moduleName + let + scope = restrictToModVarList export this_mod + rnm_mod = Fe.Module { Fe.moduleName = our_mod_name + , moduleExports = export + , moduleImports = imports + , moduleItems = items + } + pure (RnmMod rnm_mod scope) + +restrictToModVarList :: Maybe [NamespacedItem FqVar] -> Scope -> Scope +restrictToModVarList Nothing x = x +restrictToModVarList (Just xs) sc = foldr del sc xs where + del :: NamespacedItem FqVar -> Scope -> Scope + del (IEVar n) = over scopeNames (HashMap.delete (fqVarName n)) + del (IECon n) = over scopeNames (HashMap.delete (fqVarName n)) . over scopeTypes (HashMap.delete (fqVarName n)) + del (IEModule n) = over scopeNamespaces (HashMap.delete (fqVarName n)) + +renameModVarList :: Scope -> [NamespacedItem ParsedVar] -> Rename [NamespacedItem FqVar] +renameModVarList sc (x:xs) = + case x of + IEVar pv -> + case HashMap.lookup (varId pv) (sc ^. scopeNames) of + Just [t] -> (IEVar t:) <$> renameModVarList sc xs + _ -> notInScope pv + IECon pv -> + case HashMap.lookup (varId pv) (sc ^. scopeNames) <|> HashMap.lookup (varId pv) (sc ^. scopeTypes) of + Just [t] -> (IECon t:) <$> renameModVarList sc xs + _ -> notInScope pv + IEModule pv -> + case HashMap.lookup (varId pv) (sc ^. scopeNamespaces) of + Just (t, _) -> (IEModule t:) <$> renameModVarList sc xs + _ -> notInScope pv +renameModVarList sc [] = pure [] + +renameModuleBody :: [ModuleItem ParsedVar] -> Rename [ModuleItem FqVar] +renameModuleBody = wrap where + wrap [] = pure [] + wrap xs = do + var <- asks rcThisModule + go xs + + go [] = pure [] + go (ModDecl (TySig vars tipe begin end) begin' end':items) = local (renamerSourcePos begin end) $ do + tipe <- renameTypeToplevel tipe + vars <- traverse (makeVariable_unique Variable scopeNames) vars + r <- go items + insertVariables scopeNames vars $ + pure (ModDecl (TySig (map snd vars) tipe begin end) begin' end':r) + go (x:items) = error $ "show item: " ++ ppShow x + +renameTypeToplevel :: FeType ParsedVar -> Rename (FeType FqVar) +renameTypeToplevel (Tytup xs) = Tytup <$> traverse renameTypeToplevel xs +renameTypeToplevel (SPType ty s e) = + SPType <$> local (renamerSourcePos s e) (renameTypeToplevel ty) + <*> pure s + <*> pure e +-- rename units to units +renameTypeToplevel (Tycon (BuiltinId _ (BuiltinTuple 0) _ _)) = pure (Tytup []) +renameTypeToplevel (Tycon x) = do + var <- useVariable scopeTypes x + pure (Tycon var) +renameTypeToplevel x = error $ "idk how to rename: " ++ show x + +extendBy :: Scope -> RenameCtx -> RenameCtx +extendBy s rc = rc{rcScope = s <> rcScope rc} + +importModule :: ModuleImport ParsedVar -> Rename (ModuleImport FqVar, Scope) +importModule Import{importMod, importList, importQualified, importAlias, importBegin, importEnd} = do + fp <- asks rcOurFilePath + our_mod_id <- asks rcOurModId + res_fp <- liftAct $ askOracle $ + AhcModuleFilepath { ahcfpImportingModule = fp + , ahcfpModName = importMod + } + + renamed_import <- liftAct $ askOracle $ AhcRenamedModule res_fp + let + orig = rnmModSignature renamed_import + other_mod = Fe.moduleName $ rnmModModule renamed_import + il <- traverse (renameModVarList orig) importList + orig <- pure $ restrictToModVarList il orig + + mod <- pure $ + if importQualified + then mempty { _scopeNamespaces = HashMap.singleton (varId importMod) (other_mod, orig) } + else orig + + mod <- pure $ + case importAlias of + Just x -> mod & scopeNamespaces %~ HashMap.insert (varId x) (other_mod, orig) + Nothing -> mod + + our_mod_name <- makeVariable Rename.Types.Module importMod + alias <- traverse (makeVariable Rename.Types.Module) importAlias + let + imp = Import our_mod_name il importQualified alias importBegin importEnd + + pure (imp, mod) + +makeVariable :: MonadReader RenameCtx m => Namespace -> ParsedVar -> m FqVar +makeVariable nsc x = do + context <- ask + pure $ + FqVar { fqVarName = varId x + , fqVarModule = rcOurModId context + , fqVarNamespace = nsc + , fqBegin = rcSpanStart context + , fqEnd = rcSpanEnd context + } + +makeVariable_unique :: Namespace -> Lens' Scope (HashMap Text [FqVar]) -> ParsedVar -> Rename (Text, FqVar) +makeVariable_unique this_ns ns x = do + var <- asks rcThisModule + scope <- liftIO . readMVar $ var + let + the_hm = scope ^. ns + case HashMap.lookup (varId x) the_hm of + Just (orig:_) -> redeclaration orig x + _ -> do + new_var <- makeVariable this_ns x + liftIO $ modifyMVar_ var $ \s -> pure $ over ns (insertVariable (varId x) new_var) s + pure (varId x, new_var) + +insertVariable :: Text -> FqVar -> HashMap Text [FqVar] -> HashMap Text [FqVar] +insertVariable c v = HashMap.alter go c where + go (Just xs) = Just (v:xs) + go Nothing = Just [v] + +insertVariables :: Lens' Scope (HashMap Text [FqVar]) -> [(Text, FqVar)] -> Rename a -> Rename a +insertVariables lens vars = local (over (_rcScope . lens) go) where + go x = foldr (\(x, y) -> insertVariable x y) x vars + +useVariable :: Lens' Scope (HashMap Text [FqVar]) -> ParsedVar -> Rename FqVar +useVariable final_ns var = + do scope <- asks rcScope + go scope var + where + go scope (QualVar id prfx begin end) = + case HashMap.lookup prfx (scope ^. scopeNamespaces) of + Nothing -> notInScope var + Just (_, sc) -> local (\s -> s { rcScope = sc }) $ useVariable final_ns (UnqualVar id begin end) + + go scope (UnqualVar id begin end) = + case HashMap.lookup id (scope ^. final_ns) of + Just [var] -> pure var + Just vars -> ambiguousVariables vars + Nothing -> notInScope var + + go _ x = error $ "idk how to rename yet: " ++ show x + +ambiguousVariables :: [FqVar] -> Rename a +ambiguousVariables vs = throwRenamer $ \e -> e { errorMessage = show vs } + +notInScope :: HasPosn x => x -> Rename a +notInScope x = + local (renamerSourcePos (startPosn x) (endPosn x)) $ + throwRenamer $ \e -> e { errorMessage = "variable not in scope" } + +redeclaration :: FqVar -> ParsedVar -> Rename a +redeclaration first v = do + liftIO $ pPrint first + throwRenamer $ \e -> e { errorMessage = "variables can only have one declaration." + , errorInlineDesc = Just "first declaration here" + , errorPointers = pointTo v v "redeclaration here" + , errorBegin = startPosn first + , errorEnd = endPosn first + } + +throwRenamer :: (MonadIO m, MonadReader RenameCtx m) => (AhcError -> AhcError) -> m b +throwRenamer cont = do + context <- ask + let + fname = moduleFp (rcOurModId context) + errorBegin = rcSpanStart context + errorEnd = rcSpanEnd context + liftIO . throwIO $ cont (emptyError { errorFilename = fname, errorBegin, errorEnd }) diff --git a/src/Rename/Types.hs b/src/Rename/Types.hs new file mode 100644 index 0000000..30e606d --- /dev/null +++ b/src/Rename/Types.hs @@ -0,0 +1,81 @@ +{-# LANGUAGE DeriveAnyClass #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE StandaloneDeriving #-} +module Rename.Types where + +import Ahc.Data.Lens (lens, Lens, Lens') + +import qualified Data.HashMap.Strict as HashMap +import Data.HashMap.Strict (HashMap) +import Data.ByteString (ByteString) +import Data.Text (Text) + +import Development.Shake.Classes + +import Frontend.Parser.Posn +import Frontend.Syntax + +import GHC.Generics + +data ModuleId + = ModuleId { moduleName :: Text + , moduleFp :: String + , moduleHash :: ByteString + } + deriving (Eq, Show, Generic, NFData, Binary, Hashable) + +data Namespace + = DataCon + | TypeCon + | Variable + | Module + deriving (Eq, Show, Generic, NFData, Binary, Hashable) + +data FqVar + = FqVar { fqVarName :: Text + , fqVarModule :: ModuleId + , fqVarNamespace :: Namespace + , fqBegin :: Posn + , fqEnd :: Posn + } + deriving (Eq, Show, Generic, NFData, Binary, Hashable) + +instance HasPosn FqVar where + startPosn = fqBegin + endPosn = fqEnd + span s e var = var { fqBegin = startPosn s, fqEnd = endPosn e } + +data RenamedMod + = RnmMod { rnmModModule :: FeModule FqVar + , rnmModSignature :: Scope } + deriving (Eq, Show, Generic, NFData, Binary, Hashable) + +data Scope + = Scope + { _scopeNames :: HashMap Text [FqVar] + , _scopeTypes :: HashMap Text [FqVar] + , _scopeNamespaces :: HashMap Text (FqVar, Scope) } + deriving (Eq, Show, Generic, NFData, Binary, Hashable) + +scopeNames :: Lens' Scope (HashMap Text [FqVar]) +scopeNames = lens _scopeNames (\s x -> s { _scopeNames = x}) + +scopeTypes :: Lens' Scope (HashMap Text [FqVar]) +scopeTypes = lens _scopeTypes (\s x -> s { _scopeTypes = x}) + +scopeNamespaces :: Lens' Scope (HashMap Text (FqVar, Scope)) +scopeNamespaces = lens _scopeNamespaces (\s x -> s { _scopeNamespaces = x}) + +instance Semigroup Scope where + Scope nam typ ns <> Scope nam' typ' ns' + = Scope (nam `merge` nam') (typ `merge` typ') (ns `merge'` ns') + where + merge = HashMap.unionWith (<>) + merge' = HashMap.unionWith (\(x, y) (_, z) -> (x, y <> z)) + +instance Monoid Scope where + mempty = Scope mempty mempty mempty + +instance (Eq k, Hashable k, Binary k, Binary v) => Binary (HashMap k v) where + get = HashMap.fromList <$> get + put = put . HashMap.toList