From 238511c99d46984a77ba107a662d53ab23e42783 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Abigail=20Magalh=C3=A3es?= Date: Tue, 3 Aug 2021 22:50:36 -0300 Subject: [PATCH] Implement parsing for foreign import declarations --- ahc.cabal | 1 + src/Frontend/Autogen/Lexer.x | 47 +++++++----- src/Frontend/Autogen/Parser.y | 40 +++++++++- src/Frontend/Lexer/Tokens.hs | 39 +++++++++- src/Frontend/Parser/Foreign.hs | 135 +++++++++++++++++++++++++++++++++ src/Frontend/Syntax.hs | 42 +++++++++- src/Main.hs | 2 +- 7 files changed, 278 insertions(+), 28 deletions(-) create mode 100644 src/Frontend/Parser/Foreign.hs diff --git a/ahc.cabal b/ahc.cabal index 8cfeb83..3c98c64 100644 --- a/ahc.cabal +++ b/ahc.cabal @@ -30,6 +30,7 @@ executable ahc Frontend.Lexer.Tokens, Frontend.Lexer.Wrapper, Frontend.Parser.Posn, + Frontend.Parser.Foreign, Frontend.Syntax build-tool-depends: alex:alex >= 3.2.4 && < 4.0 diff --git a/src/Frontend/Autogen/Lexer.x b/src/Frontend/Autogen/Lexer.x index 7f6da51..8bede11 100644 --- a/src/Frontend/Autogen/Lexer.x +++ b/src/Frontend/Autogen/Lexer.x @@ -35,12 +35,13 @@ tokens :- $white_nol+ ; \t { \_ _ -> alexError "tab character in source code" } -<0,import_> "--" .* \n +<0,import_,foreign_> "--" .* \n { just $ pushStartCode newline } <0> \= { always TokEqual } <0> \` { always TokTick } <0> \: \: { always TokDoubleColon } + \: \: { \i l -> popStartCode *> always TokDoubleColon i l } <0> \\ { \i l -> setPendingLC *> yield' (const TokLambda) i l } <0> "->" { always TokArrow } @@ -49,7 +50,7 @@ tokens :- <0> \{ { always TokOBrace } <0> \[ { always TokOSquare } -<0,import_> { +<0,import_,foreign_> { \, { always TokComma } \( { always TokOParen } \) { always TokCParen } @@ -60,9 +61,9 @@ tokens :- <0> \;+ { always TokSemi } -<0,import_> \n { just $ pushStartCode newline } +<0,import_,foreign_> \n { just $ pushStartCode newline } -<0> \" { startString } +<0,foreign_> \" { startString } { \\ \" { stringAppend (T.singleton '"') } @@ -107,17 +108,12 @@ tokens :- () { startLayout } } - { - \n { just $ pushStartCode newline } - "--" .* \n { just $ pushStartCode newline } -} - () { emptyLayout } () { emitPendingToken } -- identifiers and keywords -<0,import_> { +<0,import_,foreign_> { $lower [$alpha $digit \_ \']* { variableOrKeyword } $upper [$alpha $digit \_ \']* { yield (TokUnqual ConId) } @@ -300,7 +296,7 @@ offsideRule (Posn line col, _, _, _) _ = do maybePopImportSC :: Alex () maybePopImportSC = do startcode <- alexGetStartCode - when (startcode == import_) popStartCode + when (startcode == import_ || startcode == foreign_) popStartCode emptyLayout :: AlexInput -> Int64 -> Alex Token emptyLayout (Posn line col, _, _, _) _ = do @@ -402,18 +398,20 @@ finishVarKw l c text = do -- set the "keyword" (now changed to an identifier) as pending, so -- that it will be emitted by the next alexMonadScan. "import" -> do - pushStartCode import_ + when ((sc /= import_) && (sc /= foreign_)) $ pushStartCode import_ pure (Token TokImport l c) - "as" - | sc == import_, c > col -> pure (Token TokAs l c) - | sc == import_ -> offsideKeyword (TokUnqual VarId text) l c - | otherwise -> pure (Token (TokUnqual VarId text) l c) + "as" -> conditionalKeyword l c import_ (c > col) TokAs + "qualified" -> conditionalKeyword l c import_ (c > col) TokQualified - "qualified" - | sc == import_, c > col -> pure (Token TokQualified l c) - | sc == import_ -> offsideKeyword (TokUnqual VarId text) l c - | otherwise -> pure (Token (TokUnqual VarId text) l c) + "foreign" -> do + when (sc /= foreign_) $ pushStartCode foreign_ + pure (Token TokForeign l c) + + "export" -> conditionalKeyword l c foreign_ (c > col) TokForeign + "safe" -> conditionalKeyword l c foreign_ (c > col) TokSafe + "unsafe" -> conditionalKeyword l c foreign_ (c > col) TokUnsafe + "ccall" -> conditionalKeyword l c foreign_ (c > col) TokCCall -- when starting a layout context for let expressions we make sure -- that it is distinguishable from layout contexts started by @@ -456,6 +454,15 @@ finishVarKw l c text = do [] -> error "empty keyword/identifier" + +conditionalKeyword l c import_ cond kw = do + sc <- alexGetStartCode + case () of + () | sc == import_, cond -> pure (Token kw l c) + | sc == import_ -> offsideKeyword (TokUnqual VarId text) l c + | otherwise -> pure (Token (TokUnqual VarId text) l c) + where text = T.pack (show kw) + finishVar :: (IdClass -> T.Text -> TokenClass) -> (IdClass -> T.Text -> T.Text -> TokenClass) -> Int -> Int -> T.Text -> Alex Token finishVar tokunqual tokqual l c text | T.null text = undefined diff --git a/src/Frontend/Autogen/Parser.y b/src/Frontend/Autogen/Parser.y index 2d9f8d9..a408d45 100644 --- a/src/Frontend/Autogen/Parser.y +++ b/src/Frontend/Autogen/Parser.y @@ -5,11 +5,12 @@ module Frontend.Autogen.Parser where import qualified Data.Text as T import Data.Maybe +import Frontend.Parser.Foreign import Frontend.Lexer.Wrapper +import Frontend.Autogen.Lexer import Frontend.Lexer.Tokens import Frontend.Parser.Posn import Frontend.Syntax -import Frontend.Autogen.Lexer import qualified Prelude import Prelude hiding (span) @@ -83,6 +84,12 @@ import Control.Monad 'import' { Token TokImport _ _ } 'as' { Token TokAs _ _ } 'qualified' { Token TokQualified _ _ } + + 'foreign' { Token TokForeign _ _ } + 'export' { Token TokExport _ _ } + 'safe' { Token TokSafe _ _ } + 'unsafe' { Token TokUnsafe _ _ } + 'ccall' { Token TokCCall _ _ } %% @@ -175,8 +182,9 @@ NSItem :: { NamespacedItem ParsedVar } | 'module' CON { IEModule (getVar $2) } ModItem :: { Item } - : Decl { ModDecl $1 (startPosn $1) (endPosn $1) } - | Import { ModImport $1 (startPosn $1) (endPosn $1) } + : Decl { ModDecl $1 (startPosn $1) (endPosn $1) } + | Import { ModImport $1 (startPosn $1) (endPosn $1) } + | 'foreign' FfiItem { ModFfi $2 (startPosn $1) (endPosn $2) } Import :: { ModuleImport ParsedVar } : 'import' modid ImportExportList @@ -191,6 +199,32 @@ Import :: { ModuleImport ParsedVar } | 'import' 'qualified' modid ImportExportList 'as' CON { Import $3 (fst $4) True (Just (getVar $6)) (startPosn $1) (endPosn $6) } +FfiItem :: { FfiItem ParsedVar } + : 'import' CallConv Safety Entity VAR '::' Type + -- 1 2 3 4 5 6 7 + {% do { ffiDesc <- traverse parseForeignItem $4 + ; pure $ FfiImport + { fiVarName = getVar $5 + , fiType = $7 + , fiCallConv = $2 + , fiSafety = $3 + , fiItem = ffiDesc + , fiBegin = startPosn $1 + , fiEnd = endPosn $7 } + } } + +Safety :: { Maybe FfiSafety } + : {-empty-} { Nothing } + | 'safe' { Just Safe } + | 'unsafe' { Just Unsafe } + +CallConv :: { FfiCallConv } + : 'ccall' { CC_CCall } + +Entity :: { Maybe Token } + : {-empty-} { Nothing } + | STRING { Just $1 } + Opt(p) : { () } | p { () } diff --git a/src/Frontend/Lexer/Tokens.hs b/src/Frontend/Lexer/Tokens.hs index 6d9ba2d..f1e08ea 100644 --- a/src/Frontend/Lexer/Tokens.hs +++ b/src/Frontend/Lexer/Tokens.hs @@ -47,17 +47,23 @@ data TokenClass | TokCase | TokOf + | TokForeign + | TokExport + | TokSafe + | TokUnsafe + | TokCCall + | TokData | TokSemi deriving (Eq, Ord) instance Show TokenClass where - show (TokUnqual _ id) = T.unpack id + show (TokUnqual _ id) = T.unpack id show (TokQual _ ns id) = T.unpack ns ++ '.':T.unpack id - show (TokUnqualOp _ id) = T.unpack id + show (TokUnqualOp _ id) = T.unpack id show (TokQualOp _ ns id) = T.unpack ns ++ '.':T.unpack id - show (TokString text) = show text + show (TokString text) = show text show TokEof = "" show TokLambda = "\\" @@ -95,6 +101,33 @@ instance Show TokenClass where show TokOf = "of" show TokData = "data" + show TokForeign = "foreign" + show TokExport = "export" + show TokSafe = "export" + show TokUnsafe = "export" + show TokCCall = "export" + +isKeywordToken :: TokenClass -> Bool +isKeywordToken TokLet = True +isKeywordToken TokIn = True + +isKeywordToken TokModule = True +isKeywordToken TokImport = True +isKeywordToken TokQualified = True +isKeywordToken TokAs = True +isKeywordToken TokWhere = True +isKeywordToken TokCase = True +isKeywordToken TokOf = True + +isKeywordToken TokForeign = True +isKeywordToken TokExport = True +isKeywordToken TokSafe = True +isKeywordToken TokUnsafe = True +isKeywordToken TokCCall = True + +isKeywordToken TokData = True + +isKeywordToken _ = False tokSize :: TokenClass -> Int tokSize = length . show diff --git a/src/Frontend/Parser/Foreign.hs b/src/Frontend/Parser/Foreign.hs new file mode 100644 index 0000000..d60d90e --- /dev/null +++ b/src/Frontend/Parser/Foreign.hs @@ -0,0 +1,135 @@ +{-# LANGUAGE ViewPatterns #-} +module Frontend.Parser.Foreign where + +import Control.Monad + +import qualified Data.Text as T +import Data.List + + +import Frontend.Lexer.Wrapper +import Frontend.Lexer.Tokens +import Frontend.Syntax + +import Text.Show.Pretty +import Frontend.Parser.Posn +import Data.Char (isAlpha) + +parseForeignItem :: Token -> Alex FfiImpEnt +parseForeignItem token@(Token (TokString impent) line col) = go 0 impent emptyItem + where + pn = Posn line (col + 1) + + go off xs item + | T.pack " " `T.isPrefixOf` xs = + let (run, rest) = T.span (== ' ') xs + in go (off + T.length run) rest item + + go off (T.span (/= ' ') -> (x, xs)) item + | x == xs, T.null xs = pure item + | x == T.pack "static" = + if fiStatic item + then tooManyStatics pn off + else go (off + T.length x) xs item{fiStatic = True} + | T.pack ".h" `T.isSuffixOf` x = + case fiHeader item of + Nothing -> go (off + T.length x) xs item{fiHeader = Just x} + Just _ -> tooManyHeaders pn off (T.length x) + | T.singleton '&' `T.isPrefixOf` x, T.length x == 1 = + if fiIsRef item + then tooManyReferences pn off + else go (off + 1) xs item{fiIsRef = True} + | T.singleton '&' `T.isPrefixOf` x, T.length x >= 2 = + if fiIsRef item + then tooManyReferences pn off + else go (off + 1) (T.tail x <> xs) item{fiIsRef = True} + | otherwise = + case fiItemName item of + Nothing -> do + id <- parseCid pn off x + go (off + T.length x) xs item{fiItemName = id} + _ -> tooManyItemNames pn off (T.length x) +parseForeignItem _ = undefined + +tooManyHeaders :: Posn -> Int -> Int -> Alex a +tooManyHeaders (Posn l c) off len = + alexThrow $ \fname -> + ParseError + { parseErrorMessage = "this foreign entity has too many header names; only one is allowed." + , parseErrorFilename = fname + , parseErrorInlineDesc = Just "repeated header name" + , parseErrorBegin = Posn l (c + off) + , parseErrorEnd = Posn l (c + off + len) + } + +tooManyItemNames :: Posn -> Int -> Int -> Alex a +tooManyItemNames (Posn l c) off len = + alexThrow $ \fname -> + ParseError + { parseErrorMessage = "this foreign entity has too many names; only one is allowed." + , parseErrorFilename = fname + , parseErrorInlineDesc = Just "repeated name" + , parseErrorBegin = Posn l (c + off) + , parseErrorEnd = Posn l (c + off + len) + } + +tooManyReferences :: Posn -> Int -> Alex a +tooManyReferences (Posn l c) off = + alexThrow $ \fname -> + ParseError + { parseErrorMessage = "this foreign entity has too many '&'s; only one is allowed." + , parseErrorFilename = fname + , parseErrorInlineDesc = Just "repeated '&'" + , parseErrorBegin = Posn l (c + off) + , parseErrorEnd = Posn l (c + off + 1) + } + +tooManyStatics :: Posn -> Int -> Alex a +tooManyStatics (Posn l c) off = + alexThrow $ \fname -> + ParseError + { parseErrorMessage = "this foreign entity has too many 'static's; only one is allowed." + , parseErrorFilename = fname + , parseErrorInlineDesc = Just "repeated 'static'" + , parseErrorBegin = Posn l (c + off) + , parseErrorEnd = Posn l (c + off + length "static") + } + +parseCid :: Posn -> Int -> T.Text -> Alex (Maybe T.Text) +parseCid (Posn l c) off id + | T.null id = pure $ Nothing + | isValidCIdent id = pure $ Just id + | otherwise = + alexThrow $ \fname -> + ParseError + { parseErrorMessage = "names for foreign entities must be valid C identifiers." + , parseErrorFilename = fname + , parseErrorInlineDesc = Just "this is not a C identifier" + , parseErrorBegin = Posn l (c + off) + , parseErrorEnd = Posn l (c + off + T.length id) + } + +isValidCIdent :: T.Text -> Bool +isValidCIdent = go . T.unpack where + go :: String -> Bool + go [] = False + go (x:xs) + | isAlpha x = go' xs + | otherwise = False + + go' :: String -> Bool + go' [] = True + go' (x:xs) + | 'a' <= x && x <= 'z' = go' xs + | 'A' <= x && x <= 'Z' = go' xs + | '0' <= x && x <= '9' = go' xs + | x == '_' = go' xs + | otherwise = False + +emptyItem :: FfiImpEnt +emptyItem = + ForeignItem { fiItemName = Nothing + , fiHeader = Nothing + , fiStatic = False + , fiIsRef = False + } diff --git a/src/Frontend/Syntax.hs b/src/Frontend/Syntax.hs index 1ba6164..935aaa7 100644 --- a/src/Frontend/Syntax.hs +++ b/src/Frontend/Syntax.hs @@ -135,6 +135,40 @@ data NamespacedItem var data ModuleItem var = ModDecl { itemDecl :: FeDecl var, itemBegin :: Posn, itemEnd :: Posn } | ModImport { itemImport :: ModuleImport var, itemBegin :: Posn, itemEnd :: Posn } + | ModFfi { itemForeign :: FfiItem var, itemBegin :: Posn, itemEnd :: Posn } + deriving (Eq, Show, Ord) + +data FfiItem var + = FfiImport + { fiVarName :: var + , fiType :: FeType var + , fiCallConv :: FfiCallConv + , fiSafety :: Maybe FfiSafety + , fiItem :: Maybe FfiImpEnt + , fiBegin :: Posn + , fiEnd :: Posn + } + | FfiExport + { fiVarName :: var + , fiType :: FeType var + , fiCallConv :: FfiCallConv + , fiExpCid :: Maybe Text + , fiBegin :: Posn + , fiEnd :: Posn + } + deriving (Eq, Show, Ord) + +data FfiCallConv = CC_CCall deriving (Eq, Show, Ord) +data FfiSafety = Safe | Unsafe deriving (Eq, Show, Ord) +data FfiImpEnt + = ForeignItem + { fiItemName :: Maybe Text + , fiHeader :: Maybe Text + , fiStatic :: Bool + , fiIsRef :: Bool + } + | Dynamic + | Wrapper deriving (Eq, Show, Ord) data ParsedVar @@ -185,4 +219,10 @@ instance HasPosn (ModuleItem var) where startPosn = itemBegin endPosn = itemEnd - span sp ep s = s { itemBegin = startPosn sp, itemEnd = endPosn ep } \ No newline at end of file + span sp ep s = s { itemBegin = startPosn sp, itemEnd = endPosn ep } + +instance HasPosn (FfiItem var) where + startPosn = fiBegin + endPosn = fiEnd + + span sp ep s = s { fiBegin = startPosn sp, fiEnd = endPosn ep } \ No newline at end of file diff --git a/src/Main.hs b/src/Main.hs index fe0882c..0d9b667 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -28,7 +28,7 @@ main :: IO () main = do args <- getArgs for_ args $ \str -> do - Main.lex str pPrint parseMod =<< Lbs.readFile str + Main.lex str (\_ -> putStrLn $ str ++ " parsed!") parseMod =<< Lbs.readFile str testParse :: String -> IO () testParse s = Main.lex "" pPrint parseMod (Lbs.fromStrict (T.encodeUtf8 (T.pack s)))