{-# LANGUAGE DeriveFunctor #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} module Lexer.Support where import Data.Word import Data.List (uncons) import Data.Char (ord) import Control.Monad.State import Data.List.NonEmpty (NonEmpty ((:|))) import qualified Data.List.NonEmpty as NE import Control.Monad.Except data Token = TkIdent String -- identifiers -- Keywords | TkLet | TkIn | TkWhere -- Punctuation | TkEqual | TkOpen | TkSemi | TkClose | TkLParen | TkRParen | TkBackslash | TkArrow -- Layout punctuation | TkVOpen | TkVSemi | TkVClose | TkEOF deriving (Eq, Show) data AlexInput = Input { inpLine :: {-# UNPACK #-} !Int , inpColumn :: {-# UNPACK #-} !Int , inpLast :: {-# UNPACK #-} !Char , inpStream :: String } deriving (Eq, Show) alexPrevInputChar :: AlexInput -> Char alexPrevInputChar = inpLast alexGetByte :: AlexInput -> Maybe (Word8, AlexInput) alexGetByte inp@Input{inpStream = str} = advance <$> uncons str where advance ('\n', rest) = ( fromIntegral (ord '\n') , Input { inpLine = inpLine inp + 1 , inpColumn = 0 , inpLast = '\n' , inpStream = rest } ) advance (c, rest) = ( fromIntegral (ord c) , Input { inpLine = inpLine inp , inpColumn = inpColumn inp + 1 , inpLast = c , inpStream = rest } ) newtype Lexer a = Lexer { _getLexer :: StateT LexerState (Either String) a } deriving (Functor, Applicative, Monad, MonadState LexerState, MonadError String) data Layout = ExplicitLayout | LayoutColumn Int deriving (Eq, Show, Ord) data LexerState = LS { lexerInput :: {-# UNPACK #-} !AlexInput , lexerStartCodes :: {-# UNPACK #-} !(NonEmpty Int) , lexerLayout :: [Layout] } deriving (Eq, Show) startCode :: Lexer Int startCode = gets (NE.head . lexerStartCodes) pushStartCode :: Int -> Lexer () pushStartCode i = modify' $ \st -> st { lexerStartCodes = NE.cons i (lexerStartCodes st) } popStartCode :: Lexer () popStartCode = modify' $ \st -> st { lexerStartCodes = case lexerStartCodes st of _ :| [] -> 0 :| [] _ :| (x:xs) -> x :| xs } layout :: Lexer (Maybe Layout) layout = gets (fmap fst . uncons . lexerLayout) pushLayout :: Layout -> Lexer () pushLayout i = modify' $ \st -> st { lexerLayout = i:lexerLayout st } popLayout :: Lexer () popLayout = modify' $ \st -> st { lexerLayout = case lexerLayout st of _:xs -> xs [] -> [] } initState :: String -> LexerState initState str = LS { lexerInput = Input 0 1 '\n' str , lexerStartCodes = 0 :| [] , lexerLayout = [] } emit :: (String -> Token) -> String -> Lexer Token emit = (pure .) token :: Token -> String -> Lexer Token token = const . pure runLexer :: Lexer a -> String -> Either String a runLexer act s = fst <$> runStateT (_getLexer act) (initState s)