|
|
- {-# LANGUAGE DeriveFunctor #-}
- {-# LANGUAGE GeneralizedNewtypeDeriving #-}
- module Lexer.Support where
-
- import Data.Word
- import Data.List (uncons)
- import Data.Char (ord)
- import Control.Monad.State
- import Data.List.NonEmpty (NonEmpty ((:|)))
- import qualified Data.List.NonEmpty as NE
- import Control.Monad.Except
-
- data Token
- = TkIdent String -- identifiers
-
- -- Keywords
- | TkLet | TkIn | TkWhere
-
- -- Punctuation
- | TkEqual | TkOpen | TkSemi | TkClose
- | TkLParen | TkRParen
- | TkBackslash | TkArrow
-
- -- Layout punctuation
- | TkVOpen | TkVSemi | TkVClose
- | TkEOF
- deriving (Eq, Show)
-
- data AlexInput
- = Input { inpLine :: {-# UNPACK #-} !Int
- , inpColumn :: {-# UNPACK #-} !Int
- , inpLast :: {-# UNPACK #-} !Char
- , inpStream :: String
- }
- deriving (Eq, Show)
-
- alexPrevInputChar :: AlexInput -> Char
- alexPrevInputChar = inpLast
-
- alexGetByte :: AlexInput -> Maybe (Word8, AlexInput)
- alexGetByte inp@Input{inpStream = str} = advance <$> uncons str where
- advance ('\n', rest) =
- ( fromIntegral (ord '\n')
- , Input { inpLine = inpLine inp + 1
- , inpColumn = 0
- , inpLast = '\n'
- , inpStream = rest }
- )
- advance (c, rest) =
- ( fromIntegral (ord c)
- , Input { inpLine = inpLine inp
- , inpColumn = inpColumn inp + 1
- , inpLast = c
- , inpStream = rest }
- )
-
- newtype Lexer a = Lexer { _getLexer :: StateT LexerState (Either String) a }
- deriving (Functor, Applicative, Monad, MonadState LexerState, MonadError String)
-
- data Layout = ExplicitLayout | LayoutColumn Int
- deriving (Eq, Show, Ord)
-
- data LexerState
- = LS { lexerInput :: {-# UNPACK #-} !AlexInput
- , lexerStartCodes :: {-# UNPACK #-} !(NonEmpty Int)
- , lexerLayout :: [Layout]
- }
- deriving (Eq, Show)
-
- startCode :: Lexer Int
- startCode = gets (NE.head . lexerStartCodes)
-
- pushStartCode :: Int -> Lexer ()
- pushStartCode i = modify' $ \st ->
- st { lexerStartCodes = NE.cons i (lexerStartCodes st)
- }
-
- popStartCode :: Lexer ()
- popStartCode = modify' $ \st ->
- st { lexerStartCodes =
- case lexerStartCodes st of
- _ :| [] -> 0 :| []
- _ :| (x:xs) -> x :| xs
- }
-
- layout :: Lexer (Maybe Layout)
- layout = gets (fmap fst . uncons . lexerLayout)
-
- pushLayout :: Layout -> Lexer ()
- pushLayout i = modify' $ \st ->
- st { lexerLayout = i:lexerLayout st }
-
- popLayout :: Lexer ()
- popLayout = modify' $ \st ->
- st { lexerLayout =
- case lexerLayout st of
- _:xs -> xs
- [] -> []
- }
-
- initState :: String -> LexerState
- initState str = LS { lexerInput = Input 0 1 '\n' str
- , lexerStartCodes = 0 :| []
- , lexerLayout = []
- }
-
- emit :: (String -> Token) -> String -> Lexer Token
- emit = (pure .)
-
- token :: Token -> String -> Lexer Token
- token = const . pure
-
- runLexer :: Lexer a -> String -> Either String a
- runLexer act s = fst <$> runStateT (_getLexer act) (initState s)
|