You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

114 lines
2.9 KiB

2 years ago
  1. {-# LANGUAGE DeriveFunctor #-}
  2. {-# LANGUAGE GeneralizedNewtypeDeriving #-}
  3. module Lexer.Support where
  4. import Data.Word
  5. import Data.List (uncons)
  6. import Data.Char (ord)
  7. import Control.Monad.State
  8. import Data.List.NonEmpty (NonEmpty ((:|)))
  9. import qualified Data.List.NonEmpty as NE
  10. import Control.Monad.Except
  11. data Token
  12. = TkIdent String -- identifiers
  13. -- Keywords
  14. | TkLet | TkIn | TkWhere
  15. -- Punctuation
  16. | TkEqual | TkOpen | TkSemi | TkClose
  17. | TkLParen | TkRParen
  18. | TkBackslash | TkArrow
  19. -- Layout punctuation
  20. | TkVOpen | TkVSemi | TkVClose
  21. | TkEOF
  22. deriving (Eq, Show)
  23. data AlexInput
  24. = Input { inpLine :: {-# UNPACK #-} !Int
  25. , inpColumn :: {-# UNPACK #-} !Int
  26. , inpLast :: {-# UNPACK #-} !Char
  27. , inpStream :: String
  28. }
  29. deriving (Eq, Show)
  30. alexPrevInputChar :: AlexInput -> Char
  31. alexPrevInputChar = inpLast
  32. alexGetByte :: AlexInput -> Maybe (Word8, AlexInput)
  33. alexGetByte inp@Input{inpStream = str} = advance <$> uncons str where
  34. advance ('\n', rest) =
  35. ( fromIntegral (ord '\n')
  36. , Input { inpLine = inpLine inp + 1
  37. , inpColumn = 0
  38. , inpLast = '\n'
  39. , inpStream = rest }
  40. )
  41. advance (c, rest) =
  42. ( fromIntegral (ord c)
  43. , Input { inpLine = inpLine inp
  44. , inpColumn = inpColumn inp + 1
  45. , inpLast = c
  46. , inpStream = rest }
  47. )
  48. newtype Lexer a = Lexer { _getLexer :: StateT LexerState (Either String) a }
  49. deriving (Functor, Applicative, Monad, MonadState LexerState, MonadError String)
  50. data Layout = ExplicitLayout | LayoutColumn Int
  51. deriving (Eq, Show, Ord)
  52. data LexerState
  53. = LS { lexerInput :: {-# UNPACK #-} !AlexInput
  54. , lexerStartCodes :: {-# UNPACK #-} !(NonEmpty Int)
  55. , lexerLayout :: [Layout]
  56. }
  57. deriving (Eq, Show)
  58. startCode :: Lexer Int
  59. startCode = gets (NE.head . lexerStartCodes)
  60. pushStartCode :: Int -> Lexer ()
  61. pushStartCode i = modify' $ \st ->
  62. st { lexerStartCodes = NE.cons i (lexerStartCodes st)
  63. }
  64. popStartCode :: Lexer ()
  65. popStartCode = modify' $ \st ->
  66. st { lexerStartCodes =
  67. case lexerStartCodes st of
  68. _ :| [] -> 0 :| []
  69. _ :| (x:xs) -> x :| xs
  70. }
  71. layout :: Lexer (Maybe Layout)
  72. layout = gets (fmap fst . uncons . lexerLayout)
  73. pushLayout :: Layout -> Lexer ()
  74. pushLayout i = modify' $ \st ->
  75. st { lexerLayout = i:lexerLayout st }
  76. popLayout :: Lexer ()
  77. popLayout = modify' $ \st ->
  78. st { lexerLayout =
  79. case lexerLayout st of
  80. _:xs -> xs
  81. [] -> []
  82. }
  83. initState :: String -> LexerState
  84. initState str = LS { lexerInput = Input 0 1 '\n' str
  85. , lexerStartCodes = 0 :| []
  86. , lexerLayout = []
  87. }
  88. emit :: (String -> Token) -> String -> Lexer Token
  89. emit = (pure .)
  90. token :: Token -> String -> Lexer Token
  91. token = const . pure
  92. runLexer :: Lexer a -> String -> Either String a
  93. runLexer act s = fst <$> runStateT (_getLexer act) (initState s)