less prototype, less bad code implementation of CCHM type theory
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

126 lines
3.6 KiB

  1. {
  2. module Presyntax.Lexer where
  3. import qualified Data.ByteString.Lazy as Lbs
  4. import qualified Data.Text.Encoding as T
  5. import qualified Data.Text as T
  6. import Presyntax.Tokens
  7. }
  8. %wrapper "monadUserState-bytestring"
  9. $alpha = [a-zA-Z]
  10. $digit = [0-9]
  11. $white_nol = $white # \n
  12. tokens :-
  13. $white_nol+ ;
  14. "--" .* \n ;
  15. <0,prtext> $alpha [$alpha $digit \_ \']* { yield tokVar }
  16. -- zero state: normal lexing
  17. <0> \= { always TokEqual }
  18. <0> \: { always TokColon }
  19. <0> \, { always TokComma }
  20. <0> \* { always TokStar }
  21. <0> ".1" { always TokPi1 }
  22. <0> ".2" { always TokPi2 }
  23. <0> \\ { always TokLambda }
  24. <0> "->" { always TokArrow }
  25. <0> \( { always TokOParen }
  26. <0> \{ { always TokOBrace }
  27. <0> \[ { always TokOSquare }
  28. <0> \) { always TokCParen }
  29. <0> \} { always TokCBrace }
  30. <0> \] { always TokCSquare }
  31. <0> \; { always TokSemi }
  32. <0> \n { just $ pushStartCode newline }
  33. <0> "&&" { always TokAnd }
  34. <0> "||" { always TokOr }
  35. <0> "{-" { just $ pushStartCode comment }
  36. <comment> {
  37. "-}" { \i l -> popStartCode *> skip i l }
  38. . ;
  39. }
  40. <0> "{-#" { \i l -> pushStartCode prkw *> always TokOPragma i l }
  41. <prkw> "PRIMITIVE" { \i l -> popStartCode *> pushStartCode prtext *> always TokPrim i l }
  42. <prtext> "#-}" { \i l -> popStartCode *> always TokCPragma i l }
  43. <0> ":let" { always TokReplLet }
  44. <0> ":t"("y"|"yp"|"ype"|()) { yield TokReplT }
  45. -- newline: emit a semicolon when de-denting
  46. <newline> {
  47. \n ;
  48. () { offsideRule }
  49. }
  50. {
  51. alexEOF :: Alex Token
  52. alexEOF = do
  53. (AlexPn _ l c, _, _, _) <- alexGetInput
  54. pure $ Token TokEof l c
  55. yield k (AlexPn _ l c, _, s, _) i = pure (Token (k $! (T.decodeUtf8 (Lbs.toStrict (Lbs.take i s)))) l c)
  56. always :: TokenClass -> AlexInput -> Int64 -> Alex Token
  57. always k x i = yield (const k) x i
  58. data AlexUserState = AlexUserState { layoutColumns :: [Int], startCodes :: [Int] }
  59. alexInitUserState = AlexUserState [1] []
  60. just :: Alex a -> AlexAction Token
  61. just k _ _ = k *> alexMonadScan
  62. getUserState :: Alex AlexUserState
  63. getUserState = Alex $ \s -> Right (s, alex_ust s)
  64. mapUserState :: (AlexUserState -> AlexUserState) -> Alex ()
  65. mapUserState k = Alex $ \s -> Right (s { alex_ust = k $! alex_ust s }, ())
  66. pushStartCode :: Int -> Alex ()
  67. pushStartCode c = do
  68. sc <- alexGetStartCode
  69. mapUserState $ \s -> s { startCodes = sc:startCodes s }
  70. alexSetStartCode c
  71. popStartCode :: Alex ()
  72. popStartCode = do
  73. sc <- startCodes <$> getUserState
  74. case sc of
  75. [] -> alexSetStartCode 0
  76. (x:xs) -> do
  77. mapUserState $ \s -> s { startCodes = xs }
  78. alexSetStartCode x
  79. offsideRule :: AlexInput -> Int64 -> Alex Token
  80. offsideRule (AlexPn _ line col, _, s, _) _
  81. | Lbs.null s = pure (Token TokEof line col)
  82. | otherwise = do
  83. ~(col':_) <- layoutColumns <$> getUserState
  84. case col `compare` col' of
  85. EQ -> do
  86. popStartCode
  87. pure (Token TokSemi line col)
  88. GT -> do
  89. popStartCode
  90. alexMonadScan
  91. LT -> alexError "wrong ass indentation"
  92. tokVar :: T.Text -> TokenClass
  93. tokVar text =
  94. case T.unpack text of
  95. "as" -> TokAs
  96. _ -> TokVar text
  97. }