less prototype, less bad code implementation of CCHM type theory
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

163 lines
4.7 KiB

  1. {
  2. module Presyntax.Lexer where
  3. import qualified Data.ByteString.Lazy as Lbs
  4. import qualified Data.Text.Encoding as T
  5. import qualified Data.Text as T
  6. import Presyntax.Tokens
  7. }
  8. %wrapper "monadUserState-bytestring"
  9. $alpha = [a-zA-Z]
  10. $digit = [0-9]
  11. $white_nol = $white # \n
  12. tokens :-
  13. $white_nol+ ;
  14. "--" .* \n ;
  15. <0,prtext> $alpha [$alpha $digit \_ \']* { variableOrKeyword }
  16. -- zero state: normal lexing
  17. <0> \= { always TokEqual }
  18. <0> \: { always TokColon }
  19. <0> \, { always TokComma }
  20. <0> \* { always TokStar }
  21. <0> ".1" { always TokPi1 }
  22. <0> ".2" { always TokPi2 }
  23. <0> \\ { always TokLambda }
  24. <0> "->" { always TokArrow }
  25. <0> \( { always TokOParen }
  26. <0> \{ { always TokOBrace }
  27. <0> \[ { always TokOSquare }
  28. <0> \) { always TokCParen }
  29. <0> \} { always TokCBrace }
  30. <0> \] { always TokCSquare }
  31. <0> \; { always TokSemi }
  32. <0> \n { just $ pushStartCode newline }
  33. <0> "&&" { always TokAnd }
  34. <0> "||" { always TokOr }
  35. <0> "{-" { just $ pushStartCode comment }
  36. <comment> {
  37. "-}" { \i l -> popStartCode *> skip i l }
  38. . ;
  39. }
  40. <0> "{-#" { \i l -> pushStartCode prkw *> always TokOPragma i l }
  41. <prkw> "PRIMITIVE" { \i l -> popStartCode *> pushStartCode prtext *> always TokPrim i l }
  42. <prtext> "#-}" { \i l -> popStartCode *> always TokCPragma i l }
  43. <0> ":let" { always TokReplLet }
  44. <0> ":t"("y"|"yp"|"ype"|()) { yield TokReplT }
  45. -- newline: emit a semicolon when de-denting
  46. <newline> {
  47. \n ;
  48. () { offsideRule }
  49. }
  50. -- layout: indentation of the next token is context for offside rule
  51. <layout> {
  52. \n ;
  53. () { startLayout }
  54. }
  55. <empty_layout> () { emptyLayout }
  56. {
  57. alexEOF :: Alex Token
  58. alexEOF = do
  59. (AlexPn _ l c, _, _, _) <- alexGetInput
  60. pure $ Token TokEof l c
  61. yield k (AlexPn _ l c, _, s, _) i = pure (Token (k $! (T.decodeUtf8 (Lbs.toStrict (Lbs.take i s)))) l c)
  62. always :: TokenClass -> AlexInput -> Int64 -> Alex Token
  63. always k x i = yield (const k) x i
  64. data AlexUserState = AlexUserState { layoutColumns :: [Int], startCodes :: [Int], leastColumn :: Int }
  65. alexInitUserState = AlexUserState [1] [] 0
  66. just :: Alex a -> AlexAction Token
  67. just k _ _ = k *> alexMonadScan
  68. getUserState :: Alex AlexUserState
  69. getUserState = Alex $ \s -> Right (s, alex_ust s)
  70. mapUserState :: (AlexUserState -> AlexUserState) -> Alex ()
  71. mapUserState k = Alex $ \s -> Right (s { alex_ust = k $! alex_ust s }, ())
  72. pushStartCode :: Int -> Alex ()
  73. pushStartCode c = do
  74. sc <- alexGetStartCode
  75. mapUserState $ \s -> s { startCodes = sc:startCodes s }
  76. alexSetStartCode c
  77. popStartCode :: Alex ()
  78. popStartCode = do
  79. sc <- startCodes <$> getUserState
  80. case sc of
  81. [] -> alexSetStartCode 0
  82. (x:xs) -> do
  83. mapUserState $ \s -> s { startCodes = xs }
  84. alexSetStartCode x
  85. offsideRule :: AlexInput -> Int64 -> Alex Token
  86. offsideRule (AlexPn _ line col, _, s, _) size
  87. -- | Lbs.null s = pure (Token TokEof line col)
  88. | otherwise = do
  89. ~(col':ctx) <- layoutColumns <$> getUserState
  90. case col `compare` col' of
  91. EQ -> do
  92. popStartCode
  93. pure (Token TokSemi line col)
  94. GT -> do
  95. popStartCode
  96. alexMonadScan
  97. LT -> do
  98. mapUserState $ \s -> s { layoutColumns = ctx }
  99. pure (Token TokLEnd line col)
  100. emptyLayout :: AlexInput -> Int64 -> Alex Token
  101. emptyLayout (AlexPn _ line col, _, _, _) _ = do
  102. popStartCode
  103. pushStartCode newline
  104. pure (Token TokLEnd line col)
  105. startLayout :: AlexInput -> Int64 -> Alex Token
  106. startLayout (AlexPn _ line col, _, s, _) size = do
  107. popStartCode
  108. least <- leastColumn <$> getUserState
  109. ~(col':_) <- layoutColumns <$> getUserState
  110. if (col' >= col) || col <= least
  111. then pushStartCode empty_layout
  112. else mapUserState $ \s -> s { layoutColumns = col:layoutColumns s }
  113. pure (Token TokLStart line col)
  114. variableOrKeyword :: AlexAction Token
  115. variableOrKeyword (AlexPn _ l c, _, s, _) size =
  116. let text = T.decodeUtf8 (Lbs.toStrict (Lbs.take size s)) in
  117. case T.unpack text of
  118. "as" -> pure (Token TokAs l c)
  119. "in" -> pure (Token TokIn l c)
  120. "postulate" -> laidOut TokPostulate l c
  121. "let" -> laidOut TokLet l c
  122. _ -> pure (Token (TokVar text) l c)
  123. laidOut x l c = do
  124. pushStartCode layout
  125. mapUserState $ \s -> s { leastColumn = c }
  126. pure (Token x l c)
  127. }