less prototype, less bad code implementation of CCHM type theory
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

186 lines
5.3 KiB

  1. {
  2. module Presyntax.Lexer where
  3. import qualified Data.ByteString.Lazy as Lbs
  4. import qualified Data.Text.Encoding as T
  5. import qualified Data.Text as T
  6. import Presyntax.Tokens
  7. import Debug.Trace
  8. }
  9. %wrapper "monadUserState-bytestring"
  10. $alpha = [a-zA-Z]
  11. $digit = [0-9]
  12. $white_nol = $white # \n
  13. tokens :-
  14. $white_nol+ ;
  15. "--" .* \n ;
  16. <0,prtext> $alpha [$alpha $digit \_ \']* { variableOrKeyword }
  17. -- zero state: normal lexing
  18. <0> \= { always TokEqual }
  19. <0> \: { always TokColon }
  20. <0> \, { always TokComma }
  21. <0> \* { always TokStar }
  22. <0> ".1" { always TokPi1 }
  23. <0> ".2" { always TokPi2 }
  24. <0> \\ { always TokLambda }
  25. <0> "->" { always TokArrow }
  26. <0> "_" { always TokUnder }
  27. <0> \( { always TokOParen }
  28. <0> \{ { always TokOBrace }
  29. <0> \[ { always TokOSquare }
  30. <0> \) { always TokCParen }
  31. <0> \} { closeBrace }
  32. <0> \] { always TokCSquare }
  33. <0> \; { always TokSemi }
  34. <0> \n { just $ pushStartCode newline }
  35. <0> "&&" { always TokAnd }
  36. <0> "||" { always TokOr }
  37. <0> "{-" { just $ pushStartCode comment }
  38. <comment> {
  39. "-}" { \i l -> popStartCode *> skip i l }
  40. . ;
  41. }
  42. <0> "{-#" { \i l -> pushStartCode prkw *> always TokOPragma i l }
  43. <prkw> "PRIMITIVE" { \i l -> popStartCode *> pushStartCode prtext *> always TokPrim i l }
  44. <prtext> "#-}" { \i l -> popStartCode *> always TokCPragma i l }
  45. <0> ":let" { always TokReplLet }
  46. <0> ":t"("y"|"yp"|"ype"|()) { yield TokReplT }
  47. -- newline: emit a semicolon when de-denting
  48. <newline> {
  49. \n ;
  50. () { offsideRule }
  51. }
  52. -- layout: indentation of the next token is context for offside rule
  53. <layout> {
  54. \n ;
  55. \{ { openBrace }
  56. () { startLayout }
  57. }
  58. <empty_layout> () { emptyLayout }
  59. {
  60. alexEOF :: Alex Token
  61. alexEOF = do
  62. (AlexPn _ l c, _, _, _) <- alexGetInput
  63. pure $ Token TokEof l c
  64. yield k (AlexPn _ l c, _, s, _) i = pure (Token (k $! (T.decodeUtf8 (Lbs.toStrict (Lbs.take i s)))) l c)
  65. always :: TokenClass -> AlexInput -> Int64 -> Alex Token
  66. always k x i = yield (const k) x i
  67. data AlexUserState = AlexUserState { layoutColumns :: [Int], startCodes :: [Int], leastColumn :: Int }
  68. alexInitUserState = AlexUserState [1] [] 0
  69. just :: Alex a -> AlexAction Token
  70. just k _ _ = k *> alexMonadScan
  71. getUserState :: Alex AlexUserState
  72. getUserState = Alex $ \s -> Right (s, alex_ust s)
  73. mapUserState :: (AlexUserState -> AlexUserState) -> Alex ()
  74. mapUserState k = Alex $ \s -> Right (s { alex_ust = k $! alex_ust s }, ())
  75. pushStartCode :: Int -> Alex ()
  76. pushStartCode c = do
  77. sc <- alexGetStartCode
  78. mapUserState $ \s -> s { startCodes = sc:startCodes s }
  79. alexSetStartCode c
  80. popStartCode :: Alex ()
  81. popStartCode = do
  82. sc <- startCodes <$> getUserState
  83. case sc of
  84. [] -> alexSetStartCode 0
  85. (x:xs) -> do
  86. mapUserState $ \s -> s { startCodes = xs }
  87. alexSetStartCode x
  88. offsideRule :: AlexInput -> Int64 -> Alex Token
  89. offsideRule (AlexPn _ line col, _, _, _) _
  90. -- | Lbs.null s = pure (Token TokEof line col)
  91. | otherwise = do
  92. ~(col':ctx) <- layoutColumns <$> getUserState
  93. case col `compare` col' of
  94. EQ -> do
  95. popStartCode
  96. pure (Token TokSemi line col)
  97. GT -> do
  98. popStartCode
  99. alexMonadScan
  100. LT -> do
  101. mapUserState $ \s -> s { layoutColumns = ctx }
  102. pure (Token TokLEnd line col)
  103. emptyLayout :: AlexInput -> Int64 -> Alex Token
  104. emptyLayout (AlexPn _ line col, _, _, _) _ = do
  105. popStartCode
  106. pushStartCode newline
  107. pure (Token TokLEnd line col)
  108. startLayout :: AlexInput -> Int64 -> Alex Token
  109. startLayout (AlexPn _ line col, _, _, _) _ = do
  110. popStartCode
  111. ~(col':_) <- layoutColumns <$> getUserState
  112. if col' >= col
  113. then pushStartCode empty_layout
  114. else mapUserState $ \s -> s { layoutColumns = col:layoutColumns s }
  115. pure (Token TokLStart line col)
  116. openBrace :: AlexInput -> Int64 -> Alex Token
  117. openBrace (AlexPn _ line col, _, _, _) _ = do
  118. popStartCode
  119. mapUserState $ \s -> s { layoutColumns = minBound:layoutColumns s }
  120. pure (Token TokOBrace line col)
  121. closeBrace :: AlexInput -> Int64 -> Alex Token
  122. closeBrace (AlexPn _ line col, _, _, _) _ = do
  123. ~(col':tail) <- layoutColumns <$> getUserState
  124. if col' < 0
  125. then mapUserState $ \s -> s { layoutColumns = tail }
  126. else pure ()
  127. pure (Token TokCBrace line col)
  128. variableOrKeyword :: AlexAction Token
  129. variableOrKeyword (AlexPn _ l c, _, s, _) size =
  130. let text = T.decodeUtf8 (Lbs.toStrict (Lbs.take size s)) in
  131. case T.unpack text of
  132. "as" -> pure (Token TokAs l c)
  133. "in" -> pure (Token TokIn l c)
  134. "data" -> pure (Token TokData l c)
  135. "postulate" -> laidOut TokPostulate l c
  136. "let" -> laidOut TokLet l c
  137. "where" -> laidOut TokWhere l c
  138. "case" -> laidOut TokCase l c
  139. _ -> pure (Token (TokVar text) l c)
  140. laidOut x l c = do
  141. pushStartCode layout
  142. mapUserState $ \s -> s { leastColumn = c }
  143. pure (Token x l c)
  144. }