less prototype, less bad code implementation of CCHM type theory
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

103 lines
3.0 KiB

  1. {
  2. module Presyntax.Lexer where
  3. import qualified Data.ByteString.Lazy as Lbs
  4. import qualified Data.Text.Encoding as T
  5. import Presyntax.Tokens
  6. }
  7. %wrapper "monadUserState-bytestring"
  8. $alpha = [a-zA-Z]
  9. $digit = [0-9]
  10. $white_nol = $white # \n
  11. tokens :-
  12. $white_nol+ ;
  13. <0,prtext> $alpha [$alpha $digit \_ \']* { yield TokVar }
  14. -- zero state: normal lexing
  15. <0> \= { always TokEqual }
  16. <0> \: { always TokColon }
  17. <0> \, { always TokComma }
  18. <0> \* { always TokStar }
  19. <0> ".1" { always TokPi1 }
  20. <0> ".2" { always TokPi2 }
  21. <0> \\ { always TokLambda }
  22. <0> "->" { always TokArrow }
  23. <0> \( { always TokOParen }
  24. <0> \{ { always TokOBrace }
  25. <0> \) { always TokCParen }
  26. <0> \} { always TokCBrace }
  27. <0> \; { always TokSemi }
  28. <0> \n { just $ pushStartCode newline }
  29. <0> "{-#" { \i l -> pushStartCode prkw *> always TokOPragma i l }
  30. <prkw> "PRIMITIVE" { \i l -> popStartCode *> pushStartCode prtext *> always TokPrim i l }
  31. <prtext> "#-}" { \i l -> popStartCode *> always TokCPragma i l }
  32. <0> ":let" { always TokReplLet }
  33. <0> ":t"("y"|"yp"|"ype"|()) { yield TokReplT }
  34. -- newline: emit a semicolon when de-denting
  35. <newline> {
  36. \n ;
  37. () { offsideRule }
  38. }
  39. {
  40. alexEOF :: Alex Token
  41. alexEOF = do
  42. (AlexPn _ l c, _, _, _) <- alexGetInput
  43. pure $ Token TokEof l c
  44. yield k (AlexPn _ l c, _, s, _) i = pure (Token (k $! (T.decodeUtf8 (Lbs.toStrict (Lbs.take i s)))) l c)
  45. always :: TokenClass -> AlexInput -> Int64 -> Alex Token
  46. always k x i = yield (const k) x i
  47. data AlexUserState = AlexUserState { layoutColumns :: [Int], startCodes :: [Int] }
  48. alexInitUserState = AlexUserState [1] []
  49. just :: Alex a -> AlexAction Token
  50. just k _ _ = k *> alexMonadScan
  51. getUserState :: Alex AlexUserState
  52. getUserState = Alex $ \s -> Right (s, alex_ust s)
  53. mapUserState :: (AlexUserState -> AlexUserState) -> Alex ()
  54. mapUserState k = Alex $ \s -> Right (s { alex_ust = k $! alex_ust s }, ())
  55. pushStartCode :: Int -> Alex ()
  56. pushStartCode c = do
  57. sc <- alexGetStartCode
  58. mapUserState $ \s -> s { startCodes = sc:startCodes s }
  59. alexSetStartCode c
  60. popStartCode :: Alex ()
  61. popStartCode = do
  62. sc <- startCodes <$> getUserState
  63. case sc of
  64. [] -> alexSetStartCode 0
  65. (x:xs) -> do
  66. mapUserState $ \s -> s { startCodes = xs }
  67. alexSetStartCode x
  68. offsideRule :: AlexInput -> Int64 -> Alex Token
  69. offsideRule (AlexPn _ line col, _, _, _) _ = do
  70. ~(col':_) <- layoutColumns <$> getUserState
  71. case col `compare` col' of
  72. EQ -> do
  73. popStartCode
  74. pure (Token TokSemi line col)
  75. GT -> do
  76. popStartCode
  77. alexMonadScan
  78. LT -> alexError "wrong ass indentation"
  79. }