a type theory with equality based on setoids
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

171 lines
4.9 KiB

  1. {-# LANGUAGE BangPatterns #-}
  2. module Presyntax.Lexer where
  3. import Data.Text (Text)
  4. import Data.Char
  5. import qualified Data.Text as T
  6. {- HLINT ignore -}
  7. data TokenClass
  8. = Tok_var Text
  9. | Tok_lambda
  10. | Tok_type
  11. | Tok_let
  12. | Tok_in
  13. -- Operations on equality
  14. | Tok_coe
  15. | Tok_cong
  16. | Tok_refl
  17. | Tok_sym
  18. | Tok_proj1
  19. | Tok_proj2
  20. | Tok_top
  21. | Tok_oparen
  22. | Tok_cparen
  23. | Tok_obrace
  24. | Tok_cbrace
  25. | Tok_arrow
  26. | Tok_times
  27. | Tok_colon
  28. | Tok_comma
  29. | Tok_semi
  30. | Tok_equal
  31. | Tok_under
  32. | Tok_equiv
  33. deriving (Eq, Show, Ord)
  34. data Token
  35. = Token { tokLine :: {-# UNPACK #-} !Int
  36. , tokCol :: {-# UNPACK #-} !Int
  37. , tokSOff :: {-# UNPACK #-} !Int
  38. , tokOff :: {-# UNPACK #-} !Int
  39. , tokClass :: !TokenClass
  40. }
  41. deriving (Eq, Show, Ord)
  42. data LexError
  43. = LexError { leChar :: {-# UNPACK #-} !Char
  44. , leLine :: {-# UNPACK #-} !Int
  45. , leCol :: {-# UNPACK #-} !Int
  46. }
  47. | EOFInComment { leLine :: {-# UNPACK #-} !Int
  48. , leCol :: {-# UNPACK #-} !Int
  49. }
  50. deriving (Eq, Show, Ord)
  51. lexString :: String -> Either LexError [Token]
  52. lexString = go 0 0 0 where
  53. go :: Int -> Int -> Int -> String -> Either LexError [Token]
  54. go !off !line !_ ('\n':xs) =
  55. go (off + 1) (line + 1) 0 xs
  56. go !off !line !col (' ':xs) =
  57. go (off + 1) line (col + 1) xs
  58. go !off !line !_ ('-':'-':xs) =
  59. let (a, b) = span (/= '\n') xs
  60. in go (off + length a) line 0 b
  61. go !off !line !col ('{':'-':xs) = skipComment off line col 1 xs
  62. go !off !line !col ('(':cs) =
  63. Token line col off (off + 1) Tok_oparen `yield` go (off + 1) line (col + 1) cs
  64. go !off !line !col (')':cs) =
  65. Token line col off (off + 1) Tok_cparen `yield` go (off + 1) line (col + 1) cs
  66. go !off !line !col ('{':cs) =
  67. Token line col off (off + 1) Tok_obrace `yield` go (off + 1) line (col + 1) cs
  68. go !off !line !col ('}':cs) =
  69. Token line col off (off + 1) Tok_cbrace `yield` go (off + 1) line (col + 1) cs
  70. go !off !line !col (':':cs) =
  71. Token line col off (off + 1) Tok_colon `yield` go (off + 1) line (col + 1) cs
  72. go !off !line !col (';':cs) =
  73. Token line col off (off + 1) Tok_semi `yield` go (off + 1) line (col + 1) cs
  74. go !off !line !col ('⊤':cs) =
  75. Token line col off (off + 1) Tok_top `yield` go (off + 1) line (col + 1) cs
  76. go !off !line !col ('≡':cs) =
  77. Token line col off (off + 1) Tok_equiv `yield` go (off + 1) line (col + 1) cs
  78. go !off !line !col ('=':'=':cs) =
  79. Token line col off (off + 2) Tok_equiv `yield` go (off + 2) line (col + 2) cs
  80. go !off !line !col ('=':cs) =
  81. Token line col off (off + 1) Tok_equal `yield` go (off + 1) line (col + 1) cs
  82. go !off !line !col ('→':cs) =
  83. Token line col off (off + 1) Tok_arrow `yield` go (off + 1) line (col + 1) cs
  84. go !off !line !col (',':cs) =
  85. Token line col off (off + 1) Tok_comma `yield` go (off + 1) line (col + 1) cs
  86. go !off !line !col ('_':cs) =
  87. Token line col off (off + 1) Tok_under `yield` go (off + 1) line (col + 1) cs
  88. go !off !line !col ('*':cs) =
  89. Token line col off (off + 1) Tok_times `yield` go (off + 1) line (col + 1) cs
  90. go !off !line !col ('×':cs) =
  91. Token line col off (off + 1) Tok_times `yield` go (off + 1) line (col + 1) cs
  92. go !off !line !col ('\\':cs) =
  93. Token line col off (off + 1) Tok_lambda `yield` go (off + 1) line (col + 1) cs
  94. go !off !line !col ('λ':cs) =
  95. Token line col off (off + 1) Tok_lambda `yield` go (off + 1) line (col + 1) cs
  96. go !off !line !col ('-':'>':cs) =
  97. Token line col off (off + 2) Tok_arrow `yield` go (off + 2) line (col + 2) cs
  98. go !off !line !col ('.':'1':cs) =
  99. Token line col off (off + 2) Tok_proj1 `yield` go (off + 2) line (col + 2) cs
  100. go !off !line !col ('.':'2':cs) =
  101. Token line col off (off + 2) Tok_proj2 `yield` go (off + 2) line (col + 2) cs
  102. go !off !line !col (c:cs)
  103. | isAlpha c = goIdent off off line col (T.singleton c) cs
  104. go !_ !line !col (c:_) = Left (LexError c line col)
  105. go _ _ _ [] = pure []
  106. goIdent !soff !off !line !col !acc [] =
  107. pure [Token line col soff off (finishIdent acc)]
  108. goIdent !soff !off !line !col !acc (c:cs)
  109. | isAlphaNum c || c == '\''
  110. = goIdent soff (off + 1) line (col + 1) (T.snoc acc c) cs
  111. | otherwise
  112. = Token line col soff off (finishIdent acc) `yield` go (off + 1) line (col + 1) (c:cs)
  113. skipComment off line col level ('-':'}':cs)
  114. | level == 1 = go off line col cs
  115. | otherwise = skipComment off line col (level - 1) cs
  116. skipComment off line col level ('{':'-':cs) =
  117. skipComment off line col (level + 1) cs
  118. skipComment _ line col _ [] = Left (EOFInComment line col)
  119. yield c = fmap (c:)
  120. finishIdent c
  121. | T.pack "let" == c = Tok_let
  122. | T.pack "Type" == c = Tok_type
  123. | T.pack "in" == c = Tok_in
  124. | T.pack "refl" == c = Tok_refl
  125. | T.pack "coe" == c = Tok_coe
  126. | T.pack "cong" == c = Tok_cong
  127. | T.pack "sym" == c = Tok_sym
  128. | otherwise = Tok_var c