Prototype, extremely bad code implementation of CCHM Cubical Type Theory
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

191 lines
5.5 KiB

3 years ago
  1. {-# LANGUAGE BangPatterns #-}
  2. module Presyntax.Lexer where
  3. import Data.Text (Text)
  4. import Data.Char
  5. import qualified Data.Text as T
  6. {- HLINT ignore -}
  7. data TokenClass
  8. = Tok_var Text
  9. | Tok_lambda
  10. | Tok_type
  11. | Tok_path
  12. | Tok_phi
  13. | Tok_sub
  14. | Tok_comp
  15. | Tok_tr
  16. | Tok_I
  17. | Tok_I0
  18. | Tok_I1
  19. | Tok_oparen
  20. | Tok_cparen
  21. | Tok_osquare
  22. | Tok_csquare
  23. | Tok_colon
  24. | Tok_arrow
  25. | Tok_let
  26. | Tok_equal
  27. | Tok_in
  28. | Tok_and
  29. | Tok_not
  30. | Tok_or
  31. | Tok_fand
  32. | Tok_for
  33. | Tok_assume
  34. | Tok_p1
  35. | Tok_p2
  36. | Tok_comma
  37. | Tok_times
  38. deriving (Eq, Show, Ord)
  39. data Token
  40. = Token { tokLine :: {-# UNPACK #-} !Int
  41. , tokCol :: {-# UNPACK #-} !Int
  42. , tokSOff :: {-# UNPACK #-} !Int
  43. , tokOff :: {-# UNPACK #-} !Int
  44. , tokClass :: !TokenClass
  45. }
  46. deriving (Eq, Show, Ord)
  47. data LexError
  48. = LexError { leChar :: {-# UNPACK #-} !Char
  49. , leLine :: {-# UNPACK #-} !Int
  50. , leCol :: {-# UNPACK #-} !Int
  51. }
  52. | EOFInComment { leLine :: {-# UNPACK #-} !Int
  53. , leCol :: {-# UNPACK #-} !Int
  54. }
  55. deriving (Eq, Show, Ord)
  56. lexString :: String -> Either LexError [Token]
  57. lexString = go 0 0 0 where
  58. go :: Int -> Int -> Int -> String -> Either LexError [Token]
  59. go !off !line !_ ('\n':xs) =
  60. go (off + 1) (line + 1) 0 xs
  61. go !off !line !col (' ':xs) =
  62. go (off + 1) line (col + 1) xs
  63. go !off !line !_ ('-':'-':xs) =
  64. let (a, b) = span (/= '\n') xs
  65. in go (off + length a) line 0 b
  66. go !off !line !col ('{':'-':xs) = skipComment off line col 1 xs
  67. go !off !line !col ('~':cs) =
  68. Token line col off (off + 1) Tok_not `yield` go (off + 1) line (col + 1) cs
  69. go !off !line !col ('(':cs) =
  70. Token line col off (off + 1) Tok_oparen `yield` go (off + 1) line (col + 1) cs
  71. go !off !line !col (')':cs) =
  72. Token line col off (off + 1) Tok_cparen `yield` go (off + 1) line (col + 1) cs
  73. go !off !line !col ('[':cs) =
  74. Token line col off (off + 1) Tok_osquare `yield` go (off + 1) line (col + 1) cs
  75. go !off !line !col (']':cs) =
  76. Token line col off (off + 1) Tok_csquare `yield` go (off + 1) line (col + 1) cs
  77. go !off !line !col (':':cs) =
  78. Token line col off (off + 1) Tok_colon `yield` go (off + 1) line (col + 1) cs
  79. go !off !line !col (',':cs) =
  80. Token line col off (off + 1) Tok_comma `yield` go (off + 1) line (col + 1) cs
  81. go !off !line !col ('=':cs) =
  82. Token line col off (off + 1) Tok_equal `yield` go (off + 1) line (col + 1) cs
  83. go !off !line !col ('\\':'/':cs) =
  84. Token line col off (off + 2) Tok_for `yield` go (off + 2) line (col + 2) cs
  85. go !off !line !col ('\\':cs) =
  86. Token line col off (off + 1) Tok_lambda `yield` go (off + 1) line (col + 1) cs
  87. go !off !line !col ('*':cs) =
  88. Token line col off (off + 1) Tok_times `yield` go (off + 1) line (col + 1) cs
  89. go !off !line !col ('λ':cs) =
  90. Token line col off (off + 1) Tok_lambda `yield` go (off + 1) line (col + 1) cs
  91. go !off !line !col ('→':cs) =
  92. Token line col off (off + 1) Tok_lambda `yield` go (off + 1) line (col + 1) cs
  93. go !off !line !col ('-':'>':cs) =
  94. Token line col off (off + 2) Tok_arrow `yield` go (off + 2) line (col + 2) cs
  95. go !off !line !col ('&':'&':cs) =
  96. Token line col off (off + 2) Tok_and `yield` go (off + 2) line (col + 2) cs
  97. go !off !line !col ('&':cs) =
  98. Token line col off (off + 1) Tok_fand `yield` go (off + 1) line (col + 1) cs
  99. go !off !line !col ('/':'\\':cs) =
  100. Token line col off (off + 1) Tok_fand `yield` go (off + 1) line (col + 1) cs
  101. go !off !line !col ('|':'|':cs) =
  102. Token line col off (off + 2) Tok_or `yield` go (off + 2) line (col + 2) cs
  103. go !off !line !col ('|':cs) =
  104. Token line col off (off + 1) Tok_for `yield` go (off + 1) line (col + 1) cs
  105. go !off !line !col ('.':'1':cs) =
  106. Token line col off (off + 2) Tok_p1 `yield` go (off + 2) line (col + 2) cs
  107. go !off !line !col ('.':'2':cs) =
  108. Token line col off (off + 2) Tok_p2 `yield` go (off + 2) line (col + 2) cs
  109. go !off !line !col (c:cs)
  110. | isAlpha c = goIdent off off line col (T.singleton c) cs
  111. go !_ !line !col (c:_) = Left (LexError c line col)
  112. go _ _ _ [] = pure []
  113. goIdent !soff !off !line !col !acc [] =
  114. pure [Token line col soff off (finishIdent acc)]
  115. goIdent !soff !off !line !col !acc (c:cs)
  116. | isAlphaNum c || c == '\''
  117. = goIdent soff (off + 1) line (col + 1) (T.snoc acc c) cs
  118. | otherwise
  119. = Token line col soff off (finishIdent acc) `yield` go (off + 1) line (col + 1) (c:cs)
  120. skipComment off line col level ('-':'}':cs)
  121. | level == 1 = go (off + 2) line (col + 2) cs
  122. | otherwise = skipComment (off + 2) line (col + 2) (level - 1) cs
  123. skipComment off line col level ('{':'-':cs) =
  124. skipComment (off + 2) line (col + 2) (level + 1) cs
  125. skipComment off line col level ('\n':cs) =
  126. skipComment (off + 1) (line + 1) 0 level cs
  127. skipComment off line col level (c:cs) =
  128. skipComment (off + 1) line (col + 1) level cs
  129. skipComment _ line col _ [] = Left (EOFInComment line col)
  130. yield c = fmap (c:)
  131. finishIdent c
  132. | T.pack "Type" == c = Tok_type
  133. | T.pack "Path" == c = Tok_path
  134. | T.pack "Partial" == c = Tok_phi
  135. | T.pack "Sub" == c = Tok_sub
  136. | T.pack "comp" == c = Tok_comp
  137. | T.pack "tr" == c = Tok_tr
  138. | T.pack "I" == c = Tok_I
  139. | T.pack "i0" == c = Tok_I0
  140. | T.pack "i1" == c = Tok_I1
  141. | T.pack "let" == c = Tok_let
  142. | T.pack "in" == c = Tok_in
  143. | T.pack "assume" == c = Tok_assume
  144. | otherwise = Tok_var c