Abbie's Haskell compiler
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

373 lines
10 KiB

  1. {
  2. module Frontend.Autogen.Lexer where
  3. import qualified Data.ByteString.Lazy as Lbs
  4. import qualified Data.Text.Encoding as T
  5. import qualified Data.Text as T
  6. import Frontend.Lexer.Tokens
  7. import Control.Monad
  8. import Debug.Trace
  9. }
  10. %wrapper "monadUserState-bytestring"
  11. $alpha = [a-zA-Z]
  12. $digit = [0-9]
  13. $white_nol = $white # \n
  14. tokens :-
  15. $white_nol+ ;
  16. <0,module_header> "--" .* \n
  17. { just $ pushStartCode newline }
  18. <0,module_header,import_>
  19. $alpha [$alpha $digit \_ \']* { variableOrKeyword }
  20. <0> \= { always TokEqual }
  21. <0> \: \: { always TokDoubleColon }
  22. <0> \\ { \i l -> setPendingLC *> yield' (const TokLambda) i l }
  23. <0> "->" { always TokArrow }
  24. <0> "_" { always TokUnder }
  25. <0> \{ { always TokOBrace }
  26. <0> \[ { always TokOSquare }
  27. <0,module_header,import_> {
  28. \, { always TokComma }
  29. \( { always TokOParen }
  30. \) { always TokCParen }
  31. }
  32. <0> \} { closeBrace }
  33. <0> \] { always TokCSquare }
  34. <0> \;+ { always TokSemi }
  35. <0> \n { just $ pushStartCode newline }
  36. <0> \" { just startString }
  37. <string> {
  38. \\ \" { stringSeg (T.singleton '"') }
  39. \\ \\ { stringSeg (T.singleton '\\') }
  40. \\ a { stringSeg (T.singleton '\a') }
  41. \\ b { stringSeg (T.singleton '\b') }
  42. \\ f { stringSeg (T.singleton '\f') }
  43. \\ n { stringSeg (T.singleton '\n') }
  44. \\ \n { stringSeg (T.singleton '\n') }
  45. \\ r { stringSeg (T.singleton '\r') }
  46. \\ v { stringSeg (T.singleton '\v') }
  47. \\ t { stringSeg (T.singleton '\t') }
  48. \" { endString }
  49. [^\\\"]+ { stringChar }
  50. }
  51. <0,newline,comment,import_,module_header>
  52. "{-" { just $ pushStartCode comment }
  53. <comment> {
  54. "-}" { \i l -> popStartCode *> skip i l }
  55. . ;
  56. }
  57. -- newline: emit a semicolon when de-denting
  58. <newline> {
  59. \n ;
  60. "--" .* \n ;
  61. () { offsideRule }
  62. }
  63. -- layout: indentation of the next token is context for offside rule
  64. <layout> {
  65. \n ;
  66. "--" .* \n ;
  67. \{ { openBrace }
  68. () { startLayout }
  69. }
  70. <import_> {
  71. \n { just $ pushStartCode newline }
  72. "--" .* \n { just $ pushStartCode newline }
  73. }
  74. <empty_layout> () { emptyLayout }
  75. <pending> () { emitPendingToken }
  76. <module_header> {
  77. \n ;
  78. }
  79. {
  80. alexEOF :: Alex Token
  81. alexEOF = do
  82. (AlexPn _ l c, _, _, _) <- alexGetInput
  83. maybePopImportSC
  84. state <- getUserState
  85. unless (T.null (stringBuffer state)) $ do
  86. alexError $ "Unterminated string literal at line " ++ show l ++ ", column " ++ show c
  87. case layoutColumns state of
  88. -- EOF is allowed to close as many layout contexts as there are
  89. -- pending (number of pending layout contexts is the length of the
  90. -- list minus one, since there's the one initial layout context.)
  91. _:tail -> do
  92. mapUserState $ \s ->
  93. s { pendingTokens = (Token TokLEnd l c <$ tail) ++ [Token TokEof l c]
  94. , layoutColumns = []
  95. }
  96. pushStartCode pending
  97. pure (Token TokLEnd l c)
  98. _ -> pure $ Token TokEof l c
  99. yield k inp i = clearPendingLC *> yield' k inp i
  100. yield' k (AlexPn _ l c, _, s, _) i = do
  101. pure (Token (k $! (T.decodeUtf8 (Lbs.toStrict (Lbs.take i s)))) l c)
  102. setPendingLC = mapUserState $ \s -> s { pendingLambdaCase = True }
  103. clearPendingLC = mapUserState $ \s -> s { pendingLambdaCase = False }
  104. always :: TokenClass -> AlexInput -> Int64 -> Alex Token
  105. always k x i = yield (const k) x i
  106. startString = do
  107. mapUserState $ \s -> s { stringBuffer = T.empty }
  108. pushStartCode string
  109. endString (AlexPn _ l c, _, _, _) _i = do
  110. text <- stringBuffer <$> getUserState
  111. mapUserState $ \s -> s { stringBuffer = T.empty }
  112. popStartCode
  113. pure (Token (TokString text) l c)
  114. stringChar input@(AlexPn _ _ _, _, buf, _) i = do
  115. mapUserState $ \s -> s { stringBuffer = stringBuffer s <> T.decodeUtf8 (Lbs.toStrict (Lbs.take i buf)) }
  116. alexMonadScan
  117. stringSeg text _ _ = do
  118. mapUserState $ \s -> s { stringBuffer = stringBuffer s <> text }
  119. alexMonadScan
  120. data LayoutState
  121. = LetLayout { layoutCol :: Int }
  122. | Layout { layoutCol :: Int }
  123. | ModLayout { layoutCol :: Int }
  124. deriving (Show)
  125. data AlexUserState =
  126. AlexUserState { layoutColumns :: [LayoutState]
  127. , startCodes :: [Int]
  128. , leastColumn :: Int
  129. , pendingLayoutKw :: Maybe (Int -> LayoutState)
  130. , pendingTokens :: [Token]
  131. , pendingLambdaCase :: Bool
  132. , haveModuleHeader :: Bool
  133. , stringBuffer :: T.Text
  134. }
  135. alexInitUserState = AlexUserState [] [] 0 Nothing [] False False T.empty
  136. emitPendingToken :: AlexAction Token
  137. emitPendingToken _ _ = do
  138. t <- getUserState
  139. case pendingTokens t of
  140. [] -> do
  141. popStartCode
  142. alexMonadScan
  143. (x:xs) -> do
  144. mapUserState $ \s -> s { pendingTokens = xs }
  145. pure x
  146. delayToken :: Token -> Alex ()
  147. delayToken t = do
  148. mapUserState $ \s -> s { pendingTokens = t:pendingTokens s }
  149. pushStartCode pending
  150. just :: Alex a -> AlexAction Token
  151. just k _ _ = k *> alexMonadScan
  152. getUserState :: Alex AlexUserState
  153. getUserState = Alex $ \s -> Right (s, alex_ust s)
  154. mapUserState :: (AlexUserState -> AlexUserState) -> Alex ()
  155. mapUserState k = Alex $ \s -> Right (s { alex_ust = k $! alex_ust s }, ())
  156. pushStartCode :: Int -> Alex ()
  157. pushStartCode c = do
  158. sc <- alexGetStartCode
  159. mapUserState $ \s -> s { startCodes = sc:startCodes s }
  160. alexSetStartCode c
  161. popStartCode :: Alex ()
  162. popStartCode = do
  163. sc <- startCodes <$> getUserState
  164. case sc of
  165. [] -> alexSetStartCode 0
  166. (x:xs) -> do
  167. mapUserState $ \s -> s { startCodes = xs }
  168. alexSetStartCode x
  169. offsideRule :: AlexInput -> Int64 -> Alex Token
  170. offsideRule (AlexPn _ line col, _, s, _) _ = do
  171. ~(col':ctx) <- layoutColumns <$> getUserState
  172. case col `compare` layoutCol col' of
  173. EQ -> do
  174. popStartCode
  175. maybePopImportSC
  176. pure (Token TokSemi line col)
  177. GT -> do
  178. popStartCode
  179. alexMonadScan
  180. LT -> do
  181. mapUserState $ \s -> s { layoutColumns = ctx }
  182. pure (Token TokLEnd line col)
  183. maybePopImportSC :: Alex ()
  184. maybePopImportSC = do
  185. startcode <- alexGetStartCode
  186. when (startcode == import_) popStartCode
  187. emptyLayout :: AlexInput -> Int64 -> Alex Token
  188. emptyLayout (AlexPn _ line col, _, _, _) _ = do
  189. popStartCode
  190. pushStartCode newline
  191. pure (Token TokLEnd line col)
  192. startLayout :: AlexInput -> Int64 -> Alex Token
  193. startLayout (AlexPn _ line col, _, _, _) _ = do
  194. state <- getUserState
  195. popStartCode
  196. let
  197. col' =
  198. case layoutColumns state of
  199. [] -> 0
  200. (x:_) -> layoutCol x
  201. layoutKind = case pendingLayoutKw state of
  202. Just s -> s
  203. Nothing -> Layout
  204. if col < col'
  205. then pushStartCode empty_layout
  206. else mapUserState $ \s -> s { layoutColumns = layoutKind col:layoutColumns s }
  207. pure (Token TokLStart line col)
  208. getLayout :: Alex LayoutState
  209. getLayout = do
  210. t <- getUserState
  211. case layoutColumns t of
  212. (x:_) -> pure x
  213. _ -> error "No layout?"
  214. openBrace :: AlexInput -> Int64 -> Alex Token
  215. openBrace (AlexPn _ line col, _, _, _) _ = do
  216. popStartCode
  217. mapUserState $ \s -> s { layoutColumns = Layout minBound:layoutColumns s }
  218. pure (Token TokOBrace line col)
  219. popLayoutContext :: Alex ()
  220. popLayoutContext = mapUserState $ \s -> s { layoutColumns = tail (layoutColumns s) }
  221. closeBrace :: AlexInput -> Int64 -> Alex Token
  222. closeBrace (AlexPn _ line col, _, _, _) _ = do
  223. ~(col':_) <- layoutColumns <$> getUserState
  224. if layoutCol col' < 0
  225. then popLayoutContext
  226. else pure ()
  227. pure (Token TokCBrace line col)
  228. variableOrKeyword :: AlexAction Token
  229. variableOrKeyword (AlexPn _ l c, _, s, _) size = do
  230. sc <- alexGetStartCode
  231. state <- getUserState
  232. clearPendingLC
  233. let
  234. text = T.decodeUtf8 (Lbs.toStrict (Lbs.take size s))
  235. col = layoutCol (head (layoutColumns state))
  236. case T.unpack text of
  237. "as"
  238. | sc == import_, c > col -> pure (Token TokAs l c)
  239. | sc == import_ -> offsideKeyword (TokVar text) l c
  240. | otherwise -> pure (Token (TokVar text) l c)
  241. "qualified"
  242. | sc == import_, c > col -> pure (Token TokQualified l c)
  243. | sc == import_ -> offsideKeyword (TokVar text) l c
  244. | otherwise -> pure (Token (TokVar text) l c)
  245. "let" -> laidOut' (Just LetLayout) TokLet l c
  246. "in" -> do
  247. laidout <- getLayout
  248. case laidout of
  249. -- let .. in critical pair:
  250. -- 'in' is allowed to close a layout context before the offside rule would apply.
  251. LetLayout _ -> earlyEnd TokIn l c
  252. _ -> pure (Token TokIn l c)
  253. "data" -> pure (Token TokData l c)
  254. "where" -> do
  255. -- if this is the where in the module_header, then
  256. -- pop the start code so that the offside rule applies again
  257. when (sc == module_header) popStartCode
  258. laidOut' (if sc == module_header then Just ModLayout else Nothing) TokWhere l c
  259. "case"
  260. -- "case" is a layout token if it's immediately following a \\
  261. | pendingLambdaCase state -> laidOut TokCase l c
  262. | otherwise -> pure (Token TokCase l c)
  263. "import" -> do
  264. pushStartCode import_
  265. pure (Token TokImport l c)
  266. "of" -> laidOut TokOf l c
  267. "module" -> do
  268. unless (haveModuleHeader state) $ do
  269. mapUserState $ \s -> s { haveModuleHeader = True }
  270. pushStartCode module_header
  271. pure (Token TokModule l c)
  272. (x:_)
  273. | Data.Char.isUpper x -> pure (Token (TokCon text) l c)
  274. | otherwise -> pure (Token (TokVar text) l c)
  275. [] -> error "empty keyword/identifier"
  276. earlyEnd tok l c = do
  277. popLayoutContext
  278. delayToken (Token tok l c)
  279. pure (Token TokLEnd l c)
  280. offsideKeyword tok l c = do
  281. popLayoutContext
  282. delayToken (Token tok l c)
  283. pure (Token TokSemi l c)
  284. laidOut' n x l c = do
  285. pushStartCode layout
  286. mapUserState $ \s -> s { leastColumn = c, pendingLayoutKw = n }
  287. pure (Token x l c)
  288. laidOut = laidOut' Nothing
  289. }