70 lines
2.1 KiB
Haskell
70 lines
2.1 KiB
Haskell
-- vim: set ts=2 sw=2 et tw=80:
|
|
|
|
module Parser where
|
|
|
|
import Data.List
|
|
import Text.Regex.TDFA
|
|
|
|
-- Types of tokens (sequences of characters with a meaning, e.g. '(')
|
|
data TokenType = Exp | Plus | Minus | Star | Slash
|
|
| Percent | OpenParen | CloseParen | EOF
|
|
|
|
-- A token has a type its string contents
|
|
data Token = Symbol TokenType | Literal String | Identifier String
|
|
|
|
-- Represents a newly scanned token and the remaining string to parse
|
|
type ScanState = (Token, String)
|
|
|
|
-- Returns the string representation of the token type
|
|
tokenText :: TokenType -> String
|
|
tokenText Exp = "%"
|
|
tokenText Plus = "+"
|
|
tokenText Minus = "-"
|
|
tokenText Star = "*"
|
|
tokenText Slash = "/"
|
|
tokenText Percent = "%"
|
|
tokenText OpenParen = "("
|
|
tokenText CloseParen = ")"
|
|
|
|
-- If the token is found at the start of the string, returns Just of the token
|
|
-- and the remaining string, otherwise returns Nothing
|
|
findTokenText :: TokenType -> String -> Maybe ScanState
|
|
findTokenText EOF [] = Just (Symbol EOF, [])
|
|
findTokenText _ [] = Nothing
|
|
findTokenText tt s = if matches then Just (Symbol tt, rest) else Nothing
|
|
where text = tokenText tt
|
|
matches = text `isPrefixOf` s
|
|
rest = drop (length text) s
|
|
|
|
matchesRegex :: (String -> Token) -> String -> String -> Maybe ScanState
|
|
matchesRegex tokenConstr r s = case match of
|
|
[] -> Nothing
|
|
s -> Just (tokenConstr match, rest)
|
|
where match = s =~ r :: String
|
|
rest = drop (length match) s
|
|
|
|
matchesIdentifier :: String -> Maybe ScanState
|
|
matchesIdentifier = matchesRegex Identifier "[a-zA-Z][0-9a-zA-Z]+"
|
|
|
|
matchesLiteral :: String -> Maybe ScanState
|
|
matchesLiteral = matchesRegex Literal "[0-9]+"
|
|
|
|
tokenFactories :: [String -> Maybe ScanState]
|
|
tokenFactories = [
|
|
matchesIdentifier,
|
|
matchesLiteral,
|
|
findTokenText Plus,
|
|
findTokenText Exp,
|
|
findTokenText Minus,
|
|
findTokenText Star,
|
|
findTokenText Slash,
|
|
findTokenText Percent,
|
|
findTokenText OpenParen,
|
|
findTokenText CloseParen]
|
|
|
|
scanToken :: String -> [Maybe ScanState]
|
|
scanToken s = fmap ($s) tokenFactories
|
|
|
|
-- TODO: make ScanState a monoid and implement <> as picking the token
|
|
-- with minimum length
|