This repository has been archived on 2020-07-05. You can view files and clone it, but cannot push or open issues or pull requests.
zurihac2020/hw09/src/Parser.hs

70 lines
2.1 KiB
Haskell

-- vim: set ts=2 sw=2 et tw=80:
module Parser where
import Data.List
import Text.Regex.TDFA
-- Types of tokens (sequences of characters with a meaning, e.g. '(')
data TokenType = Exp | Plus | Minus | Star | Slash
| Percent | OpenParen | CloseParen | EOF
-- A token has a type its string contents
data Token = Symbol TokenType | Literal String | Identifier String
-- Represents a newly scanned token and the remaining string to parse
type ScanState = (Token, String)
-- Returns the string representation of the token type
tokenText :: TokenType -> String
tokenText Exp = "%"
tokenText Plus = "+"
tokenText Minus = "-"
tokenText Star = "*"
tokenText Slash = "/"
tokenText Percent = "%"
tokenText OpenParen = "("
tokenText CloseParen = ")"
-- If the token is found at the start of the string, returns Just of the token
-- and the remaining string, otherwise returns Nothing
findTokenText :: TokenType -> String -> Maybe ScanState
findTokenText EOF [] = Just (Symbol EOF, [])
findTokenText _ [] = Nothing
findTokenText tt s = if matches then Just (Symbol tt, rest) else Nothing
where text = tokenText tt
matches = text `isPrefixOf` s
rest = drop (length text) s
matchesRegex :: (String -> Token) -> String -> String -> Maybe ScanState
matchesRegex tokenConstr r s = case match of
[] -> Nothing
s -> Just (tokenConstr match, rest)
where match = s =~ r :: String
rest = drop (length match) s
matchesIdentifier :: String -> Maybe ScanState
matchesIdentifier = matchesRegex Identifier "[a-zA-Z][0-9a-zA-Z]+"
matchesLiteral :: String -> Maybe ScanState
matchesLiteral = matchesRegex Literal "[0-9]+"
tokenFactories :: [String -> Maybe ScanState]
tokenFactories = [
matchesIdentifier,
matchesLiteral,
findTokenText Plus,
findTokenText Exp,
findTokenText Minus,
findTokenText Star,
findTokenText Slash,
findTokenText Percent,
findTokenText OpenParen,
findTokenText CloseParen]
scanToken :: String -> [Maybe ScanState]
scanToken s = fmap ($s) tokenFactories
-- TODO: make ScanState a monoid and implement <> as picking the token
-- with minimum length