zurihac2020/hw09/src/Parser.hs

-- vim: set ts=2 sw=2 et tw=80:

module Parser where

import Data.List
import Text.Regex.TDFA

-- Types of tokens (sequences of characters with a meaning, e.g. '(')
data TokenType = Exp | Plus | Minus | Star | Slash
  | Percent | OpenParen | CloseParen | EOF

-- A token has a type its string contents
data Token = Symbol TokenType | Literal String | Identifier String

-- Represents a newly scanned token and the remaining string to parse
type ScanState = (Token, String)

-- Returns the string representation of the token type
tokenText :: TokenType -> String
tokenText Exp = "%"
tokenText Plus = "+"
tokenText Minus = "-"
tokenText Star = "*"
tokenText Slash = "/"
tokenText Percent = "%"
tokenText OpenParen = "("
tokenText CloseParen = ")"

-- If the token is found at the start of the string, returns Just of the token
-- and the remaining string, otherwise returns Nothing
findTokenText :: TokenType -> String -> Maybe ScanState
findTokenText EOF [] = Just (Symbol EOF, [])
findTokenText _ [] = Nothing
findTokenText tt s = if matches then Just (Symbol tt, rest) else Nothing
  where text = tokenText tt
        matches = text `isPrefixOf` s
        rest = drop (length text) s

matchesRegex :: (String -> Token) -> String -> String -> Maybe ScanState
matchesRegex tokenConstr r s = case match of
                                 [] -> Nothing
                                 s -> Just (tokenConstr match, rest)
  where match = s =~ r :: String
        rest = drop (length match) s

matchesIdentifier :: String -> Maybe ScanState
matchesIdentifier = matchesRegex Identifier "[a-zA-Z][0-9a-zA-Z]+"

matchesLiteral :: String -> Maybe ScanState
matchesLiteral = matchesRegex Literal "[0-9]+"

tokenFactories :: [String -> Maybe ScanState]
tokenFactories = [
  matchesIdentifier,
  matchesLiteral,
  findTokenText Plus,
  findTokenText Exp,
  findTokenText Minus,
  findTokenText Star,
  findTokenText Slash,
  findTokenText Percent,
  findTokenText OpenParen,
  findTokenText CloseParen]

scanToken :: String -> [Maybe ScanState]
scanToken s = fmap ($s) tokenFactories

-- TODO: make ScanState a monoid and implement <> as picking the token
-- with minimum length