I'm trying to handle token errors in the calculator programme I made with haskell, my code is as follows:
import Data.Char
import Control.Applicative
data MayError a = Value a | Error String
instance (Show a) => Show (MayError a) where
show (Value x) = show x
show (Error s) = "error: " ++ s
instance Functor MayError where
fmap f (Value x) = Value (f x)
fmap f (Error s) = Error s
instance Applicative MayError where
pure x = Value x
(Value f) <*> (Value x) = Value (f x)
(Value f) <*> (Error s) = Error s
(Error s) <*> _ = Error s
instance Monad MayError where
return x = Value x
(Value x) >>= f = f x
(Error s) >>= f = Error s
{- tokenizer -}
data Token = Num Int | Add | Sub | Mul | Div | Exp | LPar | RPar deriving (Eq, Show)
tokens :: String -> MayError [Token]
tokens [] = []
tokens ('+':cs) = Add:(tokens cs)
tokens ('-':cs) = Sub:(tokens cs)
tokens ('*':cs) = Mul:(tokens cs)
tokens ('/':cs) = Div:(tokens cs)
tokens ('(':cs) = LPar:(tokens cs)
tokens (')':cs) = RPar:(tokens cs)
tokens ('^':cs) = Exp:(tokens cs)
tokens (c:cs) | isDigit c = let (ds,rs) = span isDigit (c:cs)
in Num(read ds):(tokens rs)
| isSpace c = tokens cs
| otherwise = Error "unknown token"
{- parser -}
data ParseTree = Number Int |
Plus ParseTree ParseTree |
Minus ParseTree ParseTree |
Times ParseTree ParseTree |
Divide ParseTree ParseTree |
Power ParseTree ParseTree
deriving Show
type Parser = [Token] -> MayError(ParseTree, [Token])
parseFactor::Parser
parseFactor (Num x:l) = return (Number x, l)
parseFactor (Add:l) = parseFactor l
parseFactor (Sub:l) = let (p1, l1) = parseFactor l in (Minus (Number 0) p1, l1)
parseFactor (LPar:l) = let (p1, RPar:l1) = parseExpr l in (p1, l1)
parseFactor _ = Error "parse error"
parseExponent::Parser
parseExponent l = nextExp $ parseFactor l
where nextExp(p1, Exp:l1) = let (p2, l2) = parseFactor l1
in nextExp(Power p1 p2, l2)
nextExp x = x
parseTerm::Parser
parseTerm l = nextFactor $ parseExponent l
where nextFactor(p1, Mul:l1) = let (p2,l2) = parseExponent l1
in nextFactor(Times p1 p2, l2)
nextFactor(p1, Div:l1) = let (p2,l2) = parseExponent l1
in nextFactor(Divide p1 p2, l2)
nextFactor x = x
parseExpr::Parser
parseExpr l = nextTerm $ parseTerm l
where nextTerm(p1, Add:l1) = let (p2,l2) = parseTerm l1
in nextTerm(Plus p1 p2, l2)
nextTerm(p1, Sub:l1) = let (p2,l2) = parseTerm l1
in nextTerm(Minus p1 p2, l2)
nextTerm x = x
{- evaluator -}
eval::ParseTree -> MayError Int
eval (Number x) = Value x
eval (Plus p1 p2) = do x <- eval p1
y <- eval p2
return (x+y)
eval (Minus p1 p2) = do x <- eval p1
y <- eval p2
return (x-y)
eval (Times p1 p2) = do x <- eval p1
y <- eval p2
return (x*y)
eval (Divide p1 p2) = do x <- eval p1
y <- eval p2
if y == 0 then Error "division by 0"
else return (x `div` y)
eval (Power p1 p2) = do x <- eval p1
y <- eval p2
if y < 0 then Error "cannot process negative exponents"
else return (x^y)
parse :: [Token] -> MayError ParseTree
parse ts = do (pt, rs) <- parseExpr ts
if null rs then return pt else Error "extra token"
{- main -}
main = do cs <- getContents
putStr $ unlines $ map show $
map (\s -> tokens s >>= parse >>= eval) $ lines cs
Everything works fine when I wasn't trying to parse errors, but now the error is as shown below:
calc_final2.hs:29:13: error:
• Couldn't match expected type ‘MayError [Token]’
with actual type ‘[a1]’
• In the expression: []
In an equation for ‘tokens’: tokens [] = []
|
29 | tokens [] = []
| ^^
calc_final2.hs:30:19: error:
• Couldn't match expected type ‘MayError [Token]’
with actual type ‘[Token]’
• In the expression: Add : (tokens cs)
In an equation for ‘tokens’: tokens ('+' : cs) = Add : (tokens cs)
|
30 | tokens ('+':cs) = Add:(tokens cs)
| ^^^^^^^^^^^^^^^
calc_final2.hs:30:24: error:
• Couldn't match expected type ‘[Token]’
with actual type ‘MayError [Token]’
• In the second argument of ‘(:)’, namely ‘(tokens cs)’
In the expression: Add : (tokens cs)
In an equation for ‘tokens’: tokens ('+' : cs) = Add : (tokens cs)
|
30 | tokens ('+':cs) = Add:(tokens cs)
| ^^^^^^^^^
calc_final2.hs:31:19: error:
• Couldn't match expected type ‘MayError [Token]’
with actual type ‘[Token]’
• In the expression: Sub : (tokens cs)
In an equation for ‘tokens’: tokens ('-' : cs) = Sub : (tokens cs)
|
31 | tokens ('-':cs) = Sub:(tokens cs)
| ^^^^^^^^^^^^^^^
calc_final2.hs:31:24: error:
• Couldn't match expected type ‘[Token]’
with actual type ‘MayError [Token]’
• In the second argument of ‘(:)’, namely ‘(tokens cs)’
In the expression: Sub : (tokens cs)
In an equation for ‘tokens’: tokens ('-' : cs) = Sub : (tokens cs)
|
31 | tokens ('-':cs) = Sub:(tokens cs)
| ^^^^^^^^^
calc_final2.hs:32:19: error:
• Couldn't match expected type ‘MayError [Token]’
with actual type ‘[Token]’
• In the expression: Mul : (tokens cs)
In an equation for ‘tokens’: tokens ('*' : cs) = Mul : (tokens cs)
|
32 | tokens ('*':cs) = Mul:(tokens cs)
| ^^^^^^^^^^^^^^^
calc_final2.hs:32:24: error:
• Couldn't match expected type ‘[Token]’
with actual type ‘MayError [Token]’
• In the second argument of ‘(:)’, namely ‘(tokens cs)’
In the expression: Mul : (tokens cs)
In an equation for ‘tokens’: tokens ('*' : cs) = Mul : (tokens cs)
|
32 | tokens ('*':cs) = Mul:(tokens cs)
| ^^^^^^^^^
calc_final2.hs:33:19: error:
• Couldn't match expected type ‘MayError [Token]’
with actual type ‘[Token]’
• In the expression: Div : (tokens cs)
In an equation for ‘tokens’: tokens ('/' : cs) = Div : (tokens cs)
|
33 | tokens ('/':cs) = Div:(tokens cs)
| ^^^^^^^^^^^^^^^
calc_final2.hs:33:24: error:
• Couldn't match expected type ‘[Token]’
with actual type ‘MayError [Token]’
• In the second argument of ‘(:)’, namely ‘(tokens cs)’
In the expression: Div : (tokens cs)
In an equation for ‘tokens’: tokens ('/' : cs) = Div : (tokens cs)
|
33 | tokens ('/':cs) = Div:(tokens cs)
| ^^^^^^^^^
calc_final2.hs:34:19: error:
• Couldn't match expected type ‘MayError [Token]’
with actual type ‘[Token]’
• In the expression: LPar : (tokens cs)
In an equation for ‘tokens’: tokens ('(' : cs) = LPar : (tokens cs)
|
34 | tokens ('(':cs) = LPar:(tokens cs)
| ^^^^^^^^^^^^^^^^
calc_final2.hs:34:25: error:
• Couldn't match expected type ‘[Token]’
with actual type ‘MayError [Token]’
• In the second argument of ‘(:)’, namely ‘(tokens cs)’
In the expression: LPar : (tokens cs)
In an equation for ‘tokens’: tokens ('(' : cs) = LPar : (tokens cs)
|
34 | tokens ('(':cs) = LPar:(tokens cs)
| ^^^^^^^^^
calc_final2.hs:35:19: error:
• Couldn't match expected type ‘MayError [Token]’
with actual type ‘[Token]’
• In the expression: RPar : (tokens cs)
In an equation for ‘tokens’: tokens (')' : cs) = RPar : (tokens cs)
|
35 | tokens (')':cs) = RPar:(tokens cs)
| ^^^^^^^^^^^^^^^^
calc_final2.hs:35:25: error:
• Couldn't match expected type ‘[Token]’
with actual type ‘MayError [Token]’
• In the second argument of ‘(:)’, namely ‘(tokens cs)’
In the expression: RPar : (tokens cs)
In an equation for ‘tokens’: tokens (')' : cs) = RPar : (tokens cs)
|
35 | tokens (')':cs) = RPar:(tokens cs)
| ^^^^^^^^^
calc_final2.hs:36:19: error:
• Couldn't match expected type ‘MayError [Token]’
with actual type ‘[Token]’
• In the expression: Exp : (tokens cs)
In an equation for ‘tokens’: tokens ('^' : cs) = Exp : (tokens cs)
|
36 | tokens ('^':cs) = Exp:(tokens cs)
| ^^^^^^^^^^^^^^^
calc_final2.hs:36:24: error:
• Couldn't match expected type ‘[Token]’
with actual type ‘MayError [Token]’
• In the second argument of ‘(:)’, namely ‘(tokens cs)’
In the expression: Exp : (tokens cs)
In an equation for ‘tokens’: tokens ('^' : cs) = Exp : (tokens cs)
|
36 | tokens ('^':cs) = Exp:(tokens cs)
| ^^^^^^^^^
calc_final2.hs:38:32: error:
• Couldn't match expected type ‘MayError [Token]’
with actual type ‘[Token]’
• In the expression: Num (read ds) : (tokens rs)
In the expression:
let (ds, rs) = span isDigit (c : cs) in Num (read ds) : (tokens rs)
In an equation for ‘tokens’:
tokens (c : cs)
| isDigit c
= let (ds, rs) = span isDigit (c : cs)
in Num (read ds) : (tokens rs)
| isSpace c = tokens cs
| otherwise = Error "unknown token"
|
38 | in Num(read ds):(tokens rs)
| ^^^^^^^^^^^^^^^^^^^^^^^^
calc_final2.hs:38:46: error:
• Couldn't match expected type ‘[Token]’
with actual type ‘MayError [Token]’
• In the second argument of ‘(:)’, namely ‘(tokens rs)’
In the expression: Num (read ds) : (tokens rs)
In the expression:
let (ds, rs) = span isDigit (c : cs) in Num (read ds) : (tokens rs)
|
38 | in Num(read ds):(tokens rs)
| ^^^^^^^^^
calc_final2.hs:56:38: error:
• Couldn't match expected type ‘(a, b)’
with actual type ‘MayError (ParseTree, [Token])’
• In the expression: parseFactor l
In a pattern binding: (p1, l1) = parseFactor l
In the expression:
let (p1, l1) = parseFactor l in (Minus (Number 0) p1, l1)
• Relevant bindings include
p1 :: a (bound at calc_final2.hs:56:28)
l1 :: b (bound at calc_final2.hs:56:32)
|
56 | parseFactor (Sub:l) = let (p1, l1) = parseFactor l in (Minus (Number 0) p1, l1)
| ^^^^^^^^^^^^^
calc_final2.hs:56:55: error:
• Couldn't match expected type ‘MayError (ParseTree, [Token])’
with actual type ‘(ParseTree, b0)’
• In the expression: (Minus (Number 0) p1, l1)
In the expression:
let (p1, l1) = parseFactor l in (Minus (Number 0) p1, l1)
In an equation for ‘parseFactor’:
parseFactor (Sub : l)
= let (p1, l1) = parseFactor l in (Minus (Number 0) p1, l1)
|
56 | parseFactor (Sub:l) = let (p1, l1) = parseFactor l in (Minus (Number 0) p1, l1)
| ^^^^^^^^^^^^^^^^^^^^^^^^^
calc_final2.hs:57:44: error:
• Couldn't match expected type ‘(a, [Token])’
with actual type ‘MayError (ParseTree, [Token])’
• In the expression: parseExpr l
In a pattern binding: (p1, RPar : l1) = parseExpr l
In the expression: let (p1, RPar : l1) = parseExpr l in (p1, l1)
• Relevant bindings include p1 :: a (bound at calc_final2.hs:57:29)
|
57 | parseFactor (LPar:l) = let (p1, RPar:l1) = parseExpr l in (p1, l1)
| ^^^^^^^^^^^
calc_final2.hs:57:59: error:
• Couldn't match expected type ‘MayError (ParseTree, [Token])’
with actual type ‘(a0, [Token])’
• In the expression: (p1, l1)
In the expression: let (p1, RPar : l1) = parseExpr l in (p1, l1)
In an equation for ‘parseFactor’:
parseFactor (LPar : l)
= let (p1, RPar : l1) = parseExpr l in (p1, l1)
|
57 | parseFactor (LPar:l) = let (p1, RPar:l1) = parseExpr l in (p1, l1)
| ^^^^^^^^
calc_final2.hs:61:19: error:
• Couldn't match expected type ‘MayError (ParseTree, [Token])’
with actual type ‘(ParseTree, [Token])’
• In the expression: nextExp $ parseFactor l
In an equation for ‘parseExponent’:
parseExponent l
= nextExp $ parseFactor l
where
nextExp (p1, Exp : l1) = let ... in nextExp (Power p1 p2, l2)
nextExp x = x
|
61 | parseExponent l = nextExp $ parseFactor l
| ^^^^^^^^^^^^^^^^^^^^^^^
calc_final2.hs:61:29: error:
• Couldn't match expected type ‘(ParseTree, [Token])’
with actual type ‘MayError (ParseTree, [Token])’
• In the second argument of ‘($)’, namely ‘parseFactor l’
In the expression: nextExp $ parseFactor l
In an equation for ‘parseExponent’:
parseExponent l
= nextExp $ parseFactor l
where
nextExp (p1, Exp : l1) = let ... in nextExp (Power p1 p2, l2)
nextExp x = x
|
61 | parseExponent l = nextExp $ parseFactor l
| ^^^^^^^^^^^^^
calc_final2.hs:62:46: error:
• Couldn't match expected type ‘(a, b)’
with actual type ‘MayError (ParseTree, [Token])’
• In the expression: parseFactor l1
In a pattern binding: (p2, l2) = parseFactor l1
In the expression:
let (p2, l2) = parseFactor l1 in nextExp (Power p1 p2, l2)
• Relevant bindings include
p2 :: a (bound at calc_final2.hs:62:36)
l2 :: b (bound at calc_final2.hs:62:40)
|
62 | where nextExp(p1, Exp:l1) = let (p2, l2) = parseFactor l1
| ^^^^^^^^^^^^^^
calc_final2.hs:67:15: error:
• Couldn't match expected type ‘MayError (ParseTree, [Token])’
with actual type ‘(ParseTree, [Token])’
• In the expression: nextFactor $ parseExponent l
In an equation for ‘parseTerm’:
parseTerm l
= nextFactor $ parseExponent l
where
nextFactor (p1, Mul : l1) = let ... in nextFactor (Times p1 p2, l2)
nextFactor (p1, Div : l1)
= let ... in nextFactor (Divide p1 p2, l2)
nextFactor x = x
|
67 | parseTerm l = nextFactor $ parseExponent l
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
calc_final2.hs:67:28: error:
• Couldn't match expected type ‘(ParseTree, [Token])’
with actual type ‘MayError (ParseTree, [Token])’
• In the second argument of ‘($)’, namely ‘parseExponent l’
In the expression: nextFactor $ parseExponent l
In an equation for ‘parseTerm’:
parseTerm l
= nextFactor $ parseExponent l
where
nextFactor (p1, Mul : l1) = let ... in nextFactor (Times p1 p2, l2)
nextFactor (p1, Div : l1)
= let ... in nextFactor (Divide p1 p2, l2)
nextFactor x = x
|
67 | parseTerm l = nextFactor $ parseExponent l
| ^^^^^^^^^^^^^^^
calc_final2.hs:68:48: error:
• Couldn't match expected type ‘(a, b)’
with actual type ‘MayError (ParseTree, [Token])’
• In the expression: parseExponent l1
In a pattern binding: (p2, l2) = parseExponent l1
In the expression:
let (p2, l2) = parseExponent l1 in nextFactor (Times p1 p2, l2)
• Relevant bindings include
p2 :: a (bound at calc_final2.hs:68:39)
l2 :: b (bound at calc_final2.hs:68:42)
|
68 | where nextFactor(p1, Mul:l1) = let (p2,l2) = parseExponent l1
| ^^^^^^^^^^^^^^^^
calc_final2.hs:70:48: error:
• Couldn't match expected type ‘(a, b)’
with actual type ‘MayError (ParseTree, [Token])’
• In the expression: parseExponent l1
In a pattern binding: (p2, l2) = parseExponent l1
In the expression:
let (p2, l2) = parseExponent l1 in nextFactor (Divide p1 p2, l2)
• Relevant bindings include
p2 :: a (bound at calc_final2.hs:70:39)
l2 :: b (bound at calc_final2.hs:70:42)
|
70 | nextFactor(p1, Div:l1) = let (p2,l2) = parseExponent l1
| ^^^^^^^^^^^^^^^^
calc_final2.hs:75:15: error:
• Couldn't match expected type ‘MayError (ParseTree, [Token])’
with actual type ‘(ParseTree, [Token])’
• In the expression: nextTerm $ parseTerm l
In an equation for ‘parseExpr’:
parseExpr l
= nextTerm $ parseTerm l
where
nextTerm (p1, Add : l1) = let ... in nextTerm (Plus p1 p2, l2)
nextTerm (p1, Sub : l1) = let ... in nextTerm (Minus p1 p2, l2)
nextTerm x = x
|
75 | parseExpr l = nextTerm $ parseTerm l
| ^^^^^^^^^^^^^^^^^^^^^^
calc_final2.hs:75:26: error:
• Couldn't match expected type ‘(ParseTree, [Token])’
with actual type ‘MayError (ParseTree, [Token])’
• In the second argument of ‘($)’, namely ‘parseTerm l’
In the expression: nextTerm $ parseTerm l
In an equation for ‘parseExpr’:
parseExpr l
= nextTerm $ parseTerm l
where
nextTerm (p1, Add : l1) = let ... in nextTerm (Plus p1 p2, l2)
nextTerm (p1, Sub : l1) = let ... in nextTerm (Minus p1 p2, l2)
nextTerm x = x
|
75 | parseExpr l = nextTerm $ parseTerm l
| ^^^^^^^^^^^
calc_final2.hs:76:46: error:
• Couldn't match expected type ‘(a, b)’
with actual type ‘MayError (ParseTree, [Token])’
• In the expression: parseTerm l1
In a pattern binding: (p2, l2) = parseTerm l1
In the expression:
let (p2, l2) = parseTerm l1 in nextTerm (Plus p1 p2, l2)
• Relevant bindings include
p2 :: a (bound at calc_final2.hs:76:37)
l2 :: b (bound at calc_final2.hs:76:40)
|
76 | where nextTerm(p1, Add:l1) = let (p2,l2) = parseTerm l1
| ^^^^^^^^^^^^
calc_final2.hs:78:46: error:
• Couldn't match expected type ‘(a, b)’
with actual type ‘MayError (ParseTree, [Token])’
• In the expression: parseTerm l1
In a pattern binding: (p2, l2) = parseTerm l1
In the expression:
let (p2, l2) = parseTerm l1 in nextTerm (Minus p1 p2, l2)
• Relevant bindings include
p2 :: a (bound at calc_final2.hs:78:37)
l2 :: b (bound at calc_final2.hs:78:40)
|
78 | nextTerm(p1, Sub:l1) = let (p2,l2) = parseTerm l1
| ^^^^^^^^^^^^
What I'm trying to do exactly is to have my programme show "unknown token" when some invalid token is inputted.
Since the return type of tokens is MayError [Token], you need to wrap the items in a Value, for items where we recurse, we can perform an fmap to prepend to the list wrapped in the Value:
tokens :: String -> MayError [Token]
tokens [] = Value []
tokens ('+':cs) = (Add:) <$> tokens cs
tokens ('-':cs) = (Sub:) <$> tokens cs
tokens ('*':cs) = (Mul:) <$> tokens cs
tokens ('/':cs) = (Div:) <$> tokens cs
tokens ('(':cs) = (LPar:) <$> tokens cs
tokens (')':cs) = (RPar:) <$> tokens cs
tokens ('^':cs) = (Exp:) <$> tokens cs
tokens (c:cs) | isDigit c = let (ds,rs) = span isDigit (c:cs)
in (Num (read ds):) <$> tokens rs
| isSpace c = tokens cs
| otherwise = Error "unknown token"
Other functions have the same problem: you should wrap values in a Value data constructor, or unwrap these if you are processing a MayError value.
Related
I'm trying to write an evaluator for my language that takes a parsed if-block, evaluates all conditions and filters out the False branches and then picks one branch to take at random. I'm at the stage wherein I can filter out the branches and be left with my possible paths but only within GHCI. When I attempt to pass an if-block into my program through the command line, what occurs is if the first branch is True, that's the only branch that is taken, otherwise an empty list is returned regardless if there are any sub-branches or not. My evaluator for binary expressions works so I know that my error is with my logic in my if evaluator or parser.
Here is the behaviour through the command line:
~/Desktop/Olivia > ./Main "if (1 > 3)-> 1 + 1 [] (1 < 2)-> 2 + 2"
[]
~/Desktop/Olivia > ./Main "if (1 < 3)-> 1 + 1 [] (1 < 2)-> 2 + 2"
[2]
Here is the behaviour through GHCI:
λ > b = SubIf (Expr (HInteger 2) Less (HInteger 1)) [(Expr (HInteger 45) Add (HInteger 45)), (Expr (HInteger 6) Add (HInteger 6))]
λ > c = SubIf (Expr (HInteger 2) Less (HInteger 3)) [(Expr (HInteger 5) Add (HInteger 5)), (Expr (HInteger 6) Add (HInteger 6))]
λ > a = If (Expr (HInteger 3) Less (HInteger 2)) [(Expr (HInteger 1) Add (HInteger 100)), (Expr (HInteger 2) Add (HInteger 2))] [b, c]
λ > eval b -- All False branches return a value of 1
1
λ > eval c
[10 12]
λ > eval a
[[10 12]]
This is my expected result. In the case of n number of sub if expressions, I will pick one list of the overall list as my branch to take before I go back to evaluate the loop guard and begin the next iteration of the program.
My data type:
data HVal
= HInteger Integer
| HBool Bool
| HString String
| HList [HVal]
| Expr HVal Op HVal
| EqExpr HVal Op HVal
| Neg HVal
| Assign HVal HVal
| Do HVal [HVal]
| If HVal [HVal] [HVal]
| SubIf HVal [HVal]
| Load String
deriving (Eq, Read)
My Parsers:
parseIf :: Parser HVal
parseIf = do
_ <- string "if"
spaces
_ <- string "("
cond <- (parseExpr <|> parseEqExpr <|> parseBool)
_ <- string ")->"
expr <- spaces *> many (parseExpression <* spaces)
expr' <- spaces *> many (parseExpression <* spaces)
return $ If cond expr expr'
parseSubIf :: Parser HVal
parseSubIf = do
_ <- string "[]"
spaces
_ <- string "("
cond <- (parseExpr <|> parseEqExpr <|> parseBool)
_ <- string ")->"
expr <- spaces *> many (parseExpression <* spaces)
return $ SubIf cond expr
My Evaluator:
eval :: HVal -> HVal
---------- EVALUATING PRIMITIVES ----------
eval val#(HString _) = val
eval val#(HInteger _) = val
eval val#(HBool _) = val
eval val#(HList _) = val
eval (Expr x op y) = evalExpr x op y
eval (If cond expr expr') = evalIf cond expr expr'
eval (SubIf cond expr) = evalSubIf cond expr
evalIf :: HVal -> [HVal] -> [HVal] -> HVal
evalIf cond expr expr' = if ((eval cond) == (HBool True))
then HList $ map eval expr
else HList $ (filter (/= (HInteger 1)) (map eval expr'))
evalSubIf :: HVal -> [HVal] -> HVal
evalSubIf cond expr = if ((eval cond) == (HBool True))
then HList $ map eval expr
else (HInteger 1)
I think that the error may be with my parser for the if-block. My thinking behind it was that the if block contains the conditional for the first branch and what it evaluates to and then a list of sub expressions wherein each element of the list contains the branch conditional and what it evaluates to.
Currently, I am working on a problem of parsing and showing expressions in Haskell.
type Name = String
data Expr = Val Integer
| Var Name
| Expr :+: Expr
| Expr :-: Expr
| Expr :*: Expr
| Expr :/: Expr
| Expr :%: Expr
This is the code of my data type Expr and this is how i define show function:
instance Show Expr where
show (Val x) = show x
show (Var y) = y
show (p :+: q) = par (show p ++ "+" ++ show q)
show (p :-: q) = par (show p ++ "-" ++ show q)
show (p :/: q) = par (show p ++ "/" ++ show q)
show (p :*: q) = par (show p ++ "*" ++ show q)
show (p :%: q) = par (show p ++ "%" ++ show q)
par :: String -> String
par s = "(" ++ s ++ ")"
Later i tried to transform string input into the expression but i encounter the following problem: I don't understand how parentheses in the second case are implemented in Haskell.
*Main> Val 2 :*:Val 2 :+: Val 3
((2*2)+3)
*Main> Val 2 :*:(Val 2 :+: Val 3)
(2*(2+3))
Because of that, i am a bit confused regarding how should i transform parentheses from my string into the expression. Currently i am using the following function for parsing, but for now, it just ignores parentheses which is not intended behavior:
toExpr :: String -> Expr
toExpr str = f (lexer str) (Val 0)
where
f [] expr = expr
f (c:cs) expr
|isAlpha (head c) = f cs (Var c)
|isDigit (head c) = f cs (Val (read c))
|c == "+" = (expr :+: f cs (Val 0))
|c == "-" = (expr :-: f cs (Val 0))
|c == "/" = (expr :/: f cs (Val 0))
|c == "*" = (expr :*: f cs (Val 0))
|c == "%" = (expr :%: f cs (Val 0))
|otherwise = f cs expr
Edit: few grammar mistakes
I don't understand how parentheses in the second case are implemented in Haskell.
The brackets just give precedence to a certain part of the expression to parse. The problem is not with the parenthesis you render. I think the problem is that you did not assign precedence to your operators. This thus means that, unless you specify brackets, Haskell will consider all operators to have the same precedence, and parse these left-to-right. This thus means that x ⊕ y ⊗ z is parsed as (x ⊕ y) ⊗ z.
You can define the precedence of your :+:, :*, etc. operators with infixl:
infixl 7 :*:, :/:, :%:
infixl 5 :+:, :-:
type Name = String
data Expr = Val Integer
| Var Name
| Expr :+: Expr
| Expr :-: Expr
| Expr :*: Expr
| Expr :/: Expr
| Expr :%: Expr
As for your parser (the toExpr), you will need a parsing mechanism like a LALR parser [wiki] that stores results on a stack, and thus makes proper operations.
This was my final parser which gave me the result I needed. To get the result i wanted proper grammar was added and i wrote a parses according to he grammar.
Thanks, everyone for the help.
{-
parser for the following grammar:
E -> T E'
E' -> + T E' | - T E' | <empty string>
T -> F T'
T' -> * F T' | / F T' | % F T' | <empty string>
F -> (E) | <integer> | <identifier>
-}
parseExpr :: String -> (Expr,[String])
parseExpr tokens = parseE (lexer tokens)
parseE :: [String] -> (Expr,[String])
parseE tokens = parseE' acc rest where (acc,rest) = parseT tokens
parseE' :: Expr -> [String] -> (Expr,[String])
parseE' accepted ("+":tokens) = let (acc,rest) = parseT tokens in parseE' (accepted :+: acc) rest
parseE' accepted ("-":tokens) = let (acc,rest) = parseT tokens in parseE' (accepted :-: acc) rest
parseE' accepted tokens = (accepted,tokens)
parseT :: [String] -> (Expr,[String])
parseT tokens = let (acc,rest) = parseF tokens in parseT' acc rest
parseT' :: Expr -> [String] -> (Expr,[String])
parseT' accepted ("*":tokens) = let (acc,rest) = parseF tokens in parseT' (accepted :*: acc) rest
parseT' accepted ("/":tokens) = let (acc,rest) = parseF tokens in parseT' (accepted :/: acc) rest
parseT' accepted ("%":tokens) = let (acc,rest) = parseF tokens in parseT' (accepted :%: acc) rest
parseT' accepted tokens = (accepted,tokens)
parseF :: [String] -> (Expr,[String])
parseF ("(":tokens) = (e, tail rest) where (e,rest) = parseE tokens
parseF (t:tokens)
| isAlpha (head t) = (Var t,tokens)
| isDigit (head t) = (Val (read t),tokens)
| otherwise = error ""
parseF [] = error ""
lexer :: String -> [String]
lexer [] = []
lexer (c:cs)
| elem c " \t\n" = lexer cs
| elem c "=+-*/%()" = [c]:(lexer cs)
| isAlpha c = (c:takeWhile isAlpha cs):lexer(dropWhile isAlpha cs)
| isDigit c = (c:takeWhile isDigit cs):lexer(dropWhile isDigit cs)
| otherwise = error ""
It is true that Parsec has chainl and chainr to parse chains of either left-associative or right-associative operations (i.e. a -> a -> a). So I could quite easily parse something like x + y + z in a ((a + y) + z) or (a + (y + z)) manner.
However,
there is no standard way to parse a -> b -> c functions and specific case when a = b: a -> a -> c, for example a = b = c thought as a comparison function (a -> a -> Bool);
there is no standard way to implement "importance" of an operation: for example a + b = b + a should be parsed as ((a + b) = (b + a)) and not (((a + b) = b) + a)).
I am kind of new to parsing problems, so it would be great to get answers for both questions.
Okay, here's a long answer that might help. First, these are the imports I'm using, if you want to follow along:
{-# LANGUAGE FlexibleContexts #-}
{-# OPTIONS_GHC -Wall #-}
import Control.Applicative (some)
import Text.Parsec
import Text.Parsec.Expr
import Text.Parsec.String
Why a -> a -> a isn't so bad...
The operator type signature a -> a -> a is less restrictive and makes more sense than you might at first think. One key point is that usually when we're parsing expressions, we don't write a parser to evaluate them directly but rather parse them into some intermediate abstract syntax tree (AST) that is later evaluated. For example, consider a simple untyped AST with addition, subtraction, equality, and boolean connectives:
data Expr
= IntE Int -- integer literals
| FalseE | TrueE -- boolean literals (F, T)
| AddE Expr Expr -- x + y
| SubE Expr Expr -- x - y
| EqE Expr Expr -- x = y
| OrE Expr Expr -- x | y
| AndE Expr Expr -- x & y
deriving (Show)
If we want to write a parser to treat all these operators as left associative at the same precedence level, we can write a chainl-based parser like so. (For simplicity, this parser doesn't permit whitespace.)
expr :: Parser Expr
expr = chainl1 term op
where op = AddE <$ char '+'
<|> SubE <$ char '-'
<|> EqE <$ char '='
<|> OrE <$ char '|'
<|> AndE <$ char '&'
term :: Parser Expr
term = IntE . read <$> some digit
<|> FalseE <$ char 'F' <|> TrueE <$ char 'T'
<|> parens expr
parens :: Parser a -> Parser a
parens = between (char '(') (char ')')
and we get:
> parseTest expr "1+2+3"
AddE (AddE (IntE 1) (IntE 2)) (IntE 3)
> parseTest expr "1=2=F"
EqE (EqE (IntE 1) (IntE 2)) FalseE
>
We'd then leave it up to the interpreter to deal with the types (i.e., to type check the program):
data Value = BoolV Bool | IntV Int deriving (Eq, Show)
eval :: Expr -> Value
eval (IntE x) = IntV x
eval FalseE = BoolV False
eval TrueE = BoolV True
eval (AddE e1 e2)
= let IntV v1 = eval e1 -- pattern match ensures right type
IntV v2 = eval e2
in IntV (v1 + v2)
eval (SubE e1 e2)
= let IntV v1 = eval e1
IntV v2 = eval e2
in IntV (v1 - v2)
eval (EqE e1 e2) = BoolV (eval e1 == eval e2) -- equal if same type and value
eval (OrE e1 e2)
= let BoolV v1 = eval e1
BoolV v2 = eval e2
in BoolV (v1 || v2)
eval (AndE e1 e2)
= let BoolV v1 = eval e1
BoolV v2 = eval e2
in BoolV (v1 && v2)
evalExpr :: String -> Value
evalExpr str = let Right e = parse expr "<evalExpr>" str in eval e
giving:
> evalExpr "1+2+3"
IntV 6
> evalExpr "1=2=F"
BoolV True
>
Note that even though the type of the "=" operator is something like Eq a => a -> a -> Bool (or actually a -> b -> Bool, as we allow comparison of unequal types), it's represented in the AST as the constructor EqE of type Expr -> Expr -> Expr, so the a -> a -> a type makes sense.
Even if we were to combine the parser and evaluator above into a single function, we'd probably find it easiest to use a dynamic Value type, so all operators would be of type Value -> Value -> Value which fits into the a -> a -> a pattern:
expr' :: Parser Value
expr' = chainl1 term' op
where op = add <$ char '+'
<|> sub <$ char '-'
<|> eq <$ char '='
<|> or <$ char '|'
<|> and <$ char '&'
add (IntV x) (IntV y) = IntV $ x + y
sub (IntV x) (IntV y) = IntV $ x - y
eq v1 v2 = BoolV $ v1 == v2
or (BoolV x) (BoolV y) = BoolV $ x || y
and (BoolV x) (BoolV y) = BoolV $ x && y
term' :: Parser Value
term' = IntV . read <$> some digit
<|> BoolV False <$ char 'F' <|> BoolV True <$ char 'T'
<|> parens expr'
This works too, with the parser directly evaluating the expression
> parseTest expr' "1+2+3"
IntV 6
> parseTest expr' "1=2=F"
BoolV True
>
You may find this use of dynamic typing during parsing and evaluation a little unsatifactory, but see below.
Operator Precedence
The standard way of adding operator precedence is to define multiple expression "levels" that work with a subset of the operators. If we want a precedence ordering from highest to lowest of addition/subtraction, then equality, then boolean "and", then boolean "or", we could replace expr' with the following. Note that each chainl1 call uses as "terms" the next (higher-precedence) expression level:
expr0 :: Parser Value
expr0 = chainl1 expr1 op
where op = or <$ char '|'
or (BoolV x) (BoolV y) = BoolV $ x || y
expr1 :: Parser Value
expr1 = chainl1 expr2 op
where op = and <$ char '&'
and (BoolV x) (BoolV y) = BoolV $ x && y
expr2 :: Parser Value
expr2 = chainl1 expr3 op
where op = eq <$ char '='
eq v1 v2 = BoolV $ v1 == v2
expr3 :: Parser Value
expr3 = chainl1 term'' op
where op = add <$ char '+' -- two operators at same precedence
<|> sub <$ char '-'
add (IntV x) (IntV y) = IntV $ x + y
sub (IntV x) (IntV y) = IntV $ x - y
term'' :: Parser Value
term'' = IntV . read <$> some digit
<|> BoolV False <$ char 'F' <|> BoolV True <$ char 'T'
<|> parens expr0
After which:
> parseTest expr0 "(1+5-6=2-3+1&2+2=4)=(T|F)"
BoolV True
>
As this can be tedious, Parsec provides a Text.Parsec.Expr that makes this easier. The following replaces expr0 through expr3 above:
expr0' :: Parser Value
expr0' = buildExpressionParser table term''
where table = [ [binary '+' add, binary '-' sub]
, [binary '=' eq]
, [binary '&' and]
, [binary '|' or]
]
binary c op = Infix (op <$ char c) AssocLeft
add (IntV x) (IntV y) = IntV $ x + y
sub (IntV x) (IntV y) = IntV $ x - y
eq v1 v2 = BoolV $ v1 == v2
and (BoolV x) (BoolV y) = BoolV $ x && y
or (BoolV x) (BoolV y) = BoolV $ x || y
Typed Parsing
You may find it strange above that we use an untyped AST (i.e., everything's an Expr) and dynamically typed Value instead of using Haskell's type system in the parsing. It is possible to design a parser where the operators actually have expected Haskell types. In the language above, equality causes a bit of an issue, but if we permit integer equality only, it's possible to write a typed parser/evaluator as follows. Here bexpr and iexpr are for boolean-valued and integer-values expressions respectively.
bexpr0 :: Parser Bool
bexpr0 = chainl1 bexpr1 op
where op = (||) <$ char '|'
bexpr1 :: Parser Bool
bexpr1 = chainl1 bexpr2 op
where op = (&&) <$ char '&'
bexpr2 :: Parser Bool
bexpr2 = False <$ char 'F' <|> True <$ char 'T'
<|> try eqexpr
<|> parens bexpr0
where eqexpr = (==) <$> iexpr3 <* char '=' <*> iexpr3 -- this can't chain now
iexpr3 :: Parser Int
iexpr3 = chainl1 iterm op
where op = (+) <$ char '+'
<|> (-) <$ char '-'
iterm :: Parser Int
iterm = read <$> some digit
<|> parens iexpr3
Note that we're still able to use chainl1, but there's a boundary between the integer and boolean types enforced by precedence, so we only ever chain Int -> Int -> Int or Bool -> Bool -> Bool operators, and we don't let the Int -> Int -> Bool integer equality operator chain.
This also means we need to use a different parser to parse a boolean versus an integer expression:
> parseTest bexpr0 "1+2=3"
True
> parseTest iexpr3 "1+2-3" -- iexpr3 is top-most integer expression parser
0
>
Note here that if you wanted integer equality to chain as a set of equalities so that 1+1=2=3-1 would check that all three terms are equal, you could do this with chainl1 using some trickery with lists and singleton values, but it's easier to use sepBy1 and replace eqexpr above with the definition:
eqexpr' = do
x:xs <- sepBy1 iexpr3 (char '=')
return $ all (==x) xs
giving:
> parseTest bexpr0 "1+1=2=3-1"
True
The whole program
To summarize, here's all the code:
{-# LANGUAGE FlexibleContexts #-}
{-# OPTIONS_GHC -Wall #-}
import Control.Applicative (some)
import Text.Parsec
import Text.Parsec.Expr
import Text.Parsec.String
-- * Untyped parser to AST
data Expr
= IntE Int -- integer literals
| FalseE | TrueE -- boolean literals (F, T)
| AddE Expr Expr -- x + y
| SubE Expr Expr -- x - y
| EqE Expr Expr -- x = y
| OrE Expr Expr -- x | y
| AndE Expr Expr -- x & y
deriving (Show)
expr :: Parser Expr
expr = chainl1 term op
where op = AddE <$ char '+'
<|> SubE <$ char '-'
<|> EqE <$ char '='
<|> OrE <$ char '|'
<|> AndE <$ char '&'
term :: Parser Expr
term = IntE . read <$> some digit
<|> FalseE <$ char 'F' <|> TrueE <$ char 'T'
<|> parens expr
parens :: Parser a -> Parser a
parens = between (char '(') (char ')')
-- * Interpreter
data Value = BoolV Bool | IntV Int deriving (Eq, Show)
eval :: Expr -> Value
eval (IntE x) = IntV x
eval FalseE = BoolV False
eval TrueE = BoolV True
eval (AddE e1 e2)
= let IntV v1 = eval e1 -- pattern match ensures right type
IntV v2 = eval e2
in IntV (v1 + v2)
eval (SubE e1 e2)
= let IntV v1 = eval e1
IntV v2 = eval e2
in IntV (v1 - v2)
eval (EqE e1 e2) = BoolV (eval e1 == eval e2) -- equal if same type and value
eval (OrE e1 e2)
= let BoolV v1 = eval e1
BoolV v2 = eval e2
in BoolV (v1 || v2)
eval (AndE e1 e2)
= let BoolV v1 = eval e1
BoolV v2 = eval e2
in BoolV (v1 && v2)
-- * Combined parser/interpreter with no intermediate AST
expr' :: Parser Value
expr' = chainl1 term' op
where op = add <$ char '+'
<|> sub <$ char '-'
<|> eq <$ char '='
<|> or <$ char '|'
<|> and <$ char '&'
add (IntV x) (IntV y) = IntV $ x + y
sub (IntV x) (IntV y) = IntV $ x - y
eq v1 v2 = BoolV $ v1 == v2
or (BoolV x) (BoolV y) = BoolV $ x || y
and (BoolV x) (BoolV y) = BoolV $ x && y
term' :: Parser Value
term' = IntV . read <$> some digit
<|> BoolV False <$ char 'F' <|> BoolV True <$ char 'T'
<|> parens expr'
-- * Parser/interpreter with operator precendence
expr0 :: Parser Value
expr0 = chainl1 expr1 op
where op = or <$ char '|'
or (BoolV x) (BoolV y) = BoolV $ x || y
expr1 :: Parser Value
expr1 = chainl1 expr2 op
where op = and <$ char '&'
and (BoolV x) (BoolV y) = BoolV $ x && y
expr2 :: Parser Value
expr2 = chainl1 expr3 op
where op = eq <$ char '='
eq v1 v2 = BoolV $ v1 == v2
expr3 :: Parser Value
expr3 = chainl1 term'' op
where op = add <$ char '+' -- two operators at same precedence
<|> sub <$ char '-'
add (IntV x) (IntV y) = IntV $ x + y
sub (IntV x) (IntV y) = IntV $ x - y
term'' :: Parser Value
term'' = IntV . read <$> some digit
<|> BoolV False <$ char 'F' <|> BoolV True <$ char 'T'
<|> parens expr0
-- * Alternate implementation using buildExpressionParser
expr0' :: Parser Value
expr0' = buildExpressionParser table term''
where table = [ [binary '+' add, binary '-' sub]
, [binary '=' eq]
, [binary '&' and]
, [binary '|' or]
]
binary c op = Infix (op <$ char c) AssocLeft
add (IntV x) (IntV y) = IntV $ x + y
sub (IntV x) (IntV y) = IntV $ x - y
eq v1 v2 = BoolV $ v1 == v2
and (BoolV x) (BoolV y) = BoolV $ x && y
or (BoolV x) (BoolV y) = BoolV $ x || y
-- * Typed parser/interpreter with separate boolean and integer expressions
bexpr0 :: Parser Bool
bexpr0 = chainl1 bexpr1 op
where op = (||) <$ char '|'
bexpr1 :: Parser Bool
bexpr1 = chainl1 bexpr2 op
where op = (&&) <$ char '&'
bexpr2 :: Parser Bool
bexpr2 = False <$ char 'F' <|> True <$ char 'T'
<|> try eqexpr
<|> parens bexpr0
where eqexpr = (==) <$> iexpr3 <* char '=' <*> iexpr3 -- this can't chain now
iexpr3 :: Parser Int
iexpr3 = chainl1 iterm op
where op = (+) <$ char '+'
<|> (-) <$ char '-'
iterm :: Parser Int
iterm = read <$> some digit
<|> parens iexpr3
-- * Alternate definition of eqexpr to allow 4=2+2=1+3
eqexpr' = do
x:xs <- sepBy1 iexpr3 (char '=')
return $ all (==x) xs
I have the following grammar for expressions that I'm trying to represent as a Haskell ADT:
Expr = SimpleExpr [OPrelation SimpleExpr]
SimpleExpr = [OPunary] Term {OPadd Term}
Term = Factor {OPmult Factor}
where:
{} means 0 or more
[] means 0 or 1
OPmult, OPadd, OPrelation, OPunary are classes of operators
Note that this grammar does get precedence right.
Here's something I tried:
data Expr = Expr SimpleExpr (Maybe OPrelation) (Maybe SimpleExpr)
data SimpleExpr = SimpleExpr (Maybe OPunary) Term [OPadd] [Term]
data Term = Term Factor [OPmult] [Factor]
which in hindsight I think is awful, especially the [OPadd] [Term] and [OPmult] [Factor] parts. Because, for example, in the parse tree for 1+2+3 it would put [+, +] in one branch and [2, 3] in another, meaning they're decoupled.
What would be a good representation that'll play nice later in the next stages of compilation?
Decomposing { } and [ ] into more data types seems like an overkill
Using lists seems not quite right as it would no longer be a tree (Just a node that's a list)
Maybe for { }. A good idea ?
And finally, I'm assuming after parsing I'll have to pass over the Parse Tree and reduce it to an AST? or should the whole grammar be modified to be less complex? or maybe it's abstract enough?
The AST does not need to be that close to the grammar. The grammar is structured into multiple levels to encode precedence and uses repetition to avoid left-recursion while still being able to correctly handle left-associative operators. The AST does not need to worry about such things.
Instead I'd define the AST like this:
data Expr = BinaryOperation BinaryOperator Expr Expr
| UnaryOperation UnaryOperator Expr
| Literal LiteralValue
| Variable Id
data BinaryOperator = Add | Sub | Mul | Div
data UnaryOperator = Not | Negate
Here's an additional answer that might help you. I don't want to spoil your fun, so here's a very simple example grammar:
-- Expr = Term ['+' Term]
-- Term = Factor ['*' Factor]
-- Factor = number | '(' Expr ')'
-- number = one or more digits
Using a CST
As one approach, we can represent this grammar as a concrete syntax tree (CST):
data Expr = TermE Term | PlusE Term Term deriving (Show)
data Term = FactorT Factor | TimesT Factor Factor deriving (Show)
data Factor = NumberF Int | ParenF Expr deriving (Show)
A Parsec-based parser to turn the concrete syntax into a CST might look like this:
expr :: Parser Expr
expr = do
t1 <- term
(PlusE t1 <$ symbol "+" <*> term)
<|> pure (TermE t1)
term :: Parser Term
term = do
f1 <- factor
(TimesT f1 <$ symbol "*" <*> factor)
<|> pure (FactorT f1)
factor :: Parser Factor
factor = NumberF . read <$> lexeme (many1 (satisfy isDigit))
<|> ParenF <$> between (symbol "(") (symbol ")") expr
with helper functions for whitespace processing:
lexeme :: Parser a -> Parser a
lexeme p = p <* spaces
symbol :: String -> Parser String
symbol = lexeme . string
and main entry point:
parseExpr :: String -> Expr
parseExpr pgm = case parse (spaces *> expr) "(string)" pgm of
Right e -> e
Left err -> error $ show err
after which we can run:
> parseExpr "1+1*(3+4)"
PlusE (FactorT (Number 1)) (TimesT (Number 1) (ParenF (PlusE
(FactorT (Number 3)) (FactorT (Number 4)))))
>
To convert this into the following AST:
data AExpr -- Abstract Expression
= NumberA Int
| PlusA AExpr AExpr
| TimesA AExpr AExpr
we could write:
aexpr :: Expr -> AExpr
aexpr (TermE t) = aterm t
aexpr (PlusE t1 t2) = PlusA (aterm t1) (aterm t2)
aterm :: Term -> AExpr
aterm (FactorT f) = afactor f
aterm (TimesT f1 f2) = TimesA (afactor f1) (afactor f2)
afactor :: Factor -> AExpr
afactor (NumberF n) = NumberA n
afactor (ParenF e) = aexpr e
To interpret the AST, we could use:
interp :: AExpr -> Int
interp (NumberA n) = n
interp (PlusA e1 e2) = interp e1 + interp e2
interp (TimesA e1 e2) = interp e1 * interp e2
and then write:
calc :: String -> Int
calc = interp . aexpr . parseExpr
after which we have a crude little calculator:
> calc "1 + 2 * (6 + 3)"
19
>
Skipping the CST
As an alternative approach, we could replace the parser with one that parses directly into an AST of type AExpr:
expr :: Parser AExpr
expr = do
t1 <- term
(PlusA t1 <$ symbol "+" <*> term)
<|> pure t1
term :: Parser AExpr
term = do
f1 <- factor
(TimesA f1 <$ symbol "*" <*> factor)
<|> pure f1
factor :: Parser AExpr
factor = NumberA . read <$> lexeme (many1 (satisfy isDigit))
<|> between (symbol "(") (symbol ")") expr
You can see how little the structure of these parsers changes. All that's disappeared is the distinction between expressions, terms, and factors at the type level, and constructors like TermE, FactorT, and ParenF whose only purpose is to allow embedding of these types within each other.
In more complex scenarios, the CST and AST parsers might exhibit bigger differences. (For example, in a grammar that allowed 1 + 2 + 3, this might be represented as a single constructor data Expr = ... | PlusE [Term] | ... in the CST but with a nested series of binary PlusA constructors in the same AExpr AST type as above.)
After redefining parseExpr to return an AExpr and dropping the aexpr step from calc, everything else stays the same, and we still have:
> calc "1 + 2 * (6 + 3)"
19
>
Programs for Reference
Here's the full program using an intermediate CST:
-- Calc1.hs, using a CST
{-# OPTIONS_GHC -Wall #-}
module Calc1 where
import Data.Char
import Text.Parsec
import Text.Parsec.String
data Expr = TermE Term | PlusE Term Term deriving (Show)
data Term = FactorT Factor | TimesT Factor Factor deriving (Show)
data Factor = NumberF Int | ParenF Expr deriving (Show)
lexeme :: Parser a -> Parser a
lexeme p = p <* spaces
symbol :: String -> Parser String
symbol = lexeme . string
expr :: Parser Expr
expr = do
t1 <- term
(PlusE t1 <$ symbol "+" <*> term)
<|> pure (TermE t1)
term :: Parser Term
term = do
f1 <- factor
(TimesT f1 <$ symbol "*" <*> factor)
<|> pure (FactorT f1)
factor :: Parser Factor
factor = NumberF . read <$> lexeme (many1 (satisfy isDigit))
<|> ParenF <$> between (symbol "(") (symbol ")") expr
parseExpr :: String -> Expr
parseExpr pgm = case parse (spaces *> expr) "(string)" pgm of
Right e -> e
Left err -> error $ show err
data AExpr -- Abstract Expression
= NumberA Int
| PlusA AExpr AExpr
| TimesA AExpr AExpr
aexpr :: Expr -> AExpr
aexpr (TermE t) = aterm t
aexpr (PlusE t1 t2) = PlusA (aterm t1) (aterm t2)
aterm :: Term -> AExpr
aterm (FactorT f) = afactor f
aterm (TimesT f1 f2) = TimesA (afactor f1) (afactor f2)
afactor :: Factor -> AExpr
afactor (NumberF n) = NumberA n
afactor (ParenF e) = aexpr e
interp :: AExpr -> Int
interp (NumberA n) = n
interp (PlusA e1 e2) = interp e1 + interp e2
interp (TimesA e1 e2) = interp e1 * interp e2
calc :: String -> Int
calc = interp . aexpr . parseExpr
and here's the full program for the more traditional solution that skips an explicit CST representation:
-- Calc2.hs, with direct parsing to AST
{-# OPTIONS_GHC -Wall #-}
module Calc where
import Data.Char
import Text.Parsec
import Text.Parsec.String
lexeme :: Parser a -> Parser a
lexeme p = p <* spaces
symbol :: String -> Parser String
symbol = lexeme . string
expr :: Parser AExpr
expr = do
t1 <- term
(PlusA t1 <$ symbol "+" <*> term)
<|> pure t1
term :: Parser AExpr
term = do
f1 <- factor
(TimesA f1 <$ symbol "*" <*> factor)
<|> pure f1
factor :: Parser AExpr
factor = NumberA . read <$> lexeme (many1 (satisfy isDigit))
<|> between (symbol "(") (symbol ")") expr
parseExpr :: String -> AExpr
parseExpr pgm = case parse (spaces *> expr) "(string)" pgm of
Right e -> e
Left err -> error $ show err
data AExpr -- Abstract Expression
= NumberA Int
| PlusA AExpr AExpr
| TimesA AExpr AExpr
interp :: AExpr -> Int
interp (NumberA n) = n
interp (PlusA e1 e2) = interp e1 + interp e2
interp (TimesA e1 e2) = interp e1 * interp e2
calc :: String -> Int
calc = interp . parseExpr
Okay so Buhr's answer is quite nice. Here's how I did though (no CST) inspired by sepp2k's response:
The AST:
data OP = OPplus | OPminus | OPstar | OPdiv
| OPidiv | OPmod | OPand | OPeq | OPneq
| OPless | OPgreater | OPle | OPge
| OPin | OPor
data Expr =
Relation Expr OP Expr -- > < == >= etc..
| Unary OP Expr -- + -
| Mult Expr OP Expr -- * / div mod and
| Add Expr OP Expr -- + - or
| FactorInt Int | FactorReal Double
| FactorStr String
| FactorTrue | FactorFalse
| FactorNil
| FactorDesig Designator -- identifiers
| FactorNot Expr
| FactorFuncCall FuncCall deriving (Show)
The parsers:
parseExpr :: Parser Expr
parseExpr = (try $ Relation <$>
parseSimpleExpr <*> parseOPrelation <*> parseSimpleExpr)
<|> parseSimpleExpr
parseSimpleExpr :: Parser Expr
parseSimpleExpr = (try simpleAdd)
<|> (try $ Unary <$> parseOPunary <*> simpleAdd)
<|> (try $ Unary <$> parseOPunary <*> parseSimpleExpr)
<|> parseTerm
where simpleAdd = Add <$> parseTerm <*> parseOPadd <*> parseSimpleExpr
parseTerm :: Parser Expr
parseTerm = (try $ Mult <$>
parseFactor <*> parseOPmult <*> parseTerm)
<|> parseFactor
parseFactor :: Parser Expr
parseFactor =
(parseKWnot >> FactorNot <$> parseFactor)
<|> (exactTok "true" >> return FactorTrue)
<|> (exactTok "false" >> return FactorFalse)
<|> (parseNumber)
<|> (FactorStr <$> parseString)
<|> (betweenCharTok '(' ')' parseExpr)
<|> (FactorDesig <$> parseDesignator)
<|> (FactorFuncCall <$> parseFuncCall)
I didn't include basic parsers like parseOPadd as those are what you'd expect and are easy to build.
I still parsed according to the grammar but tweaked it slightly to match my AST.
You could check out the full source which is a compiler for Pascal here.
I've defined an expression tree structure in F# as follows:
type Num = int
type Name = string
type Expr =
| Con of Num
| Var of Name
| Add of Expr * Expr
| Sub of Expr * Expr
| Mult of Expr * Expr
| Div of Expr * Expr
| Pow of Expr * Expr
| Neg of Expr
I wanted to be able to pretty-print the expression tree so I did the following:
let (|Unary|Binary|Terminal|) expr =
match expr with
| Add(x, y) -> Binary(x, y)
| Sub(x, y) -> Binary(x, y)
| Mult(x, y) -> Binary(x, y)
| Div(x, y) -> Binary(x, y)
| Pow(x, y) -> Binary(x, y)
| Neg(x) -> Unary(x)
| Con(x) -> Terminal(box x)
| Var(x) -> Terminal(box x)
let operator expr =
match expr with
| Add(_) -> "+"
| Sub(_) | Neg(_) -> "-"
| Mult(_) -> "*"
| Div(_) -> "/"
| Pow(_) -> "**"
| _ -> failwith "There is no operator for the given expression."
let rec format expr =
match expr with
| Unary(x) -> sprintf "%s(%s)" (operator expr) (format x)
| Binary(x, y) -> sprintf "(%s %s %s)" (format x) (operator expr) (format y)
| Terminal(x) -> string x
However, I don't really like the failwith approach for the operator function since it's not compile-time safe. So I rewrote it as an active pattern:
let (|Operator|_|) expr =
match expr with
| Add(_) -> Some "+"
| Sub(_) | Neg(_) -> Some "-"
| Mult(_) -> Some "*"
| Div(_) -> Some "/"
| Pow(_) -> Some "**"
| _ -> None
Now I can rewrite my format function beautifully as follows:
let rec format expr =
match expr with
| Unary(x) & Operator(op) -> sprintf "%s(%s)" op (format x)
| Binary(x, y) & Operator(op) -> sprintf "(%s %s %s)" (format x) op (format y)
| Terminal(x) -> string x
I assumed, since F# is magic, that this would just work. Unfortunately, the compiler then warns me about incomplete pattern matches, because it can't see that anything that matches Unary(x) will also match Operator(op) and anything that matches Binary(x, y) will also match Operator(op). And I consider warnings like that to be as bad as compiler errors.
So my questions are: Is there a specific reason why this doesn't work (like have I left some magical annotation off somewhere or is there something that I'm just not seeing)? Is there a simple workaround I could use to get the type of safety I want? And is there an inherent problem with this type of compile-time checking, or is it something that F# might add in some future release?
If you code the destinction between ground terms and complex terms into the type system, you can avoid the runtime check and make them be complete pattern matches.
type Num = int
type Name = string
type GroundTerm =
| Con of Num
| Var of Name
type ComplexTerm =
| Add of Term * Term
| Sub of Term * Term
| Mult of Term * Term
| Div of Term * Term
| Pow of Term * Term
| Neg of Term
and Term =
| GroundTerm of GroundTerm
| ComplexTerm of ComplexTerm
let (|Operator|) ct =
match ct with
| Add(_) -> "+"
| Sub(_) | Neg(_) -> "-"
| Mult(_) -> "*"
| Div(_) -> "/"
| Pow(_) -> "**"
let (|Unary|Binary|) ct =
match ct with
| Add(x, y) -> Binary(x, y)
| Sub(x, y) -> Binary(x, y)
| Mult(x, y) -> Binary(x, y)
| Div(x, y) -> Binary(x, y)
| Pow(x, y) -> Binary(x, y)
| Neg(x) -> Unary(x)
let (|Terminal|) gt =
match gt with
| Con x -> Terminal(string x)
| Var x -> Terminal(string x)
let rec format expr =
match expr with
| ComplexTerm ct ->
match ct with
| Unary(x) & Operator(op) -> sprintf "%s(%s)" op (format x)
| Binary(x, y) & Operator(op) -> sprintf "(%s %s %s)" (format x) op (format y)
| GroundTerm gt ->
match gt with
| Terminal(x) -> x
also, imo, you should avoid boxing if you want to be type-safe. If you really want both cases, make two pattern. Or, as done here, just make a projection to the type you need later on. This way you avoid the boxing and instead you return what you need for printing.
I think you can make operator a normal function rather than an active pattern. Because operator is just a function which gives you an operator string for an expr, where as unary, binary and terminal are expression types and hence it make sense to pattern match on them.
let operator expr =
match expr with
| Add(_) -> "+"
| Sub(_) | Neg(_) -> "-"
| Mult(_) -> "*"
| Div(_) -> "/"
| Pow(_) -> "**"
| Var(_) | Con(_) -> ""
let rec format expr =
match expr with
| Unary(x) -> sprintf "%s(%s)" (operator expr) (format x)
| Binary(x, y) -> sprintf "(%s %s %s)" (format x) (operator expr) (format y)
| Terminal(x) -> string x
I find the best solution is to restructure your original type defintion:
type UnOp = Neg
type BinOp = Add | Sub | Mul | Div | Pow
type Expr =
| Int of int
| UnOp of UnOp * Expr
| BinOp of BinOp * Expr * Expr
All sorts of functions can then be written over the UnOp and BinOp types including selecting operators. You may even want to split BinOp into arithmetic and comparison operators in the future.
For example, I used this approach in the (non-free) article "Language-oriented programming: The Term-level Interpreter
" (2008) in the F# Journal.