I'm trying to get familiar with Happy parser generator for Haskell. Currently, I have an example from the documentation but when I compile the program, I get an error.
This is the code:
{
module Main where
import Data.Char
}
%name calc
%tokentype { Token }
%error { parseError }
%token
let { TokenLet }
in { TokenIn }
int { TokenInt $$ }
var { TokenVar $$ }
'=' { TokenEq }
'+' { TokenPlus }
'-' { TokenMinus }
'*' { TokenTimes }
'/' { TokenDiv }
'(' { TokenOB }
')' { TokenCB }
%%
Exp : let var '=' Exp in Exp { \p -> $6 (($2,$4 p):p) }
| Exp1 { $1 }
Exp1 : Exp1 '+' Term { \p -> $1 p + $3 p }
| Exp1 '-' Term { \p -> $1 p - $3 p }
| Term { $1 }
Term : Term '*' Factor { \p -> $1 p * $3 p }
| Term '/' Factor { \p -> $1 p `div` $3 p }
| Factor { $1 }
Factor
: int { \p -> $1 }
| var { \p -> case lookup $1 p of
Nothing -> error "no var"
Just i -> i }
| '(' Exp ')' { $2 }
{
parseError :: [Token] -> a
parseError _ = error "Parse error"
data Token
= TokenLet
| TokenIn
| TokenInt Int
| TokenVar String
| TokenEq
| TokenPlus
| TokenMinus
| TokenTimes
| TokenDiv
| TokenOB
| TokenCB
deriving Show
lexer :: String -> [Token]
lexer [] = []
lexer (c:cs)
| isSpace c = lexer cs
| isAlpha c = lexVar (c:cs)
| isDigit c = lexNum (c:cs)
lexer ('=':cs) = TokenEq : lexer cs
lexer ('+':cs) = TokenPlus : lexer cs
lexer ('-':cs) = TokenMinus : lexer cs
lexer ('*':cs) = TokenTimes : lexer cs
lexer ('/':cs) = TokenDiv : lexer cs
lexer ('(':cs) = TokenOB : lexer cs
lexer (')':cs) = TokenCB : lexer cs
lexNum cs = TokenInt (read num) : lexer rest
where (num,rest) = span isDigit cs
lexVar cs =
case span isAlpha cs of
("let",rest) -> TokenLet : lexer rest
("in",rest) -> TokenIn : lexer rest
(var,rest) -> TokenVar var : lexer rest
main = getContents >>= print . calc . lexer
}
I'm getting this error:
[1 of 1] Compiling Main ( gr.hs, gr.o )
gr.hs:310:24:
No instance for (Show ([(String, Int)] -> Int))
arising from a use of `print'
Possible fix:
add an instance declaration for (Show ([(String, Int)] -> Int))
In the first argument of `(.)', namely `print'
In the second argument of `(>>=)', namely `print . calc . lexer'
In the expression: getContents >>= print . calc . lexer
Do you know why and how can I solve it?
If you examine the error message
No instance for (Show ([(String, Int)] -> Int))
arising from a use of `print'
it's clear that the problem is that you are trying to print a function. And indeed, the value produced by the parser function calc is supposed to be a function which takes a lookup table of variable bindings and gives back a result. See for example the rule for variables:
{ \p -> case lookup $1 p of
Nothing -> error "no var"
Just i -> i }
So in main, we need to pass in a list for the p argument, for example an empty list. (Or you could add some pre-defined global variables if you wanted). I've expanded the point-free code to a do block so it's easier to see what's going on:
main = do
input <- getContents
let fn = calc $ lexer input
print $ fn [] -- or e.g. [("foo", 42)] if you wanted it pre-defined
Now it works:
$ happy Calc.y
$ runghc Calc.hs <<< "let x = 1337 in x * 2"
2674
Related
What I would like to do
I would like to correctly parse minus floating-point numbers.
How should I fix my code?
What is not working
When I try to interpret - 5 as -5.000000, it shows me this error.
Fatal error: exception Stdlib.Parsing.Parse_error
1c1
< error: parse error at char=0, near token '-'
---
> - 5 = -5.000000
My source code
calc_ast.ml
(* abstract syntax tree *)
type expr =
Num of float
| Plus of expr * expr
| Times of expr * expr
| Div of expr * expr
| Minus of expr * expr
;;
calc_lex.ml
{
open Calc_parse
;;
}
rule lex = parse
| [' ' '\t' '\n' ] { lex lexbuf }
| '-'? ['0' - '9']+ as s { NUM(float_of_string s) }
| '-'? ['0' - '9']+ ('.' digit*)? as s { NUM(float_of_string s) }
| '+' { PLUS }
| '-' { MINUS }
| '*' { TIMES }
| '/' { DIV }
| '(' { LPAREN }
| ')' { RPAREN }
| eof { EOF }
calc_parse.mly
%{
%}
%token <float> NUM
%token PLUS TIMES EOF MINUS DIV LPAREN RPAREN
%start program
%type <Calc_ast.expr> program
%%
program :
| compound_expr EOF { $1 }
compound_expr :
| expr { $1 }
| LPAREN expr RPAREN { $2 }
expr :
| mul { $1 }
| expr PLUS mul { Calc_ast.Plus($1, $3) }
| expr MINUS mul { Calc_ast.Minus($1, $3) }
mul :
| NUM { Calc_ast.Num $1 }
| mul TIMES NUM { Calc_ast.Times($1, Calc_ast.Num $3) }
| mul DIV NUM { Calc_ast.Div($1, Calc_ast.Num $3) }
%%
calc.ml
open Calc_parse
(* token -> string *)
let string_of_token t =
match t with
NUM(s) -> Printf.sprintf "NUM(%f)" s
| PLUS -> "PLUS"
| TIMES -> "TIMES"
| MINUS -> "MINUS"
| DIV -> "DIV"
| LPAREN -> "LPAREN"
| RPAREN -> "RPAREN"
| EOF -> "EOF"
;;
(* print token t and return it *)
let print_token t =
Printf.printf "%s\n" (string_of_token t);
t
;;
(* apply lexer to string s *)
let lex_string s =
let rec loop b =
match print_token (Calc_lex.lex b) with
EOF -> ()
| _ -> loop b
in
loop (Lexing.from_string s)
;;
(* apply parser to string s;
show some info when a parse error happens *)
let parse_string s =
let b = Lexing.from_string s in
try
program Calc_lex.lex b (* main work *)
with Parsing.Parse_error as exn ->
(* handle parse error *)
let c0 = Lexing.lexeme_start b in
let c1 = Lexing.lexeme_end b in
Printf.fprintf stdout
"error: parse error at char=%d, near token '%s'\n"
c0 (String.sub s c0 (c1 - c0));
raise exn
;;
(* evaluate expression (AST tree) *)
let rec eval_expr e =
match e with
Calc_ast.Num(c) -> c
| Calc_ast.Plus(e0, e1)
-> (eval_expr e0) +. (eval_expr e1)
| Calc_ast.Minus(e0, e1)
-> (eval_expr e0) -. (eval_expr e1)
| Calc_ast.Times(e0, e1)
-> (eval_expr e0) *. (eval_expr e1)
| Calc_ast.Div(e0, e1)
-> (eval_expr e0) /. (eval_expr e1)
;;
(* evaluate string *)
let eval_string s =
let e = parse_string s in
eval_expr e
;;
(* evaluate string and print it *)
let eval_print_string s =
let y = eval_string s in
Printf.printf "%s = %f\n" s y
;;
let eval_print_stdin () =
let ch = stdin in
let s = input_line ch in
eval_print_string (String.trim s)
;;
let main argv =
eval_print_stdin ()
;;
if not !Sys.interactive then
main Sys.argv
;;
As indicated in the comments, it's almost never a good idea for the lexical analyser to try to recognise the - as part of a numeric literal:
Since the lexical token must be a contiguous string, - 5 will not match. Instead, you'll get two tokens. So you need to handle that in the parser anyway.
On the other hand, if you don't put a space after the -, then 3-4 will be analysed as the two tokens 3 and -4, which is also going to lead to a syntax error.
A simple solution is to add term to recognise the unary negation operator:
mul :
| term { Calc_ast.Num $1 }
| mul TIMES term { Calc_ast.Times($1, Calc_ast.Num $3) }
| mul DIV term { Calc_ast.Div($1, Calc_ast.Num $3) }
term :
| NUM { $1 }
| MINUS term { Calc_ast.Minus(0, $2) }
| LPAREN expr RPAREN { $2 }
In the above, I also moved the handling of parentheses from the bottom to the top of the hierarchy, in order to make 4*(5+3) possible. With that change, you will no longer require compound_expr.
IHello!
I'm trying to code a lexer and parser for computer algebric system.
When I'm compiling the code with my makefile, I have problems on the value of some functions.
Here is the code of function.ml :
(****************************************
* Type definitions
****************************************)
type operator = Plus | Minus | Times | Div | Power;;
type var = string;; (* e.g. "x", "y", etc. *)
type power = var * float;; (* var, raised_to, e.g. ("x", 3.) -> x^3. *)
type monomial = float * power list;; (* coefficient, list of power terms, e.g. (2., [("x", 1.); ("y", 3.)]) -> 2xy^3 *)
type polynomial = monomial list;; (* sum of monomials, e.g. [(2., [("x", 2.); ("y", 3.)]); (-1., [])] -> 2x^2y^3 - 1 *)
type frac = polynomial * polynomial;; (* numerator, denominator *)
type exp = frac * frac;; (* base, exponent *)
type term = Poly of polynomial
| Frac of frac
| Exp of exp;;
type expr = Leaf of term
| Node of operator * expr list;;
type eqn = expr * expr;;
(****************************************
* Lexer/Parser definitions
****************************************)
type token = PLUS | MINUS | TIMES | DIV | POWER | LPAREN | RPAREN | EQUALS
| FLOAT of float
| VAR of var
| EOF;;
Here is the code of lexer.mll :
{
open Function
}
let numeric = ['0' - '9']
let letter = ['a' - 'z' 'A' - 'Z']
rule main = parse
| [' ' '\t' '\n'] { main lexbuf } (* skip over whitespace *)
| "+" { PLUS }
| "-" { MINUS }
| "*" { TIMES }
| "/" { DIV }
| "^" { POWER }
| "(" { LPAREN }
| ")" { RPAREN }
| "=" { EQUALS }
| ((numeric*) '.' (numeric+)) as n
{ FLOAT (float_of_string n) }
| (numeric+) as n
{ FLOAT (float_of_string n) }
| (letter numeric*) as n
{ VAR n }
| eof { EOF }
{
let lexer_main = main;;
let token_iterator_of_string s =
let rec lbuf = Lexing.from_string s
in fun () -> lexer_main lbuf;;
let token_list_of_string s =
let rec lbuf = Lexing.from_string s
and token_list_aux () =
let token = lexer_main lbuf in
if token = EOF then
[]
else
token :: token_list_aux ()
in token_list_aux ();;
}
Here is the code of parser.mly :
%{
open Function
%}
%token PLUS MINUS TIMES DIV POWER LPAREN RPAREN EQUALS EOF
%token <float> FLOAT
%token <Function.var> VAR
%start yacc_eqn
%start yacc_expr
%type <Function.eqn> yacc_eqn
%type <Function.expr> yacc_expr
%%
yacc_eqn:
exp EQUALS exp EOF { ($1, $3) }
yacc_expr:
exp EOF { $1 }
exp:
op1 { $1 }
op1:
op2 { $1 }
| op1 PLUS op2 { Node(Plus, [$1; $3]) }
| op1 MINUS op2 { Node(Minus, [$1; $3]) }
op2:
op3 { $1 }
| op2 TIMES op3 { Node(Times, [$1; $3]) }
| op2 DIV op3 { Node(Div, [$1; $3]) }
op3:
op4 { $1 }
| op3 op4 { Node(Times, [$1; $2]) }
op4:
leaf { $1 }
| op4 POWER leaf { Node(Power, [$1; $3]) }
leaf:
atom { $1 }
| LPAREN exp RPAREN { $2 }
atom:
VAR { Leaf(Poly(poly_of_var $1)) }
| FLOAT { Leaf(Poly(poly_of_float $1)) }
%%
let eqn_of_string s = yacc_eqn Lexer.lexer_main (Lexing.from_string s);;
let expr_of_string s = yacc_expr Lexer.lexer_main (Lexing.from_string s);;
let parse_eqn = eqn_of_string;;
let parse_expr = expr_of_string;;
The problem is that in the parser.mli which is create with the makefile, the value of yacc_eqn and yacc_expr are :
val yacc_eqn :
(Lexing.lexbuf -> token) -> Lexing.lexbuf -> Function.eqn
val yacc_expr :
(Lexing.lexbuf -> token) -> Lexing.lexbuf -> Function.expr
And I have the following error :
The implementation parser.ml does not match the interface parser.cmi:
Values do not match:
val yacc_eqn :
(Lexing.lexbuf -> Function.token) -> Lexing.lexbuf -> Function.eqn
is not included in
val yacc_eqn :
(Lexing.lexbuf -> token) -> Lexing.lexbuf -> Function.eqn
I think the solution might be to something like a cast, but I have stricly no idea how to do that ... Anyone help ?
Thanks in advance !
I want handle some ambiguities in dypgen. I found something in the manual, that I want to know, how I can use that.
In the manual point 5.2 "Pattern matching on Symbols" there is an example:
expr:
| expr OP<"+"> expr { $1 + $2 }
| expr OP<"*"> expr { $1 * $2 }
OP is matched with "+" or "*", as I understand. I also find there:
The patterns can be any Caml patterns (but without the keyword when).
For instance this is possible:
expr: expr<(Function([arg1;arg2],f_body)) as f> expr
{ some action }
So I tried to put there some other expressions, but I dont understand, what happens. If I put in there printf it outputs the value of the matched string. But if I put in there (fun x -> printf x), that seems to me the same as printf, dypgen complains about a syntax error and points to the end of the expression. If I put Printf.printf in there, it complains about Syntax error: operator expected. And if I put there (fun x -> Printf.printf x) it says: Lexing failed with message: lexing: empty token
What do these different error-messages mean?
In the end I would like to look up something in a hashtable, if the value is in there, but I don't know, if it is possible this way. Is it or isn't it possible?
EDIT: A minimal example derived from the forest-example from the dypgen-demos.
The grammarfile forest_parser.dyp contains:
{
open Parse_tree
let dyp_merge = Dyp.keep_all
}
%start main
%layout [' ' '\t']
%%
main : np "." "\n" { $1 }
np:
| sg {Noun($1)}
| pl {Noun($1)}
sg: word <Word("sheep"|"fish")> {Sg($1)}
sg: word <Word("cat"|"dog")> {Sg($1)}
pl: word <Word("sheep"|"fish")> {Pl($1)}
pl: word <Word("cats"|"dogs")> {Pl($1)}
/* OR try:
sg: word <printf> {Sg($1)}
pl: word <printf> {Pl($1)}
*/
word:
| (['A'-'Z' 'a'-'z']+) {Word($1)}
The forest.ml has the following print_forest-function now:
let print_forest forest =
let rec aux1 t = match t with
| Word x
-> print_string x
| Noun (x) -> (
print_string "N [";
aux1 x;
print_string " ]")
| Sg (x) -> (
print_string "Sg [";
aux1 x;
print_string " ]")
| Pl (x) -> (
print_string "Pl [";
aux1 x;
print_string " ]")
in
let aux2 t = aux1 t; print_newline () in
List.iter aux2 forest;
print_newline ()
And the parser_tree.mli contains:
type tree =
| Word of string
| Noun of tree
| Sg of tree
| Pl of tree
And then you can determine, what numeri fish, sheep, cat(s) etc. are.
sheep or fish can be singular and plural. cats and dogs cannot.
fish.
N [Sg [fish ] ]
N [Pl [fish ] ]
I know nothing about Dypgen so I tried to figure it out.
Let's see what I found out.
In the parser.dyp file you can define the lexer and the parser or you can use an external lexer. Here's what I did :
My ast looks like this :
parse_prog.mli
type f =
| Print of string
| Function of string list * string * string
type program = f list
prog_parser.dyp
{
open Parse_prog
(* let dyp_merge = Dyp.keep_all *)
let string_buf = Buffer.create 10
}
%start main
%relation pf<pr
%lexer
let newline = '\n'
let space = [' ' '\t' '\r']
let uident = ['A'-'Z']['a'-'z' 'A'-'Z' '0'-'9' '_']*
let lident = ['a'-'z']['a'-'z' 'A'-'Z' '0'-'9' '_']*
rule string = parse
| '"' { () }
| _ { Buffer.add_string string_buf (Dyp.lexeme lexbuf);
string lexbuf }
main lexer =
newline | space + -> { () }
"fun" -> ANONYMFUNCTION { () }
lident -> FUNCTION { Dyp.lexeme lexbuf }
uident -> MODULE { Dyp.lexeme lexbuf }
'"' -> STRING { Buffer.clear string_buf;
string lexbuf;
Buffer.contents string_buf }
%parser
main : function_calls eof
{ $1 }
function_calls:
|
{ [] }
| function_call ";" function_calls
{ $1 :: $3 }
function_call:
| printf STRING
{ Print $2 } pr
| "(" ANONYMFUNCTION lident "->" printf lident ")" STRING
{ Print $6 } pf
| nested_modules "." FUNCTION STRING
{ Function ($1, $3, $4) } pf
| FUNCTION STRING
{ Function ([], $1, $2) } pf
| "(" ANONYMFUNCTION lident "->" FUNCTION lident ")" STRING
{ Function ([], $5, $8) } pf
printf:
| FUNCTION<"printf">
{ () }
| MODULE<"Printf"> "." FUNCTION<"printf">
{ () }
nested_modules:
| MODULE
{ [$1] }
| MODULE "." nested_modules
{ $1 :: $3 }
This file is the most important. As you can see, if I have a function printf "Test" my grammar is ambiguous and this can be reduced to either Print "Test" or Function ([], "printf", "Test") but !, as I realized, I can give priorities to my rules so if one as a higher priority it will be the one chosen for the first parsing. (try to uncomment let dyp_merge = Dyp.keep_all and you'll see all the possible combinations).
And in my main :
main.ml
open Parse_prog
let print_stlist fmt sl =
match sl with
| [] -> ()
| _ -> List.iter (Format.fprintf fmt "%s.") sl
let print_program tl =
let aux1 t = match t with
| Function (ml, f, p) ->
Format.printf "I can't do anything with %a%s(\"%s\")#." print_stlist ml f p
| Print s -> Format.printf "You want to print : %s#." s
in
let aux2 t = List.iter (fun (tl, _) ->
List.iter aux1 tl; Format.eprintf "------------#.") tl in
List.iter aux2 tl
let input_file = Sys.argv.(1)
let lexbuf = Dyp.from_channel (Forest_parser.pp ()) (Pervasives.open_in input_file)
let result = Parser_prog.main lexbuf
let () = print_program result
And, for example, for the following file :
test
printf "first print";
Printf.printf "nested print";
Format.eprintf "nothing possible";
(fun x -> printf x) "Anonymous print";
If I execute ./myexec test I will get the following prompt
You want to print : first print
You want to print : nested print
I can't do anything with Format.eprintf("nothing possible")
You want to print : x
------------
So, TL;DR, the manual example was just here to show you that you can play with your defined tokens (I never defined the token PRINT, just FUNCTION) and match on them to get new rules.
I hope it's clear, I learned a lot with your question ;-)
[EDIT] So, I changed the parser to match what you wanted to watch :
{
open Parse_prog
(* let dyp_merge = Dyp.keep_all *)
let string_buf = Buffer.create 10
}
%start main
%relation pf<pp
%lexer
let newline = '\n'
let space = [' ' '\t' '\r']
let uident = ['A'-'Z']['a'-'z' 'A'-'Z' '0'-'9' '_']*
let lident = ['a'-'z']['a'-'z' 'A'-'Z' '0'-'9' '_']*
rule string = parse
| '"' { () }
| _ { Buffer.add_string string_buf (Dyp.lexeme lexbuf);
string lexbuf }
main lexer =
newline | space + -> { () }
"fun" -> ANONYMFUNCTION { () }
lident -> FUNCTION { Dyp.lexeme lexbuf }
uident -> MODULE { Dyp.lexeme lexbuf }
'"' -> STRING { Buffer.clear string_buf;
string lexbuf;
Buffer.contents string_buf }
%parser
main : function_calls eof
{ $1 }
function_calls:
|
{ [] } pf
| function_call <Function((["Printf"] | []), "printf", st)> ";" function_calls
{ (Print st) :: $3 } pp
| function_call ";" function_calls
{ $1 :: $3 } pf
function_call:
| nested_modules "." FUNCTION STRING
{ Function ($1, $3, $4) }
| FUNCTION STRING
{ Function ([], $1, $2) }
| "(" ANONYMFUNCTION lident "->" FUNCTION lident ")" STRING
{ Function ([], $5, $8) }
nested_modules:
| MODULE
{ [$1] }
| MODULE "." nested_modules
{ $1 :: $3 }
Here, as you can see, I don't handle the fact that my function is print when I parse it but when I put it in my functions list. So, I match on the algebraic type that was built by my parser. I hope this example is ok for you ;-) (but be warned, this is extremely ambiguous ! :-D)
I'm working on a project that uses the Happy parser generator. This is what have I done until now:
Exp : Exp1 { $1 }
Exp1 : Exp1 '+' Term { \p -> $1 p + $3 p }
| Exp1 '-' Term { \p -> $1 p - $3 p }
| Term { $1 }
Term : Term '*' Factor { \p -> $1 p * $3 p }
| Term '/' Factor { \p -> $1 p / $3 p }
| sqrt Factor { \p -> sqrt $2 p }
| Factor { $1 }
Factor
: double { \p -> $1 }
| '(' Exp ')' { $2 }
The problem is that I get the following error:
Parser.hs:158:38:
No instance for (Floating ([a0] -> Double))
arising from a use of `happyReduction_7'
Possible fix:
add an instance declaration for (Floating ([a0] -> Double))
In the second argument of `happySpecReduce_2', namely
`happyReduction_7'
In the expression: happySpecReduce_2 6 happyReduction_7
In an equation for `happyReduce_7':
happyReduce_7 = happySpecReduce_2 6 happyReduction_7
Do you know how can I solve this?
Update: I solved it but now it works only if I write "sqrt2"(no space between sqrt and 2); if I write "sqrt 2" I get "parse error".
This is what I have in the Alex(lex) file:
tokens :-
$white+ ;
"--".* ;
"sqrt" { \s -> TokenSqrt}
"sin" { \s -> TokenSin}
"log" { \s -> TokenLog}
#doubleNumber { \s -> TokenDouble (read s) }
#var { \s -> TokenVar s }
"+" { \s -> TokenPlus }
"-" { \s -> TokenMinus }
"*" { \s -> TokenMul }
"/" { \s -> TokenDiv }
"(" { \s -> TokenOB }
")" { \s -> TokenCB }
"=" { \s -> TokenEq }
sqrt $2 p
This calls sqrt with the function $2 as its argument and then applies the resulting function to the argument p. This would only makes sense if sqrt could take a function and produce a function as a result, which would be the case if and only if there was a Floating instance for functions, which there is not. Thus the error message.
What you doubtlessly intended to do was to apply the function $2 to the argument p and then apply sqrt to the result, for which you'd write:
sqrt ($2 p)
to calculate the value of the expression on the fly at the production rules in happy doesn't work if I'm using the lambda expressions.
For example this code
Exp : let var '=' Exp in Exp { \p -> $6 (($2,$4 p):p) }
| Exp1 { $1 }
Exp1 : Exp1 '+' Term { \p -> $1 p + $3 p }
| Exp1 '-' Term { \p -> $1 p - $3 p }
| Term { $1 }
Term : Term '*' Factor { \p -> $1 p * $3 p }
| Term '/' Factor { \p -> $1 p `div` $3 p }
| Factor { $1 }
Factor
: int { \p -> $1 }
| var { \p -> case lookup $1 p of
Nothing -> error "no var"
Just i -> i }
| '(' Exp ')' { $2 }
from http://www.haskell.org/happy/doc/html/sec-using.html doesn't work.
Or more precisly I 've got an error message
No instance for (Show ([(String, Int)] -> Int))
arising from a use of `print'
Possible fix:
add an instance declaration for (Show ([(String, Int)] -> Int))
In a stmt of an interactive GHCi command: print it
It would be nice if you could explain me what I have to change.
It must have something to do with the lambda expression and the environment variable p.
When I'm using data types everything is fine.
The thing to note here is that the result of this parser is a function which takes an environment of variable bindings. The error message is basically GHCi telling you that it can't print functions, presumably because you forgot to pass an environment
> eval "1 + 1"
when you should have either passed an empty environment
> eval "1 + 1" []
or one with some pre-defined variables
> eval "x + x" [("x", 1)]