ocaml parser "values do not match" error - parsing

IHello!
I'm trying to code a lexer and parser for computer algebric system.
When I'm compiling the code with my makefile, I have problems on the value of some functions.
Here is the code of function.ml :
(****************************************
* Type definitions
****************************************)
type operator = Plus | Minus | Times | Div | Power;;
type var = string;; (* e.g. "x", "y", etc. *)
type power = var * float;; (* var, raised_to, e.g. ("x", 3.) -> x^3. *)
type monomial = float * power list;; (* coefficient, list of power terms, e.g. (2., [("x", 1.); ("y", 3.)]) -> 2xy^3 *)
type polynomial = monomial list;; (* sum of monomials, e.g. [(2., [("x", 2.); ("y", 3.)]); (-1., [])] -> 2x^2y^3 - 1 *)
type frac = polynomial * polynomial;; (* numerator, denominator *)
type exp = frac * frac;; (* base, exponent *)
type term = Poly of polynomial
| Frac of frac
| Exp of exp;;
type expr = Leaf of term
| Node of operator * expr list;;
type eqn = expr * expr;;
(****************************************
* Lexer/Parser definitions
****************************************)
type token = PLUS | MINUS | TIMES | DIV | POWER | LPAREN | RPAREN | EQUALS
| FLOAT of float
| VAR of var
| EOF;;
Here is the code of lexer.mll :
{
open Function
}
let numeric = ['0' - '9']
let letter = ['a' - 'z' 'A' - 'Z']
rule main = parse
| [' ' '\t' '\n'] { main lexbuf } (* skip over whitespace *)
| "+" { PLUS }
| "-" { MINUS }
| "*" { TIMES }
| "/" { DIV }
| "^" { POWER }
| "(" { LPAREN }
| ")" { RPAREN }
| "=" { EQUALS }
| ((numeric*) '.' (numeric+)) as n
{ FLOAT (float_of_string n) }
| (numeric+) as n
{ FLOAT (float_of_string n) }
| (letter numeric*) as n
{ VAR n }
| eof { EOF }
{
let lexer_main = main;;
let token_iterator_of_string s =
let rec lbuf = Lexing.from_string s
in fun () -> lexer_main lbuf;;
let token_list_of_string s =
let rec lbuf = Lexing.from_string s
and token_list_aux () =
let token = lexer_main lbuf in
if token = EOF then
[]
else
token :: token_list_aux ()
in token_list_aux ();;
}
Here is the code of parser.mly :
%{
open Function
%}
%token PLUS MINUS TIMES DIV POWER LPAREN RPAREN EQUALS EOF
%token <float> FLOAT
%token <Function.var> VAR
%start yacc_eqn
%start yacc_expr
%type <Function.eqn> yacc_eqn
%type <Function.expr> yacc_expr
%%
yacc_eqn:
exp EQUALS exp EOF { ($1, $3) }
yacc_expr:
exp EOF { $1 }
exp:
op1 { $1 }
op1:
op2 { $1 }
| op1 PLUS op2 { Node(Plus, [$1; $3]) }
| op1 MINUS op2 { Node(Minus, [$1; $3]) }
op2:
op3 { $1 }
| op2 TIMES op3 { Node(Times, [$1; $3]) }
| op2 DIV op3 { Node(Div, [$1; $3]) }
op3:
op4 { $1 }
| op3 op4 { Node(Times, [$1; $2]) }
op4:
leaf { $1 }
| op4 POWER leaf { Node(Power, [$1; $3]) }
leaf:
atom { $1 }
| LPAREN exp RPAREN { $2 }
atom:
VAR { Leaf(Poly(poly_of_var $1)) }
| FLOAT { Leaf(Poly(poly_of_float $1)) }
%%
let eqn_of_string s = yacc_eqn Lexer.lexer_main (Lexing.from_string s);;
let expr_of_string s = yacc_expr Lexer.lexer_main (Lexing.from_string s);;
let parse_eqn = eqn_of_string;;
let parse_expr = expr_of_string;;
The problem is that in the parser.mli which is create with the makefile, the value of yacc_eqn and yacc_expr are :
val yacc_eqn :
(Lexing.lexbuf -> token) -> Lexing.lexbuf -> Function.eqn
val yacc_expr :
(Lexing.lexbuf -> token) -> Lexing.lexbuf -> Function.expr
And I have the following error :
The implementation parser.ml does not match the interface parser.cmi:
Values do not match:
val yacc_eqn :
(Lexing.lexbuf -> Function.token) -> Lexing.lexbuf -> Function.eqn
is not included in
val yacc_eqn :
(Lexing.lexbuf -> token) -> Lexing.lexbuf -> Function.eqn
I think the solution might be to something like a cast, but I have stricly no idea how to do that ... Anyone help ?
Thanks in advance !

Related

OCaml: parse minus floating-point number as a calculator

What I would like to do
I would like to correctly parse minus floating-point numbers.
How should I fix my code?
What is not working
When I try to interpret - 5 as -5.000000, it shows me this error.
Fatal error: exception Stdlib.Parsing.Parse_error
1c1
< error: parse error at char=0, near token '-'
---
> - 5 = -5.000000
My source code
calc_ast.ml
(* abstract syntax tree *)
type expr =
Num of float
| Plus of expr * expr
| Times of expr * expr
| Div of expr * expr
| Minus of expr * expr
;;
calc_lex.ml
{
open Calc_parse
;;
}
rule lex = parse
| [' ' '\t' '\n' ] { lex lexbuf }
| '-'? ['0' - '9']+ as s { NUM(float_of_string s) }
| '-'? ['0' - '9']+ ('.' digit*)? as s { NUM(float_of_string s) }
| '+' { PLUS }
| '-' { MINUS }
| '*' { TIMES }
| '/' { DIV }
| '(' { LPAREN }
| ')' { RPAREN }
| eof { EOF }
calc_parse.mly
%{
%}
%token <float> NUM
%token PLUS TIMES EOF MINUS DIV LPAREN RPAREN
%start program
%type <Calc_ast.expr> program
%%
program :
| compound_expr EOF { $1 }
compound_expr :
| expr { $1 }
| LPAREN expr RPAREN { $2 }
expr :
| mul { $1 }
| expr PLUS mul { Calc_ast.Plus($1, $3) }
| expr MINUS mul { Calc_ast.Minus($1, $3) }
mul :
| NUM { Calc_ast.Num $1 }
| mul TIMES NUM { Calc_ast.Times($1, Calc_ast.Num $3) }
| mul DIV NUM { Calc_ast.Div($1, Calc_ast.Num $3) }
%%
calc.ml
open Calc_parse
(* token -> string *)
let string_of_token t =
match t with
NUM(s) -> Printf.sprintf "NUM(%f)" s
| PLUS -> "PLUS"
| TIMES -> "TIMES"
| MINUS -> "MINUS"
| DIV -> "DIV"
| LPAREN -> "LPAREN"
| RPAREN -> "RPAREN"
| EOF -> "EOF"
;;
(* print token t and return it *)
let print_token t =
Printf.printf "%s\n" (string_of_token t);
t
;;
(* apply lexer to string s *)
let lex_string s =
let rec loop b =
match print_token (Calc_lex.lex b) with
EOF -> ()
| _ -> loop b
in
loop (Lexing.from_string s)
;;
(* apply parser to string s;
show some info when a parse error happens *)
let parse_string s =
let b = Lexing.from_string s in
try
program Calc_lex.lex b (* main work *)
with Parsing.Parse_error as exn ->
(* handle parse error *)
let c0 = Lexing.lexeme_start b in
let c1 = Lexing.lexeme_end b in
Printf.fprintf stdout
"error: parse error at char=%d, near token '%s'\n"
c0 (String.sub s c0 (c1 - c0));
raise exn
;;
(* evaluate expression (AST tree) *)
let rec eval_expr e =
match e with
Calc_ast.Num(c) -> c
| Calc_ast.Plus(e0, e1)
-> (eval_expr e0) +. (eval_expr e1)
| Calc_ast.Minus(e0, e1)
-> (eval_expr e0) -. (eval_expr e1)
| Calc_ast.Times(e0, e1)
-> (eval_expr e0) *. (eval_expr e1)
| Calc_ast.Div(e0, e1)
-> (eval_expr e0) /. (eval_expr e1)
;;
(* evaluate string *)
let eval_string s =
let e = parse_string s in
eval_expr e
;;
(* evaluate string and print it *)
let eval_print_string s =
let y = eval_string s in
Printf.printf "%s = %f\n" s y
;;
let eval_print_stdin () =
let ch = stdin in
let s = input_line ch in
eval_print_string (String.trim s)
;;
let main argv =
eval_print_stdin ()
;;
if not !Sys.interactive then
main Sys.argv
;;
As indicated in the comments, it's almost never a good idea for the lexical analyser to try to recognise the - as part of a numeric literal:
Since the lexical token must be a contiguous string, - 5 will not match. Instead, you'll get two tokens. So you need to handle that in the parser anyway.
On the other hand, if you don't put a space after the -, then 3-4 will be analysed as the two tokens 3 and -4, which is also going to lead to a syntax error.
A simple solution is to add term to recognise the unary negation operator:
mul :
| term { Calc_ast.Num $1 }
| mul TIMES term { Calc_ast.Times($1, Calc_ast.Num $3) }
| mul DIV term { Calc_ast.Div($1, Calc_ast.Num $3) }
term :
| NUM { $1 }
| MINUS term { Calc_ast.Minus(0, $2) }
| LPAREN expr RPAREN { $2 }
In the above, I also moved the handling of parentheses from the bottom to the top of the hierarchy, in order to make 4*(5+3) possible. With that change, you will no longer require compound_expr.

Pattern Matching in dypgen

I want handle some ambiguities in dypgen. I found something in the manual, that I want to know, how I can use that.
In the manual point 5.2 "Pattern matching on Symbols" there is an example:
expr:
| expr OP<"+"> expr { $1 + $2 }
| expr OP<"*"> expr { $1 * $2 }
OP is matched with "+" or "*", as I understand. I also find there:
The patterns can be any Caml patterns (but without the keyword when).
For instance this is possible:
expr: expr<(Function([arg1;arg2],f_body)) as f> expr
{ some action }
So I tried to put there some other expressions, but I dont understand, what happens. If I put in there printf it outputs the value of the matched string. But if I put in there (fun x -> printf x), that seems to me the same as printf, dypgen complains about a syntax error and points to the end of the expression. If I put Printf.printf in there, it complains about Syntax error: operator expected. And if I put there (fun x -> Printf.printf x) it says: Lexing failed with message: lexing: empty token
What do these different error-messages mean?
In the end I would like to look up something in a hashtable, if the value is in there, but I don't know, if it is possible this way. Is it or isn't it possible?
EDIT: A minimal example derived from the forest-example from the dypgen-demos.
The grammarfile forest_parser.dyp contains:
{
open Parse_tree
let dyp_merge = Dyp.keep_all
}
%start main
%layout [' ' '\t']
%%
main : np "." "\n" { $1 }
np:
| sg {Noun($1)}
| pl {Noun($1)}
sg: word <Word("sheep"|"fish")> {Sg($1)}
sg: word <Word("cat"|"dog")> {Sg($1)}
pl: word <Word("sheep"|"fish")> {Pl($1)}
pl: word <Word("cats"|"dogs")> {Pl($1)}
/* OR try:
sg: word <printf> {Sg($1)}
pl: word <printf> {Pl($1)}
*/
word:
| (['A'-'Z' 'a'-'z']+) {Word($1)}
The forest.ml has the following print_forest-function now:
let print_forest forest =
let rec aux1 t = match t with
| Word x
-> print_string x
| Noun (x) -> (
print_string "N [";
aux1 x;
print_string " ]")
| Sg (x) -> (
print_string "Sg [";
aux1 x;
print_string " ]")
| Pl (x) -> (
print_string "Pl [";
aux1 x;
print_string " ]")
in
let aux2 t = aux1 t; print_newline () in
List.iter aux2 forest;
print_newline ()
And the parser_tree.mli contains:
type tree =
| Word of string
| Noun of tree
| Sg of tree
| Pl of tree
And then you can determine, what numeri fish, sheep, cat(s) etc. are.
sheep or fish can be singular and plural. cats and dogs cannot.
fish.
N [Sg [fish ] ]
N [Pl [fish ] ]
I know nothing about Dypgen so I tried to figure it out.
Let's see what I found out.
In the parser.dyp file you can define the lexer and the parser or you can use an external lexer. Here's what I did :
My ast looks like this :
parse_prog.mli
type f =
| Print of string
| Function of string list * string * string
type program = f list
prog_parser.dyp
{
open Parse_prog
(* let dyp_merge = Dyp.keep_all *)
let string_buf = Buffer.create 10
}
%start main
%relation pf<pr
%lexer
let newline = '\n'
let space = [' ' '\t' '\r']
let uident = ['A'-'Z']['a'-'z' 'A'-'Z' '0'-'9' '_']*
let lident = ['a'-'z']['a'-'z' 'A'-'Z' '0'-'9' '_']*
rule string = parse
| '"' { () }
| _ { Buffer.add_string string_buf (Dyp.lexeme lexbuf);
string lexbuf }
main lexer =
newline | space + -> { () }
"fun" -> ANONYMFUNCTION { () }
lident -> FUNCTION { Dyp.lexeme lexbuf }
uident -> MODULE { Dyp.lexeme lexbuf }
'"' -> STRING { Buffer.clear string_buf;
string lexbuf;
Buffer.contents string_buf }
%parser
main : function_calls eof
{ $1 }
function_calls:
|
{ [] }
| function_call ";" function_calls
{ $1 :: $3 }
function_call:
| printf STRING
{ Print $2 } pr
| "(" ANONYMFUNCTION lident "->" printf lident ")" STRING
{ Print $6 } pf
| nested_modules "." FUNCTION STRING
{ Function ($1, $3, $4) } pf
| FUNCTION STRING
{ Function ([], $1, $2) } pf
| "(" ANONYMFUNCTION lident "->" FUNCTION lident ")" STRING
{ Function ([], $5, $8) } pf
printf:
| FUNCTION<"printf">
{ () }
| MODULE<"Printf"> "." FUNCTION<"printf">
{ () }
nested_modules:
| MODULE
{ [$1] }
| MODULE "." nested_modules
{ $1 :: $3 }
This file is the most important. As you can see, if I have a function printf "Test" my grammar is ambiguous and this can be reduced to either Print "Test" or Function ([], "printf", "Test") but !, as I realized, I can give priorities to my rules so if one as a higher priority it will be the one chosen for the first parsing. (try to uncomment let dyp_merge = Dyp.keep_all and you'll see all the possible combinations).
And in my main :
main.ml
open Parse_prog
let print_stlist fmt sl =
match sl with
| [] -> ()
| _ -> List.iter (Format.fprintf fmt "%s.") sl
let print_program tl =
let aux1 t = match t with
| Function (ml, f, p) ->
Format.printf "I can't do anything with %a%s(\"%s\")#." print_stlist ml f p
| Print s -> Format.printf "You want to print : %s#." s
in
let aux2 t = List.iter (fun (tl, _) ->
List.iter aux1 tl; Format.eprintf "------------#.") tl in
List.iter aux2 tl
let input_file = Sys.argv.(1)
let lexbuf = Dyp.from_channel (Forest_parser.pp ()) (Pervasives.open_in input_file)
let result = Parser_prog.main lexbuf
let () = print_program result
And, for example, for the following file :
test
printf "first print";
Printf.printf "nested print";
Format.eprintf "nothing possible";
(fun x -> printf x) "Anonymous print";
If I execute ./myexec test I will get the following prompt
You want to print : first print
You want to print : nested print
I can't do anything with Format.eprintf("nothing possible")
You want to print : x
------------
So, TL;DR, the manual example was just here to show you that you can play with your defined tokens (I never defined the token PRINT, just FUNCTION) and match on them to get new rules.
I hope it's clear, I learned a lot with your question ;-)
[EDIT] So, I changed the parser to match what you wanted to watch :
{
open Parse_prog
(* let dyp_merge = Dyp.keep_all *)
let string_buf = Buffer.create 10
}
%start main
%relation pf<pp
%lexer
let newline = '\n'
let space = [' ' '\t' '\r']
let uident = ['A'-'Z']['a'-'z' 'A'-'Z' '0'-'9' '_']*
let lident = ['a'-'z']['a'-'z' 'A'-'Z' '0'-'9' '_']*
rule string = parse
| '"' { () }
| _ { Buffer.add_string string_buf (Dyp.lexeme lexbuf);
string lexbuf }
main lexer =
newline | space + -> { () }
"fun" -> ANONYMFUNCTION { () }
lident -> FUNCTION { Dyp.lexeme lexbuf }
uident -> MODULE { Dyp.lexeme lexbuf }
'"' -> STRING { Buffer.clear string_buf;
string lexbuf;
Buffer.contents string_buf }
%parser
main : function_calls eof
{ $1 }
function_calls:
|
{ [] } pf
| function_call <Function((["Printf"] | []), "printf", st)> ";" function_calls
{ (Print st) :: $3 } pp
| function_call ";" function_calls
{ $1 :: $3 } pf
function_call:
| nested_modules "." FUNCTION STRING
{ Function ($1, $3, $4) }
| FUNCTION STRING
{ Function ([], $1, $2) }
| "(" ANONYMFUNCTION lident "->" FUNCTION lident ")" STRING
{ Function ([], $5, $8) }
nested_modules:
| MODULE
{ [$1] }
| MODULE "." nested_modules
{ $1 :: $3 }
Here, as you can see, I don't handle the fact that my function is print when I parse it but when I put it in my functions list. So, I match on the algebraic type that was built by my parser. I hope this example is ok for you ;-) (but be warned, this is extremely ambiguous ! :-D)

unclear how to add extra productions to bison grammar to create error messages

This is not homework, but it is from a book.
I'm given a following bison spec file:
%{
#include <stdio.h>
#include <ctype.h>
int yylex();
int yyerror();
%}
%token NUMBER
%%
command : exp { printf("%d\n", $1); }
; /* allows printing of the result */
exp : exp '+' term { $$ = $1 + $3; }
| exp '-' term { $$ = $1 - $3; }
| term { $$ = $1; }
;
term : term '*' factor { $$ = $1 * $3; }
| factor { $$ = $1; }
;
factor : NUMBER { $$ = $1; }
| '(' exp ')' { $$ = $2; }
;
%%
int main() {
return yyparse();
}
int yylex() {
int c;
/* eliminate blanks*/
while((c = getchar()) == ' ');
if (isdigit(c)) {
ungetc(c, stdin);
scanf("%d", &yylval);
return (NUMBER);
}
/* makes the parse stop */
if (c == '\n') return 0;
return (c);
}
int yyerror(char * s) {
fprintf(stderr, "%s\n", s);
return 0;
} /* allows for printing of an error message */
The task is to do the following:
Rewrite the spec to add the following useful error messages:
"missing right parenthesis," generated by the string (2+3
"missing left parenthesis," generated by the string 2+3)
"missing operator," generated by the string 2 3
"missing operand," generated by the string (2+)
The simplest solution that I was able to come up with is to do the following:
half_exp : exp '+' { $$ = $1; }
| exp '-' { $$ = $1; }
| exp '*' { $$ = $1; }
;
factor : NUMBER { $$ = $1; }
| '(' exp '\n' { yyerror("missing right parenthesis"); }
| exp ')' { yyerror("missing left parenthesis"); }
| '(' exp '\n' { yyerror("missing left parenthesis"); }
| '(' exp ')' { $$ = $2; }
| '(' half_exp ')' { yyerror("missing operand"); exit(0); }
;
exp : exp '+' term { $$ = $1 + $3; }
| exp '-' term { $$ = $1 - $3; }
| term { $$ = $1; }
| exp exp { yyerror("missing operator"); }
;
These changes work, however they lead to a lot of conflicts.
Here is my question.
Is there a way to rewrite this grammar in such a way so that it wouldn't generate conflicts?
Any help is appreciated.
Yes it is possible:
command : exp { printf("%d\n", $1); }
; /* allows printing of the result */
exp: exp '+' exp {
// code
}
| exp '-' exp {
// code
}
| exp '*' exp {
// code
}
| exp '/' exp {
// code
}
|'(' exp ')' {
// code
}
Bison allows Ambiguous grammars.
I don't see how can you rewrite grammar to avoid conflicts. You just missed the point of terms, factors etc. You use these when you want left recursion context free grammar.
From this grammar:
E -> E+T
|T
T -> T*F
|F
F -> (E)
|num
Once you free it from left recursion you would go to:
E -> TE' { num , ( }
E' -> +TE' { + }
| eps { ) , EOI }
T -> FT' { ( , num }
T' -> *FT' { * }
|eps { + , ) , EOI }
F -> (E) { ( }
|num { num }
These sets alongside rules are showing what input character has to be in order to use that rule. Of course this is just example for simple arithmetic expressions for example 2*(3+4)*5+(3*3*3+4+5*6) etc.
If you want to learn more about this topic I suggest you to read about "left recursion context free grammar". There are some great books covering this topic and also covering how to get input sets.
But as I said above, all of this can be avoided because Bison allows Ambiguous grammars.

Haskell - Happy - "No instance ..." error

I'm trying to get familiar with Happy parser generator for Haskell. Currently, I have an example from the documentation but when I compile the program, I get an error.
This is the code:
{
module Main where
import Data.Char
}
%name calc
%tokentype { Token }
%error { parseError }
%token
let { TokenLet }
in { TokenIn }
int { TokenInt $$ }
var { TokenVar $$ }
'=' { TokenEq }
'+' { TokenPlus }
'-' { TokenMinus }
'*' { TokenTimes }
'/' { TokenDiv }
'(' { TokenOB }
')' { TokenCB }
%%
Exp : let var '=' Exp in Exp { \p -> $6 (($2,$4 p):p) }
| Exp1 { $1 }
Exp1 : Exp1 '+' Term { \p -> $1 p + $3 p }
| Exp1 '-' Term { \p -> $1 p - $3 p }
| Term { $1 }
Term : Term '*' Factor { \p -> $1 p * $3 p }
| Term '/' Factor { \p -> $1 p `div` $3 p }
| Factor { $1 }
Factor
: int { \p -> $1 }
| var { \p -> case lookup $1 p of
Nothing -> error "no var"
Just i -> i }
| '(' Exp ')' { $2 }
{
parseError :: [Token] -> a
parseError _ = error "Parse error"
data Token
= TokenLet
| TokenIn
| TokenInt Int
| TokenVar String
| TokenEq
| TokenPlus
| TokenMinus
| TokenTimes
| TokenDiv
| TokenOB
| TokenCB
deriving Show
lexer :: String -> [Token]
lexer [] = []
lexer (c:cs)
| isSpace c = lexer cs
| isAlpha c = lexVar (c:cs)
| isDigit c = lexNum (c:cs)
lexer ('=':cs) = TokenEq : lexer cs
lexer ('+':cs) = TokenPlus : lexer cs
lexer ('-':cs) = TokenMinus : lexer cs
lexer ('*':cs) = TokenTimes : lexer cs
lexer ('/':cs) = TokenDiv : lexer cs
lexer ('(':cs) = TokenOB : lexer cs
lexer (')':cs) = TokenCB : lexer cs
lexNum cs = TokenInt (read num) : lexer rest
where (num,rest) = span isDigit cs
lexVar cs =
case span isAlpha cs of
("let",rest) -> TokenLet : lexer rest
("in",rest) -> TokenIn : lexer rest
(var,rest) -> TokenVar var : lexer rest
main = getContents >>= print . calc . lexer
}
I'm getting this error:
[1 of 1] Compiling Main ( gr.hs, gr.o )
gr.hs:310:24:
No instance for (Show ([(String, Int)] -> Int))
arising from a use of `print'
Possible fix:
add an instance declaration for (Show ([(String, Int)] -> Int))
In the first argument of `(.)', namely `print'
In the second argument of `(>>=)', namely `print . calc . lexer'
In the expression: getContents >>= print . calc . lexer
Do you know why and how can I solve it?
If you examine the error message
No instance for (Show ([(String, Int)] -> Int))
arising from a use of `print'
it's clear that the problem is that you are trying to print a function. And indeed, the value produced by the parser function calc is supposed to be a function which takes a lookup table of variable bindings and gives back a result. See for example the rule for variables:
{ \p -> case lookup $1 p of
Nothing -> error "no var"
Just i -> i }
So in main, we need to pass in a list for the p argument, for example an empty list. (Or you could add some pre-defined global variables if you wanted). I've expanded the point-free code to a do block so it's easier to see what's going on:
main = do
input <- getContents
let fn = calc $ lexer input
print $ fn [] -- or e.g. [("foo", 42)] if you wanted it pre-defined
Now it works:
$ happy Calc.y
$ runghc Calc.hs <<< "let x = 1337 in x * 2"
2674

Yacc and Lex error in parsing expressions which use binary operators

I am new to Lex and Yacc and I am trying to create a parser for a simple language which allows for basic arithmetic and equality expressions. Though I have some of it working, I am encountering errors when trying to parse expressions involving binary operations. Here is my .y file:
%{
#include <stdlib.h>
#include <stdio.h>
%}
%token NUMBER
%token HOME
%token PU
%token PD
%token FD
%token BK
%token RT
%token LT
%left '+' '-'
%left '=' '<' '>'
%nonassoc UMINUS
%%
S : statement S { printf("S -> stmt S\n"); }
| { printf("S -> \n"); }
;
statement : HOME { printf("stmt -> HOME\n"); }
| PD { printf("stmt -> PD\n"); }
| PU { printf("stmt -> PU\n"); }
| FD expression { printf("stmt -> FD expr\n"); }
| BK expression { printf("stmt -> BK expr\n"); }
| RT expression { printf("stmt -> RT expr\n"); }
| LT expression { printf("stmt -> LT expr\n"); }
;
expression : expression '+' expression { printf("expr -> expr + expr\n"); }
| expression '-' expression { printf("expr -> expr - expr\n"); }
| expression '>' expression { printf("expr -> expr > expr\n"); }
| expression '<' expression { printf("expr -> expr < expr\n"); }
| expression '=' expression { printf("expr -> expr = expr\n"); }
| '(' expression ')' { printf("expr -> (expr)\n"); }
| '-' expression %prec UMINUS { printf("expr -> -expr\n"); }
| NUMBER { printf("expr -> number\n"); }
;
%%
int yyerror(char *s)
{
fprintf (stderr, "%s\n", s);
return 0;
}
int main()
{
yyparse();
}
And here is my .l file for Lex:
%{
#include "testYacc.h"
%}
number [0-9]+
%%
[ ] { /* skip blanks */ }
{number} { sscanf(yytext, "%d", &yylval); return NUMBER; }
home { return HOME; }
pu { return PU; }
pd { return PD; }
fd { return FD; }
bk { return BK; }
rt { return RT; }
lt { return LT; }
%%
When I try to enter an arithmetic expression on the command-line for evaluation, it results in the following error:
home
stmt -> HOME
pu
stmt -> PU
fd 10
expr -> number
fd 10
stmt -> FD expr
expr -> number
fd (10 + 10)
stmt -> FD expr
(expr -> number
+stmt -> FD expr
S ->
S -> stmt S
S -> stmt S
S -> stmt S
S -> stmt S
S -> stmt S
syntax error
Your lexer lacks rules to match and return tokens such as '+' and '*', so if there are any in your input, it will just echo them and discard them. This is what happens when you enter fd (10 + 10) -- the lexer returns the tokens FD NUMBER NUMBER while + and ( get echoed to stdout. The parser then gives a syntax error.
You want to add a rule to return these single character tokens. The easiest is to just add a single rule to your .l file at the end:
. { return *yytext; }
which matches any single character.
Note that this does NOT match a \n (newline), so newlines in your input will still be echoed and ignored. You might want to add them (and tabs and carriage returns) to your skip blanks rule:
[ \t\r\n] { /* skip blanks */ }

Resources