YACC Parser getting an error at line 1 even if test program is empty? - parsing

I have been trying do my homework which is language design with Lex and YACC. My assignment is building a simple parser with YACC. But my problem is the test file I send to my parser always returns a syntax error on the first line. As far as I realised the parser does not even reach the first statement of the example. The other problem with my program it returns with an error message even if the test program is empty . Here is my Lex and YACC code and the example program I am using.
My Lex Code:
%{
#include <stdio.h>
#include "y.tab.h"
void yyerror(char *);
%}
lowerLetter [a-z]
letter [a-zA-Z]
digit [0-9]
signs [+-]
integer {signs}?{digit}+
double {signs}?{digit}*(\.)?{digit}+
word {lowerLetter}+({letter}*{digit}*)*
string \"[^\"]*\"
day ("Monday"|"Tuesday"|"Wednesday"|"Thursday"|"Friday"|"Saturday"|"Sunday")
month ("January"|"February"|"March"|"April"|"May"|"June"|"July"|"August"|"September"|"October"|"November"|"December")
time {day}(\,)(\ )[0-3][1-9](\ ){month}(\ ){digit}*(\ )[0-2][0-9](\:)[0-5][0-9](\:)[0-5][0-9]((\ )("GMT")(((\+)|(\-)){integer}(\:){integer})?)?
sensor (\$)("s"){digit}*
switch (\$)("sw"){digit}
url ("http")("s")?("://")("www.")?[a-zA-Z0-9]*(\.)(.)*
type ("integer"|"double"|"string"|"sensor"|"switch"|"url"|"boolean"|"time"|"letter")
boolean ("true")|("false")|([a-zA-Z0-9]+((\ )*)?(((\=|\<|\>|\!)(\=))|(\>|\<))+((\ )*)?[a-zA-Z0-9]+)
identifier ({letter}*|{digit}*|(\_)*)*
%%
while return WHILE;
for return FOR;
return return RETURN;
sysin return SYSIN;
sysout return SYSOUT;
if return IF;
main return MAIN;
end return END;
else return ELSE;
fun return FUNCTION_IDENTIFIER;
cons return CONS;
isURL return IS_URL;
connect return CONNECT;
send return SEND;
receive return RECEIVE;
\\\n return NL;
\. return DOT;
\, return COMMA;
\: return COLON;
\; return SEMICOLON;
\+ return PLUS_OP;
\- return MINUS_OP;
\* return MULTIPLY_OP;
\/ return DIVIDE_OP;
\% return MOD_OP;
\# return HASHTAG;
\$ return SENSOR_IDENTIFIER;
\^ return POWER_OP;
\_ return UNDER_SCORE;
\? return QUESTION;
\! return NOT;
\( return LP;
\) return RP;
\{ return LCB;
\} return RCB;
\[ return LSB;
\] return RSB;
\= return ASSN_OP;
\> return GT;
\< return LT;
\=\= return EQ;
\>\= return GEQ;
\<\= return LEQ;
\!\= return NE;
\&\& return AND;
\|\| return OR;
\/\/ return DS;
{day} return DAY;
{month} return MONTH;
{time} return TIME;
{type} return TYPE;
{lowerLetter} return LOWER_LETTER;
{letter} return LETTER;
{integer} return INTEGER;
{string} return STRING;
{sensor} return SENSOR;
{switch} return SWITCH;
{double} return DOUBLE;
{boolean} return BOOLEAN;
{url} return URL;
{word} return WORD;
{identifier} return IDENTIFIER;
[ \t] ;
%%
int yywrap(void)
{
return 1;
}
My YACC code:
%{
#include "stdio.h"
#include <stdlib.h>
void yyerror(char *);
extern int yylineno;
#include "y.tab.h"
int yylex(void);
%}
%token MAIN
%token DOT COMMA COLON SEMICOLON UNDER_SCORE QUESTION LP RP LCB RCB LSB RSB DS NL
%token WHILE FOR RETURN SYSIN SYSOUT IF END ELSE FUNCTION_IDENTIFIER NOT CONS PLUS_OP MINUS_OP MULTIPLY_OP DIVIDE_OP MOD_OP HASHTAG SENSOR_IDENTIFIER POWER_OP
%token ASSN_OP GT LT EQ GEQ LEQ NE AND OR DAY MONTH TIME TYPE IS_URL CONNECT SEND RECEIVE
%token LOWER_LETTER LETTER WORD STRING INTEGER DOUBLE BOOLEAN SENSOR SWITCH URL IDENTIFIER
%nonassoc ELSE
%left PLUS_OP MINUS_OP
%left MULTIPLY_OP DIVIDE_OP
%left POWER_OP MOD_OP
%start program
%%
program:
stmts {printf("\rProgram is valid.\n");};
stmts: stmt
| stmts stmt;
stmt: if_stmt
| non_if_stmt ;
if_stmt: IF LP logical_expr RP LCB stmts RCB
| IF LP logical_expr RP LCB stmts RCB ELSE LCB stmts RCB;
non_if_stmt: loops
| aritmetic_op
| func_call
| func_dec
| initialize
| decl
| decl_ini
| input_stmt
| output_stmt
//| comment
| url_checker
| send
| receive
| connect;
loops: while_loop
| for_loop;
while_loop: WHILE LP logical_expr RP LCB stmts RCB;
for_loop: FOR LP decl_ini SEMICOLON logical_expr SEMICOLON aritmetic_op RP LCB stmts RCB;
initialize: IDENTIFIER ASSN_OP value SEMICOLON;
logical_expr: logical_term logical_op logical_term
| logical_expr logical_connector logical_term
| boolean_stmt;
boolean_stmt: BOOLEAN
| NOT boolean_stmt;
logical_term: term;
//| logical_term AND term;
logical_connector: AND
| OR;
term: IDENTIFIER
| constant;
constant: CONS IDENTIFIER;
logical_op: EQ
| NE
| LT
| GT
| LEQ
| GEQ;
decl: TYPE term SEMICOLON;
decl_ini: TYPE term ASSN_OP value SEMICOLON;
value: number
| STRING
| SENSOR
| SWITCH
| URL
| BOOLEAN
| LETTER
| TIME;
number: DOUBLE
| INTEGER;
/*
value_list: value
| value_list COMMA value;
array_decl: TYPE term LSB RSB SEMICOLON;
array_ini: TYPE term LSB RSB ASSN_OP LSB value_list RSB SEMICOLON
| term ASSN_OP LSB value_list RSB SEMICOLON;
get_array_val: term LSB INTEGER RSB;
*/
func_dec: FUNCTION_IDENTIFIER TYPE IDENTIFIER LP arguments RP LCB function_block RCB;
func_call: IDENTIFIER LP arguments RP SEMICOLON;
function_block: stmts
| stmts RETURN value
| stmts RETURN term;
arguments: TYPE term
| arguments COMMA TYPE term;
aritmetic_op: addition
| subtraction
| multiplication
| division
| modulo
| power;
addition: aritmetic_op PLUS_OP term
| term PLUS_OP term
| term PLUS_OP aritmetic_op;
subtraction: aritmetic_op MINUS_OP term
| term MINUS_OP term
| term MINUS_OP aritmetic_op;
multiplication: aritmetic_op MULTIPLY_OP term
| term MULTIPLY_OP term
| term MULTIPLY_OP aritmetic_op;
division: aritmetic_op DIVIDE_OP term
| term DIVIDE_OP term
| term DIVIDE_OP aritmetic_op;
modulo: aritmetic_op MOD_OP term
| term MOD_OP term
| term MOD_OP aritmetic_op;
power: aritmetic_op POWER_OP term
| term POWER_OP term
| term POWER_OP aritmetic_op;
/*
comment:
| DS sentence NL;
sentence:
| IDENTIFIER
| DOT | COMMA | COLON | SEMICOLON | PLUS_OP | MINUS_OP | MULTIPLY_OP | DIVIDE_OP
| MOD_OP | HASHTAG | SENSOR_IDENTIFIER | POWER_OP | UNDER_SCORE | QUESTION | NOT
| LP | RP | LCB | RCB | ASSN_OP | GT | LT
| sentence sentence;
*/
input_stmt: TYPE term ASSN_OP SYSIN LP RP SEMICOLON
| SYSIN LP RP SEMICOLON;
output_stmt: SYSOUT LP output RP SEMICOLON;
output: term
| value
| aritmetic_op
| output COMMA term;
url_checker: IS_URL LP STRING RP
| IS_URL LP term RP;
connect: CONNECT LP URL RP;
send: SEND LP number COMMA URL RP;
receive: RECEIVE LP URL RP;
%%
void yyerror(char *s)
{
fprintf(stderr, "syntax error at line: %d %s\n", yylineno, s);
}
int main(void){
yyparse();
if(yynerrs < 1) printf("there are no syntax errors!!\n");
}
My Simple Test Program:
integer a = 5;
double e = 5.5;
double f = 3.0;
sadasd
My Extended Test Program
sensor a = $s1;
switch b = $sw1;
integer c = -5;
integer d = 75;
double e = 5.5;
double f = 3.0;
c = c + d;
f = f + e;
sysout (e + f);
integer g = sysin();
url k = https://www.cs.bilkent.edu.tr/~guvenir/courses/CS315/Pr1.htm;
url l = https://docs.oracle.com/cd/E19504-01/802-5880/lex-6/index.html;
if(isURL(k)){connect(k);} //Connecting to URL k after checking if its a URL
send(g, k); //Sending the integer g to the URL k
integer h = receive(l); //Receiving the integer h from URL l
sysout (h); //Printing the integer h which we took from the URL l
if (e > f) {
e = f + e;
}else{
e = f - e;
}
time t = Friday, 14 October 2022 18:05:52
time t = Monday, 17 October 2022 12:05:52 GMT
time t = Saturday, 22 October 2022 20:05:52 GMT+03:00 //Defining time in different ways
while (e > f) {
e = e - f;
}
for (integer i = 0, e <= f, i = i + 1) {
e = e + i;
}
fun boolean isGreater(integer x, integer y){
boolean z = (x > y);
return z;
}
boolean g = isGreater(e, f);
sysout (k);
sysout (l);
And My Make file:
LEX = lex
YACC = yacc -d
CC = gcc
all: parser clean
parser: y.tab.o lex.yy.o
$(CC) -o parser y.tab.o lex.yy.o
./parser < test.txt
lex.yy.o: lex.yy.c y.tab.h
lex.yy.o y.tab.o: y.tab.c
y.tab.c y.tab.h: y.y
$(YACC) -v y.y
lex.yy.c: lex.l
$(LEX) lex.l
clean:
-rm -f *.o lex.yy.c *.tab.* parser *.output

Related

OCaml: parse minus floating-point number as a calculator

What I would like to do
I would like to correctly parse minus floating-point numbers.
How should I fix my code?
What is not working
When I try to interpret - 5 as -5.000000, it shows me this error.
Fatal error: exception Stdlib.Parsing.Parse_error
1c1
< error: parse error at char=0, near token '-'
---
> - 5 = -5.000000
My source code
calc_ast.ml
(* abstract syntax tree *)
type expr =
Num of float
| Plus of expr * expr
| Times of expr * expr
| Div of expr * expr
| Minus of expr * expr
;;
calc_lex.ml
{
open Calc_parse
;;
}
rule lex = parse
| [' ' '\t' '\n' ] { lex lexbuf }
| '-'? ['0' - '9']+ as s { NUM(float_of_string s) }
| '-'? ['0' - '9']+ ('.' digit*)? as s { NUM(float_of_string s) }
| '+' { PLUS }
| '-' { MINUS }
| '*' { TIMES }
| '/' { DIV }
| '(' { LPAREN }
| ')' { RPAREN }
| eof { EOF }
calc_parse.mly
%{
%}
%token <float> NUM
%token PLUS TIMES EOF MINUS DIV LPAREN RPAREN
%start program
%type <Calc_ast.expr> program
%%
program :
| compound_expr EOF { $1 }
compound_expr :
| expr { $1 }
| LPAREN expr RPAREN { $2 }
expr :
| mul { $1 }
| expr PLUS mul { Calc_ast.Plus($1, $3) }
| expr MINUS mul { Calc_ast.Minus($1, $3) }
mul :
| NUM { Calc_ast.Num $1 }
| mul TIMES NUM { Calc_ast.Times($1, Calc_ast.Num $3) }
| mul DIV NUM { Calc_ast.Div($1, Calc_ast.Num $3) }
%%
calc.ml
open Calc_parse
(* token -> string *)
let string_of_token t =
match t with
NUM(s) -> Printf.sprintf "NUM(%f)" s
| PLUS -> "PLUS"
| TIMES -> "TIMES"
| MINUS -> "MINUS"
| DIV -> "DIV"
| LPAREN -> "LPAREN"
| RPAREN -> "RPAREN"
| EOF -> "EOF"
;;
(* print token t and return it *)
let print_token t =
Printf.printf "%s\n" (string_of_token t);
t
;;
(* apply lexer to string s *)
let lex_string s =
let rec loop b =
match print_token (Calc_lex.lex b) with
EOF -> ()
| _ -> loop b
in
loop (Lexing.from_string s)
;;
(* apply parser to string s;
show some info when a parse error happens *)
let parse_string s =
let b = Lexing.from_string s in
try
program Calc_lex.lex b (* main work *)
with Parsing.Parse_error as exn ->
(* handle parse error *)
let c0 = Lexing.lexeme_start b in
let c1 = Lexing.lexeme_end b in
Printf.fprintf stdout
"error: parse error at char=%d, near token '%s'\n"
c0 (String.sub s c0 (c1 - c0));
raise exn
;;
(* evaluate expression (AST tree) *)
let rec eval_expr e =
match e with
Calc_ast.Num(c) -> c
| Calc_ast.Plus(e0, e1)
-> (eval_expr e0) +. (eval_expr e1)
| Calc_ast.Minus(e0, e1)
-> (eval_expr e0) -. (eval_expr e1)
| Calc_ast.Times(e0, e1)
-> (eval_expr e0) *. (eval_expr e1)
| Calc_ast.Div(e0, e1)
-> (eval_expr e0) /. (eval_expr e1)
;;
(* evaluate string *)
let eval_string s =
let e = parse_string s in
eval_expr e
;;
(* evaluate string and print it *)
let eval_print_string s =
let y = eval_string s in
Printf.printf "%s = %f\n" s y
;;
let eval_print_stdin () =
let ch = stdin in
let s = input_line ch in
eval_print_string (String.trim s)
;;
let main argv =
eval_print_stdin ()
;;
if not !Sys.interactive then
main Sys.argv
;;
As indicated in the comments, it's almost never a good idea for the lexical analyser to try to recognise the - as part of a numeric literal:
Since the lexical token must be a contiguous string, - 5 will not match. Instead, you'll get two tokens. So you need to handle that in the parser anyway.
On the other hand, if you don't put a space after the -, then 3-4 will be analysed as the two tokens 3 and -4, which is also going to lead to a syntax error.
A simple solution is to add term to recognise the unary negation operator:
mul :
| term { Calc_ast.Num $1 }
| mul TIMES term { Calc_ast.Times($1, Calc_ast.Num $3) }
| mul DIV term { Calc_ast.Div($1, Calc_ast.Num $3) }
term :
| NUM { $1 }
| MINUS term { Calc_ast.Minus(0, $2) }
| LPAREN expr RPAREN { $2 }
In the above, I also moved the handling of parentheses from the bottom to the top of the hierarchy, in order to make 4*(5+3) possible. With that change, you will no longer require compound_expr.

Parser to verify declarations of type int and float in C language

I'm trying to write a parser to verify the following declarations of type int and float in C language.
variables declarations, pointer variable declarations, array of any dimensions
float a , b , r = 5, area = r * r , * b;
int a , b , c , ** p ;
int x , mat [2][3];
This is my lex file
%{
#include "y.tab.h"
extern int yylval;
%}
%%
"int" return INT;
"float" return FLOAT;
[0-9]+ return NUM;
[_|a-z|A-Z]([_|a-z|A-Z|0-9])*{1,255} return NAME;
[+\-*/] return op;
[ \t\n];
. return yytext[0];
%%
This is my yacc file
%{
#include<stdio.h>
int yylex() ;
int yyerror();
%}
%token NUM NAME op INT FLOAT
%%
stmt_list: stmt | stmt_list stmt;
stmt: type id_list ';' { printf("Valid Declaration\n"); };
type: INT | FLOAT;
id_list: id ',' id_list | id ;
id: NAME'='expr | expr;
expr: expr op expr | POINT expr | expr MATRIX | '(' expr')' | NAME;
MATRIX: '[' NUM ']' | '[' NUM ']' MATRIX ;
POINT: '*' | '*'POINT;
%%
int main(){
yyparse();
return 0;
}
int yyerror(){
printf("Invalid Declaration\n");
return -1;
}
Even if I enter "int a;" as input, I get "Invalid Declaration". I'm not able to figure out what I'm doing wrong.

How to write yacc grammar rules to identify function definitions vs function calls?

I have started learning about YACC, and I have executed a few examples of simple toy programs. But I have never seen a practical example that demonstrates how to build a compiler that identifies and implements function definitions and function calls, array implementation and so on, nor has it been easy to find an example using Google search. Can someone please provide one example of how to generate the tree using YACC? C or C++ is fine.
Thanks in advance!
Let's parse this code with yacc.
file test contains valid C code that we want to parse.
int main (int c, int b) {
int a;
while ( 1 ) {
int d;
}
}
A lex file c.l
alpha [a-zA-Z]
digit [0-9]
%%
[ \t] ;
[ \n] { yylineno = yylineno + 1;}
int return INT;
float return FLOAT;
char return CHAR;
void return VOID;
double return DOUBLE;
for return FOR;
while return WHILE;
if return IF;
else return ELSE;
printf return PRINTF;
struct return STRUCT;
^"#include ".+ ;
{digit}+ return NUM;
{alpha}({alpha}|{digit})* return ID;
"<=" return LE;
">=" return GE;
"==" return EQ;
"!=" return NE;
">" return GT;
"<" return LT;
"." return DOT;
\/\/.* ;
\/\*(.*\n)*.*\*\/ ;
. return yytext[0];
%%
file c.y for input to YACC:
%{
#include <stdio.h>
#include <stdlib.h>
extern FILE *fp;
%}
%token INT FLOAT CHAR DOUBLE VOID
%token FOR WHILE
%token IF ELSE PRINTF
%token STRUCT
%token NUM ID
%token INCLUDE
%token DOT
%right '='
%left AND OR
%left '<' '>' LE GE EQ NE LT GT
%%
start: Function
| Declaration
;
/* Declaration block */
Declaration: Type Assignment ';'
| Assignment ';'
| FunctionCall ';'
| ArrayUsage ';'
| Type ArrayUsage ';'
| StructStmt ';'
| error
;
/* Assignment block */
Assignment: ID '=' Assignment
| ID '=' FunctionCall
| ID '=' ArrayUsage
| ArrayUsage '=' Assignment
| ID ',' Assignment
| NUM ',' Assignment
| ID '+' Assignment
| ID '-' Assignment
| ID '*' Assignment
| ID '/' Assignment
| NUM '+' Assignment
| NUM '-' Assignment
| NUM '*' Assignment
| NUM '/' Assignment
| '\'' Assignment '\''
| '(' Assignment ')'
| '-' '(' Assignment ')'
| '-' NUM
| '-' ID
| NUM
| ID
;
/* Function Call Block */
FunctionCall : ID'('')'
| ID'('Assignment')'
;
/* Array Usage */
ArrayUsage : ID'['Assignment']'
;
/* Function block */
Function: Type ID '(' ArgListOpt ')' CompoundStmt
;
ArgListOpt: ArgList
|
;
ArgList: ArgList ',' Arg
| Arg
;
Arg: Type ID
;
CompoundStmt: '{' StmtList '}'
;
StmtList: StmtList Stmt
|
;
Stmt: WhileStmt
| Declaration
| ForStmt
| IfStmt
| PrintFunc
| ';'
;
/* Type Identifier block */
Type: INT
| FLOAT
| CHAR
| DOUBLE
| VOID
;
/* Loop Blocks */
WhileStmt: WHILE '(' Expr ')' Stmt
| WHILE '(' Expr ')' CompoundStmt
;
/* For Block */
ForStmt: FOR '(' Expr ';' Expr ';' Expr ')' Stmt
| FOR '(' Expr ';' Expr ';' Expr ')' CompoundStmt
| FOR '(' Expr ')' Stmt
| FOR '(' Expr ')' CompoundStmt
;
/* IfStmt Block */
IfStmt : IF '(' Expr ')'
Stmt
;
/* Struct Statement */
StructStmt : STRUCT ID '{' Type Assignment '}'
;
/* Print Function */
PrintFunc : PRINTF '(' Expr ')' ';'
;
/*Expression Block*/
Expr:
| Expr LE Expr
| Expr GE Expr
| Expr NE Expr
| Expr EQ Expr
| Expr GT Expr
| Expr LT Expr
| Assignment
| ArrayUsage
;
%%
#include"lex.yy.c"
#include<ctype.h>
int count=0;
int main(int argc, char *argv[])
{
yyin = fopen(argv[1], "r");
if(!yyparse())
printf("\nParsing complete\n");
else
printf("\nParsing failed\n");
fclose(yyin);
return 0;
}
yyerror(char *s) {
printf("%d : %s %s\n", yylineno, s, yytext );
}
A Makefile to put it together. I use flex-lexer and bison but the example will also work with lex and yacc.
miniC: c.l c.y
bison c.y
flex c.l
gcc c.tab.c -ll -ly
Compile and parse the test code:
$ make
bison c.y
flex c.l
gcc c.tab.c -ll -ly
c.tab.c: In function ‘yyparse’:
c.tab.c:1273:16: warning: implicit declaration of function ‘yylex’ [-Wimplicit-function-declaration]
yychar = yylex ();
^
c.tab.c:1402:7: warning: implicit declaration of function ‘yyerror’ [-Wimplicit-function-declaration]
yyerror (YY_("syntax error"));
^
c.y: At top level:
c.y:155:1: warning: return type defaults to ‘int’ [-Wimplicit-int]
yyerror(char *s) {
^
$ ls
a.out c.l CMakeLists.txt c.tab.c c.y lex.yy.c Makefile README.md test
$ ./a.out test
Parsing complete
For reading resources I can recommend the books Modern Compiler Implementation in C by Andrew Appel and the flex/bison book by John Levine.

Flex and Yacc Grammar Issue

Edit #1: I think the problem is in my .l file. I don't think the rules are being treated as rules, and I'm not sure how to treat the terminals of the rules as strings.
My last project for a compilers class is to write a .l and a .y file for a simple SQL grammar. I have no experience with Flex or Yacc, so everything I have written I have pieced together. I only have a basic understanding of how these files work, so if you spot my problem can you also explain what that section of the file is supposed to do? I'm not even sure what the '%' symbols do.
Basically some rules just do not work when I try to parse something. Some rules hang and others reject when they should accept. I need to implement the following grammar:
start
::= expression
expression
::= one-relation-expression | two-relation-expression
one-relation-expression
::= renaming | restriction | projection
renaming
::= term RENAME attribute AS attribute
term
::= relation | ( expression )
restriction
::= term WHERE comparison
projection
::= term | term [ attribute-commalist ]
attribute-commalist
::= attribute | attribute , attribute-commalist
two-relation-expression
::= projection binary-operation expression
binary-operation
::= UNION | INTERSECT | MINUS | TIMES | JOIN | DIVIDEBY
comparison
::= attribute compare number
compare
::= < | > | <= | >= | = | <>
number
::= val | val number
val
::= 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9
attribute
::= CNO | CITY | CNAME | SNO | PNO | TQTY |
SNAME | QUOTA | PNAME | COST | AVQTY |
S# | STATUS | P# | COLOR | WEIGHT | QTY
relation
::= S | P | SP | PRDCT | CUST | ORDERS
Here is my .l file:
%{
#include <stdio.h>
#include "p5.tab.h"
%}
binaryOperation UINION|INTERSECT|MINUS|TIMES|JOIN|DIVIDEBY
compare <|>|<=|>=|=|<>
attribute CNO|CITY|CNAME|SNO|PNO|TQTY|SNAME|QUOTA|PNAME|COST|AVQTY|S#|STATUS|P#|COLOR|WEIGHT|QTY
relation S|P|SP|PRDCT|CUST|ORDERS
%%
[ \t\n]+ ;
{binaryOperation} return binaryOperation;
{compare} return compare;
[0-9]+ return val;
{attribute} return attribute;
{relation} return relation;
"RENAME" return RENAME;
"AS" return AS;
"WHERE" return WHERE;
"(" return '(';
")" return ')';
"[" return '[';
"]" return ']';
"," return ',';
. {printf("REJECT\n");
exit(0);}
%%
Here is my .y file:
%{
#include <stdio.h>
#include <stdlib.h>
%}
%token RENAME attribute AS relation WHERE binaryOperation compare val
%%
start:
expression {printf("ACCEPT\n");}
;
expression:
oneRelationExpression
| twoRelationExpression
;
oneRelationExpression:
renaming
| restriction
| projection
;
renaming:
term RENAME attribute AS attribute
;
term:
relation
| '(' expression ')'
;
restriction:
term WHERE comparison
;
projection:
term
| term '[' attributeCommalist ']'
;
attributeCommalist:
attribute
| attribute ',' attributeCommalist
;
twoRelationExpression:
projection binaryOperation expression
;
comparison:
attribute compare number
;
number:
val
| val number
;
%%
yyerror() {
printf("REJECT\n");
exit(0);
}
main() {
yyparse();
}
yywrap() {}
Here is my makefile:
p5: p5.tab.c lex.yy.c
cc -o p5 p5.tab.c lex.yy.c
p5.tab.c: p5.y
bison -d p5.y
lex.yy.c: p5.l
flex p5.l
This works:
S RENAME CNO AS CITY
These do not:
S
S WHERE CNO = 5
I have not tested everything, but I think there is a common problem for these issues.
Your grammar is correct, the problem is that you are running interactively. When you call yyparse() it will attempt to read all input. Because the input
S
could be followed by either RENAME or WHERE it won't accept. Similarly,
S WHERE CNO = 5
could be followed by one or more numbers, so yyparse won't accept until it gets an EOF or an unexpected token.
What you want to do is follow the advice here and change p5.l to have these lines:
[ \t]+ ;
\n if (yyin==stdin) return 0;
That way when you are running interactively it will take the ENTER key to be the end of input.
Also, you want to use left recursion for number:
number:
val
| number val
;

Removing ambiguity in bison

I am writing a simple parser in bison. The parser checks whether a program has any syntax errors with respect to my following grammar:
%{
#include <stdio.h>
void yyerror (const char *s) /* Called by yyparse on error */
{
printf ("%s\n", s);
}
%}
%token tNUM tINT tREAL tIDENT tINTTYPE tREALTYPE tINTMATRIXTYPE
%token tREALMATRIXTYPE tINTVECTORTYPE tREALVECTORTYPE tTRANSPOSE
%token tIF tENDIF tDOTPROD tEQ tNE tGTE tLTE tGT tLT tOR tAND
%left "(" ")" "[" "]"
%left "<" "<=" ">" ">="
%right "="
%left "+" "-"
%left "*" "/"
%left "||"
%left "&&"
%left "==" "!="
%% /* Grammar rules and actions follow */
prog: stmtlst ;
stmtlst: stmt | stmt stmtlst ;
stmt: decl | asgn | if;
decl: type vars "=" expr ";" ;
type: tINTTYPE | tINTVECTORTYPE | tINTMATRIXTYPE | tREALTYPE | tREALVECTORTYPE
| tREALMATRIXTYPE ;
vars: tIDENT | tIDENT "," vars ;
asgn: tIDENT "=" expr ";" ;
if: tIF "(" bool ")" stmtlst tENDIF ;
expr: tIDENT | tINT | tREAL | vectorLit | matrixLit | expr "+" expr| expr "-" expr
| expr "*" expr | expr "/" expr| expr tDOTPROD expr | transpose ;
transpose: tTRANSPOSE "(" expr ")" ;
vectorLit: "[" row "]" ;
matrixLit: "[" row ";" rows "]" ;
row: value | value "," row ;
rows: row | row ";" rows ;
value: tINT | tREAL | tIDENT ;
bool: comp | bool tAND bool | bool tOR bool ;
comp: expr relation expr ;
relation: tGT | tLT | tGTE | tLTE | tNE | tEQ ;
%%
int main ()
{
if (yyparse()) {
// parse error
printf("ERROR\n");
return 1;
}
else {
// successful parsing
printf("OK\n");
return 0;
}
}
The code may look long and complicated, but i think what i am going to ask does not need the full code, but in any case i preferred to write the code. I am sure my grammar is correct, but ambiguous. When i try to create the executable of the program by writing "bison -d filename.y", i get an error saying that conflicts: 13 shift/reduce. I defined the precedence of the operators at the beginning of this file, and i tried a lot of combinations of these precedences, but i still get this error. How can i remove this ambiguity? Thank you
tOR, tAND, and tDOTPROD need to have their precedence specified as well.

Resources