Reduce/reduce conflicts - parsing

I'm trying to create a parser for a simple language, but I can't get rid of some reduce/reduce conflicts because of the rules expr: l_value and r_value: '#' l_value. I tried to fix it determining a precedence for the symbol '#' but it didn't help. A minimal, reproducible example is that:
%{
#include <cstdio>
#include "lexer.hpp"
%}
%define parse.error verbose /*to help me debug the parser*
%token T_if "if"
%token T_then "then"
%token T_do "do"
%token T_begin "begin"
%token T_end "end"
%token T_and "and"
%token T_of "of"
%token T_array "array"
%token T_else "else"
%token T_integer "integer"
%token T_or "or"
%token T_true "true"
%token T_label "label"
%token T_procedure "procedure"
%token T_var "var"
%token T_boolean "boolean"
%token T_false "false"
%token T_mod "mod"
%token T_program "program"
%token T_while "while"
%token T_char "char"
%token T_forward "forward"
%token T_new "new"
%token T_real "real"
%token T_dispose "dispose"
%token T_function "function"
%token T_nil "nil"
%token T_result "result"
%token T_div "div"
%token T_goto "goto"
%token T_not "not"
%token T_return "return"
%token T_id
%token T_realnum
%token T_int
%token T_character
%token T_string
%token T_assign ":="
%token T_greaterequal ">="
%token T_lessequal "<="
%token T_notequal "<>"
%nonassoc '=' '<' '>' "<=" ">=" "<>" ":="
%left '+' '-' "or"
%left '*' '/' "div" "mod" "and"
%left UMINUS
%right '^'
%left '#'
%left '[' '('
%expect 1
%%
expr:
l_value
|r_value
;
l_value:
T_id
|"result"
|T_string
|l_value '[' expr ']'
|expr '^'
|'(' l_value ')'
;
r_value:
T_int
|"true"
|"false"
|T_realnum
|T_character
|'(' r_value ')'
|"nil"
|call
| expr '<' expr
| expr '>' expr
| expr "<=" expr
| expr ">=" expr
| expr '=' expr
| expr "<>" expr
| expr '+' expr
| expr '-' expr
| expr "or" expr
| expr '*' expr
| expr '/' expr
| expr "and" expr
| expr "div" expr
| expr "mod" expr
|"not" expr %prec UMINUS
|'+' expr %prec UMINUS
|'-' expr %prec UMINUS
|'#' l_value
;
call:
T_id '(' expr text6
;
text6:
')' | ',' expr text6
;
%%
extern int lineno;
int main() {
#ifdef YYDEBUG
yydebug = 1;
#endif
int result = yyparse();
if (result == 0) printf("Success.\n");
return result;
}
`
In parser.output I get this:
1 expr: l_value .
6 l_value: l_value . '[' expr ']'
34 r_value: '#' l_value .
'[' shift, and go to state 43
$end reduce using rule 34 (r_value)
"and" reduce using rule 1 (expr)
"and" [reduce using rule 34 (r_value)]
"or" reduce using rule 1 (expr)
"or" [reduce using rule 34 (r_value)]
"mod" reduce using rule 1 (expr)
"mod" [reduce using rule 34 (r_value)]
"div" reduce using rule 1 (expr)
"div" [reduce using rule 34 (r_value)]
">=" reduce using rule 1 (expr)
">=" [reduce using rule 34 (r_value)]
"<=" reduce using rule 1 (expr)
"<=" [reduce using rule 34 (r_value)]
"<>" reduce using rule 1 (expr)
"<>" [reduce using rule 34 (r_value)]
'=' reduce using rule 1 (expr)
'=' [reduce using rule 34 (r_value)]
'<' reduce using rule 1 (expr)
'<' [reduce using rule 34 (r_value)]
'>' reduce using rule 1 (expr)
'>' [reduce using rule 34 (r_value)]
'+' reduce using rule 1 (expr)
'+' [reduce using rule 34 (r_value)]
'-' reduce using rule 1 (expr)
'-' [reduce using rule 34 (r_value)]
'*' reduce using rule 1 (expr)
'*' [reduce using rule 34 (r_value)]
'/' reduce using rule 1 (expr)
'/' [reduce using rule 34 (r_value)]
'^' reduce using rule 1 (expr)
'^' [reduce using rule 34 (r_value)]
']' reduce using rule 34 (r_value)
')' reduce using rule 34 (r_value)
',' reduce using rule 34 (r_value)
$default reduce using rule 1 (expr)

Related

Resolving Shift/reduce conflicts in GNU Bison

I have the following grammar rules:
%precedence KW2
%left "or"
%left "and"
%left "==" "!=" ">=" ">" "<=" "<"
%left "-" "+"
%left "/" "*"
%start statement1
%%
param
: id
| id ":" expr // Conflict is caused by this line
| id "=" expr
;
param_list
: param_list "," param
| param
;
defparam
: param_list "," "/"
| param_list "," "/" ","
;
param_arg_list
: defparam param_list
| param_list
;
statement1
: KEYWORD1 "(" param_arg_list ")" ":" expr {}
expression1
: KEYWORD2 param_arg_list ":" expr %prec KW2 {} // This causes shift/reduce conflicts
expr
: id
| expr "+" expr
| expr "-" expr
| expr "*" expr
| expr "/" expr
| expr "==" expr
| expr "!=" expr
| expr "<" expr
| expr "<=" expr
| expr ">" expr
| expr ">=" expr
| expr "and" expr
| expr "or" expr
| expression1
id
: TK_NAME {}
.output
State 33
12 param: id . [":", ",", ")"]
13 | id . ":" expr
14 | id . "=" expr
":" shift, and go to state 55
"=" shift, and go to state 56
":" [reduce using rule 12 (param)]
$default reduce using rule 12 (param)
The problem here is that, For the expression1, id ":" expr rule in param is not required, so If I remove id ":" expr, the conflicts are resolved. But, I can not remove id ":" expr rule in param, because statement1 requires it.
I wanted to use para_arg_list for statement1 and expression1 is that, it simplifies the grammar rules by not allowing to use the grammar rules again and again.
My question is that is there any other way to resolve the conflict?

conflicts: 2 shift/reduce

I'm trying to write a little interpreter with GNU bison.
I wanted to ask if anyone could explain the difference between the directive% right and% left and where my mistake is in the code below.
%token <flo> FLO
%token <name> NAME
%right '='
%left '+' '-'
%left '*' '/' '%'
%left '&' '|' 'x'
%left NEG NOT LOGIC_NOT
%left '^'
%left ARG
%type <flo> exp
%%
language: /* nothing */
| language statment
statment: '\n'
| exp
| error { yyerrok; }
;
exp: FLO { $$ = $1; }
| NAME '(' ')' { $$ = ycall($1); }
| NAME '(' exp ')' { $$ = ycall($1, $3); }
| NAME '(' exp ',' exp ')' { $$ = ycall($1, $3, $5); }
| NAME '=' exp { $$ = 1; ysetvar($1, $3); }
| NAME %prec VAR { $$ = ygetvar($1); }
| '_' exp %prec ARG { $$ = ygetarg($2, args); }
| '(' exp ')' { $$ = $2; }
/* 1 Operand */
| '-' exp %prec NEG { $$ = - $2; }
| '~' exp %prec NOT { $$ = ~ static_cast<int>($2); }
| '!' exp %prec LOGIC_NOT { $$ = ! static_cast<int>($2); }
/* 2 Operands */
| exp '+' exp { $$ = $1 + $3; }
| exp '-' exp { $$ = $1 - $3; }
| exp '*' exp { $$ = $1 * $3; }
| exp '/' exp { $$ = $1 / $3; }
| exp '%' exp { $$ = static_cast<int>($1) % static_cast<int>($3); }
| exp '^' exp { $$ = pow($1, $3); }
| exp '&' exp { $$ = static_cast<int>($1) & static_cast<int>($3); }
| exp '|' exp { $$ = static_cast<int>($1) | static_cast<int>($3); }
| exp 'x' exp { $$ = static_cast<int>($1) ^ static_cast<int>($3); }
;
Look at the y.output file produced by yacc or bison with the -v argument. The first conflict is in state 5:
State 5
7 exp: NAME . '(' ')'
8 | NAME . '(' exp ')'
9 | NAME . '(' exp ',' exp ')'
10 | NAME . '=' exp
11 | NAME .
'=' shift, and go to state 14
'(' shift, and go to state 15
'(' [reduce using rule 11 (exp)]
$default reduce using rule 11 (exp)
In this case the conflcit is when there's a '(' after a NAME -- this is an ambiguity in your grammar in which it might be a call expression, or it might be a simple NAME expression followed by a parenthesized expression, due to the fact that you have no separator between statements in your language.
The second conflict is:
State 13
4 statment: exp .
17 exp: exp . '+' exp
18 | exp . '-' exp
19 | exp . '*' exp
20 | exp . '/' exp
21 | exp . '%' exp
22 | exp . '^' exp
23 | exp . '&' exp
24 | exp . '|' exp
25 | exp . 'x' exp
'+' shift, and go to state 21
'-' shift, and go to state 22
'*' shift, and go to state 23
'/' shift, and go to state 24
'%' shift, and go to state 25
'&' shift, and go to state 26
'|' shift, and go to state 27
'x' shift, and go to state 28
'^' shift, and go to state 29
'-' [reduce using rule 4 (statment)]
$default reduce using rule 4 (statment)
which is essentially the same problem, this time with a '-' -- the input NAME - NAME might be a single binary subtract statements, or it might be two statements -- a NAME followed by a unary negate.
If you add a separator between statements (such as ;), both of these conflicts would go away.

Bison nonassociative precedence rule

I am trying to enforce some parsing errors by defining a non-associative precedence.
Here is part of my grammar file:
Comparison :
Value ComparisonOp Value
{
$2->Left($1);
$2->Right($3);
$$ = $2;
}
;
Expressions like 1 = 2 should parse, but expressions like 1 = 2 = 3 are not allowed in the grammar. To accommodate this, I tried to make my operator non associative as follows:
%nonassoc NONASSOCIATIVE
.
.(rest of the grammar)
.
Comparison :
Value ComparisonOp Value %prec NONASSOCIATIVE
{
$2->Left($1);
$2->Right($3);
$$ = $2;
}
;
1 = 2 = 3 still passes, Could someone please tell me what I am doing wrong?
You need to set the associativity of the ComparisonOp token(s). Either
%nonassoc ComparisonOp NONASSOCIATIVE
if ComparisonOp is a token or something like
%nonassoc '=' '<' '>' NOT_EQUAL GREATOR_OR_EQUAL LESS_OR_EQUAL NONASSOCIATIVE
if you have mulitple tokens and ComparisonOp is a rule that expands to any of them
As a concrete example, the following works exactly like you are requesting:
%{
#include <stdio.h>
#include <ctype.h>
int yylex();
void yyerror(const char *);
%}
%nonassoc '=' '<' '>' CMPOP
%left '+' '-' ADDOP
%left '*' '/' '%' MULOP
%token VALUE
%%
expr: expr cmp_op expr %prec CMPOP
| expr add_op expr %prec ADDOP
| expr mul_op expr %prec MULOP
| VALUE
| '(' expr ')'
;
cmp_op: '=' | '<' | '>' | '<' '=' | '>' '=' | '<' '>' ;
add_op: '+' | '-' ;
mul_op: '*' | '/' | '%' ;
%%
int main() { return yyparse(); }
int yylex() {
int ch;
while(isspace(ch = getchar()));
if (isdigit(ch)) return VALUE;
return ch;
}
void yyerror(const char *err) { fprintf(stderr, "%s\n", err); }
so if you are having other problems, try posting an MVCE that shows the actual problem you are having...

Attempting to resolve shift-reduce parsing issue

I'm attempting to write a grammar for C and am having an issue that I don't quite understand. Relevant portions of the grammar:
stmt :
types decl SEMI { marks (A.Declare ($1, $2)) (1, 2) }
| simp SEMI { marks $1 (1, 1) }
| RETURN exp SEMI { marks (A.Return $2) (1, 2) }
| control { $1 }
| block { marks $1 (1, 1) }
;
control :
if { $1 }
| WHILE RPAREN exp LPAREN stmt { marks (A.While ($3, $5)) (1, 5) }
| FOR LPAREN simpopt SEMI exp SEMI simpopt RPAREN stmt { marks (A.For ($3, $5, $7, $9)) (1, 9) }
;
if :
IF RPAREN exp LPAREN stmt { marks (A.If ($3, $5, None)) (1, 5) }
| IF RPAREN exp LPAREN stmt ELSE stmt { marks (A.If ($3, $5, $7)) (1, 7) }
;
This doesn't work. I ran ocamlyacc -v and got the following report:
83: shift/reduce conflict (shift 86, reduce 14) on ELSE
state 83
if : IF RPAREN exp LPAREN stmt . (14)
if : IF RPAREN exp LPAREN stmt . ELSE stmt (15)
ELSE shift 86
IF reduce 14
WHILE reduce 14
FOR reduce 14
BOOL reduce 14
IDENT reduce 14
RETURN reduce 14
INT reduce 14
MAIN reduce 14
LBRACE reduce 14
RBRACE reduce 14
LPAREN reduce 14
I've read that shift/reduce conflicts are due to ambiguity in the specification of the grammar, but I don't see how I can specify this in a way that isn't ambiguous?
The grammar is certainly ambiguous, although you know what every string means, and furthermore despite the fact that ocamlyacc reports a shift/reduce conflict, its generated grammar will also produce the correct parse for every valid input.
The ambiguity comes from
if ( exp1 ) if ( exp2) stmt1 else stmt2;
Clearly stmt1 only executes if both exp1 and exp2 are true. But does stmt1 execute if exp1 is false, or if exp1 is true and exp2 is false? Those represent different parses; the first (invalid) parse attaches else stmt2 to if (exp1), while the parse that you, I and ocamlyacc know to be correct attaches else stmt2 to if (exp2).
The grammar can be rewritten, although it's a bit of a nuisance. The basic idea is to divide statements into two categories: "matched" (which means that every else in the statement is matched with some if) and "unmatched" (which means that a following else would match some if in the statement. A complete statement may be unmatched, because else clauses are optional, but you can never have an unmatched statement between an if and an else, because that else must match an if in the unmatched statement.
The following grammar is basically the one you provided, but rewritten to use bison-style single-quoted tokens, which I find more readable. I don't know if ocamlyacc handles those. (By the way, your grammar says IF RPAREN exp LPAREN... which, with the common definition of left and right parentheses, would mean if ) exp (. That's one reason I find single-quoted character terminals much more readable.)
Bison handles this grammar with no conflicts.
/* Fake non-terminals */
%token types decl simp exp
/* Keywords */
%token ELSE FOR IF RETURN WHILE
%%
stmt: matched_stmt | unmatched_stmt ;
stmt_list: stmt | stmt_list stmt ;
block: '{' stmt_list '}' ;
matched_stmt
: types decl ';'
| simp ';'
| RETURN exp ';'
| block
| matched_control
;
simpopt : simp | /* EMPTY */;
matched_control
: IF '(' exp ')' matched_stmt ELSE matched_stmt
| WHILE '(' exp ')' matched_stmt
| FOR '(' simpopt ';' exp ';' simpopt ')' matched_stmt
;
unmatched_stmt
: IF '(' exp ')' stmt
| IF '(' exp ')' matched_stmt ELSE unmatched_stmt
| WHILE '(' exp ')' unmatched_stmt
| FOR '(' simpopt ';' exp ';' simpopt ')' unmatched_stmt
;
Personally, I'd refactor a bit. Eg:
if_prefix : IF '(' exp ')'
;
loop_prefix: WHILE '(' exp ')'
| FOR '(' simpopt ';' exp ';' simpopt ')'
;
matched_control
: if_prefix matched_stmt ELSE matched_stmt
| loop_prefix matched_stmt
;
unmatched_stmt
: if_prefix stmt
| if_prefix ELSE unmatched_stmt
| loop_prefix unmatched_stmt
;
A common and simpler but less rigorous solution is to use precedence declarations as suggested in the bison manual.

Removing ambiguity in bison

I am writing a simple parser in bison. The parser checks whether a program has any syntax errors with respect to my following grammar:
%{
#include <stdio.h>
void yyerror (const char *s) /* Called by yyparse on error */
{
printf ("%s\n", s);
}
%}
%token tNUM tINT tREAL tIDENT tINTTYPE tREALTYPE tINTMATRIXTYPE
%token tREALMATRIXTYPE tINTVECTORTYPE tREALVECTORTYPE tTRANSPOSE
%token tIF tENDIF tDOTPROD tEQ tNE tGTE tLTE tGT tLT tOR tAND
%left "(" ")" "[" "]"
%left "<" "<=" ">" ">="
%right "="
%left "+" "-"
%left "*" "/"
%left "||"
%left "&&"
%left "==" "!="
%% /* Grammar rules and actions follow */
prog: stmtlst ;
stmtlst: stmt | stmt stmtlst ;
stmt: decl | asgn | if;
decl: type vars "=" expr ";" ;
type: tINTTYPE | tINTVECTORTYPE | tINTMATRIXTYPE | tREALTYPE | tREALVECTORTYPE
| tREALMATRIXTYPE ;
vars: tIDENT | tIDENT "," vars ;
asgn: tIDENT "=" expr ";" ;
if: tIF "(" bool ")" stmtlst tENDIF ;
expr: tIDENT | tINT | tREAL | vectorLit | matrixLit | expr "+" expr| expr "-" expr
| expr "*" expr | expr "/" expr| expr tDOTPROD expr | transpose ;
transpose: tTRANSPOSE "(" expr ")" ;
vectorLit: "[" row "]" ;
matrixLit: "[" row ";" rows "]" ;
row: value | value "," row ;
rows: row | row ";" rows ;
value: tINT | tREAL | tIDENT ;
bool: comp | bool tAND bool | bool tOR bool ;
comp: expr relation expr ;
relation: tGT | tLT | tGTE | tLTE | tNE | tEQ ;
%%
int main ()
{
if (yyparse()) {
// parse error
printf("ERROR\n");
return 1;
}
else {
// successful parsing
printf("OK\n");
return 0;
}
}
The code may look long and complicated, but i think what i am going to ask does not need the full code, but in any case i preferred to write the code. I am sure my grammar is correct, but ambiguous. When i try to create the executable of the program by writing "bison -d filename.y", i get an error saying that conflicts: 13 shift/reduce. I defined the precedence of the operators at the beginning of this file, and i tried a lot of combinations of these precedences, but i still get this error. How can i remove this ambiguity? Thank you
tOR, tAND, and tDOTPROD need to have their precedence specified as well.

Resources