I can use this very nicely but in the expr i want to validate the expr tADD etc i created, but i can not access the expr inside operation, how do i access it ? it just returns the result that is 0
%{
#include <stdio.h>
void yyerror (const char *s)
#define YYSTYPE char *
extern YYSTYPE yylval;
extern int line;
{}
%}
%token tADD tSUB tMUL tDIV tSTRING tNUM tPRINT tGET tSET tFUNCTION tRETURN tIDENT tEQUALITY tIF tGT tLT tGEQ tLEQ tINC tDEC
%start prog
%%
prog: '[' stmtlst ']'
;
stmtlst: stmtlst stmt |
;
stmt: setStmt | if | print | unaryOperation | expr | returnStmt
;
getExpr: '[' tGET ',' tIDENT ',' '[' exprList ']' ']'
| '[' tGET ',' tIDENT ',' '[' ']' ']'
| '[' tGET ',' tIDENT ']'
;
setStmt: '[' tSET ',' tIDENT ',' expr ']'
;
if: '[' tIF ',' condition ',' '[' stmtlst ']' ']'
| '[' tIF ',' condition ',' '[' stmtlst ']' '[' stmtlst ']' ']'
;
print: '[' tPRINT ',' '[' expr ']' ']'
;
operation: '[' tADD ',' expr ',' expr ']' {
if($2){
printf("op\n");
}
printf("%d here ", $4);
}
| '[' tSUB ',' expr ',' expr ']'
| '[' tMUL ',' expr ',' expr ']'
| '[' tDIV ',' expr ',' expr ']'
;
unaryOperation: '[' tINC ',' tIDENT ']'
| '[' tDEC ',' tIDENT ']'
;
expr: tNUM | tSTRING | function | operation | condition
;
function: '[' tFUNCTION ',' '[' parametersList ']' ',' '[' stmtlst ']' ']'
| '[' tFUNCTION ',' '[' ']' ',' '[' stmtlst ']' ']'
;
condition: '[' tEQUALITY ',' expr ',' expr ']'
| '[' tGT ',' expr ',' expr ']'
| '[' tLT ',' expr ',' expr ']'
| '[' tGEQ ',' expr ',' expr ']'
| '[' tLEQ ',' expr ',' expr ']'
;
returnStmt: '[' tRETURN ',' expr ']'
| '[' tRETURN ']'
;
parametersList: parametersList ',' tIDENT | tIDENT
;
exprList: exprList ',' expr | expr{
printf("%s\n", $1);
}
;
%%
int main ()
{
if (yyparse()) {
// parse error
printf("ERROR\n");
return 1;
}
else {
// successful parsing
printf("OK\n");
return 0;
}
}
I use $4 but I always get 0 as the value for all Operations,
expr: tNUM | tSTRING | function | operation | condition
;
Also flex that i use
%{
#include "username-hw3.tab.h"
%}
%%
['][^']*['] return tSTRING;
["]Get["] return tGET;
["]Set["] return tSET;
["]Function["] return tFUNCTION;
["]Print["] return tPRINT;
["]If["] return tIF;
["]Return["] return tRETURN;
["]"+"["] return tADD;
["]"-"["] return tSUB;
["]"*"["] return tMUL;
["]"/"["] return tDIV;
["]"++"["] return tINC;
["]">"["] return tGT;
["]"=="["] return tEQUALITY;
["]"--"["] return tDEC;
["]"<"["] return tLT;
["]"<="["] return tLEQ;
["]">="["] return tGEQ;
["][_a-zA-Z][a-zA-Z0-9_]*["] return tIDENT;
[0-9]+ |
-[0-9]+ |
[0-9]+"."[0-9]+ |
-[0-9]+"."[0-9]+ return tNUM;
[ \t\n]+
. return yytext[0];
%%
Related
I have the following grammar rules:
%precedence KW2
%left "or"
%left "and"
%left "==" "!=" ">=" ">" "<=" "<"
%left "-" "+"
%left "/" "*"
%start statement1
%%
param
: id
| id ":" expr // Conflict is caused by this line
| id "=" expr
;
param_list
: param_list "," param
| param
;
defparam
: param_list "," "/"
| param_list "," "/" ","
;
param_arg_list
: defparam param_list
| param_list
;
statement1
: KEYWORD1 "(" param_arg_list ")" ":" expr {}
expression1
: KEYWORD2 param_arg_list ":" expr %prec KW2 {} // This causes shift/reduce conflicts
expr
: id
| expr "+" expr
| expr "-" expr
| expr "*" expr
| expr "/" expr
| expr "==" expr
| expr "!=" expr
| expr "<" expr
| expr "<=" expr
| expr ">" expr
| expr ">=" expr
| expr "and" expr
| expr "or" expr
| expression1
id
: TK_NAME {}
.output
State 33
12 param: id . [":", ",", ")"]
13 | id . ":" expr
14 | id . "=" expr
":" shift, and go to state 55
"=" shift, and go to state 56
":" [reduce using rule 12 (param)]
$default reduce using rule 12 (param)
The problem here is that, For the expression1, id ":" expr rule in param is not required, so If I remove id ":" expr, the conflicts are resolved. But, I can not remove id ":" expr rule in param, because statement1 requires it.
I wanted to use para_arg_list for statement1 and expression1 is that, it simplifies the grammar rules by not allowing to use the grammar rules again and again.
My question is that is there any other way to resolve the conflict?
I have started learning about YACC, and I have executed a few examples of simple toy programs. But I have never seen a practical example that demonstrates how to build a compiler that identifies and implements function definitions and function calls, array implementation and so on, nor has it been easy to find an example using Google search. Can someone please provide one example of how to generate the tree using YACC? C or C++ is fine.
Thanks in advance!
Let's parse this code with yacc.
file test contains valid C code that we want to parse.
int main (int c, int b) {
int a;
while ( 1 ) {
int d;
}
}
A lex file c.l
alpha [a-zA-Z]
digit [0-9]
%%
[ \t] ;
[ \n] { yylineno = yylineno + 1;}
int return INT;
float return FLOAT;
char return CHAR;
void return VOID;
double return DOUBLE;
for return FOR;
while return WHILE;
if return IF;
else return ELSE;
printf return PRINTF;
struct return STRUCT;
^"#include ".+ ;
{digit}+ return NUM;
{alpha}({alpha}|{digit})* return ID;
"<=" return LE;
">=" return GE;
"==" return EQ;
"!=" return NE;
">" return GT;
"<" return LT;
"." return DOT;
\/\/.* ;
\/\*(.*\n)*.*\*\/ ;
. return yytext[0];
%%
file c.y for input to YACC:
%{
#include <stdio.h>
#include <stdlib.h>
extern FILE *fp;
%}
%token INT FLOAT CHAR DOUBLE VOID
%token FOR WHILE
%token IF ELSE PRINTF
%token STRUCT
%token NUM ID
%token INCLUDE
%token DOT
%right '='
%left AND OR
%left '<' '>' LE GE EQ NE LT GT
%%
start: Function
| Declaration
;
/* Declaration block */
Declaration: Type Assignment ';'
| Assignment ';'
| FunctionCall ';'
| ArrayUsage ';'
| Type ArrayUsage ';'
| StructStmt ';'
| error
;
/* Assignment block */
Assignment: ID '=' Assignment
| ID '=' FunctionCall
| ID '=' ArrayUsage
| ArrayUsage '=' Assignment
| ID ',' Assignment
| NUM ',' Assignment
| ID '+' Assignment
| ID '-' Assignment
| ID '*' Assignment
| ID '/' Assignment
| NUM '+' Assignment
| NUM '-' Assignment
| NUM '*' Assignment
| NUM '/' Assignment
| '\'' Assignment '\''
| '(' Assignment ')'
| '-' '(' Assignment ')'
| '-' NUM
| '-' ID
| NUM
| ID
;
/* Function Call Block */
FunctionCall : ID'('')'
| ID'('Assignment')'
;
/* Array Usage */
ArrayUsage : ID'['Assignment']'
;
/* Function block */
Function: Type ID '(' ArgListOpt ')' CompoundStmt
;
ArgListOpt: ArgList
|
;
ArgList: ArgList ',' Arg
| Arg
;
Arg: Type ID
;
CompoundStmt: '{' StmtList '}'
;
StmtList: StmtList Stmt
|
;
Stmt: WhileStmt
| Declaration
| ForStmt
| IfStmt
| PrintFunc
| ';'
;
/* Type Identifier block */
Type: INT
| FLOAT
| CHAR
| DOUBLE
| VOID
;
/* Loop Blocks */
WhileStmt: WHILE '(' Expr ')' Stmt
| WHILE '(' Expr ')' CompoundStmt
;
/* For Block */
ForStmt: FOR '(' Expr ';' Expr ';' Expr ')' Stmt
| FOR '(' Expr ';' Expr ';' Expr ')' CompoundStmt
| FOR '(' Expr ')' Stmt
| FOR '(' Expr ')' CompoundStmt
;
/* IfStmt Block */
IfStmt : IF '(' Expr ')'
Stmt
;
/* Struct Statement */
StructStmt : STRUCT ID '{' Type Assignment '}'
;
/* Print Function */
PrintFunc : PRINTF '(' Expr ')' ';'
;
/*Expression Block*/
Expr:
| Expr LE Expr
| Expr GE Expr
| Expr NE Expr
| Expr EQ Expr
| Expr GT Expr
| Expr LT Expr
| Assignment
| ArrayUsage
;
%%
#include"lex.yy.c"
#include<ctype.h>
int count=0;
int main(int argc, char *argv[])
{
yyin = fopen(argv[1], "r");
if(!yyparse())
printf("\nParsing complete\n");
else
printf("\nParsing failed\n");
fclose(yyin);
return 0;
}
yyerror(char *s) {
printf("%d : %s %s\n", yylineno, s, yytext );
}
A Makefile to put it together. I use flex-lexer and bison but the example will also work with lex and yacc.
miniC: c.l c.y
bison c.y
flex c.l
gcc c.tab.c -ll -ly
Compile and parse the test code:
$ make
bison c.y
flex c.l
gcc c.tab.c -ll -ly
c.tab.c: In function ‘yyparse’:
c.tab.c:1273:16: warning: implicit declaration of function ‘yylex’ [-Wimplicit-function-declaration]
yychar = yylex ();
^
c.tab.c:1402:7: warning: implicit declaration of function ‘yyerror’ [-Wimplicit-function-declaration]
yyerror (YY_("syntax error"));
^
c.y: At top level:
c.y:155:1: warning: return type defaults to ‘int’ [-Wimplicit-int]
yyerror(char *s) {
^
$ ls
a.out c.l CMakeLists.txt c.tab.c c.y lex.yy.c Makefile README.md test
$ ./a.out test
Parsing complete
For reading resources I can recommend the books Modern Compiler Implementation in C by Andrew Appel and the flex/bison book by John Levine.
currently having some parsing issues with my grammar and I just can't figure out what is going wrong..
Here's my grammar:
grammar Demo;
#header {
import java.util.List;
import java.util.ArrayList;
}
program:
functionList #programFunction
;
functionList:
function*
;
function:
'haupt()' '{' stmntList '}' #hauptFunction
| type='zahl' ID '(' paramList ')' '{' stmntList '}' #zahlFunction
| type='Zeichenkette' ID '(' paramList ')' '{' stmntList '}' #zeichenketteFunction
| type='nix' ID '(' paramList ')' '{' stmntList '}' #nixFunction
;
paramList:
param (',' paramList)?
;
param:
'zahl' ID
| 'Zeichenkette' ID
|
;
variableList:
ID (',' variableList)?
;
stmntList:
stmnt (stmntList)?
;
stmnt:
'zahl' varName=ID ';' #zahlStmnt
| 'Zeichenkette' varName=ID ';' #zeichenketteStmnt
| varName=ID '=' varValue=expr ';' #varAssignment
| 'Schreibe' '(' argument=expr ')' ';' #schreibeImmediat
| 'Schreibe''(' argument=ID ')' ';' #schreibeText
| 'zuZeichenkette' '(' ID ')'';' #convertString
| 'zuZahl''('ID')'';' #convertInteger
| 'wenn' '(' boolExpr ')' '{' stmntList '}' ('sonst' '{' stmntList '}')? #wennsonstStmnt
| 'fuer' '(' ID '=' expr ',' boolExpr ',' stmnt ')' '{' stmntList '}' #forLoop
| 'waehrend' '(' boolExpr ')' '{' stmntList '}' #whileLoop
| 'tu' '{' stmntList '}' 'waehrend' '(' boolExpr ')' ';' #doWhile
| 'return' expr ';' #returnVar
| fctName=ID '(' (variableList)? ')'';' #functionCall
;
boolExpr:
boolParts ('&&' boolExpr)? #logicAnd
| boolParts ('||' boolExpr)? #logicOr
;
boolParts:
expr '==' expr #isEqual
| expr '!=' expr #isUnequal
| expr '>' expr #biggerThan
| expr '<' expr #smallerThan
| expr '>=' expr #biggerEqual
| expr '<=' expr #smallerEqual
;
expr:
links=expr '+' rechts=product #addi
| links = expr '-' rechts=product #diff
|product #prod
;
product:
links=product '*' rechts=factor #mult
| links=product '/' rechts=factor #teil
| factor #fact
;
factor:
'(' expr')' #bracket
| var=ID #var
| zahl=NUMBER #numb
;
ID : [a-zA-Z]+;
NUMBER : '0'|[1-9][0-9]*;
WS: [\r\n\t ]+ -> skip ;
And this is the code I am trying to parse:
haupt() {
zahl zz;
zz = 2;
zahl cc;
cc = 3;
zz = zz+cc;
Schreibe(cc+cc+cc);
}
the problems arise already in the first row, telling me that it expects a '{' instead of ' '. This is something I cannot understand since I skipped all WS in my grammar. Next errors are the wrong recognition of the 2nd row: the variable declaration of "zahl zz;" is not understood as it should be: the first grammar rule of stmnt should work it, but it does not...
Here are the errors antlrs TestRig gives me:
line 2:6 no viable alternative at input 'zahlzz'
line 4:6 no viable alternative at input 'zahlcc'
line 9:12 mismatched input '+' expecting ')'
Thanks for your help!
Tim
When I see a weird nonsensical bugaboo like this, it generally means that there is a token mismatch between what the lexer and parser think the token types are. Make sure the you regenerate all of your grammars using ANTLR and recompile everything. Hopefully that will clear it up.
I am writing a simple parser in bison. The parser checks whether a program has any syntax errors with respect to my following grammar:
%{
#include <stdio.h>
void yyerror (const char *s) /* Called by yyparse on error */
{
printf ("%s\n", s);
}
%}
%token tNUM tINT tREAL tIDENT tINTTYPE tREALTYPE tINTMATRIXTYPE
%token tREALMATRIXTYPE tINTVECTORTYPE tREALVECTORTYPE tTRANSPOSE
%token tIF tENDIF tDOTPROD tEQ tNE tGTE tLTE tGT tLT tOR tAND
%left "(" ")" "[" "]"
%left "<" "<=" ">" ">="
%right "="
%left "+" "-"
%left "*" "/"
%left "||"
%left "&&"
%left "==" "!="
%% /* Grammar rules and actions follow */
prog: stmtlst ;
stmtlst: stmt | stmt stmtlst ;
stmt: decl | asgn | if;
decl: type vars "=" expr ";" ;
type: tINTTYPE | tINTVECTORTYPE | tINTMATRIXTYPE | tREALTYPE | tREALVECTORTYPE
| tREALMATRIXTYPE ;
vars: tIDENT | tIDENT "," vars ;
asgn: tIDENT "=" expr ";" ;
if: tIF "(" bool ")" stmtlst tENDIF ;
expr: tIDENT | tINT | tREAL | vectorLit | matrixLit | expr "+" expr| expr "-" expr
| expr "*" expr | expr "/" expr| expr tDOTPROD expr | transpose ;
transpose: tTRANSPOSE "(" expr ")" ;
vectorLit: "[" row "]" ;
matrixLit: "[" row ";" rows "]" ;
row: value | value "," row ;
rows: row | row ";" rows ;
value: tINT | tREAL | tIDENT ;
bool: comp | bool tAND bool | bool tOR bool ;
comp: expr relation expr ;
relation: tGT | tLT | tGTE | tLTE | tNE | tEQ ;
%%
int main ()
{
if (yyparse()) {
// parse error
printf("ERROR\n");
return 1;
}
else {
// successful parsing
printf("OK\n");
return 0;
}
}
The code may look long and complicated, but i think what i am going to ask does not need the full code, but in any case i preferred to write the code. I am sure my grammar is correct, but ambiguous. When i try to create the executable of the program by writing "bison -d filename.y", i get an error saying that conflicts: 13 shift/reduce. I defined the precedence of the operators at the beginning of this file, and i tried a lot of combinations of these precedences, but i still get this error. How can i remove this ambiguity? Thank you
tOR, tAND, and tDOTPROD need to have their precedence specified as well.
I have a grammar and everything works fine until this portion:
lexp
: factor ( ('+' | '-') factor)*
;
factor :('-')? IDENT;
This of course introduces an ambiguity. For example a-a can be matched by either Factor - Factor or Factor -> - IDENT
I get the following warning stating this:
[18:49:39] warning(200): withoutWarningButIncomplete.g:57:31:
Decision can match input such as "'-' {IDENT, '-'}" using multiple alternatives: 1, 2
How can I resolve this ambiguity? I just don't see a way around it. Is there some kind of option that I can use?
Here is the full grammar:
program
: includes decls (procedure)*
;
/* Check if correct! */
includes
: ('#include' STRING)*
;
decls
: (typedident ';')*
;
typedident
: ('int' | 'char') IDENT
;
procedure
: ('int' | 'char') IDENT '(' args ')' body
;
args
: typedident (',' typedident )* /* Check if correct! */
| /* epsilon */
;
body
: '{' decls stmtlist '}'
;
stmtlist
: (stmt)*;
stmt
: '{' stmtlist '}'
| 'read' '(' IDENT ')' ';'
| 'output' '(' IDENT ')' ';'
| 'print' '(' STRING ')' ';'
| 'return' (lexp)* ';'
| 'readc' '(' IDENT ')' ';'
| 'outputc' '(' IDENT ')' ';'
| IDENT '(' (IDENT ( ',' IDENT )*)? ')' ';'
| IDENT '=' lexp ';';
lexp
: term (( '+' | '-' ) term) * /*Add in | '-' to reveal the warning! !*/
;
term
: factor (('*' | '/' | '%') factor )*
;
factor : '(' lexp ')'
| ('-')? IDENT
| NUMBER;
fragment DIGIT
: ('0' .. '9')
;
IDENT : ('A' .. 'Z' | 'a' .. 'z') (( 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '_'))* ;
NUMBER
: ( ('-')? DIGIT+)
;
CHARACTER
: '\'' ('a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '\\n' | '\\t' | '\\\\' | '\\' | 'EOF' |'.' | ',' |':' ) '\'' /* IS THIS COMPLETE? */
;
As mentioned in the comments: these rules are not ambiguous:
lexp
: factor (('+' | '-') factor)*
;
factor : ('-')? IDENT;
This is the cause of the ambiguity:
'return' (lexp)* ';'
which can parse the input a-b in two different ways:
a-b as a single binary expression
a as a single expression, and -b as an unary expression
You will need to change your grammar. Perhaps add a comma in multiple return values? Something like this:
'return' (lexp (',' lexp)*)? ';'
which will match:
return;
return a;
return a, -b;
return a-b, c+d+e, f;
...