Is it possible to construct a simple arithmetic calculator in lex and Yacc?
If yes, please enlist the concepts/methods I should understand before proceeding.
Reference
Note : Before posting a question you should search about it,The following answer that I am gonna post is obtained from the above link, which I obtained from a google search.
The following code is used to implement calculator program using YACC and LEX.
cal.l
DIGIT [0-9]+\.?|[0-9]*\.[0-9]+
%option noyywrap
%%
[ ]
{DIGIT} { yylval=atof(yytext); return NUM;}
\n|. {return yytext[0];}
cal.y
%{
#include<ctype.h>
#include<stdio.h>
#define YYSTYPE double
%}
%token NUM
%left ‘+’ ‘-‘
%left ‘*’ ‘/’
%right UMINUS
%%
S : S E ‘\n’ { printf(“Answer: %g \nEnter:\n”, $2); }
| S ‘\n’
|
| error ‘\n’ { yyerror(“Error: Enter once more…\n” );yyerrok; }
;
E : E ‘+’ E { $$ = $1 + $3; }
| E’-‘E { $$=$1-$3; }
| E’*’E { $$=$1*$3; }
| E’/’E { $$=$1/$3; }
| ‘(‘E’)’ { $$=$2; }
| ‘-‘E %prec UMINUS { $$= -$2; }
| NUM
;
%%
#include “lex.yy.c”
int main()
{
printf(“Enter the expression: “);
yyparse();
}
yyerror (char * s)
{
printf (“% s \n”, s);
exit (1);
}
Related
I am trying to run my first flex bison project and this happens:
aky#aky-VirtualBox:~/wk1$ flex project1.l
aky#aky-VirtualBox:~/wk1$ bison -d project1.y
aky#aky-VirtualBox:~/wk1$ gcc -o project1 project1.c project1.tab.c lex.yy.c
project1.c: In function ‘main’:
project1.c:18:9: warning: implicit declaration of function ‘yyparse’
project1.tab.c:1213:16: warning: implicit declaration of function ‘yylex’
lex.yy.c:(.text+0x470): undefined reference to `lookup'
The related code:
project1.c ----------------------------
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "project1.h"
void yyerror(char *s)
{
fprintf(stderr, "error: %s\n", s);
}
int main(int argc, char **argv)
{
extern FILE *yyin;
++argv; --argc;
yyin = fopen(argv[0], "r");
return yyparse();
}
project1.l ------------------------
%option noyywrap nodefault yylineno
%{
#include "project1.h"
#include "project1.tab.h"
%}
EXP ([Ex][-+]?[0-9]+)
%%
".." { return DOTS; }
"+" |
"-" |
"*" |
"/" |
"=" |
"|" |
"," |
";" |
":" |
"." |
"[" |
"]" |
"{" |
"}" |
"(" |
")" { return yytext[0]; }
">" { yylval.fn = 1; return CMP; }
"<" { yylval.fn = 2; return CMP; }
"<>" { yylval.fn = 3; return CMP; }
"==" { yylval.fn = 4; return CMP; }
">=" { yylval.fn = 5; return CMP; }
"<=" { yylval.fn = 6; return CMP; }
"integer" { yylval.type_c = 'a'; return STD_TYPE; }
"real" { yylval.type_c = 'b'; return STD_TYPE; }
"program" { return PROGRAM; }
"var" { return VAR; }
"array" { return ARRAY; }
"of" { return OF; }
"begin" { return BGN; }
"end" { return END; }
"if" { return IF; }
"then" { return THEN; }
"else" { return ELSE; }
"while" {return WHILE; }
"do" { return DO; }
"print" { return PRINT; }
[a-zA-Z][a-zA-Z0-9]* { yylval.s = lookup(yytext); return ID; }
[0-9]+"."[0-9]+ |
[0-9]+ { yylval.d = atof(yytext); return NUMBER; }
"//".*
[ \t\n]
. { yyerror("Mystery character.\n"); }
%%
project1.y ------------------------
%{
#include <stdio.h>
#include <stdlib.h>
#include "project1.h"
%}
%union {
struct ast *a;
double d;
struct symbol *s;
struct symlist *sl;
struct numlist *nl;
int fn;
char type_c;
}
/* declare tokens */
%token <d> NUMBER
%token <s> ID
%token PROGRAM VAR ARRAY OF INTEGER REAL BGN END IF THEN ELSE WHILE DO DOTS PRINT
%token <type_c> STD_TYPE
%nonassoc <fn> CMP
%right '='
%left '+' '-'
%left '*' '/'
%nonassoc '|' UMINUS
%type <a> decl_list decl stmt_list stmt exp
%type <sl> id_list
%type <nl> num_list
%start program
%%
program: PROGRAM ID '(' id_list ')' ';' decl_list BGN stmt_list END '.'
{ printf("new program.\n"); }
;
decl_list: { /*$$ = NULL;*/ }
| decl ';' decl_list { printf("new declaration.\n"); }
;
decl: VAR id_list ':' STD_TYPE { }
| VAR id_list ':' ARRAY '[' NUMBER DOTS NUMBER ']' OF STD_TYPE
{ }
;
stmt: IF exp THEN '{' stmt_list '}' { }
| IF exp THEN '{' stmt_list '}' ELSE '{' stmt_list '}' { }
| WHILE exp DO '{' stmt_list '}' { }
| exp
;
stmt_list: stmt { printf("new statement.\n"); }
| stmt_list ';' stmt { }
;
exp: exp CMP exp { }
| exp '+' exp { }
| exp '-' exp { }
| exp '*' exp { }
| exp '/' exp { }
| '|' exp { }
| '(' exp ')' { }
| '-' exp %prec UMINUS { }
| NUMBER{ }
| ID { }
| ID '[' exp ']' { }
| ID '[' exp ']' '=' exp { }
| ID '=' exp { }
| ID '=' '{' num_list '}' { }
| PRINT '(' exp ')' { }
;
num_list: NUMBER { }
| NUMBER ',' num_list {}
;
id_list: ID { }
| ID ',' id_list { }
;
%%
project1.tab.h --------------------
#ifndef YY_YY_PROJECT1_TAB_H_INCLUDED
# define YY_YY_PROJECT1_TAB_H_INCLUDED
/* Debug traces. */
#ifndef YYDEBUG
# define YYDEBUG 0
#endif
#if YYDEBUG
extern int yydebug;
#endif
/* Token type. */
#ifndef YYTOKENTYPE
# define YYTOKENTYPE
enum yytokentype
{
NUMBER = 258,
ID = 259,
PROGRAM = 260,
VAR = 261,
ARRAY = 262,
OF = 263,
INTEGER = 264,
REAL = 265,
BGN = 266,
END = 267,
IF = 268,
THEN = 269,
ELSE = 270,
WHILE = 271,
DO = 272,
DOTS = 273,
PRINT = 274,
STD_TYPE = 275,
CMP = 276,
UMINUS = 277
};
#endif
/* Value type. */
#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
union YYSTYPE
{
#line 7 "project1.y" /* yacc.c:1909 */
struct ast *a;
double d;
struct symbol *s;
struct symlist *sl;
struct numlist *nl;
int fn;
char type_c;
#line 87 "project1.tab.h" /* yacc.c:1909 */
};
typedef union YYSTYPE YYSTYPE;
# define YYSTYPE_IS_TRIVIAL 1
# define YYSTYPE_IS_DECLARED 1
#endif
extern YYSTYPE yylval;
int yyparse (void);
#endif /* !YY_YY_PROJECT1_TAB_H_INCLUDED */
yyparse is declared in project1.tab.h so you need to #include that file in any translation unit which refers to yyparse.
yylex is not declared in any header. In your yacc/bison file, you need to insert a correct declaration:
int yylex(void);
That should go after the #includes.
It's not clear to me which file lookup is defined in, but you need to add it to your final compilation command .
I have been using flex and bison for making a small calculator. My files are the following:
bisonFile.y
%{
#include <stdio.h>
%}
/* declare tokens */
%token NUMBER
%token ADD SUB MUL DIV ABS
%token EOL
%%
calclist: /* nothing */
| calclist exp EOL { printf("= %d\n", $2); }
;
exp: factor
| exp ADD factor { $$ = $1 + $3; }
| exp SUB factor { $$ = $1 - $3; }
;
factor: term
| factor MUL term { $$ = $1 * $3; }
| factor DIV term { $$ = $1 / $3; }
;
term: NUMBER
| ABS term { $$ = $2 >= 0? $2 : - $2; }
;
%%
main(int argc, char **argv)
{
yyparse();
}
yyerror(char *s)
{
fprintf(stderr, "error: %s\n", s);
}
flexFile.l
%{
# include "f5.tab.h"
int yylval;
%}
/* reconocimiento de tokens e impresion */
%{
int yylval;
%}
%option noyywrap
%%
"+" { return ADD; }
"-" { return SUB; }
"*" { return MUL; }
"/" { return DIV; }
"|" { return ABS; }
[0-9]+("."[0-9]+)? { yylval = atoi(yytext); return NUMBER; } //part added
\n { return EOL; }
[ \t] { /* ignore whitespace */ }
. { printf("Mystery character %c\n", *yytext); }
%%
My program works fine with integer numbers, and it also recognizes real numbers, but the problem is that when I print the results of an operation it always return the answer as an integer number. Why is that?
Thanks
Your use of atoi in the production converts the string to an integer.
Using atof will convert it to a floating point number.
If you want to separate the two, you'll need to change the matching rule for integers, and add one for floating point.
Change "%d" → "%f" in the file “bisonFile.y”. This uses a floating point format for printing the result. The fixed line should read:
| calclist exp EOL { printf("= %f\n", $2); }
In the file “flexFile.l” remove both definitions int yylval. bison outputs
YYSTYPE yylval;
automatically. YYSTYPE is the type of the semantic values. Because you want a floating point calculator, this shall be double. Note that YYSTYPE defaults to int. To change that, YYSTYPE must be defined when compiling the C-codes (from bison and flex) (see below).
Finally, as already stated by MIS, replace atoi() → atof(). The edited line in flexFile.l should read:
[0-9]+("."[0-9]+)? { yylval = atof(yytext); return NUMBER; }
For a novice the dependencies between flex and bison sources might be confusing. A minimal Makefile documents how the example can be compiled. Line 2 sets the semantic type for the scanner and the parser consistently:
calc: calc.o l.o
calc.o l.o: CFLAGS+=-DYYSTYPE=double
l.o: l.c f5.tab.h
calc.c f5.tab.h: bisonFile.y
bison -o $# --defines=f5.tab.h $^
l.c: flexFile.l f5.tab.h
flex -o $# $^
clean::
$(RM) calc calc.o calc.c f5.tab.h l.o l.c
That’ll do the trick.
This is not homework, but it is from a book.
I'm given a following bison spec file:
%{
#include <stdio.h>
#include <ctype.h>
int yylex();
int yyerror();
%}
%token NUMBER
%%
command : exp { printf("%d\n", $1); }
; /* allows printing of the result */
exp : exp '+' term { $$ = $1 + $3; }
| exp '-' term { $$ = $1 - $3; }
| term { $$ = $1; }
;
term : term '*' factor { $$ = $1 * $3; }
| factor { $$ = $1; }
;
factor : NUMBER { $$ = $1; }
| '(' exp ')' { $$ = $2; }
;
%%
int main() {
return yyparse();
}
int yylex() {
int c;
/* eliminate blanks*/
while((c = getchar()) == ' ');
if (isdigit(c)) {
ungetc(c, stdin);
scanf("%d", &yylval);
return (NUMBER);
}
/* makes the parse stop */
if (c == '\n') return 0;
return (c);
}
int yyerror(char * s) {
fprintf(stderr, "%s\n", s);
return 0;
} /* allows for printing of an error message */
The task is to do the following:
Rewrite the spec to add the following useful error messages:
"missing right parenthesis," generated by the string (2+3
"missing left parenthesis," generated by the string 2+3)
"missing operator," generated by the string 2 3
"missing operand," generated by the string (2+)
The simplest solution that I was able to come up with is to do the following:
half_exp : exp '+' { $$ = $1; }
| exp '-' { $$ = $1; }
| exp '*' { $$ = $1; }
;
factor : NUMBER { $$ = $1; }
| '(' exp '\n' { yyerror("missing right parenthesis"); }
| exp ')' { yyerror("missing left parenthesis"); }
| '(' exp '\n' { yyerror("missing left parenthesis"); }
| '(' exp ')' { $$ = $2; }
| '(' half_exp ')' { yyerror("missing operand"); exit(0); }
;
exp : exp '+' term { $$ = $1 + $3; }
| exp '-' term { $$ = $1 - $3; }
| term { $$ = $1; }
| exp exp { yyerror("missing operator"); }
;
These changes work, however they lead to a lot of conflicts.
Here is my question.
Is there a way to rewrite this grammar in such a way so that it wouldn't generate conflicts?
Any help is appreciated.
Yes it is possible:
command : exp { printf("%d\n", $1); }
; /* allows printing of the result */
exp: exp '+' exp {
// code
}
| exp '-' exp {
// code
}
| exp '*' exp {
// code
}
| exp '/' exp {
// code
}
|'(' exp ')' {
// code
}
Bison allows Ambiguous grammars.
I don't see how can you rewrite grammar to avoid conflicts. You just missed the point of terms, factors etc. You use these when you want left recursion context free grammar.
From this grammar:
E -> E+T
|T
T -> T*F
|F
F -> (E)
|num
Once you free it from left recursion you would go to:
E -> TE' { num , ( }
E' -> +TE' { + }
| eps { ) , EOI }
T -> FT' { ( , num }
T' -> *FT' { * }
|eps { + , ) , EOI }
F -> (E) { ( }
|num { num }
These sets alongside rules are showing what input character has to be in order to use that rule. Of course this is just example for simple arithmetic expressions for example 2*(3+4)*5+(3*3*3+4+5*6) etc.
If you want to learn more about this topic I suggest you to read about "left recursion context free grammar". There are some great books covering this topic and also covering how to get input sets.
But as I said above, all of this can be avoided because Bison allows Ambiguous grammars.
I recently started learning bison and I already hit a wall. The manual sections are a little bit ambiguous, so I guess an error was to be expected. The code below is the first tutorial from the official manual - The Reverse Polish Notation Calculator, saved in a single file - rpcalc.y.
/* Reverse polish notation calculator */
%{
#include <stdio.h>
#include <math.h>
#include <ctype.h>
int yylex (void);
void yyerror (char const *);
%}
%define api.value.type {double}
%token NUM
%% /* Grammar rules and actions follow. */
input:
%empty
| input line
;
line:
'\n'
| exp '\n' {printf ("%.10g\n", $1);}
;
exp:
NUM {$$ = $1; }
| exp exp '+' {$$ = $1 + $2; }
| exp exp '-' {$$ = $1 - $2; }
| exp exp '*' {$$ = $1 * $2; }
| exp exp '/' {$$ = $1 / $2; }
| exp exp '^' {$$ = pow ($1, $2); }
| exp 'n' {$$ = -$1; }
;
%%
/* The lexical analyzer */
int yylex (void)
{
int c;
/* Skip white space */
while((c = getchar()) == ' ' || c == '\t')
continue;
/* Process numbers */
if(c == '.' || isdigit (c))
{
ungetc (c, stdin);
scanf ("%lf", $yylval);
return NUM;
}
/* Return end-of-imput */
if (c == EOF)
return 0;
/* Return a single char */
return c;
}
int main (void)
{
return yyparse ();
}
void yyerror (char const *s)
{
fprintf (stderr, "%s\n", s);
}
Executing bison rpcalc.y in cmd returns the following error:
rpcalc.y:11.24-31: syntax error, unexpected {...}
What seems to be the problem?
The fault is caused by you using features that are new to the 3.0 version of bison, whereas you have an older version of bison installed. If you are unable to upgrade to version 3.0, it is an easy change to convert the grammar to using the features of earlier versions of bison.
The %define api.value.type {double} can be changed to a %type command, and the %empty command removed. The resulting bison program would be:
/* Reverse polish notation calculator */
%{
#include <stdio.h>
#include <math.h>
#include <ctype.h>
int yylex (void);
void yyerror (char const *);
%}
%type <double> exp
%token <double> NUM
%% /* Grammar rules and actions follow. */
input:
| input line
;
line:
'\n'
| exp '\n' {printf ("%.10g\n", $1);}
;
exp:
NUM {$$ = $1; }
| exp exp '+' {$$ = $1 + $2; }
| exp exp '-' {$$ = $1 - $2; }
| exp exp '*' {$$ = $1 * $2; }
| exp exp '/' {$$ = $1 / $2; }
| exp exp '^' {$$ = pow ($1, $2); }
| exp 'n' {$$ = -$1; }
;
%%
/* The lexical analyzer */
int yylex (void)
{
int c;
/* Skip white space */
while((c = getchar()) == ' ' || c == '\t')
continue;
/* Process numbers */
if(c == '.' || isdigit (c))
{
ungetc (c, stdin);
scanf ("%lf", $yylval);
return NUM;
}
/* Return end-of-imput */
if (c == EOF)
return 0;
/* Return a single char */
return c;
}
int main (void)
{
return yyparse ();
}
void yyerror (char const *s)
{
fprintf (stderr, "%s\n", s);
}
This runs in a wider range of bison versions.
I am new to Lex and Yacc and I am trying to create a parser for a simple language which allows for basic arithmetic and equality expressions. Though I have some of it working, I am encountering errors when trying to parse expressions involving binary operations. Here is my .y file:
%{
#include <stdlib.h>
#include <stdio.h>
%}
%token NUMBER
%token HOME
%token PU
%token PD
%token FD
%token BK
%token RT
%token LT
%left '+' '-'
%left '=' '<' '>'
%nonassoc UMINUS
%%
S : statement S { printf("S -> stmt S\n"); }
| { printf("S -> \n"); }
;
statement : HOME { printf("stmt -> HOME\n"); }
| PD { printf("stmt -> PD\n"); }
| PU { printf("stmt -> PU\n"); }
| FD expression { printf("stmt -> FD expr\n"); }
| BK expression { printf("stmt -> BK expr\n"); }
| RT expression { printf("stmt -> RT expr\n"); }
| LT expression { printf("stmt -> LT expr\n"); }
;
expression : expression '+' expression { printf("expr -> expr + expr\n"); }
| expression '-' expression { printf("expr -> expr - expr\n"); }
| expression '>' expression { printf("expr -> expr > expr\n"); }
| expression '<' expression { printf("expr -> expr < expr\n"); }
| expression '=' expression { printf("expr -> expr = expr\n"); }
| '(' expression ')' { printf("expr -> (expr)\n"); }
| '-' expression %prec UMINUS { printf("expr -> -expr\n"); }
| NUMBER { printf("expr -> number\n"); }
;
%%
int yyerror(char *s)
{
fprintf (stderr, "%s\n", s);
return 0;
}
int main()
{
yyparse();
}
And here is my .l file for Lex:
%{
#include "testYacc.h"
%}
number [0-9]+
%%
[ ] { /* skip blanks */ }
{number} { sscanf(yytext, "%d", &yylval); return NUMBER; }
home { return HOME; }
pu { return PU; }
pd { return PD; }
fd { return FD; }
bk { return BK; }
rt { return RT; }
lt { return LT; }
%%
When I try to enter an arithmetic expression on the command-line for evaluation, it results in the following error:
home
stmt -> HOME
pu
stmt -> PU
fd 10
expr -> number
fd 10
stmt -> FD expr
expr -> number
fd (10 + 10)
stmt -> FD expr
(expr -> number
+stmt -> FD expr
S ->
S -> stmt S
S -> stmt S
S -> stmt S
S -> stmt S
S -> stmt S
syntax error
Your lexer lacks rules to match and return tokens such as '+' and '*', so if there are any in your input, it will just echo them and discard them. This is what happens when you enter fd (10 + 10) -- the lexer returns the tokens FD NUMBER NUMBER while + and ( get echoed to stdout. The parser then gives a syntax error.
You want to add a rule to return these single character tokens. The easiest is to just add a single rule to your .l file at the end:
. { return *yytext; }
which matches any single character.
Note that this does NOT match a \n (newline), so newlines in your input will still be echoed and ignored. You might want to add them (and tabs and carriage returns) to your skip blanks rule:
[ \t\r\n] { /* skip blanks */ }