I am trying to build a lexical analyzer for a small language using flex.
The lexical will read a text file of lexemes and give each lexeme a token and write the token in another file. the code was able to open the file and read from it using yylex() function.
The problem is the code did not write the tokens in the specified file. Also I've found, that the code can not recognize numbers and gives each letter of an identifier an IDENTIFIER token I tried many methods to overcome these problem and I used a c language websites to write a correct C code.
A small notice is that I do not want to use a .y parser who read the lexeme and pass it to the lexical I want my lexical to read the token by it self and produce the token.
So can any one help me to fine what is the problem?
%{
#include<stdlib.h>
#include<conio.h>
#include<stdio.h>
#define KEY_INT 259
#define KEY_VOID 258
#define KEY_INT 259
#define KEY_FLOAT 260
#define KEY_IF 261
#define KEY_ELSE 262
#define KEY_WHILE 263
#define KEY_FOR 264
#define KEY_RETURN 265
#define KEY_BREAK 266
#define AND_OP 267
#define OR_OP 268
#define SMALL_EQ_OP 269
#define GREAT_EQ_OP 270
#define EQ_OP 271
#define SMALL_OP 272
#define GREAT_OP 273
#define NOT_EQ_OP 274
#define ASSIGN_OP 275
#define OPN__BRACKET 276
#define CLS__BRACKET 277
#define SEMICOLON_SYMBOL 278
#define COMMA_SYMBOL 279
#define DOT_SYMBOL 280
#define ADD_OP 281
#define SUB_OP 282
#define ASTERISK_SYMBOL 283
#define SLASH_SYMBOL 284
#define INT_NUM 287
#define FLOAT_NUM 288
#define IDENTIFIER 289
int yylval;
FILE *yyin,*yyout;
int c;
%}
Letters [a-zA-Z]
Digits [0-9]
Sympols [##$%&*-+!"':;/?(),~`|^_=×{}<>]
%%
[/*][{Letters}|{Digits}|{Sympols}|\n|\t ]*[*/] {}
[-+]?[{Digits}]+ {yylval = atoi(yytext); return INT_NUM ;}
[-+]?[{Digits}]+.[{Digits}]+ {yylval = atoi(yytext);return FLOAT_NUM ;}
{Letters}[{Letters}|{Digits}|_]* {return IDENTIFIER ;}
[\t\n ]+ {}
"void" {return KEY_VOID ;}
"float" {return KEY_FLOAT ;}
"if" {printf( "KEY_IF\n") ;}
"else" {return KEY_ELSE ;}
"while" {return KEY_WHILE ;}
"for" {return KEY_FOR ;}
"return" {return KEY_RETURN ;}
"break" {return KEY_BREAK ;}
"&&" {return AND_OP ;}
"||" {return OR_OP ;}
"<=" {return SMALL_EQ_OP ;}
">=" {return GREAT_EQ_OP ;}
"==" {return EQ_OP ; }
"<" {return SMALL_OP ;}
">" {return GREAT_OP ;}
"!=" {return NOT_EQ_OP ;}
"=" {return ASSIGN_OP ;}
"(" {return OPN__BRACKET ;}
")" {return CLS__BRACKET ;}
";" {return SEMICOLON_SYMBOL ;}
"," {return COMMA_SYMBOL ;}
"." {return DOT_SYMBOL ;}
"+" {return ADD_OP ;}
"-" {return SUB_OP ;}
"*" {return ASTERISK_SYMBOL ;}
"/" {return SLASH_SYMBOL ;}
. {yyerror() ;}
%%
int yyerror (void)
{
printf("Invalid input\n");
exit(1);
}
int yywrap()
{
return 1;
}
main()
{
if((yyin=fopen("C:\\ProCompFlex\\GnuWin32\\bin\\input.txt","r"))==NULL)
{
printf("input.txt Not found !\n Press any key to exit ");
getch();
return;
}
c=yylex();
while(c!=NULL)
{
yyout=fopen("C:\\ProCompFlex\\GnuWin32\\bin\\token.txt","w");
fprintf(yyout,c," ",yylex(),"\n");
getchar();
}
fclose(yyout);
}
You need to put all the keyword rules before the IDENTIFIER rule.
Related
I am trying to build a parser that takes a list of strings in the following format and performs either an addition or multiplication of all of its elements :
prod 5-6_
sum _
sum 5_
sum 5-6-7_
$
Should print the following to the screen :
prod = 30
sum = 0
sum = 5
sum = 18
What I am actually getting as output is this :
prod = 0
sum = 0
sum = 5
sum = 5
My lex file looks like this :
%{
#include <iostream>
#include "y.tab.h"
using namespace std;
extern "C" int yylex();
%}
%option yylineno
digit [0-9]
integer {digit}+
operator "sum"|"prod"
%%
{integer} { return number; }
{operator} { return oper; }
"-" { return '-'; }
"_" { return '_'; }
"$" { return '$'; }
\n { ; }
[\t ]+ { ; }
. { cout << "unknown char" << endl; }
%%
and my yacc file looks like this :
%token oper
%token number
%token '-'
%token '_'
%token '$'
%start valid
%{
#include <iostream>
#include <string>
#include <cstdio>
#include <cstdlib>
using namespace std;
#define YYSTYPE int
extern FILE *yyin;
extern char yytext[];
extern "C" int yylex();
int yyparse();
extern int yyerror(char *);
char op;
%}
%%
valid : expr_seq endfile {}
| {}
;
expr_seq : expr {}
| expr_seq expr {}
;
expr : op sequence nl {if (op == '+') cout << "sum = " ; else cout << "prod = ";}
| op nl {if (op == '+') cout << "sum = 0"; else cout <<"prod = 1";}
;
op : oper { if (yytext[0] == 's') op = '+'; else op = '*';}
;
sequence : number { $$ = atoi(yytext);}
| sequence '-' number { if (op == '+') $$ = $1 + $3; else $$ = $1 * $3;}
;
nl : '_' { cout << endl;}
;
endfile : '$' {}
;
%%
int main(int argc, char *argv[])
{
++argv, --argc;
if(argc > 0) yyin = fopen(argv[0], "r");
else yyin = stdin;
yyparse();
return 0;
}
int yyerror(char * msg)
{
extern int yylineno;
cerr << msg << "on line # " << yylineno << endl;
return 0;
}
My reasoning for the yacc logic is as follows :
a file is valid only if it contains a sequence of expressions followed by the endfile symbol.
a sequence of expressions is a single expression or several expressions.
an expression is either an operator followed by a new line, OR an operator, followed by a list of numbers, followed by a new line symbol.
an operator is either 'sum' or 'prod'
a list of numbers is either a number or several numbers separated by the '-' symbol.
From my perspective this should work, but for some reason it doesn't interpret the sequence of numbers properly after the first element. Any tips would be helpful.
Thanks
You must not use yytext in your yacc actions. yytext is only valid during a scanner action, and the parser often reads ahead to the next token. (In fact, yacc always reads the next token. Bison sometimes doesn't, but it's not always easily predictable.)
You can associate a semantic value with every token (and non-terminal), and you can reference these semantic values using $1, $2, etc. in your yacc actions. You can even associate semantic values of different types to different grammar symbols. And if you use bison -- and you probably are using bison -- you can give grammar symbols names to make it easier to refer to their semantic values.
This is all explained in depth, with examples, in the bison manual.
The solution that worked was simply to change the following lines :
sequence : number { $$ = atoi(yytext);}
| sequence '-' number { if (op == '+') $$ = $1 + $3; else $$ = $1 * $3;}
;
to this :
sequence : number { $$ = atoi(yytext);}
| sequence '-' number { if (op == '+') $$ = $1 + atoi(yytext); else $$ = $1 * atoi(yytext);}
;
I am trying to run my first flex bison project and this happens:
aky#aky-VirtualBox:~/wk1$ flex project1.l
aky#aky-VirtualBox:~/wk1$ bison -d project1.y
aky#aky-VirtualBox:~/wk1$ gcc -o project1 project1.c project1.tab.c lex.yy.c
project1.c: In function ‘main’:
project1.c:18:9: warning: implicit declaration of function ‘yyparse’
project1.tab.c:1213:16: warning: implicit declaration of function ‘yylex’
lex.yy.c:(.text+0x470): undefined reference to `lookup'
The related code:
project1.c ----------------------------
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "project1.h"
void yyerror(char *s)
{
fprintf(stderr, "error: %s\n", s);
}
int main(int argc, char **argv)
{
extern FILE *yyin;
++argv; --argc;
yyin = fopen(argv[0], "r");
return yyparse();
}
project1.l ------------------------
%option noyywrap nodefault yylineno
%{
#include "project1.h"
#include "project1.tab.h"
%}
EXP ([Ex][-+]?[0-9]+)
%%
".." { return DOTS; }
"+" |
"-" |
"*" |
"/" |
"=" |
"|" |
"," |
";" |
":" |
"." |
"[" |
"]" |
"{" |
"}" |
"(" |
")" { return yytext[0]; }
">" { yylval.fn = 1; return CMP; }
"<" { yylval.fn = 2; return CMP; }
"<>" { yylval.fn = 3; return CMP; }
"==" { yylval.fn = 4; return CMP; }
">=" { yylval.fn = 5; return CMP; }
"<=" { yylval.fn = 6; return CMP; }
"integer" { yylval.type_c = 'a'; return STD_TYPE; }
"real" { yylval.type_c = 'b'; return STD_TYPE; }
"program" { return PROGRAM; }
"var" { return VAR; }
"array" { return ARRAY; }
"of" { return OF; }
"begin" { return BGN; }
"end" { return END; }
"if" { return IF; }
"then" { return THEN; }
"else" { return ELSE; }
"while" {return WHILE; }
"do" { return DO; }
"print" { return PRINT; }
[a-zA-Z][a-zA-Z0-9]* { yylval.s = lookup(yytext); return ID; }
[0-9]+"."[0-9]+ |
[0-9]+ { yylval.d = atof(yytext); return NUMBER; }
"//".*
[ \t\n]
. { yyerror("Mystery character.\n"); }
%%
project1.y ------------------------
%{
#include <stdio.h>
#include <stdlib.h>
#include "project1.h"
%}
%union {
struct ast *a;
double d;
struct symbol *s;
struct symlist *sl;
struct numlist *nl;
int fn;
char type_c;
}
/* declare tokens */
%token <d> NUMBER
%token <s> ID
%token PROGRAM VAR ARRAY OF INTEGER REAL BGN END IF THEN ELSE WHILE DO DOTS PRINT
%token <type_c> STD_TYPE
%nonassoc <fn> CMP
%right '='
%left '+' '-'
%left '*' '/'
%nonassoc '|' UMINUS
%type <a> decl_list decl stmt_list stmt exp
%type <sl> id_list
%type <nl> num_list
%start program
%%
program: PROGRAM ID '(' id_list ')' ';' decl_list BGN stmt_list END '.'
{ printf("new program.\n"); }
;
decl_list: { /*$$ = NULL;*/ }
| decl ';' decl_list { printf("new declaration.\n"); }
;
decl: VAR id_list ':' STD_TYPE { }
| VAR id_list ':' ARRAY '[' NUMBER DOTS NUMBER ']' OF STD_TYPE
{ }
;
stmt: IF exp THEN '{' stmt_list '}' { }
| IF exp THEN '{' stmt_list '}' ELSE '{' stmt_list '}' { }
| WHILE exp DO '{' stmt_list '}' { }
| exp
;
stmt_list: stmt { printf("new statement.\n"); }
| stmt_list ';' stmt { }
;
exp: exp CMP exp { }
| exp '+' exp { }
| exp '-' exp { }
| exp '*' exp { }
| exp '/' exp { }
| '|' exp { }
| '(' exp ')' { }
| '-' exp %prec UMINUS { }
| NUMBER{ }
| ID { }
| ID '[' exp ']' { }
| ID '[' exp ']' '=' exp { }
| ID '=' exp { }
| ID '=' '{' num_list '}' { }
| PRINT '(' exp ')' { }
;
num_list: NUMBER { }
| NUMBER ',' num_list {}
;
id_list: ID { }
| ID ',' id_list { }
;
%%
project1.tab.h --------------------
#ifndef YY_YY_PROJECT1_TAB_H_INCLUDED
# define YY_YY_PROJECT1_TAB_H_INCLUDED
/* Debug traces. */
#ifndef YYDEBUG
# define YYDEBUG 0
#endif
#if YYDEBUG
extern int yydebug;
#endif
/* Token type. */
#ifndef YYTOKENTYPE
# define YYTOKENTYPE
enum yytokentype
{
NUMBER = 258,
ID = 259,
PROGRAM = 260,
VAR = 261,
ARRAY = 262,
OF = 263,
INTEGER = 264,
REAL = 265,
BGN = 266,
END = 267,
IF = 268,
THEN = 269,
ELSE = 270,
WHILE = 271,
DO = 272,
DOTS = 273,
PRINT = 274,
STD_TYPE = 275,
CMP = 276,
UMINUS = 277
};
#endif
/* Value type. */
#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
union YYSTYPE
{
#line 7 "project1.y" /* yacc.c:1909 */
struct ast *a;
double d;
struct symbol *s;
struct symlist *sl;
struct numlist *nl;
int fn;
char type_c;
#line 87 "project1.tab.h" /* yacc.c:1909 */
};
typedef union YYSTYPE YYSTYPE;
# define YYSTYPE_IS_TRIVIAL 1
# define YYSTYPE_IS_DECLARED 1
#endif
extern YYSTYPE yylval;
int yyparse (void);
#endif /* !YY_YY_PROJECT1_TAB_H_INCLUDED */
yyparse is declared in project1.tab.h so you need to #include that file in any translation unit which refers to yyparse.
yylex is not declared in any header. In your yacc/bison file, you need to insert a correct declaration:
int yylex(void);
That should go after the #includes.
It's not clear to me which file lookup is defined in, but you need to add it to your final compilation command .
Is it possible to construct a simple arithmetic calculator in lex and Yacc?
If yes, please enlist the concepts/methods I should understand before proceeding.
Reference
Note : Before posting a question you should search about it,The following answer that I am gonna post is obtained from the above link, which I obtained from a google search.
The following code is used to implement calculator program using YACC and LEX.
cal.l
DIGIT [0-9]+\.?|[0-9]*\.[0-9]+
%option noyywrap
%%
[ ]
{DIGIT} { yylval=atof(yytext); return NUM;}
\n|. {return yytext[0];}
cal.y
%{
#include<ctype.h>
#include<stdio.h>
#define YYSTYPE double
%}
%token NUM
%left ‘+’ ‘-‘
%left ‘*’ ‘/’
%right UMINUS
%%
S : S E ‘\n’ { printf(“Answer: %g \nEnter:\n”, $2); }
| S ‘\n’
|
| error ‘\n’ { yyerror(“Error: Enter once more…\n” );yyerrok; }
;
E : E ‘+’ E { $$ = $1 + $3; }
| E’-‘E { $$=$1-$3; }
| E’*’E { $$=$1*$3; }
| E’/’E { $$=$1/$3; }
| ‘(‘E’)’ { $$=$2; }
| ‘-‘E %prec UMINUS { $$= -$2; }
| NUM
;
%%
#include “lex.yy.c”
int main()
{
printf(“Enter the expression: “);
yyparse();
}
yyerror (char * s)
{
printf (“% s \n”, s);
exit (1);
}
I have been using flex and bison for making a small calculator. My files are the following:
bisonFile.y
%{
#include <stdio.h>
%}
/* declare tokens */
%token NUMBER
%token ADD SUB MUL DIV ABS
%token EOL
%%
calclist: /* nothing */
| calclist exp EOL { printf("= %d\n", $2); }
;
exp: factor
| exp ADD factor { $$ = $1 + $3; }
| exp SUB factor { $$ = $1 - $3; }
;
factor: term
| factor MUL term { $$ = $1 * $3; }
| factor DIV term { $$ = $1 / $3; }
;
term: NUMBER
| ABS term { $$ = $2 >= 0? $2 : - $2; }
;
%%
main(int argc, char **argv)
{
yyparse();
}
yyerror(char *s)
{
fprintf(stderr, "error: %s\n", s);
}
flexFile.l
%{
# include "f5.tab.h"
int yylval;
%}
/* reconocimiento de tokens e impresion */
%{
int yylval;
%}
%option noyywrap
%%
"+" { return ADD; }
"-" { return SUB; }
"*" { return MUL; }
"/" { return DIV; }
"|" { return ABS; }
[0-9]+("."[0-9]+)? { yylval = atoi(yytext); return NUMBER; } //part added
\n { return EOL; }
[ \t] { /* ignore whitespace */ }
. { printf("Mystery character %c\n", *yytext); }
%%
My program works fine with integer numbers, and it also recognizes real numbers, but the problem is that when I print the results of an operation it always return the answer as an integer number. Why is that?
Thanks
Your use of atoi in the production converts the string to an integer.
Using atof will convert it to a floating point number.
If you want to separate the two, you'll need to change the matching rule for integers, and add one for floating point.
Change "%d" → "%f" in the file “bisonFile.y”. This uses a floating point format for printing the result. The fixed line should read:
| calclist exp EOL { printf("= %f\n", $2); }
In the file “flexFile.l” remove both definitions int yylval. bison outputs
YYSTYPE yylval;
automatically. YYSTYPE is the type of the semantic values. Because you want a floating point calculator, this shall be double. Note that YYSTYPE defaults to int. To change that, YYSTYPE must be defined when compiling the C-codes (from bison and flex) (see below).
Finally, as already stated by MIS, replace atoi() → atof(). The edited line in flexFile.l should read:
[0-9]+("."[0-9]+)? { yylval = atof(yytext); return NUMBER; }
For a novice the dependencies between flex and bison sources might be confusing. A minimal Makefile documents how the example can be compiled. Line 2 sets the semantic type for the scanner and the parser consistently:
calc: calc.o l.o
calc.o l.o: CFLAGS+=-DYYSTYPE=double
l.o: l.c f5.tab.h
calc.c f5.tab.h: bisonFile.y
bison -o $# --defines=f5.tab.h $^
l.c: flexFile.l f5.tab.h
flex -o $# $^
clean::
$(RM) calc calc.o calc.c f5.tab.h l.o l.c
That’ll do the trick.
I'm trying to implement a simple calculator using Flex and Bison. I'm running into problems in the Bison stage, wherein I can't figure out the way in which the value of the variable can be retrieved from the symbol table and assigned to $$.
The lex file:
%{
#include <iostream>
#include <string.h>
#include "calc.tab.h"
using namespace std;
void Print();
int count = 0;
%}
%%
[ \t\n]+ ;
"print" {Print();}
"exit" {
exit(EXIT_SUCCESS);
}
[0-9]+ {
yylval.FLOAT = atof(yytext);
return (NUMBER);
count++;
}
[a-z][_a-zA-Z0-9]* {
yylval.NAME = yytext;
return (ID);
}
. {
return (int)yytext[0];
}
%%
void Print()
{
cout << "Printing ST..\n";
}
int yywrap()
{
return 0;
}
The Bison file:
%{
#include <iostream>
#include <string.h>
#include "table.h"
extern int count;
int yylex();
int yyerror(const char *);
int UpdateSymTable(float, char *, float);
using namespace std;
%}
%union
{
float FLOAT;
char *NAME;
}
%token NUMBER
%token ID
%type <FLOAT> NUMBER
%type <NAME> ID
%type <FLOAT> expr
%type <FLOAT> E
%left '*'
%left '/'
%left '+'
%left '-'
%right '='
%%
E: expr {cout << $$ << "\n";}
expr: NUMBER {$$ = $1;}
| expr '+' expr {$$ = $1 + $3;}
| expr '-' expr {$$ = $1 - $3;}
| expr '*' expr {$$ = $1 * $3;}
| expr '/' expr {$$ = $1 / $3;}
| ID '=' expr {
int index = UpdateSymTable($$, $1, $3);
$$ = st[index].number = $3; //The problem is here
}
%%
int yyerror(const char *msg)
{
cout << "Error: "<<msg<<"\n";
}
int UpdateSymTable(float doll_doll, char *doll_one, float doll_three)
{
int number1 = -1;
for(int i=0;i<count;i++)
{
if(!strcmp(doll_one, st[i].name) == 0)
{
strcpy(st[i].name, doll_one);
st[i].number = doll_three;
number1 = i;
}
else if(strcmp(doll_one, st[i].name) == 0)
{
number1 = i;
}
}
return number1;
}
int main()
{
yyparse();
}
The symbol table:
struct st
{
float number;
char name[25];
}st[25];
The output I'm getting is:
a = 20
c = a+3
20
Error: syntax error
I would really appreciate it if someone told me what is going wrong. I'm trying since a long time, and I haven't been able to resolve the error.
The syntax error is the result of your grammar only accepting a single expr rather than a sequence of exprs. See, for example, this question.
One of the problems with your symbol table lookup is that you incorrectly return the value yytext as your semantic value, instead of making a copy. See, for example, this question.
However, your UpdateSymTable functions has quite a few problems, starting with the fact that the names you chose for parameters are meaningless, and furthermore the first parameter ("doll_doll") is never used. I don't know what you intended to test with !strcmp(doll_one, st[i].name) == 0 but whatever it was, there must be a simpler way of expressing it. In any case, the logic is incorrect. I'd suggest writing some simple test programs (without bison and flex) to let you debug the symbol table handling. And/or talk to your lab advisor, assuming you have one.
Finally, (of what I noticed) your precedence relations are not correct. First, they are reversed: the operator which binds least tightly (assignment) should come first. Second, it is not the case that + has precedence over - , or vice versa; the two operators have the same precedence. Similarly with * and /. You could try reading the precedence chapter of the bison manual if you don't have lecture notes or other information.