yacc/lex parser not catching certain terminals - flex-lexer

I built a scanner->parser meant to catch modified Java. When testing it, I noticed that codeBlock never triggers but varDecls triggers. I'm not entirely sure why this happens.
Here is my parser
%{
#include <stdio.h>
extern int yylex(void);
void yyerror(char *s) {
fprintf(stderr, "error: %s\n", s);
}
int yywrap(){ return 1;}
%}
%token INT FLOAT STRING
%union {
int ival;
float fval;
char *sval;
int bval;
}
%token <ival> NegInt
%token <ival> Int
%token <fval> Float
%token <bval> Bool
%token <sval> ifHeader
%token <sval> thenHeader
%token <sval> elseHeader
%token <sval> forHeader
%token <sval> whileHeader
%token <sval> ID
%token <sval> BinOperator
%token <sval> BoolOperator
%token <sval> Assignment
%token <sval> Quotation
%token <sval> LBracket
%token <sval> RBracket
%token <sval> LFBracket
%token <sval> RFBracket
%token <sval> Semi
%token <sval> LABracket
%token <sval> RABracket
%token <sval> Comma
%token <sval> String
%%
codeBlock: varDecls {printf("why doth this triggering?\n");}
| ifExprs {printf("codeBlock Statement \n");}
;
ifExprs: ifExprs ifStmt
| ifStmt
;
ifStmt: ifExpr | ifExprElse;
ifExprElse: ifExpr elseExpr;
ifExpr: ifHeader LBracket boolExpr RBracket thenExpr;
thenExpr: thenHeader LFBracket varDecls RFBracket;
elseExpr: elseHeader LFBracket varDecls RFBracket;
varDecls: varDecls varDecl
| varDecl
;
varDecl: ID Assignment numStmt Semi
| ID Assignment strExpr Semi
| ID Assignment boolExpr Semi
| ID Assignment Bool Semi {printf("why is this triggering?\n");}
;
boolExpr: ID BoolOperator ID
| ID BoolOperator numExpr
| ID BoolOperator Bool
;
strExpr: Quotation ID Quotation
;
numStmt: numStmt BinOperator numExpr
| numExpr
;
numExpr: LBracket numStmt RBracket
| Int
| Float
| NegInt
;
%%
int main(int argc, char* argv[]) {
yyparse();
}
Here is my scanner:
%{
#include "y.tab.h"
extern YYSTYPE yylval;
%}
%option yylineno
Digit [0-9]
Letter [a-zA-Z]
Word [a-z][a-zA-Z0-9_]*
%%
"-"{Digit}+ {
//printf("\n An assignment: %s \n", yytext);
yylval.ival = atoi(yytext);
return NegInt;
}
{Digit}+ {
//printf("\n An assignment: %s \n", yytext);
yylval.ival = atoi(yytext);
return Int;
}
{Digit}+"."{Digit}+ {
//printf("\n A float: %s (%f)\n", yytext, atof(yytext));
yylval.ival=atof(yytext);
return Float;
}
True|False {
//printf("\n A Boolean: %s \n", yytext);
yylval.bval = atoi(yytext);
return Bool;
}
if {
//printf("\n An assignment: %s \n", yytext);
yylval.sval = yytext;
return ifHeader;
}
then {
//printf("\n An assignment: %s \n", yytext);
yylval.sval = yytext;
return thenHeader;
}
else {
//printf("\n An assignment: %s \n", yytext);
yylval.sval = yytext;
return elseHeader;
}
for {
//printf("\n An assignment: %s \n", yytext);
yylval.sval = yytext;
return forHeader;
}
while {
//printf("\n An assignment: %s \n", yytext);
yylval.sval = yytext;
return whileHeader;
}
{Word} {
//printf("\n An Identifier: %s \n", yytext);
yylval.sval = yytext;
return ID;
}
"'" {
//printf("\n An Identifier: %s \n", yytext);
yylval.sval = yytext;
return Quotation;
}
"+"|"-"|"*"|"/" {
//printf("\n An Operator: %s \n", yytext);
yylval.sval = yytext;
return BinOperator;
}
"<"|">"|"!="|"<="|">="|"==" {
//printf("\n An comparison: %s \n", yytext);
yylval.sval = yytext;
return BoolOperator;
}
"=" {
//printf("\n An assignment: %s \n", yytext);
yylval.sval = yytext;
return Assignment;
}
"(" {
//printf("\n An assignment: %s \n", yytext);
yylval.sval = yytext;
return LBracket;
}
")" {
//printf("\n An assignment: %s \n", yytext);
yylval.sval = yytext;
return RBracket;
}
"{" {
//printf("\n An assignment: %s \n", yytext);
yylval.sval = yytext;
return LFBracket;
}
"}" {
//printf("\n An assignment: %s \n", yytext);
yylval.sval = yytext;
return RFBracket;
}
";" {
//printf("\n An assignment: %s \n", yytext);
yylval.sval = yytext;
return Semi;
}
"[" {
//printf("\n An assignment: %s \n", yytext);
yylval.sval = yytext;
return LABracket;
}
"]" {
//printf("\n An assignment: %s \n", yytext);
yylval.sval = yytext;
return RABracket;
}
"," {
//printf("\n An assignment: %s \n", yytext);
yylval.sval = yytext;
return Comma;
}
%
As you can see with my print statements, I was trying to trigger my parser to print out "why doth this triggering" but instead varDecls is triggered instead.
Additionally, varDecl also triggers. Is that supposed to happen?
Help would be very appreciated. Thank you

Related

How can I resolve the error "undefined reference to `yyerror'"?

I've been following the youtube video https://www.youtube.com/watch?v=fFRxWtRibC8
I've created the lexer.l file with the following:
%{
/* definitions */
#include "parser.tab.h"
%}
/* rules */
%%
[0-9]+ {yylval.num = atoi(yytext); return NUMBER; }
\n {return EOL;}
. {}
%%
yywrap() {}
int main() {
yylex();
return 0;
}
and the parser.y file with the following:
%{
/* definitions */
%}
%union{
int num;
char sym;
}
%token EOL
%token<num> NUMBER
%type<num> exp
%token PLUS
/* rules */
%%
input:
exp EOL {printf("%d\n", $1); }
| EOL;
exp:
NUMBER { $$ = $1; }
| exp PLUS exp { $$ = $1 + $3; }
;
%%
int main() {
yyparse();
return 0;
}
void yyerror (char* s) {
printf ("ERROR: %s\n", s);
return 0;
}
When running the command "gcc lex.yy.c parser.tab.c", I'm getting the following error:
undefined reference to `yyerror'
collect2.exe: error: ld returned 1 exit status

Yacc shift/reduce that I cannot identify

So I am having this .y file on which I am trying to parse and evaluate a function with it's parameters, but a have one shift/reduce conflict that I cannot identify:
.y
%{
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include "types.h"
#define YYDEBUG 0
/* prototypes */
nodeType *opr(int oper, int nops, ...);
nodeType *id(int i);
nodeType *con(int value);
void freeNode(nodeType *p);
void yyerror(char *s);
nodeType *RadEc;
int sym[26]; /* symbol table */
%}
%union {
int iValue; /* integer value */
char sIndex; /* symbol table index */
nodeType *nPtr; /* node pointer */
};
%token <iValue> INTEGER
%token <sIndex> VARIABLE
%token WHILE IF PRINT SUBRAD ENDSUB THEN DO ENDIF RAD
%nonassoc IFX
%nonassoc ELSE
%left GE LE EQ NE '>' '<'
%left '+' '-'
%left '*' '/'
%nonassoc UMINUS
%type <nPtr> statement expr stmt_list
%type <iValue> expresie
%start program
%%
program : declaratii cod { exit(0); }
;
declaratii: SUBRAD stmt_list ENDSUB { RadEc=$2; }
| /* NULL */
;
statement : '\n' { $$ = opr(';', 2, NULL, NULL); }
| expr '\n' { $$ = $1; }
| PRINT expr '\n' { $$ = opr(PRINT, 1, $2); }
| VARIABLE '=' expr '\n' { $$ = opr('=', 2, id($1), $3); }
| DO stmt_list WHILE expr { $$ = opr(WHILE, 2, $4, $2); }
| IF expr THEN stmt_list ENDIF %prec IFX { $$ = opr(IF, 2, $2, $4); }
| IF expr THEN stmt_list ELSE stmt_list ENDIF { $$ = opr(IF, 3, $2, $4, $6); }
;
stmt_list : statement
| stmt_list statement { $$ = opr(';', 2, $1, $2); }
;
expr : INTEGER { $$ = con($1); }
| VARIABLE { $$ = id($1); }
| '-' expr %prec UMINUS { $$ = opr(UMINUS, 1, $2); }
| expr '+' expr { $$ = opr('+', 2, $1, $3); }
| expr '-' expr { $$ = opr('-', 2, $1, $3); }
| expr '*' expr { $$ = opr('*', 2, $1, $3); }
| expr '/' expr { $$ = opr('/', 2, $1, $3); }
| expr '<' expr { $$ = opr('<', 2, $1, $3); }
| expr '>' expr { $$ = opr('>', 2, $1, $3); }
| expr GE expr { $$ = opr(GE, 2, $1, $3); }
| expr LE expr { $$ = opr(LE, 2, $1, $3); }
| expr NE expr { $$ = opr(NE, 2, $1, $3); }
| expr EQ expr { $$ = opr(EQ, 2, $1, $3); }
| '(' expr ')' { $$ = $2; }
;
cod : '.' {exit(0);}
| instruc '\n' cod
;
instruc : '\n'
| PRINT expresie {printf("%d\n",$2);}
| VARIABLE '=' expresie {sym[$1]=$3;}
| RAD'('expresie','expresie','expresie')' {sym[0]=$3; sym[1]=$5; sym[2]=$7; ex(RadEc);}
;
expresie : INTEGER { $$ = $1; }
| VARIABLE { $$ = sym[$1]; }
| '-' expresie %prec UMINUS { $$ = -$2; }
| expresie '+' expresie { $$ = $1+$3; }
| expresie '-' expresie { $$ = $1-$3; }
| expresie '*' expresie { $$ = $1*$3; }
| expresie '/' expresie { $$ = $1/$3; }
| expresie '<' expresie { $$ = $1<$3; }
| expresie '>' expresie { $$ = $1>$3; }
| expresie GE expresie { $$ = $1>=$3; }
| expresie LE expresie { $$ = $1<=$3; }
| expresie NE expresie { $$ = $1!=$3; }
| expresie EQ expresie { $$ = $1==$3; }
| '(' expresie ')' { $$ = $2; }
;
%%
nodeType *con(int value)
{
nodeType *p;
/* allocate node */
if ((p = malloc(sizeof(conNodeType))) == NULL)
yyerror("out of memory");
/* copy information */
p->type = typeCon;
p->con.value = value;
return p;
}
nodeType *id(int i)
{
nodeType *p;
/* allocate node */
if ((p = malloc(sizeof(idNodeType))) == NULL)
yyerror("out of memory");
/* copy information */
p->type = typeId;
p->id.i = i;
return p;
}
nodeType *opr(int oper, int nops, ...)
{
va_list ap;
nodeType *p;
size_t size;
int i;
/* allocate node */
size = sizeof(oprNodeType) + (nops - 1) * sizeof(nodeType*);
if ((p = malloc(size)) == NULL)
yyerror("out of memory");
/* copy information */
p->type = typeOpr;
p->opr.oper = oper;
p->opr.nops = nops;
va_start(ap, nops);
for (i = 0; i < nops; i++)
p->opr.op[i] = va_arg(ap, nodeType*);
va_end(ap);
return p;
}
void freeNode(nodeType *p)
{
int i;
if (!p)
return;
if (p->type == typeOpr) {
for (i = 0; i < p->opr.nops; i++)
freeNode(p->opr.op[i]);
}
free (p);
}
int ex(nodeType *p)
{
if (!p)
return 0;
switch(p->type)
{
case typeCon: return p->con.value;
case typeId: return sym[p->id.i];
case typeOpr: switch(p->opr.oper)
{
case WHILE: while(ex(p->opr.op[0]))
ex(p->opr.op[1]);
return 0;
case IF: if (ex(p->opr.op[0]))
ex(p->opr.op[1]);
else if (p->opr.nops > 2)
ex(p->opr.op[2]);
return 0;
case PRINT: printf("%d\n", ex(p->opr.op[0]));
return 0;
case ';': ex(p->opr.op[0]);
return ex(p->opr.op[1]);
case '=': return sym[p->opr.op[0]->id.i] = ex(p->opr.op[1]);
case UMINUS: return -ex(p->opr.op[0]);
case '+': return ex(p->opr.op[0]) + ex(p->opr.op[1]);
case '-': return ex(p->opr.op[0]) - ex(p->opr.op[1]);
case '*': return ex(p->opr.op[0]) * ex(p->opr.op[1]);
case '/': return ex(p->opr.op[0]) / ex(p->opr.op[1]);
case '<': return ex(p->opr.op[0]) < ex(p->opr.op[1]);
case '>': return ex(p->opr.op[0]) > ex(p->opr.op[1]);
case GE: return ex(p->opr.op[0]) >= ex(p->opr.op[1]);
case LE: return ex(p->opr.op[0]) <= ex(p->opr.op[1]);
case NE: return ex(p->opr.op[0]) != ex(p->opr.op[1]);
case EQ: return ex(p->opr.op[0]) == ex(p->opr.op[1]);
}
}
}
void yyerror(char *s)
{
fprintf(stdout, "%s\n", s);
}
int main(void)
{
#if YYDEBUG
yydebug = 1;
#endif
yyparse();
return 0;
}
I tried different ways to see were am I losing something, but I am pretty new at this and still cannot figure it out very well the conflicts.
Any help much appreciated.
Your grammar allows statements to be expressions and it allows two statements to appear in sequence without any separator.
Now, both of the following are expressions:
a
-1
Suppose they appear like that in a statement list. How is that different from this single expression?
a - 1
Ambiguity always shows up as a parsing conflict.
By the way, delimited if statements (with an endif marker) cannot exhibit the dangling else ambiguity. The endif bracket makes the parse unambiguous. So all of the precedence apparatus copied from a different grammar is totally redundant here.

why do I get this error, how do can I fix it

I am trying to run my first flex bison project and this happens:
aky#aky-VirtualBox:~/wk1$ flex project1.l
aky#aky-VirtualBox:~/wk1$ bison -d project1.y
aky#aky-VirtualBox:~/wk1$ gcc -o project1 project1.c project1.tab.c lex.yy.c
project1.c: In function ‘main’:
project1.c:18:9: warning: implicit declaration of function ‘yyparse’
project1.tab.c:1213:16: warning: implicit declaration of function ‘yylex’
lex.yy.c:(.text+0x470): undefined reference to `lookup'
The related code:
project1.c ----------------------------
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "project1.h"
void yyerror(char *s)
{
fprintf(stderr, "error: %s\n", s);
}
int main(int argc, char **argv)
{
extern FILE *yyin;
++argv; --argc;
yyin = fopen(argv[0], "r");
return yyparse();
}
project1.l ------------------------
%option noyywrap nodefault yylineno
%{
#include "project1.h"
#include "project1.tab.h"
%}
EXP ([Ex][-+]?[0-9]+)
%%
".." { return DOTS; }
"+" |
"-" |
"*" |
"/" |
"=" |
"|" |
"," |
";" |
":" |
"." |
"[" |
"]" |
"{" |
"}" |
"(" |
")" { return yytext[0]; }
">" { yylval.fn = 1; return CMP; }
"<" { yylval.fn = 2; return CMP; }
"<>" { yylval.fn = 3; return CMP; }
"==" { yylval.fn = 4; return CMP; }
">=" { yylval.fn = 5; return CMP; }
"<=" { yylval.fn = 6; return CMP; }
"integer" { yylval.type_c = 'a'; return STD_TYPE; }
"real" { yylval.type_c = 'b'; return STD_TYPE; }
"program" { return PROGRAM; }
"var" { return VAR; }
"array" { return ARRAY; }
"of" { return OF; }
"begin" { return BGN; }
"end" { return END; }
"if" { return IF; }
"then" { return THEN; }
"else" { return ELSE; }
"while" {return WHILE; }
"do" { return DO; }
"print" { return PRINT; }
[a-zA-Z][a-zA-Z0-9]* { yylval.s = lookup(yytext); return ID; }
[0-9]+"."[0-9]+ |
[0-9]+ { yylval.d = atof(yytext); return NUMBER; }
"//".*
[ \t\n]
. { yyerror("Mystery character.\n"); }
%%
project1.y ------------------------
%{
#include <stdio.h>
#include <stdlib.h>
#include "project1.h"
%}
%union {
struct ast *a;
double d;
struct symbol *s;
struct symlist *sl;
struct numlist *nl;
int fn;
char type_c;
}
/* declare tokens */
%token <d> NUMBER
%token <s> ID
%token PROGRAM VAR ARRAY OF INTEGER REAL BGN END IF THEN ELSE WHILE DO DOTS PRINT
%token <type_c> STD_TYPE
%nonassoc <fn> CMP
%right '='
%left '+' '-'
%left '*' '/'
%nonassoc '|' UMINUS
%type <a> decl_list decl stmt_list stmt exp
%type <sl> id_list
%type <nl> num_list
%start program
%%
program: PROGRAM ID '(' id_list ')' ';' decl_list BGN stmt_list END '.'
{ printf("new program.\n"); }
;
decl_list: { /*$$ = NULL;*/ }
| decl ';' decl_list { printf("new declaration.\n"); }
;
decl: VAR id_list ':' STD_TYPE { }
| VAR id_list ':' ARRAY '[' NUMBER DOTS NUMBER ']' OF STD_TYPE
{ }
;
stmt: IF exp THEN '{' stmt_list '}' { }
| IF exp THEN '{' stmt_list '}' ELSE '{' stmt_list '}' { }
| WHILE exp DO '{' stmt_list '}' { }
| exp
;
stmt_list: stmt { printf("new statement.\n"); }
| stmt_list ';' stmt { }
;
exp: exp CMP exp { }
| exp '+' exp { }
| exp '-' exp { }
| exp '*' exp { }
| exp '/' exp { }
| '|' exp { }
| '(' exp ')' { }
| '-' exp %prec UMINUS { }
| NUMBER{ }
| ID { }
| ID '[' exp ']' { }
| ID '[' exp ']' '=' exp { }
| ID '=' exp { }
| ID '=' '{' num_list '}' { }
| PRINT '(' exp ')' { }
;
num_list: NUMBER { }
| NUMBER ',' num_list {}
;
id_list: ID { }
| ID ',' id_list { }
;
%%
project1.tab.h --------------------
#ifndef YY_YY_PROJECT1_TAB_H_INCLUDED
# define YY_YY_PROJECT1_TAB_H_INCLUDED
/* Debug traces. */
#ifndef YYDEBUG
# define YYDEBUG 0
#endif
#if YYDEBUG
extern int yydebug;
#endif
/* Token type. */
#ifndef YYTOKENTYPE
# define YYTOKENTYPE
enum yytokentype
{
NUMBER = 258,
ID = 259,
PROGRAM = 260,
VAR = 261,
ARRAY = 262,
OF = 263,
INTEGER = 264,
REAL = 265,
BGN = 266,
END = 267,
IF = 268,
THEN = 269,
ELSE = 270,
WHILE = 271,
DO = 272,
DOTS = 273,
PRINT = 274,
STD_TYPE = 275,
CMP = 276,
UMINUS = 277
};
#endif
/* Value type. */
#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
union YYSTYPE
{
#line 7 "project1.y" /* yacc.c:1909 */
struct ast *a;
double d;
struct symbol *s;
struct symlist *sl;
struct numlist *nl;
int fn;
char type_c;
#line 87 "project1.tab.h" /* yacc.c:1909 */
};
typedef union YYSTYPE YYSTYPE;
# define YYSTYPE_IS_TRIVIAL 1
# define YYSTYPE_IS_DECLARED 1
#endif
extern YYSTYPE yylval;
int yyparse (void);
#endif /* !YY_YY_PROJECT1_TAB_H_INCLUDED */
yyparse is declared in project1.tab.h so you need to #include that file in any translation unit which refers to yyparse.
yylex is not declared in any header. In your yacc/bison file, you need to insert a correct declaration:
int yylex(void);
That should go after the #includes.
It's not clear to me which file lookup is defined in, but you need to add it to your final compilation command .

Bison syntax error

I recently started learning bison and I already hit a wall. The manual sections are a little bit ambiguous, so I guess an error was to be expected. The code below is the first tutorial from the official manual - The Reverse Polish Notation Calculator, saved in a single file - rpcalc.y.
/* Reverse polish notation calculator */
%{
#include <stdio.h>
#include <math.h>
#include <ctype.h>
int yylex (void);
void yyerror (char const *);
%}
%define api.value.type {double}
%token NUM
%% /* Grammar rules and actions follow. */
input:
%empty
| input line
;
line:
'\n'
| exp '\n' {printf ("%.10g\n", $1);}
;
exp:
NUM {$$ = $1; }
| exp exp '+' {$$ = $1 + $2; }
| exp exp '-' {$$ = $1 - $2; }
| exp exp '*' {$$ = $1 * $2; }
| exp exp '/' {$$ = $1 / $2; }
| exp exp '^' {$$ = pow ($1, $2); }
| exp 'n' {$$ = -$1; }
;
%%
/* The lexical analyzer */
int yylex (void)
{
int c;
/* Skip white space */
while((c = getchar()) == ' ' || c == '\t')
continue;
/* Process numbers */
if(c == '.' || isdigit (c))
{
ungetc (c, stdin);
scanf ("%lf", $yylval);
return NUM;
}
/* Return end-of-imput */
if (c == EOF)
return 0;
/* Return a single char */
return c;
}
int main (void)
{
return yyparse ();
}
void yyerror (char const *s)
{
fprintf (stderr, "%s\n", s);
}
Executing bison rpcalc.y in cmd returns the following error:
rpcalc.y:11.24-31: syntax error, unexpected {...}
What seems to be the problem?
The fault is caused by you using features that are new to the 3.0 version of bison, whereas you have an older version of bison installed. If you are unable to upgrade to version 3.0, it is an easy change to convert the grammar to using the features of earlier versions of bison.
The %define api.value.type {double} can be changed to a %type command, and the %empty command removed. The resulting bison program would be:
/* Reverse polish notation calculator */
%{
#include <stdio.h>
#include <math.h>
#include <ctype.h>
int yylex (void);
void yyerror (char const *);
%}
%type <double> exp
%token <double> NUM
%% /* Grammar rules and actions follow. */
input:
| input line
;
line:
'\n'
| exp '\n' {printf ("%.10g\n", $1);}
;
exp:
NUM {$$ = $1; }
| exp exp '+' {$$ = $1 + $2; }
| exp exp '-' {$$ = $1 - $2; }
| exp exp '*' {$$ = $1 * $2; }
| exp exp '/' {$$ = $1 / $2; }
| exp exp '^' {$$ = pow ($1, $2); }
| exp 'n' {$$ = -$1; }
;
%%
/* The lexical analyzer */
int yylex (void)
{
int c;
/* Skip white space */
while((c = getchar()) == ' ' || c == '\t')
continue;
/* Process numbers */
if(c == '.' || isdigit (c))
{
ungetc (c, stdin);
scanf ("%lf", $yylval);
return NUM;
}
/* Return end-of-imput */
if (c == EOF)
return 0;
/* Return a single char */
return c;
}
int main (void)
{
return yyparse ();
}
void yyerror (char const *s)
{
fprintf (stderr, "%s\n", s);
}
This runs in a wider range of bison versions.

Syntax error while parsing file using flex and bison

I am parsing the following file:
BEGIN BLOCK BLK_ROWDEC
NAME cell_rowdec
SIZE UNI_rowdecSize
ITERATE itr_rows
DIRECTION lgDir_rowdec
STRAP STRD1,STRD3,STRD2
WRAP WRD1
VIA VIAB,VIAC,VIAD
ENDS BLK_ROWDEC
My flex and bison file are as follows:
lexa.l
%{
#include <iostream>
#include <stdio.h>
const char s[2] = " ";
#include "yacc.tab.h"
char *token;
#define YY_DECL extern "C" int yylex()
int line_num = 1;
using namespace std;
%}
DOT "."
COLON ":"
SEMICOLON ";"
COMMA ","
ANGLE_LEFT "<"
ANGLE_RIGHT ">"
AT "#"
EQUAL "="
SQUARE_OPEN "["
SQUARE_CLOSE [^\\]"]"
OPENBRACE "\("
CLOSEBRACE "\)"
QUOTE "\""
QUOTE_OPEN "\""
QUOTE_CLOSE [^\\]"\""
SPACE " "
TAB "\t"
CRLF "\r\n"
QUOTED_PAIR "\\"[^\r\n]
DIGIT [0-9]
ALPHA [a-zA-Z]
QTEXT [0-9a-zA-Z!#$%&'()*+,\-.\/:;<=>?#\[\]^_`{|}~]
%%
[ \t] ;
^BEGIN(.*)\r?\n+\s*BEGIN(.*) { printf("\nError : two continous BEGIN is not allowed : "); }
^ENDS(.*)\r?\n+\s*ENDS(.*) { printf("\nError : two continous END is not allowed : \n"); }
NAME { yylval.sval = strdup(yytext);
return TOK_NAME; }
SIZE { yylval.sval = strdup(yytext);
return TOK_SIZE; }
ITERATE { yylval.sval = strdup(yytext);
return TOK_ITERATE; }
DIRECTION { yylval.sval = strdup(yytext);
return TOK_DIRECTION; }
STRAP { yylval.sval = strdup(yytext);
return TOK_STRAP; }
WRAP { yylval.sval = strdup(yytext);
return TOK_WRAP; }
VIA { yylval.sval = strdup(yytext);
return TOK_VIA; }
ENDS { yylval.sval = strdup(yytext);
return TOK_END; }
BEGIN { yylval.sval = strdup(yytext);
return TOK_BEGIN; }
BLOCK { yylval.sval = strdup(yytext);
return TOK_BLOCK; }
[a-zA-Z0-9_,]+ { yylval.sval = strdup(yytext);
return TOK_STRING; }
{SPACE}* { return TOK_SPACE; }
^ENDS(.*)$ {}
^{CRLF} { return TOK_EMPTY_LINE; }
{CRLF} {}
. {}/* ignore unknown chars */
\n { ++line_num; return ENDL; }
yacca.y
%{
#include <cstdio>
#include <cstring>
#include <iostream>
#include <stdio.h>
#define YYDEBUG 1
using namespace std;
extern "C" int yylex();
extern "C" FILE *yyin;
extern int line_num;
void yyerror(const char* s);
%}
// Symbols.
%union
{
char* sval;
};
%token <sval> TOK_NAME
%token <sval> TOK_SIZE
%token <sval> TOK_STRING
%token <sval> TOK_ITERATE
%token <sval> TOK_DIRECTION
%token <sval> TOK_STRAP
%token <sval> TOK_WRAP
%token <sval> TOK_VIA
%token <sval> TOK_EMPTY_LINE
%token <sval> TOK_BLOCK
%token <sval> TOK_LINE
%token <sval> TOK_BEGIN
%token <sval> TOK_END
%token TOK_SPACE
%token END ENDL
%%
language : program ;
program : block
| program block
;
block : TOK_BEGIN TOK_BLOCK TOK_SPACE TOK_STRING blockcontents TOK_END TOK_SPACE TOK_STRING
{
cout << endl << "SHAILAVI" << $4 << " ";
}
;
blockcontents : item
| blockcontents item
;
item : TOK_SPACE TOK_NAME TOK_SPACE TOK_STRING
{
cout << endl << "Value:" << $2 << "->" << $4 << " ";
}
| TOK_SPACE TOK_SIZE TOK_SPACE TOK_STRING { cout << $2 << "->" << $4 << " "; }
| TOK_SPACE TOK_ITERATE TOK_SPACE TOK_STRING { cout << $2 << "->" << $4 << " "; }
| TOK_SPACE TOK_DIRECTION TOK_SPACE TOK_STRING { cout << $2 << "->" << $4 << " " << endl; }
| TOK_SPACE TOK_STRAP TOK_SPACE TOK_STRING { cout << "ref:" << $2 << "->" << $4 << " "; }
| TOK_SPACE TOK_WRAP TOK_SPACE TOK_STRING { cout << $2 << "->" << $4 << " "; }
| TOK_SPACE TOK_VIA TOK_SPACE TOK_STRING { cout << $2 << "->" << $4 << " " << endl; }
;
%%
int main(void) {
FILE * pt = fopen("file", "r" );
if(!pt)
{
cout << "Bad Input.Noexistant file" << endl;
return -1;
}
yyin = pt;
do
{
yydebug = 1;
yyparse();
}while (!feof(yyin));
}
void yyerror(const char *s) {
cout << "parse error on line " << line_num << "! Message: " << s << endl;
exit(-1);
}
#include "lex.yy.c"
Compilation steps:
flex lexa.l
bison -d yacca.y
g++ yacca.tab.c -lfl -o scanner.exe
At the time of execution it gives syntax error near blockcontents
Please help me to identify the mistake I have done.
Thank You so much.
It took me a while, but I've found the flaw.
In your lexer, you skip all sequences of tabs and blanks (first rule).
But your parser expects white space every now and then. Hence the syntax error.
Since you don't do anything with the white space, simply eat them within the lexer (as you already do now actually, but it is better to eliminate the {SPACE}* rule too) and eliminate the TOK_SPACE in the parser.
---- edit to give some hints ----
What I did to track down the bug is:
make the lexer verbose
I added (hash signs omitted; it confuses the renderer for some reason)
#ifdef DEBUG
#define RETURN(x) cerr << "\n--> found " << #x << "\n"; return x;
#else
#define RETURN(x) return x;
#endif
and replaced all "return something" by RETURN(something)
I compile the bison/flex files separately and link them afterwards
flex lexa.l && \
bison -d yacca.y && \
g++ -c -DDEBUG -I . lex.yy.c && \
g++ -c -I . yacca.tab.c && \
g++ lex.yy.o yacca.tab.o -o scanner
(working on linux here)
As requested the working example
%{
#include <cstdio>
#include <cstring>
#include <iostream>
#include <stdio.h>
#define YYDEBUG 1
using namespace std;
extern "C" int yylex();
extern "C" FILE *yyin;
extern int line_num;
void yyerror(const char* s);
%}
// Symbols.
%union
{
char* sval;
};
%token TOK_NAME
%token TOK_SIZE
%token TOK_STRING
%token TOK_ITERATE
%token TOK_DIRECTION
%token TOK_STRAP
%token TOK_WRAP
%token TOK_VIA
%token TOK_EMPTY_LINE
%token TOK_BLOCK
%token TOK_LINE
%token TOK_BEGIN
%token TOK_END
%token END ENDL
%%
language : program ;
program : block
| program block
;
block : TOK_BEGIN TOK_BLOCK TOK_STRING blockcontents TOK_END TOK_STRING
{
cout << endl << "SHAILAVI" << $3 << " ";
}
;
blockcontents : item
| blockcontents item
;
item : TOK_NAME TOK_STRING { cout << endl << "Value:" << $1 << "->" << $2 << " "; }
| TOK_SIZE TOK_STRING { cout << $1 << "->" << $2 " << $2 " << $2 " << $2 << " "; }
| TOK_WRAP TOK_STRING { cout << $1 << "->" << $2 << " "; }
| TOK_VIA TOK_STRING { cout << $1 << "->" << $2 << " " << endl; }
;
%%
int main(void) {
FILE * pt = fopen("./input", "r" );
if(!pt)
{
cout << "Bad Input.Nonexistent file" << endl;
return -1;
}
yyin = pt;
do
{
yydebug = 1;
yyparse();
}while (!feof(yyin));
}
void yyerror(const char *s) {
cout << "parse error on line " << line_num << "! Message: " << s << endl;
exit(-1);
}
extern "C" int yywrap()
{
return (1 == 1);
}
And the lexer
%{
#include
#include
const char s[2] = " ";
#include "yacca.tab.h"
char *token;
#define YY_DECL extern "C" int yylex()
int line_num = 1;
#ifdef DEBUG
#define RETURN(x) cerr << "\n--> found " << #x << "\n"; return x;
#else
#define RETURN(x) return x;
#endif
using namespace std;
%}
DOT "."
COLON ":"
SEMICOLON ";"
COMMA ","
ANGLE_LEFT ""
AT "#"
EQUAL "="
SQUARE_OPEN "["
SQUARE_CLOSE [^\\]"]"
OPENBRACE "\("
CLOSEBRACE "\)"
QUOTE "\""
QUOTE_OPEN "\""
QUOTE_CLOSE [^\\]"\""
SPACE " "
TAB "\t"
CRLF "\r\n"
QUOTED_PAIR "\\"[^\r\n]
DIGIT [0-9]
ALPHA [a-zA-Z]
QTEXT [0-9a-zA-Z!#$%&'()*+,\-.\/:;?#\[\]^_`{|}~]
/* [ \t] ; */
%%
^BEGIN(.*)\r?\n+\s*BEGIN(.*) { printf("\nError : two continous BEGIN is not allowed : "); }
^ENDS(.*)\r?\n+\s*ENDS(.*) { printf("\nError : two continous END is not allowed : \n"); }
NAME { yylval.sval = strdup(yytext);
RETURN(TOK_NAME); }
SIZE { yylval.sval = strdup(yytext);
RETURN(TOK_SIZE); }
ITERATE { yylval.sval = strdup(yytext);
RETURN(TOK_ITERATE); }
DIRECTION { yylval.sval = strdup(yytext);
RETURN(TOK_DIRECTION); }
STRAP { yylval.sval = strdup(yytext);
RETURN(TOK_STRAP); }
WRAP { yylval.sval = strdup(yytext);
RETURN(TOK_WRAP); }
VIA { yylval.sval = strdup(yytext);
RETURN(TOK_VIA); }
ENDS { yylval.sval = strdup(yytext);
RETURN(TOK_END); }
BEGIN { yylval.sval = strdup(yytext);
RETURN(TOK_BEGIN); }
BLOCK { yylval.sval = strdup(yytext);
RETURN(TOK_BLOCK); }
[a-zA-Z0-9_,]+ { yylval.sval = strdup(yytext); RETURN(TOK_STRING); }
^ENDS(.*)$ {}
^{CRLF} { RETURN(TOK_EMPTY_LINE); }
{CRLF} {}
. {}/* ignore unknown chars */
\n { ++line_num; /* RETURN(ENDL); */ }
There's only one problem left. It doesn't really like the EOF. I'll leave that as an exercise.

Resources