Tiger language parser for compiler - parsing

I am now trying to build a compiler using the book Modern Compiler Implementation in C (the Tiger book). I am now stuck in the parser phase. I have built my grammar and tried to run it but still get some syntax error.
I tried my lexer and it appears to work.
This is my code for yacc/bison:
%{
#include <stdio.h>
#include "util.h"
#include "errormsg.h"
#include <stdio.h>
int yylex(void); /* function prototype */
void yyerror(char *s)
{
EM_error(EM_tokPos, "%s", s);
}
%}
%union {
int pos;
int ival;
string sval;
}
%token <sval> ID STRING
%token <ival> INT
%token
COMMA COLON SEMICOLON LPAREN RPAREN LBRACK RBRACK
LBRACE RBRACE DOT
PLUS MINUS TIMES DIVIDE EQ NEQ LT LE GT GE
AND OR ASSIGN
ARRAY IF THEN ELSE WHILE FOR TO DO LET IN END OF
BREAK NIL
FUNCTION VAR TYPE
%start program
%nonassoc DO OF
%nonassoc THEN /* ELSE must come after THEN! */
%nonassoc ELSE
%left SEMICOLON
%left ASSIGN
%left OR
%left AND
%nonassoc EQ NEQ GT LT GE LE
%left PLUS MINUS
%left TIMES DIVIDE
%left UMINUS
%%
program: exp;
dec: tyDec;
dec: varDec;
dec: funDec;
tyDec: TYPE ID EQ ty;
ty: ID;
ty: arrTy;
ty: recTy;
arrTy: ARRAY OF ID;
recTy: LBRACE fieldDecs RBRACE;
fieldDecs: /* empty */ ;
fieldDecs: fieldDec;
fieldDecs: fieldDec COMMA fieldDecs;
fieldDec: ID COLON ID;
funDec: FUNCTION ID LPAREN fieldDecs RPAREN EQ exp;
funDec: FUNCTION ID LPAREN fieldDecs RPAREN COLON ID EQ exp;
varDec: VAR ID ASSIGN exp;
varDec: VAR ID COLON ID ASSIGN exp;
lValue: ID;
lValue: subscript;
lValue: fieldExp;
subscript: lValue LBRACK exp RBRACK;
fieldExp: lValue DOT ID;
exp: lValue;
exp: NIL;
exp: INT;
exp: STRING;
exp: seqExp;
exp: negation;
exp: callExp;
exp: infixExp;
exp: arrCreate;
exp: recCreate;
exp: assignment;
exp: ifThenElse;
exp: ifThen;
exp: whileExp;
exp: forExp;
exp: BREAK;
exp: letExp;
seqExp: LPAREN exps RPAREN;
exps: /* empty */ ;
exps: exp;
exps: exp SEMICOLON exps;
negation: MINUS exp %prec UMINUS ;
callExp: ID LPAREN expsc RPAREN ;
expsc: /* empty */ ;
expsc: exp;
expsc: exp COMMA expsc;
infixExp: arithmExp;
infixExp: boolExp;
infixExp: compExp;
arithmExp: exp PLUS exp;
arithmExp: exp MINUS exp;
arithmExp: exp TIMES exp;
arithmExp: exp DIVIDE exp;
boolExp: exp AND exp;
boolExp: exp OR exp;
compExp: exp EQ exp;
compExp: exp NEQ exp;
compExp: exp GT exp;
compExp: exp LT exp;
compExp: exp GE exp;
compExp: exp LE exp;
arrCreate: ID LBRACK exp RBRACK OF exp;
recCreate: ID LBRACE fieldCreates RBRACE;
fieldCreates: /* empty */ ;
fieldCreates: fieldCreate;
fieldCreates: fieldCreate COMMA fieldCreates;
fieldCreate: ID EQ exp;
assignment: lValue ASSIGN exp;
ifThenElse: IF exp THEN exp ELSE exp;
ifThen: IF exp THEN exp;
whileExp: WHILE exp DO exp;
forExp: FOR ID ASSIGN exp TO exp DO exp;
letExp: LET decs IN exps END;
decs: dec;
decs: dec decs;
/* ERROR RECOVERY */
exp: LPAREN error RPAREN;
seqExp: error SEMICOLON exp;
When I run my parser on a sample tiger code, I get some syntax error, including at the beginning.
Could you please help me to sort it out? Thanks!

Related

How to represent an if-else statment in backus naur form using rust and pomelo?

I'm attempting to use pomelo in rust to parse a scripting language and have reached a roadblock in the backus-naur representation of the language, it cannot parse if-else statements the way i've defined them.
for example, if it were to parse the statement
if (1) {
return;
}
the parser would succeed, but if I were to add an else statment to the code, like
if (1) {
return;
} else {
return;
}
it will throw a syntax error at the else token, specifically,
the output I get is
gettin If
gettin LParen
gettin Num(1)
gettin RParen
gettin LBrace
gettin Return
gettin Semicolon
gettin RBrace
gettin Else
should not execute this
thread 'main' panicked at 'called `Result::unwrap()` on an `Err` value: "syntax Some(Else)"', src/main.rs:122:24
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
It doesn't seem obvious to me why this is failing when looking at the bnf form.
Im sorry for the large amount of code for this minimal reproducible example but I don't know the exact cause of the error in the bnf form and thus I don't know how to condense it to produce the same errors.
extern crate pomelo;
use pomelo::pomelo;
pomelo!{
%error String;
%syntax_error{
Err(format!("syntax {:?}",token))
}
%parse_fail {
"Giving up. Parser is hopelessly lost...".to_string()
}
%token #[derive(Debug)] pub enum Token {};
//define types to make the compiler happy
%type Ident String;
%type Type String;
%type Num i64;
%type String String;
%type expr Vec<String>;
%type expr_list Vec<Vec<String>>;
%type stmt Vec<String>;
%type input Option<Vec<Vec<String>>>;
%type block Vec<Vec<String>>;
%type decl_list Vec<Vec<String>>;
%type stmt_list Vec<Vec<String>>;
%type func_list Vec<Vec<String>>;
%type arg_list Vec<String>;
%type type_list Vec<String>;
%type package String;
%type imports Vec<String>;
%type f_decl Vec<String>;
%type type_t Vec<Vec<String>>;
%type type_s Vec<Vec<String>>;
%type decl Vec<Vec<String>>;
%type assignment String;
%type CheckMark String;
%type Todo String;
%left Else;
%right Eq;
%left Or;
%left And;
%nonassoc Equal Neq;
%nonassoc Less LessEq Greater GreaterEq;
%left Add Sub;
%left Mul Div;
%nonassoc Not;
input ::= type_s?(v) {v /*assigning worthless values to make the compiler happy*/};
type_t ::= stmt(a) {vec![a]}
type_s ::= type_t(t) {t}
type_s ::= type_s(mut a) type_t(b) {a.extend(b);a}
stmt ::= KeyFunction Ident(name) LParen arg_list?(args) RParen block(code) {vec![name]}
assignment ::= Ident(a) Eq expr(b) {a}
arg_list ::= Ident(n) [Semicolon] { vec![n] }
arg_list ::= assignment(n) {vec![n]}
arg_list ::= arg_list(mut args) Comma Ident(n) { args.push(n); args }
block ::= LBrace stmt_list?(ss) RBrace { ss.unwrap_or(Vec::new()) }
stmt_list ::= stmt(s) { vec![s] }
stmt_list ::= stmt_list(mut ss) stmt(s) { ss.push(s); ss }
stmt ::= block(ss) { ss.get(0).unwrap_or(&Vec::new()).clone() }
stmt ::= expr(e) Semicolon {e}
stmt ::= Ident(a) Eq expr(b) Semicolon { vec![a] }
stmt ::= Var Ident(a) Eq expr(b) Semicolon { vec![a]}
stmt ::= If LParen expr(e) RParen stmt(s1) Else stmt(s2) {println!("should execute this"); s1 }
stmt ::= If LParen expr(e) RParen stmt(s1) [Else] {println!("should not execute this"); s1 }
stmt ::= While LParen expr(e) RParen stmt(s) { s }
stmt ::= Return expr(e) Semicolon { e }
stmt ::= Return Semicolon { vec!["".to_string()] }
stmt ::= Break Semicolon { vec!["".to_string()] }
stmt ::= Continue Semicolon {vec!["".to_string()] }
stmt ::= Todo(a) expr(b) Semicolon {vec![a]}
stmt ::= CheckMark(a) {vec![a]}
expr ::= Num(n) { vec!["".to_string()] }
expr ::= String(n) {vec!["".to_string()]}
expr ::= Ident(n) {vec!["".to_string()] }
expr ::= Ident(n) LParen expr_list?(es) RParen {vec![n]}
expr ::= LParen expr(e) RParen { e }
expr ::= expr(a) Add expr(b) { a}
expr ::= expr(a) Sub expr(b) { a }
expr ::= expr(a) Mul expr(b) {a}
expr ::= expr(a) Div expr(b) {a}
expr ::= Sub expr(a) [Not] { a }
expr ::= expr(a) Equal expr(b) { a }
expr ::= expr(a) Neq expr(b) { a }
expr ::= expr(a) And expr(b) { a }
expr ::= expr(a) Or expr(b) {a }
expr ::= Not expr(a) { a }
expr ::= expr(a) Less expr(b) { a }
expr ::= expr(a) Greater expr(b) { a }
expr ::= expr(a) LessEq expr(b) { a }
expr ::= expr(a) GreaterEq expr(b) { a }
expr_list ::= expr(e) { vec![e] }
expr_list ::= expr_list(mut es) Comma expr(e) { es.push(e); es }
}
fn main() {
let mut parse = parser::Parser::new();
let code = vec![
parser::Token::If,
parser::Token::LParen,
parser::Token::Num(1),
parser::Token::RParen,
parser::Token::LBrace,
parser::Token::Return,
parser::Token::Semicolon,
parser::Token::RBrace,
parser::Token::Else,
parser::Token::LBrace,
parser::Token::Return,
parser::Token::Semicolon,
parser::Token::RBrace
];
for i in code {
println!("gettin {:?}",i);
parse.parse(i).unwrap();
}
parse.end_of_input().unwrap();
}
For anyone who stumbles across a similar problem, the issue was the associativity of the else token, changing the else token from left association to right association did the trick

How to manage semantic rule of declaration of variable in bison

I have to build a compiler that translates the java language into pyhton. I'm using the Flex and Bison tools. I created the flex file and I defined the syntactic grammar in Bison for some restrictions that I have to implement (such as array, management of cycles, management of a class, management of logical-arithmetic operators, etc.).
I'm having trouble understanding how to handle semantic rules. For example, I should handle the semantics for import statement and variable declaration, add the variable in the symbol table and then handle the translation.
This is the structure of the symbol table in the symboltable.h module:
struct symtable{
char *scopename; // key
struct symtable2 *subtable; // symble table secondary
UT_hash_handle hh; // to make the structure hash
}
struct symtable2 // secondary symbol structure
{
char *name; // Name of the symbol (key)
char *element; // it can be a variable or an array
char *type; // Indicates the type assumed by the token
(int, float, char, bool)
char *value; // value assigned to the variable
int dim; // Array size, it is zero in the case of a variable.
UT_hash_handle hh; // to make the structure hash
};
And this is the add symbol function:
void add_symbol( char *name, char *current_scopename, char *element, char *current_type, char *current_value, int dim, int nr) { //Function to add a new symbol in the symbol table
struct symtable *s;
HASH_FIND_PTR(symbols, current_scopename, s);
if (s == NULL) {
s = (struct symtable *)malloc(sizeof *s);
s->scopename =current_scopename;
s->subtable=NULL;
s->scopename =current_scopename;
HASH_ADD_KEYPTR(hh,symbols,s->scopename,strlen(s->scopename),s);
}
struct symtable2 *s2;
HASH_FIND_PTR(symbols2, name, s2);
if (s2==NULL) {
s2 = (struct symtable2 *)malloc(sizeof *s2);
s2->name = name;
s2->element = element;
s2->type = current_type;
s2->value = current_value;
s2->dim = dim;
HASH_ADD_KEYPTR(hh,s->subtable,s2->name,strlen(s2->name),s2);
} else {
if (strcmp( s2->type,current_type) == 0){
s2->value =current_value;
} else {
printf("\033[01;31mRiga %i. [FATALE] SEMANTIC ERROR: assignment violates the primitive type of the variable.\033[00m\n", nr);
printf("\n\n\033[01;31mParsing failed.\033[00m\n");
}
}
}
This is a part of the bison file with the grammar to handle import statement and the variable declaration:
%{
#include <stdio.h>;
#include <ctype.h>;
#include <symboltable.h>;
file *f_ptr;
%}
%start program
%token NUMBER
%token ID
%token INT
%token FLOAT
%token DOUBLE
%token CHAR
%token IMPORT
%right ASSIGNOP
%left SCOR
%left SCAND
%left EQ NE
%left LT GT LE GE
%left ADD SUB
%left MULT DIV MOD
%right NOT
%left '(' ')' '[' ']'
%%
program
: ImportStatement GlobalVariableDeclarations
;
ImportStatement
: IMPORT LibraryName ';' { delete_file (); f_ptr = open_file (); fprintf(fptr, "import array \n"); }
;
LibraryName
: 'java.util.*'
;
GlobalVariableFieldDeclarations
: type GlobalVariableDeclarations ';'
;
GlobalVariableDeclarations
: GlobalVariableDeclaration
| GlobalVariableDeclarations ',' GlobalVariableDeclaration
;
GlobalVariableDeclaration
: VariableName
| VariableName ASSIGNOP VariableInitializer {if (typeChecking($1,$3)== 0) {$1= $3; $$=$1;}}
;
VariableName
: ID {$$ = $1 ;}
;
type
: INT
| CHAR
| FLOAT
| DOUBLE
| BOOLEAN
;
VariableInitializers
: VariableInitializer
| VariableInitializers ',' VariableInitializer
;
VariableInitializer
: ExpressionStatement
;
ExpressionStatement
: VariableName
| NUMBER
| ArithmeticExpression
| RelationalExpression
| BooleanExpression
;
ArithmeticExpression
: ExpressionStatement ADD ExpressionStatement
| ExpressionStatement SUB ExpressionStatement
| ExpressionStatement MULT ExpressionStatement
| ExpressionStatement DIV ExpressionStatement
| ExpressionStatement MOD ExpressionStatement
;
RelationalExpression
: ExpressionStatement GT ExpressionStatement
| ExpressionStatement LT ExpressionStatement
| ExpressionStatement GE ExpressionStatement
| ExpressionStatement LE ExpressionStatement
| ExpressionStatement EQ ExpressionStatement
| ExpressionStatement NE ExpressionStatement
;
BooleanExpression
: ExpressionStatement SCAND ExpressionStatement
| ExpressionStatement SCOR ExpressionStatement
| NOT ExpressionStatement
;
%%
int yyerror (char *s)
{
printf ("%s \n",s);
}
int main (void) {
return yyparse();
}
int typeChecking (variable1, variable2) {
struct symtable2 *s2;
s2=find_symbol (scopename, variable1);
if (s2!=NULL) {
int type1= s2->type;
char element1 = s2->element;
}
else{
printf("\n\n\033[01;31mVariable 1 not defined.\033[00m\n");
return -1;
}
s2=find_symbol (scopename, variable2);
if (s2!=NULL) {
int type2= s2->type;
char element2 = s2->element;
}
else{
printf("\n\n\033[01;31mVariable 2 not defined.\033[00m\n");
return -1;
}
if(element1=='variable' && element2=='variable'){
if (type1 == type2){
return 0;
}
else {
return 1;
}
}
else {
printf("\n\n\033[01;31m Different elements.\033[00m\n");
return -1;
}
}
I am a beginner with the syntax of the bison for the management of semantics, on the following productions I have doubts about the relative semantic rule:
GlobalVariableFieldDeclarations
: type GlobalVariableDeclarations ';'
;
GlobalVariableDeclarations
: GlobalVariableDeclaration
| GlobalVariableDeclarations ',' GlobalVariableDeclaration
;
GlobalVariableDeclaration
: VariableName
| VariableName ASSIGNOP VariableInitializer {if (typeChecking($1,$3)== 0) {$1= $3; $$=$1;}}
;
VariableName
: ID {$$ = $1 ;}
;
Is it correct to manage semantics in this way for a GlobalVariableDeclaration production? And how can I insert the required parameter values, in the symbol table, via the add_symbol function? (Or better, how can I acquire the required parameters starting from productions to insert them in the add_symbol function that I have implemented?) Forgive me but I am a beginner, and many things about the semantics are not clear to me. I hope you have the patience to help me, I thank you in advance.
You should use Bison to build an AST and then you would perform semantic analysis on the tree instead of in the grammar. Building an AST allows you to perform analysis on more complex data structures then just the grammar rules you built in Bison.
Once you have your AST for the input you can then make rules for how to convert that AST into a python program with the same syntax.
Here is an example of a Bison/Flex compiler for the Decaf language that might give you some ideas https://github.com/davidcox143/Decaf-Compiler

How to write yacc grammar rules to identify function definitions vs function calls?

I have started learning about YACC, and I have executed a few examples of simple toy programs. But I have never seen a practical example that demonstrates how to build a compiler that identifies and implements function definitions and function calls, array implementation and so on, nor has it been easy to find an example using Google search. Can someone please provide one example of how to generate the tree using YACC? C or C++ is fine.
Thanks in advance!
Let's parse this code with yacc.
file test contains valid C code that we want to parse.
int main (int c, int b) {
int a;
while ( 1 ) {
int d;
}
}
A lex file c.l
alpha [a-zA-Z]
digit [0-9]
%%
[ \t] ;
[ \n] { yylineno = yylineno + 1;}
int return INT;
float return FLOAT;
char return CHAR;
void return VOID;
double return DOUBLE;
for return FOR;
while return WHILE;
if return IF;
else return ELSE;
printf return PRINTF;
struct return STRUCT;
^"#include ".+ ;
{digit}+ return NUM;
{alpha}({alpha}|{digit})* return ID;
"<=" return LE;
">=" return GE;
"==" return EQ;
"!=" return NE;
">" return GT;
"<" return LT;
"." return DOT;
\/\/.* ;
\/\*(.*\n)*.*\*\/ ;
. return yytext[0];
%%
file c.y for input to YACC:
%{
#include <stdio.h>
#include <stdlib.h>
extern FILE *fp;
%}
%token INT FLOAT CHAR DOUBLE VOID
%token FOR WHILE
%token IF ELSE PRINTF
%token STRUCT
%token NUM ID
%token INCLUDE
%token DOT
%right '='
%left AND OR
%left '<' '>' LE GE EQ NE LT GT
%%
start: Function
| Declaration
;
/* Declaration block */
Declaration: Type Assignment ';'
| Assignment ';'
| FunctionCall ';'
| ArrayUsage ';'
| Type ArrayUsage ';'
| StructStmt ';'
| error
;
/* Assignment block */
Assignment: ID '=' Assignment
| ID '=' FunctionCall
| ID '=' ArrayUsage
| ArrayUsage '=' Assignment
| ID ',' Assignment
| NUM ',' Assignment
| ID '+' Assignment
| ID '-' Assignment
| ID '*' Assignment
| ID '/' Assignment
| NUM '+' Assignment
| NUM '-' Assignment
| NUM '*' Assignment
| NUM '/' Assignment
| '\'' Assignment '\''
| '(' Assignment ')'
| '-' '(' Assignment ')'
| '-' NUM
| '-' ID
| NUM
| ID
;
/* Function Call Block */
FunctionCall : ID'('')'
| ID'('Assignment')'
;
/* Array Usage */
ArrayUsage : ID'['Assignment']'
;
/* Function block */
Function: Type ID '(' ArgListOpt ')' CompoundStmt
;
ArgListOpt: ArgList
|
;
ArgList: ArgList ',' Arg
| Arg
;
Arg: Type ID
;
CompoundStmt: '{' StmtList '}'
;
StmtList: StmtList Stmt
|
;
Stmt: WhileStmt
| Declaration
| ForStmt
| IfStmt
| PrintFunc
| ';'
;
/* Type Identifier block */
Type: INT
| FLOAT
| CHAR
| DOUBLE
| VOID
;
/* Loop Blocks */
WhileStmt: WHILE '(' Expr ')' Stmt
| WHILE '(' Expr ')' CompoundStmt
;
/* For Block */
ForStmt: FOR '(' Expr ';' Expr ';' Expr ')' Stmt
| FOR '(' Expr ';' Expr ';' Expr ')' CompoundStmt
| FOR '(' Expr ')' Stmt
| FOR '(' Expr ')' CompoundStmt
;
/* IfStmt Block */
IfStmt : IF '(' Expr ')'
Stmt
;
/* Struct Statement */
StructStmt : STRUCT ID '{' Type Assignment '}'
;
/* Print Function */
PrintFunc : PRINTF '(' Expr ')' ';'
;
/*Expression Block*/
Expr:
| Expr LE Expr
| Expr GE Expr
| Expr NE Expr
| Expr EQ Expr
| Expr GT Expr
| Expr LT Expr
| Assignment
| ArrayUsage
;
%%
#include"lex.yy.c"
#include<ctype.h>
int count=0;
int main(int argc, char *argv[])
{
yyin = fopen(argv[1], "r");
if(!yyparse())
printf("\nParsing complete\n");
else
printf("\nParsing failed\n");
fclose(yyin);
return 0;
}
yyerror(char *s) {
printf("%d : %s %s\n", yylineno, s, yytext );
}
A Makefile to put it together. I use flex-lexer and bison but the example will also work with lex and yacc.
miniC: c.l c.y
bison c.y
flex c.l
gcc c.tab.c -ll -ly
Compile and parse the test code:
$ make
bison c.y
flex c.l
gcc c.tab.c -ll -ly
c.tab.c: In function ‘yyparse’:
c.tab.c:1273:16: warning: implicit declaration of function ‘yylex’ [-Wimplicit-function-declaration]
yychar = yylex ();
^
c.tab.c:1402:7: warning: implicit declaration of function ‘yyerror’ [-Wimplicit-function-declaration]
yyerror (YY_("syntax error"));
^
c.y: At top level:
c.y:155:1: warning: return type defaults to ‘int’ [-Wimplicit-int]
yyerror(char *s) {
^
$ ls
a.out c.l CMakeLists.txt c.tab.c c.y lex.yy.c Makefile README.md test
$ ./a.out test
Parsing complete
For reading resources I can recommend the books Modern Compiler Implementation in C by Andrew Appel and the flex/bison book by John Levine.

Bison nonassociative precedence rule

I am trying to enforce some parsing errors by defining a non-associative precedence.
Here is part of my grammar file:
Comparison :
Value ComparisonOp Value
{
$2->Left($1);
$2->Right($3);
$$ = $2;
}
;
Expressions like 1 = 2 should parse, but expressions like 1 = 2 = 3 are not allowed in the grammar. To accommodate this, I tried to make my operator non associative as follows:
%nonassoc NONASSOCIATIVE
.
.(rest of the grammar)
.
Comparison :
Value ComparisonOp Value %prec NONASSOCIATIVE
{
$2->Left($1);
$2->Right($3);
$$ = $2;
}
;
1 = 2 = 3 still passes, Could someone please tell me what I am doing wrong?
You need to set the associativity of the ComparisonOp token(s). Either
%nonassoc ComparisonOp NONASSOCIATIVE
if ComparisonOp is a token or something like
%nonassoc '=' '<' '>' NOT_EQUAL GREATOR_OR_EQUAL LESS_OR_EQUAL NONASSOCIATIVE
if you have mulitple tokens and ComparisonOp is a rule that expands to any of them
As a concrete example, the following works exactly like you are requesting:
%{
#include <stdio.h>
#include <ctype.h>
int yylex();
void yyerror(const char *);
%}
%nonassoc '=' '<' '>' CMPOP
%left '+' '-' ADDOP
%left '*' '/' '%' MULOP
%token VALUE
%%
expr: expr cmp_op expr %prec CMPOP
| expr add_op expr %prec ADDOP
| expr mul_op expr %prec MULOP
| VALUE
| '(' expr ')'
;
cmp_op: '=' | '<' | '>' | '<' '=' | '>' '=' | '<' '>' ;
add_op: '+' | '-' ;
mul_op: '*' | '/' | '%' ;
%%
int main() { return yyparse(); }
int yylex() {
int ch;
while(isspace(ch = getchar()));
if (isdigit(ch)) return VALUE;
return ch;
}
void yyerror(const char *err) { fprintf(stderr, "%s\n", err); }
so if you are having other problems, try posting an MVCE that shows the actual problem you are having...

Syntax Error in my grammar production with Bison

As a test file, I'm using this piece of code to see whether or not the parser is working well:
/* A test program */
void main(void){
int x;
int y;
int z;
x= 10;
y = 20;
z =x* (x+y);
}
However, I'm getting a syntax error right after the first void, and I don't quite understand why it won't even reach the parameter part. If there's any tips or see something that perhaps I'm not seeing, I'd appreciate it. I know that I have yet to solve the dangling else problem, but that shouldn't be of an issue for this test.
Here's my parser so far:
%{
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
/* external function prototypes */
extern int yylex();
extern int initLex(int , char **);
/* external global variables */
extern int yydebug;
extern int yylineno;
/* function prototypes */
void yyerror(const char *);
/* global variables */
%}
/* YYSTYPE */
/* terminals */
/* Start adding token names here */
/* Your token names must match Project 1 */
/* The file cmparser.tab.h was gets generated here */
%token TOK_ERROR
%token TOK_ELSE
%token TOK_IF
%token TOK_RETURN
%token TOK_VOID
%token TOK_INT
%token TOK_WHILE
%token TOK_PLUS
%token TOK_MINUS
%token TOK_MULT
%token TOK_DIV
%token TOK_LT
%token TOK_LE
%token TOK_GT
%token TOK_GE
%token TOK_EQ
%token TOK_NE
%token TOK_ASSIGN
%token TOK_SEMI
%token TOK_COMMA
%token TOK_LPAREN
%token TOK_RPAREN
%token TOK_LSQ
%token TOK_RSQ
%token TOK_LBRACE
%token TOK_RBRACE
%token TOK_NUM
%token TOK_ID
/* associativity and precedence */
/* specify operator precedence (taken care of by grammar) and associatity here -
-uncomment */
//%left
%left '*' '/'
%left '+' '-'
%nonassoc '<' '>' "<=" ">="
%left "==" "!="
%right '='
%nonassoc error
/* Begin your grammar specification here */
%%
Start : /* put your RHS for this rule here */
Declarations
{ printf ("Declaration.\n"); }
; /* note that the rule ends with a semicolon */
Declarations :
/* empty */
{}
| Vardeclaration Declarations
{ printf ("Var-declaration.\n");}
| Fundeclaration Declarations
{ printf ("Fun-declaration.\n");}
;
Vardeclaration :
Typespecifier TOK_ID ';'
{ printf ("Not an array.\n");}
| Typespecifier TOK_ID '[' TOK_NUM ']' ';'
{ printf ("An array.\n");}
;
Typespecifier :
TOK_INT
{ printf ("Type int.\n");}
| TOK_VOID
{ printf("Type void.\n");}
;
Fundeclaration :
Typespecifier TOK_ID '(' Params ')' Compoundstmt
{ printf ("Function Declaration.");}
;
Params :
Paramlist
| TOK_VOID
{ printf ("Void param.\n");}
;
Paramlist :
Paramlist ',' Param
| Param
;
Param :
Typespecifier TOK_ID
| Typespecifier TOK_ID '[' ']'
;
Compoundstmt :
'{' Localdeclaration Statement '}'
;
Localdeclaration :
Vardeclaration Localdeclaration
| /* empty */
;
Statement :
Expressionstmt Statement
| Compoundstmt Statement
| Selectionstmt Statement
| Iterationstmt Statement
| Returnstmt Statement
| /* empty */
;
Expressionstmt :
Expression ';'
| ';'
;
Selectionstmt :
TOK_IF '(' Expression ')' Statement
| TOK_IF '(' Expression ')' Statement TOK_ELSE Statement
;
Iterationstmt :
TOK_WHILE '(' Expression ')' Statement
;
Returnstmt :
TOK_RETURN ';'
| TOK_RETURN Expression ';'
;
Expression :
Var '=' Expression
| SimpleExpression
;
Var :
TOK_ID
| TOK_ID '[' Expression ']'
;
SimpleExpression :
AdditiveExpression Relop AdditiveExpression
| AdditiveExpression
;
Relop :
"<="
| '<'
| '>'
| ">="
| "=="
| "!="
;
AdditiveExpression :
AdditiveExpression Addop Term
| Term
;
Addop :
'+'
| '-'
;
Term :
Term Mulop Factor
| Factor
;
Mulop :
'*'
| '/'
;
Factor :
'(' Expression ')'
| Var
| Call
| TOK_NUM
;
Call :
TOK_ID '(' Args ')'
;
Args :
Arglist
| /* empty */
;
Arglist :
Arglist ',' Expression
| Expression
;
%%
void yyerror (char const *s) {
fprintf (stderr, "Line %d: %s\n", yylineno, s);
}
int main(int argc, char **argv){
initLex(argc,argv);
#ifdef YYLLEXER
while (gettok() !=0) ; //gettok returns 0 on EOF
return;
#else
yyparse();
#endif
}
Your grammar looks ok, so my best guess is that your lexer doesn't recognize the string void as TOK_VOID, but instead returns something else.
Try compiling your parser with -DYYDEBUG and setting yydebug = 1 before calling yyparse to see what its getting from the lexer.

Resources