Production Never Reducued Menhir - parsing

I've been at this for a good two hours now but I can't get the compiler to stop giving me this warning.
Warning: production type_anno -> is never reduced.
Below is my parser.mly code.
(*Optional type annotation*)
type_anno:
| {None}
| COL;name = ID {Some name}
;
statement:
|(*SEND stmt*) SEND; chn = ID; O_TBRACE; var1 = ID; COMMA; var2 = ID; C_TBRACE {Send(chn,var1,var2)}
|(*RECV stmt*) O_TBRACE; var1 = ID; COMMA; var2=ID; C_TBRACE; ARW; RECV; chn = ID; S_COL ; stmt = statement {Recv(var1,var2,chn, stmt)}
|(*SELECT stmt*) chn=ID; DOT; lbl=ID; O_TBRACE; l1 = ID; C_TBRACE; S_COL; stmt = statement {Select(chn, lbl, l1, stmt)}
|(*CASE stmt*) CASE; chn =ID; O_PAREN; branches = branch_statement C_PAREN {Case(chn,branches)}
|(*CAST stmt*) CAST; chn = ID;O_TBRACE;l1=ID;C_TBRACE; {Cast(chn,l1)}
|(*SHIFT stmt*) l1 = ID ; ARW; SHIFT; chn=ID; S_COL;stmt=statement {Shift(chn, l1, stmt)}
|(*ACCEPT stmt*) l1 = ID ; ARW; ACC; chn=ID; S_COL;stmt=statement {Accept(chn, l1, stmt)}
|(*ACQUIRE stmt*) l1 = ID ; ARW; ACQ; chn=ID; S_COL;stmt=statement {Acquire(chn, l1, stmt)}
|(*DETACH stmt*) DET; chn=ID; O_TBRACE; l1=ID;C_TBRACE;{Detach(chn,l1)}
|(*RELEASE stmt*) l1 = ID ; ARW; REL; chn=ID; S_COL;stmt=statement {Release(chn, l1, stmt)}
|(*CLOSE stmt*) CLOSE; chn=ID;{Close(chn)}
|(*WAIT stmt*) WAIT; chn=ID;stmt=statement;{Wait(chn,stmt)}
|(*SPAWN stmt*) SPAWN; ref =ID; ARW; fn_id =ID ;inp = params; S_COL; stmt=statement {Spawn(ref,fn_id,inp,stmt)}
|(*NEW stmt*) chn =ID;anno=type_anno; ARW; NEW; stmt =statement ; S_COL ;stmt0 = statement {match anno with Some a -> NewA(chn,a,stmt, stmt0) | None -> New(chn,stmt,stmt0)}
|(*OUTPUT stmt*) PRINT; O_PAREN;var= ID; C_PAREN {Out(var)}
|(*FWD stmt*) FWD; O_PAREN; chn1=ID; COMMA;chn2 = ID; C_PAREN {Fwd(chn1,chn2)}
|(*SPL stmt*) O_TBRACE; var1 = ID; COMMA; var2=ID; C_TBRACE; ARW; SPL; chn = ID; S_COL ; stmt = statement {Split(var1,var2,chn, stmt)}

The more important part of the warning emitted by menhir is that your grammar has a shift/reduce conflict that was resolved in favor of the shift.
Due to this conflict, the production
type_anno:
| {None}
can never be reduced since it is overridden by all productions that accepts ARW after an ID in the statement rule
|(*SHIFT stmt*) l1 = ID ; ARW; ...
|(*ACCEPT stmt*) l1 = ID ; ARW; ...
|(*ACQUIRE stmt*) l1 = ID ; ARW; ...
|(*RELEASE stmt*) l1 = ID ; ARW; ...
...
|(*NEW stmt*) chn =ID;anno=type_anno; ARW; NEW; ...
You can check the issue in menhir interpret mode that your parser as written rejects
ID ARW NEW
The smallest fix for the shift/reduce conflict is to make the rule type_anno an inline rule:
%inline type_anno:
| {None}
| COL;name = ID {Some name}
;

Related

How can I debug my flex/bison grammar?

This is a very silly problem. There are no errors in the grammar rules afaik but its not giving the right output. I have been staring at it but the mistake is not visible to me.
What tools are available to me to help me see what is going on in a parse? My attempts to insert tracing code are a lot of work and don't seem to be helping me much.
parser.y
%{
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include "SymbolTable.h"
#include "SymbolInfo.h"
#include "ScopeTable.h"
int yyparse(void);
int yylex(void);
extern char* yytext;
extern FILE * yyin;
extern int tableSize;
FILE *logout;
extern int line_count;
extern char *arr[100];
extern char *final_arr[100];
SymbolTable *table;
void yyerror (const char *s)
{
fprintf(stderr,"%s\n",s);
return;
}
%}
%union {
class SymbolInfo* sym;
char *s;
float f;
}
%error-verbose
%verbose
%token COMMA INT ID SEMICOLON FLOAT VOID LCURL RCURL RETURN NOT IF FOR WHILE PRINTLN LPAREN RPAREN
%token CONST_INT CONST_FLOAT LTHIRD RTHIRD
%token ADDOP MULOP INCOP DECOP RELOP LOGICOP ASSIGNOP
%token <f> DOUBLE
//%expect 1
%precedence THEN
%precedence ELSE
%left "<" ">" "<=" ">=" "=" "!="
%left "+" "-"
%left "*" "/"
%left UMINUS
%%
start : program { printf("start -> program\n");
fprintf(logout,"%d : start -> program\n",line_count);
}
;
program : program unit {
printf("program -> program unit\n");
fprintf(logout,"%d : program -> program unit\n\n",line_count);
for(int j = 0; final_arr[j] != NULL; j++)
{
fprintf(logout,"%s",final_arr[j]);
}
fprintf(logout,"\n\n");
}
| unit {
printf("program -> unit\n");
fprintf(logout,"%d : program -> unit\n\n",line_count);
for(int j = 0; final_arr[j] != NULL; j++)
{
fprintf(logout,"%s",final_arr[j]);
}
fprintf(logout,"\n\n");
}
;
unit : var_dec {
printf("unit -> var_dec\n");
fprintf(logout,"%d : unit -> var_dec\n\n",line_count);
for(int j = 0; arr[j] != NULL; j++)
{
fprintf(logout,"%s",arr[j]);
}
fprintf(logout,"\n\n");
}
|func_declaration {
fprintf(logout,"%d : unit -> func_declaration\n\n",line_count);
for(int j = 0; arr[j] != NULL; j++)
{
fprintf(logout,"%s",arr[j]);
}
fprintf(logout,"\n\n");
}
|func_definition {
fprintf(logout,"%d : unit -> func_definition\n\n",line_count);
for(int j = 0; arr[j] != NULL; j++)
{
fprintf(logout,"%s",arr[j]);
}
fprintf(logout,"\n\n");
}
;
;
func_declaration : type_specifier ID LPAREN parameter_list RPAREN SEMICOLON {
printf("func_declaration -> type_specifier id LPAREN parameter_list RPAREN SEMICOLON\n");
fprintf(logout,"%d : func_declaration : type_specifier ID LPAREN parameter_list RPAREN SEMICOLON\n\n", line_count);
for(int j = 0; arr[j] != NULL; j++)
{
fprintf(logout,"%s",arr[j]);
}
fprintf(logout,"\n\n");
}
| type_specifier ID LPAREN RPAREN SEMICOLON {
printf("func_declaration -> type_specifier id LPAREN RPAREN SEMICOLON\n");
fprintf(logout,"%d : func_declaration : type_specifier ID LPAREN parameter_list RPAREN SEMICOLON\n\n", line_count);
for(int j = 0; arr[j] != NULL; j++)
{
fprintf(logout,"%s",arr[j]);
}
fprintf(logout,"\n\n");
}
;
func_definition : type_specifier ID LPAREN parameter_list RPAREN compound_statement {
printf("func_definition -> type_specifier ID LPAREN parameter_list RPAREN compound_statement\n");
fprintf(logout,"%d : func_definition : type_specifier ID LPAREN parameter_list RPAREN compound_statement\n\n", line_count);
}
| type_specifier ID LPAREN RPAREN compound_statement {
printf("func_definition -> type_specifier id LPAREN RPAREN compound_statement\n");
fprintf(logout,"%d : func_definition : type_specifier ID LPAREN RPAREN compound_statement\n\n", line_count);
}
;
parameter_list : parameter_list COMMA type_specifier ID {
printf("parameter_list -> parameter_list COMMA type_specifier ID\n");
fprintf(logout,"%d : parameter_list : parameter_list COMMA type_specifier ID\n\n", line_count);
for(int j = 0; arr[j] != NULL; j++)
{
fprintf(logout,"%s",arr[j]);
}
fprintf(logout,"\n\n");
}
| parameter_list COMMA type_specifier {
printf("parameter_list -> parameter_list COMMA type_specifier\n");
fprintf(logout,"%d : parameter_list : parameter_list COMMA type_specifier\n\n", line_count);
}
| type_specifier ID {
printf("parameter_list -> type_specifier ID\n");
fprintf(logout,"%d : parameter_list : type_specifier ID\n\n", line_count);
for(int j = 0; arr[j] != NULL; j++)
{
fprintf(logout,"%s",arr[j]);
}
fprintf(logout,"\n\n");
}
| type_specifier {
printf("parameter_list -> type_specifier\n");
fprintf(logout,"%d : parameter_list : type_specifier \n\n", line_count);
}
;
compound_statement : LCURL statements RCURL {
printf("compound_statement -> LCURL statements RCURL\n");
fprintf(logout,"compound_statement : LCURL statements RCURL\n\n");
}
| LCURL RCURL
;
var_dec: type_specifier declaration_list SEMICOLON {
printf("var_dec -> type_specifier declaration_list SEMICOLON \n");
fprintf(logout,"%d : var_dec: type_specifier declaration_list SEMICOLON \n\n", line_count);
for(int j = 0; arr[j] != NULL; j++)
{
fprintf(logout,"%s",arr[j]);
}
fprintf(logout,"\n\n");
}
;
type_specifier : INT {printf("type_specifier -> INT\n");
fprintf(logout,"%d : type_specifier-> INT\n\n%s\n\n", line_count,yytext);
}
| FLOAT {printf("type_specifier ->FLOAT\n");
fprintf(logout,"%d : type_specifier-> FLOAT\n\n%s\n\n",line_count, yytext);
}
| VOID {printf("type_specifier -> VOID\n");
fprintf(logout,"%d : type_specifier-> VOID\n\n%s\n\n",line_count, yytext);
}
;
declaration_list : declaration_list COMMA ID {
printf("declaration_list -> declaration_list COMMA ID\n");
fprintf(logout,"%d : declaration_list -> declaration_list COMMA ID\n\n",line_count);
for(int j = 1; arr[j+1] != NULL; j++)
{
fprintf(logout,"%s",arr[j]);
}
fprintf(logout,"\n\n");
}
| declaration_list COMMA ID LTHIRD CONST_INT RTHIRD {
printf("declaration_list -> declaration_list COMMA ID LTHIRD CONST_INT RTHIRD\n");
fprintf(logout,"%d : declaration_list -> declaration_list COMMA ID LTHIRD CONST_INT RTHIRD\n",line_count);
for(int j = 1; arr[j+1] != NULL; j++)
{
fprintf(logout,"%s",arr[j]);
}
fprintf(logout,"\n\n");
}
|ID {
printf("declaration_list -> ID\n");
fprintf(logout,"%d : declaration_list -> ID\n\n",line_count);
for(int j = 1; arr[j+1] != NULL; j++)
{
fprintf(logout,"%s",arr[j]);
}
fprintf(logout,"\n\n");
}
|ID LTHIRD CONST_INT RTHIRD {
printf("declaration_list -> ID LTHIRD CONST_INT RTHIRD\n");
fprintf(logout,"%d : declaration_list -> ID LTHIRD CONST_INT RTHIRD\n",line_count);
for(int j = 1; arr[j+1] != NULL; j++)
{
fprintf(logout,"%s",arr[j]);
}
fprintf(logout,"\n\n");
}
;
statements : statement {
printf("statements -> statement\n");
fprintf(logout,"%d : statements : statement\n\n",line_count);
fprintf(logout, "%s\n\n",yytext);
}
| statements statement
;
statement : var_dec
| expression_statement
| compound_statement
| FOR LPAREN expression_statement expression_statement expression RPAREN statement
| IF LPAREN expression RPAREN statement
| WHILE LPAREN expression RPAREN statement
| PRINTLN LPAREN ID RPAREN SEMICOLON
| RETURN expression SEMICOLON {
printf("statement -> RETURN expression SEMICOLON\n");
fprintf(logout,"%d : statement : RETURN expression SEMICOLON\n\n",line_count);
fprintf(logout, "%s\n\n",yytext);
}
;
expression_statement : SEMICOLON
| expression SEMICOLON
;
variable : ID {
printf("variable -> ID\n");
fprintf(logout,"%d : variable : ID\n\n",line_count);
fprintf(logout, "%s\n\n",yytext);
}
| ID LTHIRD expression RTHIRD
;
expression : logic_expression {
printf("expression -> logic_expression\n");
fprintf(logout,"%d : expression : logic_expression\n\n",line_count);
fprintf(logout, "%s\n\n",yytext);
}
| variable ASSIGNOP logic_expression
;
logic_expression : rel_expression
| rel_expression LOGICOP rel_expression
;
rel_expression : simple_expression {
printf("rel_expression -> simple_expression \n");
fprintf(logout,"%d : rel_expression : simple_expression\n\n",line_count);
fprintf(logout, "%s\n\n",yytext);
}
| simple_expression RELOP simple_expression
;
simple_expression : term {
printf("simple_expression -> term\n");
fprintf(logout,"%d : simple_expression : term \n\n",line_count);
fprintf(logout, "%s\n\n",yytext);
}
| simple_expression ADDOP term {
printf("simple_expression -> simple_expression ADDOP term\n");
fprintf(logout,"simple_expression : simple_expression ADDOP term \n\n");
fprintf(logout, "%s\n\n",yytext);
}
;
term : unary_expression {
printf("term -> unary_expression\n");
fprintf(logout,"%d : term : unary_expression\n\n",line_count);
fprintf(logout, "%s\n\n",yytext);
}
| term MULOP unary_expression
;
unary_expression : ADDOP unary_expression
| NOT unary_expression
| factor {
printf("unary_expression -> factor\n");
fprintf(logout,"%d : unary_expression : factor\n\n",line_count);
fprintf(logout, "%s\n\n",yytext);
}
;
factor : variable {
printf("factor -> variable\n");
fprintf(logout,"%d : factor : variable\n\n",line_count);
fprintf(logout, "%s\n\n",yytext);
}
| ID LPAREN argument_list RPAREN
| LPAREN expression RPAREN
| CONST_INT
| CONST_FLOAT
| variable INCOP
| variable DECOP
;
argument_list : arguments
|
;
arguments : arguments COMMA logic_expression
| logic_expression
;
%%
int main(int argc, char *argv[])
{
FILE *fp ;
int token = 0;
if((fp = fopen(argv[1],"r")) == NULL)
{
fprintf(logout,"cannot open file");
exit(1);
}
logout = fopen("log.txt","w");
yyin = fp;
yyparse();
fclose(fp);
fclose(logout);
return 0;
}
input.txt
int var(int a, int b){
return a+b;
}
output I'm getting :
type_specifier -> INT
type_specifier -> INT
parameter_list -> type_specifier ID
type_specifier -> INT
parameter_list -> parameter_list COMMA type_specifier ID
variable -> ID
factor -> variable
unary_expression -> factor
term -> unary_expression
simple_expression -> term
rel_expression -> simple_expression
expression -> logic_expression
syntax error, unexpected ID, expecting SEMICOLON
expected output is :
type_specifier -> INT
type_specifier -> INT
parameter_list -> type_specifier ID
type_specifier -> INT
parameter_list -> parameter_list COMMA type_specifier ID
variable -> ID
factor -> variable
unary_expression -> factor
term -> unary_expression
simple_expression -> term
variable -> ID
factor -> variable
unary_expression -> factor
term -> unary_expression
simple_expression : simple_expression ADDOP term
rel_expression -> simple_expression
logic_expression : rel_expression
expression -> logic_expression
statement : RETURN expression SEMICOLON
statements : statement
compound_statement : LCURL statements RCURL
func_definition : type_specifier ID LPAREN parameter_list RPAREN compound_statement
unit : func_definition
program : program unit
start : program
Adding the flex file just in case
%option noyywrap
%{
#include<stdlib.h>
#include<stdio.h>
#include "y.tab.h"
#include "SymbolTable.h"
#include "SymbolInfo.h"
#include "ScopeTable.h"
void yyerror (char *);
extern YYSTYPE yylval;
extern SymbolTable *table;
extern FILE *logout;
char *arr[100];
char *final_arr[100];
int k; //final_arr count
int i = 0; //arr count
int line_count = 1;
%}
id [a-z]*
DOUBLE (([0-9]+(\.[0-9]*)?)|([0-9]*\.[0-9]+))
newline \n
%%
{newline} {
arr[i] = "\n",final_arr[k] = arr[i];
i++; k++;
line_count++;
}
[ \t]+ {}
(([0-9]+(\.[0-9]*)?)|([0-9]*\.[0-9]+)) {
yylval.f = atof(yytext);
return DOUBLE;
}
"int" {
memset(&arr,NULL,sizeof(arr)); i = 0;
arr[i] = "int ";
final_arr[k] = "int ";
i++; k++;
return INT;
}
"float" {
memset(&arr,NULL,sizeof(arr)); i = 0;
arr[i] = "float "; final_arr[k] = "float ";
i++; k++;
return FLOAT;
}
"void" {
memset(&arr,NULL,sizeof(arr)); i = 0;
arr[i] = "void "; final_arr[k] = "void ";
i++; k++;
return VOID;
}
";" {
arr[i] = ";";final_arr[k] = ";";
i++; k++;
return SEMICOLON;}
"," {
arr[i] = ","; final_arr[k] = ",";
i++; k++;
return COMMA;
}
"(" {
arr[i] = "(";final_arr[k] = "(";
i++; k++;
return LPAREN;}
")" {
arr[i] = ")";final_arr[k] = ")";
i++; k++;
return RPAREN;}
"{" {return LCURL;}
"}" {return RCURL;}
{id} {
yylval.s = strdup(yytext);
arr[i] = strdup(yytext); final_arr[k] = strdup(yytext);
k++; i++;
for(int j = 1; arr[j] != NULL; j++)
{
//fprintf(logout,"%s", arr[j]);
//fprintf(logout,"arr [%d] %s\n ",j,arr[j]);
}
//fprintf(logout,"\n\n");
return ID;
}
%%
You seem to have spent an awful lot of effort trying to implement a way of tracing what's going on in your parser, and to little effect since the problem here is simply a missing lexer keyword rule.
You would be much better off using the built-in debugging features of flex and bison. Then your grammar and lexer would be much simpler and easier to read, and the debugging output would be more complete (and would let you trace the behaviour through the state table).
Here's a quick summary. It's a snap, really.
Add --debug to your bison command. That will cause bison to generate code to trace your parse. (If you're lazy, you can use -t -- for trace -- which is the Posix standard command-line option, and should also work with yacc, byacc, btyacc, etc., etc.)
Add the following three lines at the beginning of main, assuming that main is in your .y file:
#ifdef YYDEBUG
yydebug = 1;
#endif
For additional bonus points, you could make this assignment conditional on some command line flag.
Once you do that, you will receive the following trace output:
... snip ... Pick up the trace at the ) at the end of the parameter list
Reading a token: Next token is token RPAREN ()
Shifting token RPAREN ()
Entering state 28
Reading a token: Next token is token LCURL ()
Shifting token LCURL ()
Entering state 25
Reading a token: Next token is token ID ()
Shifting token ID ()
Entering state 44
Reading a token: Next token is token ID ()
... snip ...
Note that two IDs were returned after the curly bracket, corresponding to the tokens return and a.
You can also enable tracing in flex with flex --debug (or -d). This causes the scanner to produce an output line of the form
--accepting rule at line 85 ("return")
for every accepted token (and some other lines). You need to check the line numbers against your source code, unfortunately, but in this case you might have noticed the similarity between the above and
--accepting rule at line 85 ("b")
For additional debugging simplicity, it's worth getting into the habit writing your scanner in a way that it can be compiled independently of the parser. Then you can test your scanner by compiling it separately using the main() implementation in -lfl.
References and more debugging information:
Debugging Your Parser in the bison manual. The section on tracing includes a fully-worked example using one of the example parsers in the manual.
Also see Printing semantic values which documents the %printer declaration.
Debugging Options in the flex manual.

ANTLR 3 bug, mismatched input, but what's wrong?

I have the following problem:
My ANTLR 3 grammar compiles, but my simple testprogram doesn't work. The grammar is as follows:
grammar Rietse;
options {
k=1;
language=Java;
output=AST;
}
tokens {
COLON = ':' ;
SEMICOLON = ';' ;
OPAREN = '(' ;
CPAREN = ')' ;
COMMA = ',' ;
OCURLY = '{' ;
CCURLY = '}' ;
SINGLEQUOTE = '\'' ;
// operators
BECOMES = '=' ;
PLUS = '+' ;
MINUS = '-' ;
TIMES = '*' ;
DIVIDE = '/' ;
MODULO = '%' ;
EQUALS = '==' ;
LT = '<' ;
LTE = '<=' ;
GT = '>' ;
GTE = '>=' ;
UNEQUALS = '!=' ;
AND = '&&' ;
OR = '||' ;
NOT = '!' ;
// keywords
PROGRAM = 'program' ;
COMPOUND = 'compound' ;
UNARY = 'unary' ;
DECL = 'decl' ;
SDECL = 'sdecl' ;
STATIC = 'static' ;
PRINT = 'print' ;
READ = 'read' ;
IF = 'if' ;
THEN = 'then' ;
ELSE = 'else' ;
DO = 'do' ;
WHILE = 'while' ;
// types
INTEGER = 'int' ;
CHAR = 'char' ;
BOOLEAN = 'boolean' ;
TRUE = 'true' ;
FALSE = 'false' ;
}
#lexer::header {
package Eindopdracht;
}
#header {
package Eindopdracht;
}
// Parser rules
program
: program2 EOF
-> ^(PROGRAM program2)
;
program2
: (declaration* statement)+
;
declaration
: STATIC type IDENTIFIER SEMICOLON -> ^(SDECL type IDENTIFIER)
| type IDENTIFIER SEMICOLON -> ^(DECL type IDENTIFIER)
;
type
: INTEGER
| CHAR
| BOOLEAN
;
statement
: assignment_expr SEMICOLON!
| while_stat SEMICOLON!
| print_stat SEMICOLON!
| if_stat SEMICOLON!
| read_stat SEMICOLON!
;
while_stat
: WHILE^ OPAREN! or_expr CPAREN! OCURLY! statement+ CCURLY! // while (expression) {statement+}
;
print_stat
: PRINT^ OPAREN! or_expr (COMMA! or_expr)* CPAREN! // print(expression)
;
read_stat
: READ^ OPAREN! IDENTIFIER (COMMA! IDENTIFIER)+ CPAREN! // read(expression)
;
if_stat
: IF^ OPAREN! or_expr CPAREN! comp_expr (ELSE! comp_expr)? // if (expression) compound else compound
;
assignment_expr
: or_expr (BECOMES^ or_expr)*
;
or_expr
: and_expr (OR^ and_expr)*
;
and_expr
: compare_expr (AND^ compare_expr)*
;
compare_expr
: plusminus_expr ((LT|LTE|GT|GTE|EQUALS|UNEQUALS)^ plusminus_expr)?
;
plusminus_expr
: timesdivide_expr ((PLUS | MINUS)^ timesdivide_expr)*
;
timesdivide_expr
: unary_expr ((TIMES | DIVIDE | MODULO)^ unary_expr)*
;
unary_expr
: operand
| PLUS operand -> ^(UNARY PLUS operand)
| MINUS operand -> ^(UNARY MINUS operand)
| NOT operand -> ^(UNARY NOT operand)
;
operand
: TRUE
| FALSE
| charliteral
| IDENTIFIER
| NUMBER
| OPAREN! or_expr CPAREN!
;
comp_expr
: OCURLY program2 CCURLY -> ^(COMPOUND program2)
;
// Lexer rules
charliteral
: SINGLEQUOTE! LETTER SINGLEQUOTE!
;
IDENTIFIER
: LETTER (LETTER | DIGIT)*
;
NUMBER
: DIGIT+
;
COMMENT
: '//' .* '\n'
{ $channel=HIDDEN; }
;
WS
: (' ' | '\t' | '\f' | '\r' | '\n')+
{ $channel=HIDDEN; }
;
fragment DIGIT : ('0'..'9') ;
fragment LOWER : ('a'..'z') ;
fragment UPPER : ('A'..'Z') ;
fragment LETTER : LOWER | UPPER ;
// EOF
I then use the following java file to test programs:
package Package;
import java.io.FileInputStream;
import java.io.InputStream;
import org.antlr.runtime.ANTLRInputStream;
import org.antlr.runtime.CommonTokenStream;
import org.antlr.runtime.RecognitionException;
import org.antlr.runtime.tree.BufferedTreeNodeStream;
import org.antlr.runtime.tree.CommonTree;
import org.antlr.runtime.tree.CommonTreeNodeStream;
import org.antlr.runtime.tree.DOTTreeGenerator;
import org.antlr.runtime.tree.TreeNodeStream;
import org.antlr.stringtemplate.StringTemplate;
public class Rietse {
public static void main (String[] args)
{
String inputFile = args[0];
try {
InputStream in = inputFile == null ? System.in : new FileInputStream(inputFile);
RietseLexer lexer = new RietseLexer(new ANTLRInputStream(in));
CommonTokenStream tokens = new CommonTokenStream(lexer);
RietseParser parser = new RietseParser(tokens);
RietseParser.program_return result = parser.program();
} catch (RietseException e) {
System.err.print("ERROR: RietseException thrown by compiler: ");
System.err.println(e.getMessage());
} catch (RecognitionException e) {
System.err.print("ERROR: recognition exception thrown by compiler: ");
System.err.println(e.getMessage());
e.printStackTrace();
} catch (Exception e) {
System.err.print("ERROR: uncaught exception thrown by compiler: ");
System.err.println(e.getMessage());
e.printStackTrace();
}
}
}
And at last, the testprogram itself:
print('a');
Now when I run this, I get the following errors:
line 1:7 mismatched input 'a' expecting LETTER
line 1:9 mismatched input ')' expecting LETTER
I have no clue whatsoever what causes this bug. I have tried several changes of things but nothing fixed it. Does anyone here know what's wrong with my code and how I can fix it?
Every bit of help is greatly appreciated, thanks in advance.
Greetings,
Rien
Using a rule:
CHARLITERAL
: SINGLEQUOTE (LETTER | DIGIT) SINGLEQUOTE
;
and changing operand to:
operand
: TRUE
| FALSE
| CHARLITERAL
| IDENTIFIER
| NUMBER
| OPAREN! or_expr CPAREN!
;
will fix the problem. It does give the problem of having singlequotes in the AST, but that can be fixed optionally by changing the text of the node with the
setText(String);
method.
Turn charliteral into a lexer rule (rename it to CHARLITERAL). Right now, the string 'a' is tokenized like this: SINGLEQUOTE IDENTIFIER SINGLEQUOTE, so you're getting an IDENTIFIER instead of a LETTER.
I wonder how this code can compile at all given that you're using a fragment (LETTER) from a parser rule.

The following sets of rules are mutually left-recursive TREE GRAMMAR

I have a complete parser grammer than generates an AST which i could say is correct using the rewrite rules and tree operators. At the moment i am stuck at the phase of creating a tree grammar.I have this error:
The following sets of rules are mutually left-recursive [direct_declarator, declarator] and [abstract_declarator, direct_abstract_declarator]
rewrite syntax or operator with no output option; setting output=AST
Here is my Tree Grammar.
tree grammar walker;
options {
language = Java;
tokenVocab = c2p;
ASTLabelType = CommonTree;
backtrack = true;
}
#header
{
package com.frankdaniel.compiler;
}
translation_unit
: ^(PROGRAM (^(FUNCTION external_declaration))+)
;
external_declaration
options {k=1;}
: (declaration_specifiers? declarator declaration*)=> function_definition
| declaration
;
function_definition
: declaration_specifiers? declarator (declaration+ compound_statement|compound_statement)
;
declaration
: 'typedef' declaration_specifiers? init_declarator_list
| declaration_specifiers init_declarator_list?
;
declaration_specifiers
: ( type_specifier|type_qualifier)+
;
init_declarator_list
: ^(INIT_DECLARATOR_LIST init_declarator+)
;
init_declarator
: declarator (ASSIGN^ initializer)?
;
type_specifier : (CONST)? (VOID | CHAR | INT | FLOAT );
type_id
: IDENTIFIER
//{System.out.println($IDENTIFIER.text+" is a type");}
;
type_qualifier
: CONST
;
declarator
: pointer? direct_declarator
| pointer
;
direct_declarator
: (IDENTIFIER|declarator) declarator_suffix*
;
declarator_suffix
: constant_expression
| parameter_type_list
| identifier_list
;
pointer
: TIMES type_qualifier+ pointer?
| TIMES pointer
| TIMES
;
parameter_type_list
: parameter_list
;
parameter_list
: ^(PARAMETER_LIST parameter_declaration)
;
parameter_declaration
: declaration_specifiers (declarator|abstract_declarator)*
;
identifier_list
: ^(IDENTIFIER_LIST IDENTIFIER+)
;
type_name
: specifier_qualifier_list abstract_declarator?
;
specifier_qualifier_list
: ( type_qualifier | type_specifier )+
;
abstract_declarator
: pointer direct_abstract_declarator?
| direct_abstract_declarator
;
direct_abstract_declarator
: (abstract_declarator | abstract_declarator_suffix ) abstract_declarator_suffix*
;
abstract_declarator_suffix
: constant_expression
| parameter_type_list
;
initializer
: assignment_expression
| initializer_list?
;
initializer_list
: ^(INITIALIZER_LIST initializer+)
;
// EXPRESSIONS
argument_expression_list
: ^(EXPRESSION_LIST assignment_expression+)
;
multiplicative_expression
: ^((TIMES|DIV|MOD) cast_expression cast_expression);
additive_expression
: ^((PLUS|MINUS) multiplicative_expression multiplicative_expression);
cast_expression
: ^(CAST_EXPRESSION type_name cast_expression)
| unary_expression
;
unary_expression
: postfix_expression
| PPLUS unary_expression
| MMINUS unary_expression
| unary_operator cast_expression
;
postfix_expression
: primary_expression
( expression
| argument_expression_list
| IDENTIFIER
| IDENTIFIER
| PPLUS
| MMINUS
)*
;
unary_operator
: TIMES
| PLUS
| MINUS
| NOT
;
primary_expression
: IDENTIFIER
| constant
| expression
;
constant
: HEX_LITERAL
| OCTAL_LITERAL
| DECIMAL_LITERAL
| CHARACTER_LITERAL
| STRING_LITERAL
| FLOATING_POINT_LITERAL
;
////////////////////////////////////////////////////////
expression
: ^(EXPRESSION assignment_expression+)
;
constant_expression
: conditional_expression
;
assignment_expression
: ^(assignment_operator lvalue assignment_expression)
| conditional_expression
;
lvalue
: unary_expression
;
assignment_operator
: ASSIGN
;
conditional_expression : (logical_or_expression) (QUESTIONMARK expression COLON conditional_expression)?;
logical_or_expression : ^(OR logical_and_expression logical_and_expression);
logical_and_expression : ^(AND equality_expression equality_expression);
//equality_expression : (a=relational_expression) ((e=EQUAL|e=NONEQUAL)^ b=relational_expression)?;
equality_expression : ^((EQUAL|NONEQUAL) relational_expression relational_expression);
//relational_expression : additive_expression ((ST|GT|STEQ|GTEQ)^ additive_expression)* ;
relational_expression : ^((ST|GT|STEQ|GTEQ) additive_expression additive_expression);
// STATEMENTS
statement
: compound_statement
| expression_statement
| selection_statement
| iteration_statement
| jump_statement
;
compound_statement
: ^(STATEMENT declaration* statement_list? )
;
statement_list
: statement+
;
expression_statement
:expression
;
selection_statement
:^(IF expression statement (^(ELSE statement))? )
|^(SWITCH expression statement)
;
iteration_statement
: ^(WHILE expression statement)
| ^(DO statement ^(WHILE expression))
| ^(FOR expression_statement expression_statement expression? statement)
;
jump_statement
: CONTINUE
| BREAK
| RETURN
| ^(RETURN expression)
;
It seems obvious that the following two rules are left recursive:
{code}
declarator
: pointer? direct_declarator
| pointer
;
direct_declarator
: (IDENTIFIER|declarator) declarator_suffix*
;
{code}
Rule "declarator" has reference to "direct_declarator", and "direct_declarator" has reference to "declarator", and there's no other predicates to pilot the rule evaluation.

Changing associativity schema in a grammar

I'm trying to use SableCC to generate a Parser for models, which I call LAM. LAM in itself are simple, and a simple grammar (where I omit a lot of things) for these is:
L := 0 | (x,y) | F(x1,...,xn) | L || L | L ; L
I wrote this grammar:
Helpers
number = ['0' .. '9'] ;
letter = ['a' .. 'z'] ;
uletter = ['A' .. 'Z'] ;
Tokens
zero = '0' ;
comma = ',' ;
parallel = '||' ;
point = ';' ;
lpar = '(' ;
rpar = ')' ;
identifier = letter+ number* ;
uidentifier = uletter+ number* ;
Productions
expr = {term} term |
{parallel} expr parallel term |
{point} expr point term;
term = {parenthesis} lpar expr rpar |
{zero} zero |
{invk} uidentifier lpar paramlist rpar |
{pair} lpar [left]:identifier comma [right]:identifier rpar ;
paramlist = {list} list |
{empty} ;
list = {var} identifier |
{com} identifier comma list ;
This basically works, but there is a side effect: it is left associative. For example, if I have
L = L1 || L2 ; L3 || L4
Then it is parsed like:
L = ((L1 || L2) ; L3) || L4
I want to give all precedence to the ";" operator, and so have L parsed like
L = (L1 || L2) ; (L3 || L4)
(other things, like "||", could remains left-associative)
My questions are:
There are tips to do such conversions in a "automated" way?
How could be a grammar with all the precedence on the ";" ?
It is accepted also "RTFM link" :-D
Thank you all
You need to create a hierarchy of rules that matches the desired operator precedence.
expr = {subexp} subexp |
{parallel} subexp parallel expr ;
subexp = {term} term |
{point} term point subexp;
Note that I also changed the associativity.

Error generating files in ANTLR

So I'm trying to write a parser in ANTLR, this is my first time using it and I'm running into a problem that I can't find a solution for, apologies if this is a very simple problem. Anyway, the error I'm getting is:
"(100): Expr.g:1:13:syntax error: antlr: MismatchedTokenException(74!=52)"
The code I'm currently using is:
grammar Expr.g;
options{
output=AST;
}
tokens{
MAIN = 'main';
OPENBRACKET = '(';
CLOSEBRACKET = ')';
OPENCURLYBRACKET = '{';
CLOSECURLYBRACKET = '}';
COMMA = ',';
SEMICOLON = ';';
GREATERTHAN = '>';
LESSTHAN = '<';
GREATEROREQUALTHAN = '>=';
LESSTHANOREQUALTHAN = '<=';
NOTEQUAL = '!=';
ISEQUALTO = '==';
WHILE = 'while';
IF = 'if';
ELSE = 'else';
READ = 'read';
OUTPUT = 'output';
PRINT = 'print';
RETURN = 'return';
READC = 'readc';
OUTPUTC = 'outputc';
PLUS = '+';
MINUS = '-';
DIVIDE = '/';
MULTIPLY = '*';
PERCENTAGE = '%';
}
#header {
//package test;
import java.util.HashMap;
}
#lexer::header {
//package test;
}
#members {
/** Map variable name to Integer object holding value */
HashMap memory = new HashMap();
}
prog: stat+ ;
stat: expr NEWLINE {System.out.println($expr.value);}
| ID '=' expr NEWLINE
{memory.put($ID.text, new Integer($expr.value));}
| NEWLINE
;
expr returns [int value]
: e=multExpr {$value = $e.value;}
( '+' e=multExpr {$value += $e.value;}
| '-' e=multExpr {$value -= $e.value;}
)*
;
multExpr returns [int value]
: e=atom {$value = $e.value;} ('*' e=atom {$value *= $e.value;})*
;
atom returns [int value]
: INT {$value = Integer.parseInt($INT.text);}
| ID
{
Integer v = (Integer)memory.get($ID.text);
if ( v!=null ) $value = v.intValue();
else System.err.println("undefined variable "+$ID.text);
}
| '(' e=expr ')' {$value = $e.value;}
;
IDENT : ('a'..'z'^|'A'..'Z'^)+ ; : .;
INT : '0'..'9'+ ;
NEWLINE:'\r'? '\n' ;
WS : (' '|'\t')+ {skip();} ;
Thanks for any help.
EDIT: Well, I'm an idiot, it's just a formatting error. Thanks for the responses from those who helped out.
You have some illegal characters after your IDENT token:
IDENT : ('a'..'z'^|'A'..'Z'^)+ ; : .;
The : .; are invalid there. And you're also trying to mix the tree-rewrite operator ^ inside a lexer rule, which is illegal: remove them. Lastly, you've named it IDENT while in your parser rules, you're using ID.
It should be:
ID : ('a'..'z' | 'A'..'Z')+ ;

Resources