warning: rule useless in parser due to conflicts: $#1: /* empty */ - parsing

I have this warning when I put code {printf("something");} in the middle of the rule, if I put at the end of the rule, I don´t have the error and everything works fine.
This throw the warning in the tittle and throw 1 shift/reduce conflict
sent_asig: ID {printf("something");} ASIG exp {printf("sent_asig: ID ASIG exp \n");}
| ID ASIG CTE_STRING {printf("sent_asig: ID ASIG CTE_STRING \n");}
| ID ASIG CTE_STRING CONCAT ID {printf("sent_asig: ID ASIG CTE_STRING CONCAT ID \n");}
| ID ASIG ID CONCAT CTE_STRING {printf("sent_asig: ID ASIG ID CONCAT CTE_STRING \n");};
And this don't throw the warnings and 0 conflicts, works fine
sent_asig: ID ASIG exp {printf("sent_asig: ID ASIG exp \n");}
| ID ASIG CTE_STRING {printf("sent_asig: ID ASIG CTE_STRING \n");}
| ID ASIG CTE_STRING CONCAT ID {printf("sent_asig: ID ASIG CTE_STRING CONCAT ID \n");}
| ID ASIG ID CONCAT CTE_STRING {printf("sent_asig: ID ASIG ID CONCAT CTE_STRING \n");};
if somebody wants to see the full rules because is probably in other part the origin of this error, here is
%token ID
%token CTE
%token ABREPAR
%token FINPAREN
%token AND
%token OR
%token COMA
%token ASIG
%token COMP
%token RESTASIG
%token CONCAT
%token SUMASIG
%token MULTASIG
%token DIVASIG
%token MENOR_IGU
%token MENOR
%token MAYOR_IGU
%token MAYOR
%token NOT
%token DIST
%token CTE_REAL
%token CTE_STRING
%token DO
%token IF
%token ENDIF
%token ELSE
%token PUT
%token GET
%token DECLARE
%token ENDDECLARE
%token BEGIN
%token ENDPROGRAM
%token INT
%token REAL
%token STRING
%token REPEAT
%token CONST
%left AND OR
%left OP_SUM OP_RESTA
%left OP_MULT
%left OP_DIV
%right ASIG
%right SUMASIG
%right RESTASIG
%right MULTASIG
%right DIVASIG
%%
programa: BEGIN declar sentencias ENDPROGRAM {printf("programa: BEGIN declar sentencias ENDPROGRAM \n");}
| BEGIN sentencias ENDPROGRAM {printf("programa: BEGIN sentencias ENDPROGRAM \n");};
sentencias: sentencia {printf("sentencia: sentencia \n");}
| sentencias sentencia {printf("sentencias: sentencia \n");};
sentencia: sent_asig {printf("sentencia: sent_asig\n");}
| sent_mult_asig {printf("sentencia: sent_mult_asig\n");}
| sent_sum_asig {printf("sentencia: sent_sum_asig");}
| sent_rest_asig {printf("sentencia: sent_rest_asig \n");}
| sent_div_asig {printf("sentencia: sent_div_asig \n");}
| asig_const {printf("sentencia: asig_const \n");}
| entrada {printf("sentencia: entrada \n");}
| salida {printf("sentencia: salida \n");}
| sent_if {printf("sentencia: sent_if \n");}
| sent_repeat {printf("sentencia: sent_repeat \n");};
sent_asig: ID {printf("something");} ASIG exp {printf("sent_asig: ID ASIG exp \n");}
| ID ASIG CTE_STRING {printf("sent_asig: ID ASIG CTE_STRING \n");}
| ID ASIG CTE_STRING CONCAT ID {printf("sent_asig: ID ASIG CTE_STRING CONCAT ID \n");}
| ID ASIG ID CONCAT CTE_STRING {printf("sent_asig: ID ASIG ID CONCAT CTE_STRING \n");};
exp: exp OP_SUM ter {printf("exp: exp OP_SUM ter\n");escribirPolaca("+");}
| exp OP_RESTA ter {printf("exp: exp OP_RESTA ter\n");escribirPolaca("-");}
| ter {printf("exp: ter\n");};
ter: ter OP_MULT factor {printf("ter: ter OP_MULT factor\n");escribirPolaca("*");}
| ter OP_DIV factor {printf("ter: ter OP_DIV factor\n");escribirPolaca("/");}
| factor {printf("ter: factor\n");};
factor: ID {printf("factor: ID\n"); escribirPolaca(Simbolos[nosalemal][0]);}
| CTE {printf("factor: CTE\n");escribirPolaca(Simbolos[nosalemal][1]);}
| CTE_REAL {printf("factor: CTE_REAL \n");escribirPolaca("CTE_REAL");};
| ABREPAR exp FINPAREN {printf("factor: ABREPAR exp FINPAREN\n");}
sent_sum_asig : ID SUMASIG ID {printf("factor: sent_sum_asig \n");}
| ID SUMASIG CTE {printf("factor: ID SUMASIG CTE \n");}
| ID SUMASIG CTE_REAL {printf("factor: ID SUMASIG CTE_REAL \n");} ;
sent_rest_asig : ID RESTASIG ID {printf("sent_rest_asig: ID RESTASIG ID \n");}
| ID RESTASIG CTE {printf("sent_rest_asig: ID RESTASIG CTE \n");}
| ID RESTASIG CTE_REAL {printf("sent_rest_asig: ID RESTASIG CTE_REAL \n");};
sent_mult_asig : ID MULTASIG ID {printf("sent_mult_asig: ID MULTASIG ID \n");}
| ID MULTASIG CTE {printf("sent_mult_asig: ID MULTASIG CTE \n");}
| ID MULTASIG CTE_REAL {printf("sent_mult_asig: ID MULTASIG CTE_REAL \n");};
sent_div_asig : ID DIVASIG ID {printf("sent_div_asig: ID DIVASIG ID \n");}
| ID DIVASIG CTE {printf("sent_div_asig : ID DIVASIG ID \n");}
| ID DIVASIG CTE_REAL {printf("sent_div_asig: ID DIVASIG ID \n");};
declar: DECLARE declaraciones ENDDECLARE {printf("declar: DECLARE declaraciones ENDDECLARE \n");};
declaraciones: dec {printf("declaraciones: dec \n");}
| dec declaraciones {printf("declaraciones: dec declaraciones \n");};
dec: REAL var {printf("dec: REAL var \n");}
| INT var {printf("dec: INT var \n");}
| STRING var {printf("dec: STRING var \n");} ;
var: ID {printf("var: ID \n");}
| ID COMA var {printf("var: ID COMA var \n");};
asig_const: CONST ID ASIG CTE {printf("asig_const: CONST ID ASIG CTE \n");}
| CONST ID ASIG CTE_REAL {printf("asig_const: CONST ID ASIG CTE_REAL \n");}
| CONST ID ASIG CTE_STRING {printf("asig_const: CONST ID ASIG CTE_STRING \n");};
entrada: PUT CTE_STRING {printf("entrada: PUT CTE_STRING \n");}
| PUT ID {printf("entrada: PUT ID \n");};
salida: GET ID {printf("salida: GET ID \n");};
sent_if: IF ABREPAR condicion FINPAREN sentencias ENDIF {printf("sent_if: IF ABREPAR condicion FINPAREN sentencias ENDIF \n");}
| IF ABREPAR condicion FINPAREN sentencias ELSE sentencias ENDIF {printf("sent_if: IF ABREPAR condicion FINPAREN sentencias ELSE sentencias ENDIF \n");}
condicion: cond {printf("condicion: cond \n");}
| cond AND cond {printf("condicion: cond AND cond\n");}
| cond OR cond {printf("condicion: cond OR cond \n");}
| NOT cond {printf("condicion: NOT cond \n");};
cond: exp MENOR exp {printf("cond: exp MENOR exp \n");apilarPilaIteracion(posicionVectorPolaca);escribirPolaca("CMP");posicionVectorPolaca++;}
| exp MAYOR exp {printf("cond: exp MENOR exp \n");}
| exp MENOR_IGU exp {printf("cond: exp MENOR exp \n");}
| exp MAYOR_IGU exp {printf("cond: exp MENOR exp \n");}
| exp COMP exp {printf("cond: exp MENOR exp \n");escribirPolaca("CMP");}
| exp DIST exp {printf("cond: exp MENOR exp \n");}
sent_repeat: DO sentencias REPEAT ABREPAR condicion FINPAREN {printf("sent_repeat: DO sentencias REPEAT ABREPAR condicion FINPAREN \n");};
%%
Sorry my bad english (if you can answer in spanish, better)

This situation is explained in the bison manual. Basically, using Mid-Rule Actions (MRAs) -- that is, an action in the middle of a rule -- reduces the ability of the parser to defer reduction decisions to the end of the rule. In effect, at that point in the parse, the grammar must be predictable as if it were an LL(1) grammar. For this reason, MRAs should only be used if strictly necessary.
Concretely, consider just the following two alternatives (truncated):
sent_asig: ID {printf("something");} ASIG exp …
| ID ASIG CTE_STRING …
Now, suppose the parser just recognized an ID and the next token in the input stream is ASIG. At this point, the parser needs to decide whether to perform the MRA { printf("something"); }. Behind the scenes, performing a MRA is the same as reducing a marker non-terminal (a terminal whose right-hand side is empty), so the parser has to decide whether or not to perform a reduction. But it doesn't yet have enough information; the decision cannot be made until it sees whether or not the token following ASIG is CTE_STRING or not. So resolution requires two lookahead tokens.
That's a shift/reduce conflict: the parser cannot decide whether to shift ASIG or reduce the marker (to execute the MRA). Since bison/yacc parsers always resolve shift/reduce conflicts by shifting, the reduction will never happen; the MRA cannot be executed and the right-hand side containing it is effectively blocked. Hence the warning.
Note that you cannot fix the problem by inserting the same MRA at both points:
sent_asig: ID {printf("something");} ASIG exp …
| ID {printf("something");} ASIG CTE_STRING …
Even though the MRAs are identical, bison/yacc inserts a different marker for the two rules, which would create a reduce/reduce conflict. Reduce/reduce conflicts are resolved by choosing the reduction which appears earlier in the input file, so that resolution would be different from the default resolution in your file, but it would still block one (or more) rules so you would still see the same warning.
If you are only trying to see what the parser is doing, I strongly suggest that you remove all of those printfs, and rely instead on bison's built-in trace facility, which is extremely easy to enable (and disable), and which gives a much more complete view of how the parse is progressing.

An elegant way to resolve that kind of conflict, especially when both actions are the same, is to use an empty production with a single action called a subroutine.
subroutine:
%empty { prepare_for_local_variables (); }
;
compound:
subroutine '{' declarations statements '}'
| subroutine '{' statements '}'
;
Here is more info:
https://www.gnu.org/software/bison/manual/html_node/Mid_002dRule-Conflicts.html#Mid_002dRule-Conflicts

Related

Why tiger(Modern Compiler Implementation) use `fundecs` in chapter 4 instead of `fundec`?

I'm following the tiger book to write a compiler.
In chapter 3, based on the github's code and my understanding, I filled in the following rules for the dec:
decs:
%empty
| decs dec
;
dec:
tydec
| vardec
| fundec
;
tydec:
TYPE ID '=' ty
;
vardec:
VAR ID ASSIGN exp
| VAR ID ':' ID ASSIGN exp
;
fundec:
FUNCTION ID '(' tyfields ')' '=' exp
| FUNCTION ID '(' tyfields ')' ':' ID '=' exp
However, in chap 4, the book provided the following functions for ast:
A_fundecList A_FundecList(A_fundec head, A_fundecList tail);
A_nametyList A_NametyList(A_namety head, A_nametyList tail);
Which made the most of code I found adjust the decs token as follow
decs:
%empty
| decs dec
;
dec:
tydecs
| vardec
| fundecs
;
tydecs:
tydec
| tydec tydecs
tydec:
TYPE ID '=' ty
;
vardec:
VAR ID ASSIGN exp
| VAR ID ':' ID ASSIGN exp
;
fundecs:
fundec
| fundec fundecs {$$ = A_FundecList($1, $2);}
;
fundec:
FUNCTION ID '(' tyfields ')' '=' exp
| FUNCTION ID '(' tyfields ')' ':' ID '=' exp
The list token fundecs and tydecs were added into the production rule.
I do not understand why doing that, since this will obviously create conflict. Because decs is a list can contain fundecs and tydecs. So a list of fundecs, for example, can be reduced to either a list of decs or a list of fundecs.
Thus I would like to ask why doing this, what is the reason of adding conflict grammar for the parser??
Thanks a lot!!!

Tiger language parser for compiler

I am now trying to build a compiler using the book Modern Compiler Implementation in C (the Tiger book). I am now stuck in the parser phase. I have built my grammar and tried to run it but still get some syntax error.
I tried my lexer and it appears to work.
This is my code for yacc/bison:
%{
#include <stdio.h>
#include "util.h"
#include "errormsg.h"
#include <stdio.h>
int yylex(void); /* function prototype */
void yyerror(char *s)
{
EM_error(EM_tokPos, "%s", s);
}
%}
%union {
int pos;
int ival;
string sval;
}
%token <sval> ID STRING
%token <ival> INT
%token
COMMA COLON SEMICOLON LPAREN RPAREN LBRACK RBRACK
LBRACE RBRACE DOT
PLUS MINUS TIMES DIVIDE EQ NEQ LT LE GT GE
AND OR ASSIGN
ARRAY IF THEN ELSE WHILE FOR TO DO LET IN END OF
BREAK NIL
FUNCTION VAR TYPE
%start program
%nonassoc DO OF
%nonassoc THEN /* ELSE must come after THEN! */
%nonassoc ELSE
%left SEMICOLON
%left ASSIGN
%left OR
%left AND
%nonassoc EQ NEQ GT LT GE LE
%left PLUS MINUS
%left TIMES DIVIDE
%left UMINUS
%%
program: exp;
dec: tyDec;
dec: varDec;
dec: funDec;
tyDec: TYPE ID EQ ty;
ty: ID;
ty: arrTy;
ty: recTy;
arrTy: ARRAY OF ID;
recTy: LBRACE fieldDecs RBRACE;
fieldDecs: /* empty */ ;
fieldDecs: fieldDec;
fieldDecs: fieldDec COMMA fieldDecs;
fieldDec: ID COLON ID;
funDec: FUNCTION ID LPAREN fieldDecs RPAREN EQ exp;
funDec: FUNCTION ID LPAREN fieldDecs RPAREN COLON ID EQ exp;
varDec: VAR ID ASSIGN exp;
varDec: VAR ID COLON ID ASSIGN exp;
lValue: ID;
lValue: subscript;
lValue: fieldExp;
subscript: lValue LBRACK exp RBRACK;
fieldExp: lValue DOT ID;
exp: lValue;
exp: NIL;
exp: INT;
exp: STRING;
exp: seqExp;
exp: negation;
exp: callExp;
exp: infixExp;
exp: arrCreate;
exp: recCreate;
exp: assignment;
exp: ifThenElse;
exp: ifThen;
exp: whileExp;
exp: forExp;
exp: BREAK;
exp: letExp;
seqExp: LPAREN exps RPAREN;
exps: /* empty */ ;
exps: exp;
exps: exp SEMICOLON exps;
negation: MINUS exp %prec UMINUS ;
callExp: ID LPAREN expsc RPAREN ;
expsc: /* empty */ ;
expsc: exp;
expsc: exp COMMA expsc;
infixExp: arithmExp;
infixExp: boolExp;
infixExp: compExp;
arithmExp: exp PLUS exp;
arithmExp: exp MINUS exp;
arithmExp: exp TIMES exp;
arithmExp: exp DIVIDE exp;
boolExp: exp AND exp;
boolExp: exp OR exp;
compExp: exp EQ exp;
compExp: exp NEQ exp;
compExp: exp GT exp;
compExp: exp LT exp;
compExp: exp GE exp;
compExp: exp LE exp;
arrCreate: ID LBRACK exp RBRACK OF exp;
recCreate: ID LBRACE fieldCreates RBRACE;
fieldCreates: /* empty */ ;
fieldCreates: fieldCreate;
fieldCreates: fieldCreate COMMA fieldCreates;
fieldCreate: ID EQ exp;
assignment: lValue ASSIGN exp;
ifThenElse: IF exp THEN exp ELSE exp;
ifThen: IF exp THEN exp;
whileExp: WHILE exp DO exp;
forExp: FOR ID ASSIGN exp TO exp DO exp;
letExp: LET decs IN exps END;
decs: dec;
decs: dec decs;
/* ERROR RECOVERY */
exp: LPAREN error RPAREN;
seqExp: error SEMICOLON exp;
When I run my parser on a sample tiger code, I get some syntax error, including at the beginning.
Could you please help me to sort it out? Thanks!

ANTLR4 mismatched input '' expecting

Currently, I've just defined simple rules in ANTLR4:
// Recognizer Rules
program : (class_dcl)+ EOF;
class_dcl: 'class' ID ('extends' ID)? '{' class_body '}';
class_body: (const_dcl|var_dcl|method_dcl)*;
const_dcl: ('static')? 'final' PRIMITIVE_TYPE ID '=' expr ';';
var_dcl: ('static')? id_list ':' type ';';
method_dcl: PRIMITIVE_TYPE ('static')? ID '(' para_list ')' block_stm;
para_list: (para_dcl (';' para_dcl)*)?;
para_dcl: id_list ':' PRIMITIVE_TYPE;
block_stm: '{' '}';
expr: <assoc=right> expr '=' expr | expr1;
expr1: term ('<' | '>' | '<=' | '>=' | '==' | '!=') term | term;
term: ('+'|'-') term | term ('*'|'/') term | term ('+'|'-') term | fact;
fact: INTLIT | FLOATLIT | BOOLLIT | ID | '(' expr ')';
type: PRIMITIVE_TYPE ('[' INTLIT ']')?;
id_list: ID (',' ID)*;
// Lexer Rules
KEYWORD: PRIMITIVE_TYPE | BOOLLIT | 'class' | 'extends' | 'if' | 'then' | 'else'
| 'null' | 'break' | 'continue' | 'while' | 'return' | 'self' | 'final'
| 'static' | 'new' | 'do';
SEPARATOR: '[' | ']' | '{' | '}' | '(' | ')' | ';' | ':' | '.' | ',';
OPERATOR: '^' | 'new' | '=' | UNA_OPERATOR | BIN_OPERATOR;
UNA_OPERATOR: '!';
BIN_OPERATOR: '+' | '-' | '*' | '\\' | '/' | '%' | '>' | '>=' | '<' | '<='
| '==' | '<>' | '&&' | '||' | ':=';
PRIMITIVE_TYPE: 'integer' | 'float' | 'bool' | 'string' | 'void';
BOOLLIT: 'true' | 'false';
FLOATLIT: [0-9]+ ((('.'[0-9]* (('E'|'e')('+'|'-')?[0-9]+)? ))|(('E'|'e')('+'|'-')? [0-9]+));
INTLIT: [0-9]+;
STRINGLIT: '"' ('\\'[bfrnt\\"]|~[\r\t\n\\"])* '"';
ILLEGAL_ESC: '"' (('\\'[bfrnt\\"]|~[\n\\"]))* ('\\'(~[bfrnt\\"]))
{if (true) throw new bkool.parser.IllegalEscape(getText());};
UNCLOSED_STRING: '"'('\\'[bfrnt\\"]|~[\r\t\n\\"])*
{if (true) throw new bkool.parser.UncloseString(getText());};
COMMENT: (BLOCK_COMMENT|LINE_COMMENT) -> skip;
BLOCK_COMMENT: '(''*'(('*')?(~')'))*'*'')';
LINE_COMMENT: '#' (~[\n])* ('\n'|EOF);
ID: [a-zA-z_]+ [a-zA-z_0-9]* ;
WS: [ \t\r\n]+ -> skip ;
ERROR_TOKEN: . {if (true) throw new bkool.parser.ErrorToken(getText());};
I opened the parse tree, and tried to test:
class abc
{
final integer x=1;
}
It returned errors:
BKOOL::program:3:8: mismatched input 'integer' expecting PRIMITIVE_TYPE
BKOOL::program:3:17: mismatched input '=' expecting {':', ','}
I still haven't got why. Could you please help me why it didn't recognize rules and tokens as I expected?
Lexer rules are exclusive. The longest wins, and the tiebreaker is the grammar order.
In your case; integer is a KEYWORD instead of PRIMITIVE_TYPE.
What you should do here:
Make one distinct token per keyword instead of an all-catching KEYWORD rule.
Turn PRIMITIVE_TYPE into a parser rule
Same for operators
Right now, your example:
class abc
{
final integer x=1;
}
Gets converted to lexemes such as:
class ID { final KEYWORD ID = INTLIT ; }
This is thanks to the implicit token typing, as you've used definitions such as 'class' in your parser rules. These get converted to anonymous tokens such as T_001 : 'class'; which get the highest priority.
If this weren't the case, you'd end up with:
KEYWORD ID SEPARATOR KEYWORD KEYWORD ID OPERATOR INTLIT ; SEPARATOR
And that's... not quite easy to parse ;-)
That's why I'm telling you to breakdown your tokens properly.

Unusual ANTLR error when attempting to reorganize grammar into two files

I am reorganizing my grammar into two files in order to accomodate a tree grammar; Lua.g and LuaGrammar.g. Lua.g will have all of my lexer rules, and LuaGrammar.g will have all of my tree grammar and parser rules. However, when i try and compile LuaGrammar.g i get the following error:
[00:28:37] error(10): internal error: C:\Users\RCIX\Desktop\AguaLua\Project\trunk\AguaLua\AguaLua\ANTLR Data\LuaGrammar.g : java.lang.IllegalArgumentException: Can't find template ruleRefBang.st; group hierarchy is [CSharp2]
org.antlr.stringtemplate.StringTemplateGroup.lookupTemplate(StringTemplateGroup.java:507)
org.antlr.stringtemplate.StringTemplateGroup.getInstanceOf(StringTemplateGroup.java:392)
org.antlr.stringtemplate.StringTemplateGroup.getInstanceOf(StringTemplateGroup.java:404)
org.antlr.stringtemplate.StringTemplateGroup.lookupTemplate(StringTemplateGroup.java:484)
org.antlr.stringtemplate.StringTemplateGroup.getInstanceOf(StringTemplateGroup.java:392)
org.antlr.stringtemplate.StringTemplateGroup.getInstanceOf(StringTemplateGroup.java:404)
org.antlr.stringtemplate.StringTemplateGroup.lookupTemplate(StringTemplateGroup.java:484)
org.antlr.stringtemplate.StringTemplateGroup.getInstanceOf(StringTemplateGroup.java:392)
org.antlr.stringtemplate.StringTemplateGroup.getInstanceOf(StringTemplateGroup.java:404)
org.antlr.grammar.v2.CodeGenTreeWalker.getRuleElementST(CodeGenTreeWalker.java:152)
org.antlr.grammar.v2.CodeGenTreeWalker.atom(CodeGenTreeWalker.java:1986)
org.antlr.grammar.v2.CodeGenTreeWalker.element(CodeGenTreeWalker.java:1708)
org.antlr.grammar.v2.CodeGenTreeWalker.element(CodeGenTreeWalker.java:1556)
org.antlr.grammar.v2.CodeGenTreeWalker.alternative(CodeGenTreeWalker.java:1306)
org.antlr.grammar.v2.CodeGenTreeWalker.block(CodeGenTreeWalker.java:1081)
org.antlr.grammar.v2.CodeGenTreeWalker.ebnf(CodeGenTreeWalker.java:1871)
org.antlr.grammar.v2.CodeGenTreeWalker.element(CodeGenTreeWalker.java:1704)
org.antlr.grammar.v2.CodeGenTreeWalker.alternative(CodeGenTreeWalker.java:1306)
org.antlr.grammar.v2.CodeGenTreeWalker.block(CodeGenTreeWalker.java:1081)
org.antlr.grammar.v2.CodeGenTreeWalker.rule(CodeGenTreeWalker.java:797)
org.antlr.grammar.v2.CodeGenTreeWalker.rules(CodeGenTreeWalker.java:588)
org.antlr.grammar.v2.CodeGenTreeWalker.grammarSpec(CodeGenTreeWalker.java:530)
org.antlr.grammar.v2.CodeGenTreeWalker.grammar(CodeGenTreeWalker.java:336)
org.antlr.codegen.CodeGenerator.genRecognizer(CodeGenerator.java:432)
org.antlr.Tool.generateRecognizer(Tool.java:641)
org.antlr.Tool.process(Tool.java:454)
org.antlr.works.generate.CodeGenerate.generate(CodeGenerate.java:104)
org.antlr.works.generate.CodeGenerate.run(CodeGenerate.java:185)
java.lang.Thread.run(Unknown Source)
And, i'm getting the following error:
[00:34:58] error(100): C:\Users\RCIX\Desktop\AguaLua\Project\trunk\AguaLua\AguaLua\ANTLR Data\Lua.g:0:0: syntax error: codegen: <AST>:0:0: unexpected end of subtree
when attempting to generate Lua.g. Why am i getting these errors, and how can i fix them? (Using ANTLR V3, am able to provide grammar files)
Update: here is the grammar file i am trying to compile.
tree grammar LuaGrammar;
options {
backtrack=true;
language=CSharp2;
output=AST;
tokenVocab=Lua;
filter=true;
ASTLabelType=CommonTree;
}
assignment
:
^('=' left=NAME right=NAME) {Ast. };
/*
chunk : (stat (';'!)?)* (laststat (';'!)?)?;
block : chunk;
stat : varlist1 '='^ explist1 |
functioncall |
doblock |
'while'^ exp doblock |
'repeat'^ block untilrule |
'if'^ exp thenchunk elseifchunk* elsechunk? 'end'! |
'for'^ forinitializer doblock |
'for'^ namelist inlist doblock |
'function'^ funcname funcbody |
'local' 'function' NAME funcbody |
'local'^ namelist localstat? ;
localstat
: '='^ explist1;
untilrule
: 'until'^ exp;
elseifchunk
: 'elseif'^ exp thenchunk;
thenchunk
: 'then'^ block;
elsechunk
: 'else'^ block;
forinitializer
: NAME '='^ exp ','! exp (','! exp)?;
doblock
: 'do'^ block 'end'!;
inlist
: 'in'^ explist1;
laststat : 'return'^ (explist1)? | 'break';
dotname : '.'! funcname;
colonname
: ':' NAME;
funcname : NAME^ (dotname | colonname)?;
varlist1 : var (','! var)*;
namelist : NAME (','! NAME)*;
explist1 : (exp ','!)* exp;
*/
/*
exp : expelement (binop^ exp)* ;
expelement
: ('nil' | 'false' | 'true' | number | stringrule | '...' | /*function |*\ prefixexp | tableconstructor | unop exp);
var: (namevar | dotvar | expvar | arrayvar)?;
namevar
: NAME^ var;
dotvar
: '.'! var;
expvar
: '('^ exp ')'! var;
arrayvar
: '['^ var ']'! var;
varSuffix: nameAndArgs* ('[' exp ']' | '.' NAME);
prefixexp: varOrExp nameAndArgs*;
functioncall: varOrExp nameAndArgs+;
varOrExp: var | '('! exp ')'!;
nameAndArgs: (':' NAME)? argsrule;
argsrule : '(' (explist1)? ')' | tableconstructor | stringrule ;
function : 'function' funcbody;
funcbody : funcparams funcblock;
funcblock
: ')'^ block 'end'!;
funcparams
: '('^ parlist1? ;
parlist1 : namelist (','! '...')? | '...';
tableconstructor : '{'^ (fieldlist)? '}'!;
fieldlist : field (fieldsep! field)* (fieldsep!)?;
field : '['! exp ']'! '='^ exp | NAME '='^ exp | exp;
*/
fieldsep : ',' | ';';
binop : '+' | '-' | '*' | '/' | '^' | '%' | '..' |
'<' | '<=' | '>' | '>=' | '==' | '~=' |
'and' | 'or';
unop : '-' | 'not' | '#';
number : INT | FLOAT | EXP | HEX;
stringrule : NORMALSTRING | CHARSTRING | LONGSTRING;
Lua.g:
/*
* Lua 5.1 grammar
*
* Nicolai Mainiero
* May 2007
*
* This is a Lua (http://www.lua.org) grammar for the version 5.1 for ANTLR 3.
* I tested it with basic and extended examples and it worked fine. It is also used
* for LunarEclipse (http://lunareclipse.sf.net) a Lua editor based on Eclipse.
*
* Thanks to Johannes Luber and Gavin Lambert who helped me with some mutually left recursion.
*
*/
grammar Lua;
options {
backtrack=true;
language=CSharp2;
//output=AST;
//ASTLabelType=CommonTree;
}
#lexer::namespace{AguaLua}
chunk : (stat (';'!)?)* (laststat (';'!)?)?;
block : chunk;
stat : varlist1 '='^ explist1 |
functioncall |
doblock |
'while'^ exp doblock |
'repeat'^ block untilrule |
'if'^ exp thenchunk elseifchunk* elsechunk? 'end'! |
'for'^ forinitializer doblock |
'for'^ namelist inlist doblock |
'function'^ funcname funcbody |
'local' 'function' NAME funcbody |
'local'^ namelist localstat? ;
localstat
: '='^ explist1;
untilrule
: 'until'^ exp;
elseifchunk
: 'elseif'^ exp thenchunk;
thenchunk
: 'then'^ block;
elsechunk
: 'else'^ block;
forinitializer
: NAME '='^ exp ','! exp (','! exp)?;
doblock
: 'do'^ block 'end'!;
inlist
: 'in'^ explist1;
laststat : 'return'^ (explist1)? | 'break';
dotname : '.'! funcname;
colonname
: ':' NAME;
funcname : NAME^ (dotname | colonname)?;
varlist1 : var (','! var)*;
namelist : NAME (','! NAME)*;
explist1 : (exp ','!)* exp;
exp : expelement (binop^ exp)* ;
expelement
: ('nil' | 'false' | 'true' | number | stringrule | '...' | function | prefixexp | tableconstructor | unop exp);
var: (namevar | dotvar | expvar | arrayvar)?;
namevar
: NAME^ var;
dotvar
: '.'! var;
expvar
: '('^ exp ')'! var;
arrayvar
: '['^ var ']'! var;
varSuffix: nameAndArgs* ('[' exp ']' | '.' NAME);
prefixexp: varOrExp nameAndArgs*;
functioncall: varOrExp nameAndArgs+;
varOrExp: var | '('! exp ')'!;
nameAndArgs: (':' NAME)? argsrule;
argsrule : '(' (explist1)? ')' | tableconstructor | stringrule ;
function : 'function' funcbody;
funcbody : funcparams funcblock;
funcblock
: ')'^ block 'end'!;
funcparams
: '('^ parlist1? ;
parlist1 : namelist (','! '...')? | '...';
tableconstructor : '{'^ (fieldlist)? '}'!;
fieldlist : field (fieldsep! field)* (fieldsep!)?;
field : '['! exp ']'! '='^ exp | NAME '='^ exp | exp;
fieldsep : ',' | ';';
binop : '+' | '-' | '*' | '/' | '^' | '%' | '..' |
'<' | '<=' | '>' | '>=' | '==' | '~=' |
'and' | 'or';
unop : '-' | 'not' | '#';
number : INT | FLOAT | EXP | HEX;
stringrule : NORMALSTRING | CHARSTRING | LONGSTRING;
// LEXER
NAME :('a'..'z'|'A'..'Z'|'_')(options{greedy=true;}: 'a'..'z'|'A'..'Z'|'_'|'0'..'9')*
;
INT : ('0'..'9')+;
FLOAT :INT '.' INT ;
EXP : (INT| FLOAT) ('E'|'e') ('-')? INT;
HEX :'0x' ('0'..'9'| 'a'..'f')+ ;
NORMALSTRING
: '"' ( EscapeSequence | ~('\\'|'"') )* '"'
;
CHARSTRING
: '\'' ( EscapeSequence | ~('\''|'\\') )* '\''
;
LONGSTRING
: '['('=')*'[' ( EscapeSequence | ~('\\'|']') )* ']'('=')*']'
;
fragment
EscapeSequence
: '\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\')
| UnicodeEscape
| OctalEscape
;
fragment
OctalEscape
: '\\' ('0'..'3') ('0'..'7') ('0'..'7')
| '\\' ('0'..'7') ('0'..'7')
| '\\' ('0'..'7')
;
fragment
UnicodeEscape
: '\\' 'u' HexDigit HexDigit HexDigit HexDigit
;
fragment
HexDigit : ('0'..'9'|'a'..'f'|'A'..'F') ;
COMMENT
: '--[[' ( options {greedy=false;} : . )* ']]' {Skip();}
;
LINE_COMMENT : '--' (~ NEWLINE)* {Skip();};
fragment NEWLINE : '\r'|'\n' | '\r\n' ;
WS : (' '|'\t'|'\u000C') {Skip();};
(both are based off of a grammar produced by Nicolai Mainero and available at ANTLR's site, Lua 5.1 grammar)
If i uncomment anymore than this, it comes up with the error above.
Okay, a 'Can't find template ruleRefBang.st' has something to do with the illegal use of a "tree exclude" operator: !. Usually, it is a contradicting rewrite rule: somewhere you have a ! and then rewrite it using -> but use that ignored token anyway. Since I cannot see a -> in your grammar, that can't be the case (unless you simplified the tree grammar to post here and removed some rewrite rules?).
Anyway, I'd start by removing all ! operators in your tree grammar and if your grammar then works put them, one by one, back in again. Then you should be able to pin point the place in your grammar that houses the illegal !.
Good luck!

How to turn this into a parser

If I just add on to the following yacc file, will it turn into a parser?
/* C-Minus BNF Grammar */
%token ELSE
%token IF
%token INT
%token RETURN
%token VOID
%token WHILE
%token ID
%token NUM
%token LTE
%token GTE
%token EQUAL
%token NOTEQUAL
%%
program : declaration_list ;
declaration_list : declaration_list declaration | declaration ;
declaration : var_declaration | fun_declaration ;
var_declaration : type_specifier ID ';'
| type_specifier ID '[' NUM ']' ';' ;
type_specifier : INT | VOID ;
fun_declaration : type_specifier ID '(' params ')' compound_stmt ;
params : param_list | VOID ;
param_list : param_list ',' param
| param ;
param : type_specifier ID | type_specifier ID '[' ']' ;
compound_stmt : '{' local_declarations statement_list '}' ;
local_declarations : local_declarations var_declaration
| /* empty */ ;
statement_list : statement_list statement
| /* empty */ ;
statement : expression_stmt
| compound_stmt
| selection_stmt
| iteration_stmt
| return_stmt ;
expression_stmt : expression ';'
| ';' ;
selection_stmt : IF '(' expression ')' statement
| IF '(' expression ')' statement ELSE statement ;
iteration_stmt : WHILE '(' expression ')' statement ;
return_stmt : RETURN ';' | RETURN expression ';' ;
expression : var '=' expression | simple_expression ;
var : ID | ID '[' expression ']' ;
simple_expression : additive_expression relop additive_expression
| additive_expression ;
relop : LTE | '<' | '>' | GTE | EQUAL | NOTEQUAL ;
additive_expression : additive_expression addop term | term ;
addop : '+' | '-' ;
term : term mulop factor | factor ;
mulop : '*' | '/' ;
factor : '(' expression ')' | var | call | NUM ;
call : ID '(' args ')' ;
args : arg_list | /* empty */ ;
arg_list : arg_list ',' expression | expression ;
Heh
Its only a grammer of PL
To make it a parser you need to add some code into this.
Like there http://dinosaur.compilertools.net/yacc/index.html
Look at chapter 2. Actions
Also you'd need lexical analyzer -- 3: Lexical Analysis

Resources