tatsu.exceptions.FailedParse while using a C BNF grammar adapted to Tatsu - parsing

tatsu.exceptions.FailedParse: (52:24) expecting one of: "'" '"' :
declarator = {pointer}? direct_declarator ;
^
I found a C BNF grammar here: https://cs.wmich.edu/~gupta/teaching/cs4850/sumII06/The%20syntax%20of%20C%20in%20Backus-Naur%20form.htm
I adapted so it can work with tatsu, but I'm not sure I did everything right.
The tatsu grammar:
translation_unit = {external_declaration}* ;
external_declaration = function_definition
| declaration ;
function_definition = {declaration_specifier}* declarator {declaration}* compound_statement ;
declaration_specifier = storage_class_specifier
| type_specifier
| type_qualifier ;
storage_class_specifier = "auto"
| "register"
| "static"
| "extern"
| "typedef" ;
type_specifier = "void"
| "char"
| "short"
| "int"
| "long"
| "float"
| "double"
| "signed"
| "unsigned"
| struct_or_union_specifier
| enum_specifier
| typedef_name ;
struct_or_union_specifier = struct_or_union identifier "{" {struct_declaration}+ "}"
| struct_or_union "{" {struct_declaration}+ "}"
| struct_or_union identifier ;
struct_or_union = "struct"
| "union" ;
struct_declaration = {specifier_qualifier}* struct_declarator_list ;
specifier_qualifier = type_specifier
| type_qualifier ;
struct_declarator_list = struct_declarator
| struct_declarator_list "," struct_declarator ;
struct_declarator = declarator
| declarator ":" constant_expression
| ":" constant_expression ;
declarator = {pointer}? direct_declarator ;
pointer = "*" {type_qualifier}* {pointer}? ;
type_qualifier = "const"
| "volatile" ;
direct_declarator = identifier
| "(" declarator ")"
| direct_declarator "[" {constant_expression}? "]"
| direct_declarator "(" parameter_type_list ")"
| direct_declarator "(" {identifier}* ")" ;
constant_expression = conditional_expression ;
conditional_expression = logical_or_expression
| logical_or_expression "?" expression ":" conditional_expression ;
logical_or_expression = logical_and_expression
| logical_or_expression "||" logical_and_expression ;
logical_and_expression = inclusive_or_expression
| logical_and_expression "&&" inclusive_or_expression ;
inclusive_or_expression = exclusive_or_expression
| inclusive_or_expression "|" exclusive_or_expression ;
exclusive_or_expression = and_expression
| exclusive_or_expression "^" and_expression ;
and_expression = equality_expression
| and_expression "&" equality_expression ;
equality_expression = relational_expression
| equality_expression "==" relational_expression
| equality_expression "!=" relational_expression ;
relational_expression = shift_expression
| relational_expression "<" shift_expression
| relational_expression ">" shift_expression
| relational_expression "<=" shift_expression
| relational_expression ">=" shift_expression ;
shift_expression = additive_expression
| shift_expression "<<" additive_expression
| shift_expression ">>" additive_expression ;
additive_expression = multiplicative_expression
| additive_expression "+" multiplicative_expression
| additive_expression "-" multiplicative_expression ;
multiplicative_expression = cast_expression
| multiplicative_expression "*" cast_expression
| multiplicative_expression "/" cast_expression
| multiplicative_expression "%" cast_expression ;
cast_expression = unary_expression
| "(" type_name ")" cast_expression ;
unary_expression = postfix_expression
| "++" unary_expression
| "--" unary_expression
| unary_operator cast_expression
| "sizeof" unary_expression
| "sizeof" type_name ;
postfix_expression = primary_expression
| postfix_expression "[" expression "]"
| postfix_expression "(" {assignment_expression}* ")"
| postfix_expression "." identifier
| postfix_expression "->" identifier
| postfix_expression "++"
| postfix_expression "--" ;
primary_expression = identifier
| constant
| string
| "(" expression ")" ;
constant = integer_constant
| character_constant
| floating_constant
| enumeration_constant ;
expression = assignment_expression
| expression "," assignment_expression ;
assignment_expression = conditional_expression
| unary_expression assignment_operator assignment_expression ;
assignment_operator = "="
| "/="
| "*="
| "%="
| "+="
| "-="
| "<<="
| ">>="
| "&="
| "^="
| "|=" ;
unary_operator = "&"
| "*"
| "+"
| "-"
| "~"
| "!" ;
type_name = {specifier_qualifier}+ {abstract_declarator}? ;
parameter_type_list = parameter_list
| parameter_list "," "..." ;
parameter_list = parameter_declaration
| parameter_list "," parameter_declaration ;
parameter_declaration = {declaration_specifier}+ declarator
| {declaration_specifier}+ abstract_declarator
| {declaration_specifier}+ ;
abstract_declarator = pointer
| pointer direct_abstract_declarator
| direct_abstract_declarator ;
direct_abstract_declarator = "(" abstract_declarator ")"
| {direct_abstract_declarator}? "[" {constant_expression}? "]"
| {direct_abstract_declarator}? "(" {parameter_type_list}? ")" ;
enum_specifier = "enum" identifier "{" enumerator_list "}"
| "enum" "{" enumerator_list "}"
| "enum" identifier ;
enumerator_list = enumerator
| enumerator_list "," enumerator ;
enumerator = identifier
| identifier "=" constant_expression ;
typedef_name = identifier ;
declaration = {declaration_specifier}+ {init_declarator}* ";" ;
init_declarator = declarator
| declarator "=" initializer ;
initializer = assignment_expression
| "{" initializer_list "}"
| "{" initializer_list "," "}" ;
initializer_list = initializer
| initializer_list "," initializer ;
compound_statement = "{" {declaration}* {statement}* "}" ;
statement = labeled_statement
| expression_statement
| compound_statement
| selection_statement
| iteration_statement
| jump_statement ;
labeled_statement = identifier ":" statement
| "case" constant_expression ":" statement
| "default" ":" statement ;
expression_statement = {expression}? ";" ;
selection_statement = "if" "(" expression ")" statement
| "if" "(" expression ")" statement "else" statement
| "switch" "(" expression ")" statement ;
iteration_statement = "while" "(" expression ")" statement
| "do" statement "while" "(" expression ")" ";"
| "for" "(" {expression}? ";" {expression}? ";" {expression}? ")" statement ;
jump_statement = "goto" identifier ";"
| "continue" ";"
| "break" ";"
| "return" {expression}? ";" ;
The C code:
int func(int i, char c) {
float f = 3;
}
The python code:
def main():
import pprint
import json
from tatsu import parse
from tatsu.util import asjson
csample = open('sample.c').read()
gram = open('cbnf_tatsu.txt').read()
ast = parse(gram, csample)
print('PPRINT')
pprint.pprint(ast, indent=2, width=20)
print()
print('JSON')
print(json.dumps(asjson(ast), indent=2))
print()
if __name__ == '__main__':
main()

Tatsu doesn't accept {rulename}?, it uses [rulename] instead.

Related

Antlr3 - Non Greedy Double Quoted String with Escaped Double Quote

The following Antlr3 Grammar file doesn't cater for escaped double quotes as part of the STRING lexer rule. Any ideas why?
Expressions working:
\"hello\"
ref(\"hello\",\"hello\")
Expressions NOT working:
\"h\"e\"l\"l\"o\"
ref(\"hello\", \"hel\"lo\")
Antlr3 grammar file runnable in AntlrWorks:
grammar Grammar;
options
{
output=AST;
ASTLabelType=CommonTree;
language=CSharp3;
}
public oaExpression
: exponentiationExpression EOF!
;
exponentiationExpression
: equalityExpression ( '^' equalityExpression )*
;
equalityExpression
: relationalExpression ( ( ('==' | '=' ) | ('!=' | '<>' ) ) relationalExpression )*
;
relationalExpression
: additiveExpression ( ( '>' | '>=' | '<' | '<=' ) additiveExpression )*
;
additiveExpression
: multiplicativeExpression ( ( '+' | '-' ) multiplicativeExpression )*
;
multiplicativeExpression
: primaryExpression ( ( '*' | '/' ) primaryExpression )*
;
primaryExpression
: '(' exponentiationExpression ')' | value | identifier (arguments )?
;
value
: STRING
;
identifier
: ID
;
expressionList
: exponentiationExpression ( ',' exponentiationExpression )*
;
arguments
: '(' ( expressionList )? ')'
;
/*
* Lexer rules
*/
ID
: LETTER (LETTER | DIGIT)*
;
STRING
: '"' ( options { greedy=false; } : ~'"' )* '"'
;
WS
: (' '|'\r'|'\t'|'\u000C'|'\n') {$channel=Hidden;}
;
/*
* Fragment Lexer rules
*/
fragment
LETTER
: 'a'..'z'
| 'A'..'Z'
| '_'
;
fragment
EXPONENT
: ('e'|'E') ('+'|'-')? ( DIGIT )+
;
fragment
HEX_DIGIT
: ( DIGIT |'a'..'f'|'A'..'F')
;
fragment
DIGIT
: '0'..'9'
;
Try this:
STRING
: '"' // a opening quote
( // start group
'\\' ~('\r' | '\n') // an escaped char other than a line break char
| // OR
~('\\' | '"'| '\r' | '\n') // any char other than '"', '\' and line breaks
)* // end group and repeat zero or more times
'"' // the closing quote
;
When I test the 4 different test cases from your comment:
"\"hello\""
"ref(\"hello\",\"hello\")"
"\"h\"e\"l\"l\"o\""
"ref(\"hello\", \"hel\"lo\")"
with the lexer rule I suggested:
grammar T;
parse
: string+ EOF
;
string
: STRING
;
STRING
: '"' ('\\' ~('\r' | '\n') | ~('\\' | '"'| '\r' | '\n'))* '"'
;
SPACE
: (' ' | '\t' | '\r' | '\n')+ {skip();}
;
ANTLRWorks' debugger produces the following parse tree:
In other words: it works just fine (on my machine :)).
EDIT II
And I've also used your grammar (making some small changes to make it Java compatible) where I replaced the incorrect STRING rule into the one I suggested:
oaExpression
: STRING+ EOF!
//: exponentiationExpression EOF!
;
exponentiationExpression
: equalityExpression ( '^' equalityExpression )*
;
equalityExpression
: relationalExpression ( ( ('==' | '=' ) | ('!=' | '<>' ) ) relationalExpression )*
;
relationalExpression
: additiveExpression ( ( '>' | '>=' | '<' | '<=' ) additiveExpression )*
;
additiveExpression
: multiplicativeExpression ( ( '+' | '-' ) multiplicativeExpression )*
;
multiplicativeExpression
: primaryExpression ( ( '*' | '/' ) primaryExpression )*
;
primaryExpression
: '(' exponentiationExpression ')' | value | identifier (arguments )?
;
value
: STRING
;
identifier
: ID
;
expressionList
: exponentiationExpression ( ',' exponentiationExpression )*
;
arguments
: '(' ( expressionList )? ')'
;
/*
* Lexer rules
*/
ID
: LETTER (LETTER | DIGIT)*
;
//STRING
// : '"' ( options { greedy=false; } : ~'"' )* '"'
// ;
STRING
: '"' ('\\' ~('\r' | '\n') | ~('\\' | '"'| '\r' | '\n'))* '"'
;
WS
: (' '|'\r'|'\t'|'\u000C'|'\n') {$channel=HIDDEN;} /*{$channel=Hidden;}*/
;
/*
* Fragment Lexer rules
*/
fragment
LETTER
: 'a'..'z'
| 'A'..'Z'
| '_'
;
fragment
EXPONENT
: ('e'|'E') ('+'|'-')? ( DIGIT )+
;
fragment
HEX_DIGIT
: ( DIGIT |'a'..'f'|'A'..'F')
;
fragment
DIGIT
: '0'..'9'
;
which parses the input from my previous example in an identical parse tree.
This is how I do this with strings that can contain escape sequences (not just \" but any):
DOUBLE_QUOTED_TEXT
#init { int escape_count = 0; }:
DOUBLE_QUOTE
(
DOUBLE_QUOTE DOUBLE_QUOTE { escape_count++; }
| ESCAPE_OPERATOR . { escape_count++; }
| ~(DOUBLE_QUOTE | ESCAPE_OPERATOR)
)*
DOUBLE_QUOTE
{ EMIT(); LTOKEN->user1 = escape_count; }
;
The rule additionally counts the escapes and stores them in the token. This allows the receiver to quickly see if it needs to do anything with the string (if user1 > 0). If you don't need that remove the #init part and the actions.

Antlr wrong rule invocation

I'm trying to implement a grammar for parsing lucene queries. So far everything went smooth until i tried to add support for range queries . Lucene details aside my grammar looks like this :
grammar ModifiedParser;
TERM_RANGE : '[' ('*' | TERM_TEXT) 'TO' ('*' | TERM_TEXT) ']'
| '{' ('*' | TERM_TEXT) 'TO' ('*' | TERM_TEXT) '}'
;
query : not (booleanOperator? not)* ;
booleanOperator : andClause
| orClause
;
andClause : 'AND' ;
notClause : 'NOT' ;
orClause : 'OR' ;
not : notClause? MODIFIER? clause;
clause : unqualified
| qualified
;
unqualified : TERM_RANGE # termRange
| TERM_PHRASE # termPhrase
| TERM_PHRASE_ANYTHING # termTruncatedPhrase
| '(' query ')' # queryUnqualified
| TERM_TEXT_TRUNCATED # termTruncatedText
| TERM_NORMAL # termText
;
qualified : TERM_NORMAL ':' unqualified
;
fragment TERM_CHAR : (~(' ' | '\t' | '\n' | '\r' | '\u3000'
| '\'' | '\"' | '(' | ')' | '[' | ']' | '{' | '}'
| '+' | '-' | '!' | ':' | '~' | '^'
| '?' | '*' | '\\' ))
;
fragment TERM_START_CHAR : TERM_CHAR
| ESCAPE
;
fragment ESCAPE : '\\' ~[];
MODIFIER : '-'
| '+'
;
AND : 'AND';
OR : 'OR';
NOT : 'NOT';
TERM_PHRASE_ANYTHING : '"' (ESCAPE|~('\"'|'\\'))+ '"' ;
TERM_PHRASE : '"' (ESCAPE|~('\"'|'\\'|'?'|'*'))+ '"' ;
TERM_TEXT_TRUNCATED : ('*'|'?')(TERM_CHAR+ ('*'|'?'))+ TERM_CHAR*
| TERM_START_CHAR (TERM_CHAR* ('?'|'*'))+ TERM_CHAR+
| ('?'|'*') TERM_CHAR+
;
TERM_NORMAL : TERM_TEXT;
fragment TERM_TEXT : TERM_START_CHAR TERM_CHAR* ;
WS : [ \t\r\n] -> skip ;
When i try to do a visitor and work with the tokens apparently parsing asd [ 10 TO 100 ] { 1 TO 1000 } 100..1000 will throw token recognition error for [ , ] , } and {, and only tries to visit the termRange rule on the third range . do you guys know what i'm missing here ? Thanks in advance
Since you made TERM_RANGE a lexer rule, you must account for everything at a character level. In particular, you forgot to allow whitespace characters in your input.
You would likely be in a much better position if you instead created termRange, a parser rule.

The following sets of rules are mutually left-recursive TREE GRAMMAR

I have a complete parser grammer than generates an AST which i could say is correct using the rewrite rules and tree operators. At the moment i am stuck at the phase of creating a tree grammar.I have this error:
The following sets of rules are mutually left-recursive [direct_declarator, declarator] and [abstract_declarator, direct_abstract_declarator]
rewrite syntax or operator with no output option; setting output=AST
Here is my Tree Grammar.
tree grammar walker;
options {
language = Java;
tokenVocab = c2p;
ASTLabelType = CommonTree;
backtrack = true;
}
#header
{
package com.frankdaniel.compiler;
}
translation_unit
: ^(PROGRAM (^(FUNCTION external_declaration))+)
;
external_declaration
options {k=1;}
: (declaration_specifiers? declarator declaration*)=> function_definition
| declaration
;
function_definition
: declaration_specifiers? declarator (declaration+ compound_statement|compound_statement)
;
declaration
: 'typedef' declaration_specifiers? init_declarator_list
| declaration_specifiers init_declarator_list?
;
declaration_specifiers
: ( type_specifier|type_qualifier)+
;
init_declarator_list
: ^(INIT_DECLARATOR_LIST init_declarator+)
;
init_declarator
: declarator (ASSIGN^ initializer)?
;
type_specifier : (CONST)? (VOID | CHAR | INT | FLOAT );
type_id
: IDENTIFIER
//{System.out.println($IDENTIFIER.text+" is a type");}
;
type_qualifier
: CONST
;
declarator
: pointer? direct_declarator
| pointer
;
direct_declarator
: (IDENTIFIER|declarator) declarator_suffix*
;
declarator_suffix
: constant_expression
| parameter_type_list
| identifier_list
;
pointer
: TIMES type_qualifier+ pointer?
| TIMES pointer
| TIMES
;
parameter_type_list
: parameter_list
;
parameter_list
: ^(PARAMETER_LIST parameter_declaration)
;
parameter_declaration
: declaration_specifiers (declarator|abstract_declarator)*
;
identifier_list
: ^(IDENTIFIER_LIST IDENTIFIER+)
;
type_name
: specifier_qualifier_list abstract_declarator?
;
specifier_qualifier_list
: ( type_qualifier | type_specifier )+
;
abstract_declarator
: pointer direct_abstract_declarator?
| direct_abstract_declarator
;
direct_abstract_declarator
: (abstract_declarator | abstract_declarator_suffix ) abstract_declarator_suffix*
;
abstract_declarator_suffix
: constant_expression
| parameter_type_list
;
initializer
: assignment_expression
| initializer_list?
;
initializer_list
: ^(INITIALIZER_LIST initializer+)
;
// EXPRESSIONS
argument_expression_list
: ^(EXPRESSION_LIST assignment_expression+)
;
multiplicative_expression
: ^((TIMES|DIV|MOD) cast_expression cast_expression);
additive_expression
: ^((PLUS|MINUS) multiplicative_expression multiplicative_expression);
cast_expression
: ^(CAST_EXPRESSION type_name cast_expression)
| unary_expression
;
unary_expression
: postfix_expression
| PPLUS unary_expression
| MMINUS unary_expression
| unary_operator cast_expression
;
postfix_expression
: primary_expression
( expression
| argument_expression_list
| IDENTIFIER
| IDENTIFIER
| PPLUS
| MMINUS
)*
;
unary_operator
: TIMES
| PLUS
| MINUS
| NOT
;
primary_expression
: IDENTIFIER
| constant
| expression
;
constant
: HEX_LITERAL
| OCTAL_LITERAL
| DECIMAL_LITERAL
| CHARACTER_LITERAL
| STRING_LITERAL
| FLOATING_POINT_LITERAL
;
////////////////////////////////////////////////////////
expression
: ^(EXPRESSION assignment_expression+)
;
constant_expression
: conditional_expression
;
assignment_expression
: ^(assignment_operator lvalue assignment_expression)
| conditional_expression
;
lvalue
: unary_expression
;
assignment_operator
: ASSIGN
;
conditional_expression : (logical_or_expression) (QUESTIONMARK expression COLON conditional_expression)?;
logical_or_expression : ^(OR logical_and_expression logical_and_expression);
logical_and_expression : ^(AND equality_expression equality_expression);
//equality_expression : (a=relational_expression) ((e=EQUAL|e=NONEQUAL)^ b=relational_expression)?;
equality_expression : ^((EQUAL|NONEQUAL) relational_expression relational_expression);
//relational_expression : additive_expression ((ST|GT|STEQ|GTEQ)^ additive_expression)* ;
relational_expression : ^((ST|GT|STEQ|GTEQ) additive_expression additive_expression);
// STATEMENTS
statement
: compound_statement
| expression_statement
| selection_statement
| iteration_statement
| jump_statement
;
compound_statement
: ^(STATEMENT declaration* statement_list? )
;
statement_list
: statement+
;
expression_statement
:expression
;
selection_statement
:^(IF expression statement (^(ELSE statement))? )
|^(SWITCH expression statement)
;
iteration_statement
: ^(WHILE expression statement)
| ^(DO statement ^(WHILE expression))
| ^(FOR expression_statement expression_statement expression? statement)
;
jump_statement
: CONTINUE
| BREAK
| RETURN
| ^(RETURN expression)
;
It seems obvious that the following two rules are left recursive:
{code}
declarator
: pointer? direct_declarator
| pointer
;
direct_declarator
: (IDENTIFIER|declarator) declarator_suffix*
;
{code}
Rule "declarator" has reference to "direct_declarator", and "direct_declarator" has reference to "declarator", and there's no other predicates to pilot the rule evaluation.

Unusual ANTLR error when attempting to reorganize grammar into two files

I am reorganizing my grammar into two files in order to accomodate a tree grammar; Lua.g and LuaGrammar.g. Lua.g will have all of my lexer rules, and LuaGrammar.g will have all of my tree grammar and parser rules. However, when i try and compile LuaGrammar.g i get the following error:
[00:28:37] error(10): internal error: C:\Users\RCIX\Desktop\AguaLua\Project\trunk\AguaLua\AguaLua\ANTLR Data\LuaGrammar.g : java.lang.IllegalArgumentException: Can't find template ruleRefBang.st; group hierarchy is [CSharp2]
org.antlr.stringtemplate.StringTemplateGroup.lookupTemplate(StringTemplateGroup.java:507)
org.antlr.stringtemplate.StringTemplateGroup.getInstanceOf(StringTemplateGroup.java:392)
org.antlr.stringtemplate.StringTemplateGroup.getInstanceOf(StringTemplateGroup.java:404)
org.antlr.stringtemplate.StringTemplateGroup.lookupTemplate(StringTemplateGroup.java:484)
org.antlr.stringtemplate.StringTemplateGroup.getInstanceOf(StringTemplateGroup.java:392)
org.antlr.stringtemplate.StringTemplateGroup.getInstanceOf(StringTemplateGroup.java:404)
org.antlr.stringtemplate.StringTemplateGroup.lookupTemplate(StringTemplateGroup.java:484)
org.antlr.stringtemplate.StringTemplateGroup.getInstanceOf(StringTemplateGroup.java:392)
org.antlr.stringtemplate.StringTemplateGroup.getInstanceOf(StringTemplateGroup.java:404)
org.antlr.grammar.v2.CodeGenTreeWalker.getRuleElementST(CodeGenTreeWalker.java:152)
org.antlr.grammar.v2.CodeGenTreeWalker.atom(CodeGenTreeWalker.java:1986)
org.antlr.grammar.v2.CodeGenTreeWalker.element(CodeGenTreeWalker.java:1708)
org.antlr.grammar.v2.CodeGenTreeWalker.element(CodeGenTreeWalker.java:1556)
org.antlr.grammar.v2.CodeGenTreeWalker.alternative(CodeGenTreeWalker.java:1306)
org.antlr.grammar.v2.CodeGenTreeWalker.block(CodeGenTreeWalker.java:1081)
org.antlr.grammar.v2.CodeGenTreeWalker.ebnf(CodeGenTreeWalker.java:1871)
org.antlr.grammar.v2.CodeGenTreeWalker.element(CodeGenTreeWalker.java:1704)
org.antlr.grammar.v2.CodeGenTreeWalker.alternative(CodeGenTreeWalker.java:1306)
org.antlr.grammar.v2.CodeGenTreeWalker.block(CodeGenTreeWalker.java:1081)
org.antlr.grammar.v2.CodeGenTreeWalker.rule(CodeGenTreeWalker.java:797)
org.antlr.grammar.v2.CodeGenTreeWalker.rules(CodeGenTreeWalker.java:588)
org.antlr.grammar.v2.CodeGenTreeWalker.grammarSpec(CodeGenTreeWalker.java:530)
org.antlr.grammar.v2.CodeGenTreeWalker.grammar(CodeGenTreeWalker.java:336)
org.antlr.codegen.CodeGenerator.genRecognizer(CodeGenerator.java:432)
org.antlr.Tool.generateRecognizer(Tool.java:641)
org.antlr.Tool.process(Tool.java:454)
org.antlr.works.generate.CodeGenerate.generate(CodeGenerate.java:104)
org.antlr.works.generate.CodeGenerate.run(CodeGenerate.java:185)
java.lang.Thread.run(Unknown Source)
And, i'm getting the following error:
[00:34:58] error(100): C:\Users\RCIX\Desktop\AguaLua\Project\trunk\AguaLua\AguaLua\ANTLR Data\Lua.g:0:0: syntax error: codegen: <AST>:0:0: unexpected end of subtree
when attempting to generate Lua.g. Why am i getting these errors, and how can i fix them? (Using ANTLR V3, am able to provide grammar files)
Update: here is the grammar file i am trying to compile.
tree grammar LuaGrammar;
options {
backtrack=true;
language=CSharp2;
output=AST;
tokenVocab=Lua;
filter=true;
ASTLabelType=CommonTree;
}
assignment
:
^('=' left=NAME right=NAME) {Ast. };
/*
chunk : (stat (';'!)?)* (laststat (';'!)?)?;
block : chunk;
stat : varlist1 '='^ explist1 |
functioncall |
doblock |
'while'^ exp doblock |
'repeat'^ block untilrule |
'if'^ exp thenchunk elseifchunk* elsechunk? 'end'! |
'for'^ forinitializer doblock |
'for'^ namelist inlist doblock |
'function'^ funcname funcbody |
'local' 'function' NAME funcbody |
'local'^ namelist localstat? ;
localstat
: '='^ explist1;
untilrule
: 'until'^ exp;
elseifchunk
: 'elseif'^ exp thenchunk;
thenchunk
: 'then'^ block;
elsechunk
: 'else'^ block;
forinitializer
: NAME '='^ exp ','! exp (','! exp)?;
doblock
: 'do'^ block 'end'!;
inlist
: 'in'^ explist1;
laststat : 'return'^ (explist1)? | 'break';
dotname : '.'! funcname;
colonname
: ':' NAME;
funcname : NAME^ (dotname | colonname)?;
varlist1 : var (','! var)*;
namelist : NAME (','! NAME)*;
explist1 : (exp ','!)* exp;
*/
/*
exp : expelement (binop^ exp)* ;
expelement
: ('nil' | 'false' | 'true' | number | stringrule | '...' | /*function |*\ prefixexp | tableconstructor | unop exp);
var: (namevar | dotvar | expvar | arrayvar)?;
namevar
: NAME^ var;
dotvar
: '.'! var;
expvar
: '('^ exp ')'! var;
arrayvar
: '['^ var ']'! var;
varSuffix: nameAndArgs* ('[' exp ']' | '.' NAME);
prefixexp: varOrExp nameAndArgs*;
functioncall: varOrExp nameAndArgs+;
varOrExp: var | '('! exp ')'!;
nameAndArgs: (':' NAME)? argsrule;
argsrule : '(' (explist1)? ')' | tableconstructor | stringrule ;
function : 'function' funcbody;
funcbody : funcparams funcblock;
funcblock
: ')'^ block 'end'!;
funcparams
: '('^ parlist1? ;
parlist1 : namelist (','! '...')? | '...';
tableconstructor : '{'^ (fieldlist)? '}'!;
fieldlist : field (fieldsep! field)* (fieldsep!)?;
field : '['! exp ']'! '='^ exp | NAME '='^ exp | exp;
*/
fieldsep : ',' | ';';
binop : '+' | '-' | '*' | '/' | '^' | '%' | '..' |
'<' | '<=' | '>' | '>=' | '==' | '~=' |
'and' | 'or';
unop : '-' | 'not' | '#';
number : INT | FLOAT | EXP | HEX;
stringrule : NORMALSTRING | CHARSTRING | LONGSTRING;
Lua.g:
/*
* Lua 5.1 grammar
*
* Nicolai Mainiero
* May 2007
*
* This is a Lua (http://www.lua.org) grammar for the version 5.1 for ANTLR 3.
* I tested it with basic and extended examples and it worked fine. It is also used
* for LunarEclipse (http://lunareclipse.sf.net) a Lua editor based on Eclipse.
*
* Thanks to Johannes Luber and Gavin Lambert who helped me with some mutually left recursion.
*
*/
grammar Lua;
options {
backtrack=true;
language=CSharp2;
//output=AST;
//ASTLabelType=CommonTree;
}
#lexer::namespace{AguaLua}
chunk : (stat (';'!)?)* (laststat (';'!)?)?;
block : chunk;
stat : varlist1 '='^ explist1 |
functioncall |
doblock |
'while'^ exp doblock |
'repeat'^ block untilrule |
'if'^ exp thenchunk elseifchunk* elsechunk? 'end'! |
'for'^ forinitializer doblock |
'for'^ namelist inlist doblock |
'function'^ funcname funcbody |
'local' 'function' NAME funcbody |
'local'^ namelist localstat? ;
localstat
: '='^ explist1;
untilrule
: 'until'^ exp;
elseifchunk
: 'elseif'^ exp thenchunk;
thenchunk
: 'then'^ block;
elsechunk
: 'else'^ block;
forinitializer
: NAME '='^ exp ','! exp (','! exp)?;
doblock
: 'do'^ block 'end'!;
inlist
: 'in'^ explist1;
laststat : 'return'^ (explist1)? | 'break';
dotname : '.'! funcname;
colonname
: ':' NAME;
funcname : NAME^ (dotname | colonname)?;
varlist1 : var (','! var)*;
namelist : NAME (','! NAME)*;
explist1 : (exp ','!)* exp;
exp : expelement (binop^ exp)* ;
expelement
: ('nil' | 'false' | 'true' | number | stringrule | '...' | function | prefixexp | tableconstructor | unop exp);
var: (namevar | dotvar | expvar | arrayvar)?;
namevar
: NAME^ var;
dotvar
: '.'! var;
expvar
: '('^ exp ')'! var;
arrayvar
: '['^ var ']'! var;
varSuffix: nameAndArgs* ('[' exp ']' | '.' NAME);
prefixexp: varOrExp nameAndArgs*;
functioncall: varOrExp nameAndArgs+;
varOrExp: var | '('! exp ')'!;
nameAndArgs: (':' NAME)? argsrule;
argsrule : '(' (explist1)? ')' | tableconstructor | stringrule ;
function : 'function' funcbody;
funcbody : funcparams funcblock;
funcblock
: ')'^ block 'end'!;
funcparams
: '('^ parlist1? ;
parlist1 : namelist (','! '...')? | '...';
tableconstructor : '{'^ (fieldlist)? '}'!;
fieldlist : field (fieldsep! field)* (fieldsep!)?;
field : '['! exp ']'! '='^ exp | NAME '='^ exp | exp;
fieldsep : ',' | ';';
binop : '+' | '-' | '*' | '/' | '^' | '%' | '..' |
'<' | '<=' | '>' | '>=' | '==' | '~=' |
'and' | 'or';
unop : '-' | 'not' | '#';
number : INT | FLOAT | EXP | HEX;
stringrule : NORMALSTRING | CHARSTRING | LONGSTRING;
// LEXER
NAME :('a'..'z'|'A'..'Z'|'_')(options{greedy=true;}: 'a'..'z'|'A'..'Z'|'_'|'0'..'9')*
;
INT : ('0'..'9')+;
FLOAT :INT '.' INT ;
EXP : (INT| FLOAT) ('E'|'e') ('-')? INT;
HEX :'0x' ('0'..'9'| 'a'..'f')+ ;
NORMALSTRING
: '"' ( EscapeSequence | ~('\\'|'"') )* '"'
;
CHARSTRING
: '\'' ( EscapeSequence | ~('\''|'\\') )* '\''
;
LONGSTRING
: '['('=')*'[' ( EscapeSequence | ~('\\'|']') )* ']'('=')*']'
;
fragment
EscapeSequence
: '\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\')
| UnicodeEscape
| OctalEscape
;
fragment
OctalEscape
: '\\' ('0'..'3') ('0'..'7') ('0'..'7')
| '\\' ('0'..'7') ('0'..'7')
| '\\' ('0'..'7')
;
fragment
UnicodeEscape
: '\\' 'u' HexDigit HexDigit HexDigit HexDigit
;
fragment
HexDigit : ('0'..'9'|'a'..'f'|'A'..'F') ;
COMMENT
: '--[[' ( options {greedy=false;} : . )* ']]' {Skip();}
;
LINE_COMMENT : '--' (~ NEWLINE)* {Skip();};
fragment NEWLINE : '\r'|'\n' | '\r\n' ;
WS : (' '|'\t'|'\u000C') {Skip();};
(both are based off of a grammar produced by Nicolai Mainero and available at ANTLR's site, Lua 5.1 grammar)
If i uncomment anymore than this, it comes up with the error above.
Okay, a 'Can't find template ruleRefBang.st' has something to do with the illegal use of a "tree exclude" operator: !. Usually, it is a contradicting rewrite rule: somewhere you have a ! and then rewrite it using -> but use that ignored token anyway. Since I cannot see a -> in your grammar, that can't be the case (unless you simplified the tree grammar to post here and removed some rewrite rules?).
Anyway, I'd start by removing all ! operators in your tree grammar and if your grammar then works put them, one by one, back in again. Then you should be able to pin point the place in your grammar that houses the illegal !.
Good luck!

How to turn this into a parser

If I just add on to the following yacc file, will it turn into a parser?
/* C-Minus BNF Grammar */
%token ELSE
%token IF
%token INT
%token RETURN
%token VOID
%token WHILE
%token ID
%token NUM
%token LTE
%token GTE
%token EQUAL
%token NOTEQUAL
%%
program : declaration_list ;
declaration_list : declaration_list declaration | declaration ;
declaration : var_declaration | fun_declaration ;
var_declaration : type_specifier ID ';'
| type_specifier ID '[' NUM ']' ';' ;
type_specifier : INT | VOID ;
fun_declaration : type_specifier ID '(' params ')' compound_stmt ;
params : param_list | VOID ;
param_list : param_list ',' param
| param ;
param : type_specifier ID | type_specifier ID '[' ']' ;
compound_stmt : '{' local_declarations statement_list '}' ;
local_declarations : local_declarations var_declaration
| /* empty */ ;
statement_list : statement_list statement
| /* empty */ ;
statement : expression_stmt
| compound_stmt
| selection_stmt
| iteration_stmt
| return_stmt ;
expression_stmt : expression ';'
| ';' ;
selection_stmt : IF '(' expression ')' statement
| IF '(' expression ')' statement ELSE statement ;
iteration_stmt : WHILE '(' expression ')' statement ;
return_stmt : RETURN ';' | RETURN expression ';' ;
expression : var '=' expression | simple_expression ;
var : ID | ID '[' expression ']' ;
simple_expression : additive_expression relop additive_expression
| additive_expression ;
relop : LTE | '<' | '>' | GTE | EQUAL | NOTEQUAL ;
additive_expression : additive_expression addop term | term ;
addop : '+' | '-' ;
term : term mulop factor | factor ;
mulop : '*' | '/' ;
factor : '(' expression ')' | var | call | NUM ;
call : ID '(' args ')' ;
args : arg_list | /* empty */ ;
arg_list : arg_list ',' expression | expression ;
Heh
Its only a grammer of PL
To make it a parser you need to add some code into this.
Like there http://dinosaur.compilertools.net/yacc/index.html
Look at chapter 2. Actions
Also you'd need lexical analyzer -- 3: Lexical Analysis

Resources