OCaml: parse minus floating-point number as a calculator - parsing

What I would like to do
I would like to correctly parse minus floating-point numbers.
How should I fix my code?
What is not working
When I try to interpret - 5 as -5.000000, it shows me this error.
Fatal error: exception Stdlib.Parsing.Parse_error
1c1
< error: parse error at char=0, near token '-'
---
> - 5 = -5.000000
My source code
calc_ast.ml
(* abstract syntax tree *)
type expr =
Num of float
| Plus of expr * expr
| Times of expr * expr
| Div of expr * expr
| Minus of expr * expr
;;
calc_lex.ml
{
open Calc_parse
;;
}
rule lex = parse
| [' ' '\t' '\n' ] { lex lexbuf }
| '-'? ['0' - '9']+ as s { NUM(float_of_string s) }
| '-'? ['0' - '9']+ ('.' digit*)? as s { NUM(float_of_string s) }
| '+' { PLUS }
| '-' { MINUS }
| '*' { TIMES }
| '/' { DIV }
| '(' { LPAREN }
| ')' { RPAREN }
| eof { EOF }
calc_parse.mly
%{
%}
%token <float> NUM
%token PLUS TIMES EOF MINUS DIV LPAREN RPAREN
%start program
%type <Calc_ast.expr> program
%%
program :
| compound_expr EOF { $1 }
compound_expr :
| expr { $1 }
| LPAREN expr RPAREN { $2 }
expr :
| mul { $1 }
| expr PLUS mul { Calc_ast.Plus($1, $3) }
| expr MINUS mul { Calc_ast.Minus($1, $3) }
mul :
| NUM { Calc_ast.Num $1 }
| mul TIMES NUM { Calc_ast.Times($1, Calc_ast.Num $3) }
| mul DIV NUM { Calc_ast.Div($1, Calc_ast.Num $3) }
%%
calc.ml
open Calc_parse
(* token -> string *)
let string_of_token t =
match t with
NUM(s) -> Printf.sprintf "NUM(%f)" s
| PLUS -> "PLUS"
| TIMES -> "TIMES"
| MINUS -> "MINUS"
| DIV -> "DIV"
| LPAREN -> "LPAREN"
| RPAREN -> "RPAREN"
| EOF -> "EOF"
;;
(* print token t and return it *)
let print_token t =
Printf.printf "%s\n" (string_of_token t);
t
;;
(* apply lexer to string s *)
let lex_string s =
let rec loop b =
match print_token (Calc_lex.lex b) with
EOF -> ()
| _ -> loop b
in
loop (Lexing.from_string s)
;;
(* apply parser to string s;
show some info when a parse error happens *)
let parse_string s =
let b = Lexing.from_string s in
try
program Calc_lex.lex b (* main work *)
with Parsing.Parse_error as exn ->
(* handle parse error *)
let c0 = Lexing.lexeme_start b in
let c1 = Lexing.lexeme_end b in
Printf.fprintf stdout
"error: parse error at char=%d, near token '%s'\n"
c0 (String.sub s c0 (c1 - c0));
raise exn
;;
(* evaluate expression (AST tree) *)
let rec eval_expr e =
match e with
Calc_ast.Num(c) -> c
| Calc_ast.Plus(e0, e1)
-> (eval_expr e0) +. (eval_expr e1)
| Calc_ast.Minus(e0, e1)
-> (eval_expr e0) -. (eval_expr e1)
| Calc_ast.Times(e0, e1)
-> (eval_expr e0) *. (eval_expr e1)
| Calc_ast.Div(e0, e1)
-> (eval_expr e0) /. (eval_expr e1)
;;
(* evaluate string *)
let eval_string s =
let e = parse_string s in
eval_expr e
;;
(* evaluate string and print it *)
let eval_print_string s =
let y = eval_string s in
Printf.printf "%s = %f\n" s y
;;
let eval_print_stdin () =
let ch = stdin in
let s = input_line ch in
eval_print_string (String.trim s)
;;
let main argv =
eval_print_stdin ()
;;
if not !Sys.interactive then
main Sys.argv
;;

As indicated in the comments, it's almost never a good idea for the lexical analyser to try to recognise the - as part of a numeric literal:
Since the lexical token must be a contiguous string, - 5 will not match. Instead, you'll get two tokens. So you need to handle that in the parser anyway.
On the other hand, if you don't put a space after the -, then 3-4 will be analysed as the two tokens 3 and -4, which is also going to lead to a syntax error.
A simple solution is to add term to recognise the unary negation operator:
mul :
| term { Calc_ast.Num $1 }
| mul TIMES term { Calc_ast.Times($1, Calc_ast.Num $3) }
| mul DIV term { Calc_ast.Div($1, Calc_ast.Num $3) }
term :
| NUM { $1 }
| MINUS term { Calc_ast.Minus(0, $2) }
| LPAREN expr RPAREN { $2 }
In the above, I also moved the handling of parentheses from the bottom to the top of the hierarchy, in order to make 4*(5+3) possible. With that change, you will no longer require compound_expr.

Related

ocaml parser "values do not match" error

IHello!
I'm trying to code a lexer and parser for computer algebric system.
When I'm compiling the code with my makefile, I have problems on the value of some functions.
Here is the code of function.ml :
(****************************************
* Type definitions
****************************************)
type operator = Plus | Minus | Times | Div | Power;;
type var = string;; (* e.g. "x", "y", etc. *)
type power = var * float;; (* var, raised_to, e.g. ("x", 3.) -> x^3. *)
type monomial = float * power list;; (* coefficient, list of power terms, e.g. (2., [("x", 1.); ("y", 3.)]) -> 2xy^3 *)
type polynomial = monomial list;; (* sum of monomials, e.g. [(2., [("x", 2.); ("y", 3.)]); (-1., [])] -> 2x^2y^3 - 1 *)
type frac = polynomial * polynomial;; (* numerator, denominator *)
type exp = frac * frac;; (* base, exponent *)
type term = Poly of polynomial
| Frac of frac
| Exp of exp;;
type expr = Leaf of term
| Node of operator * expr list;;
type eqn = expr * expr;;
(****************************************
* Lexer/Parser definitions
****************************************)
type token = PLUS | MINUS | TIMES | DIV | POWER | LPAREN | RPAREN | EQUALS
| FLOAT of float
| VAR of var
| EOF;;
Here is the code of lexer.mll :
{
open Function
}
let numeric = ['0' - '9']
let letter = ['a' - 'z' 'A' - 'Z']
rule main = parse
| [' ' '\t' '\n'] { main lexbuf } (* skip over whitespace *)
| "+" { PLUS }
| "-" { MINUS }
| "*" { TIMES }
| "/" { DIV }
| "^" { POWER }
| "(" { LPAREN }
| ")" { RPAREN }
| "=" { EQUALS }
| ((numeric*) '.' (numeric+)) as n
{ FLOAT (float_of_string n) }
| (numeric+) as n
{ FLOAT (float_of_string n) }
| (letter numeric*) as n
{ VAR n }
| eof { EOF }
{
let lexer_main = main;;
let token_iterator_of_string s =
let rec lbuf = Lexing.from_string s
in fun () -> lexer_main lbuf;;
let token_list_of_string s =
let rec lbuf = Lexing.from_string s
and token_list_aux () =
let token = lexer_main lbuf in
if token = EOF then
[]
else
token :: token_list_aux ()
in token_list_aux ();;
}
Here is the code of parser.mly :
%{
open Function
%}
%token PLUS MINUS TIMES DIV POWER LPAREN RPAREN EQUALS EOF
%token <float> FLOAT
%token <Function.var> VAR
%start yacc_eqn
%start yacc_expr
%type <Function.eqn> yacc_eqn
%type <Function.expr> yacc_expr
%%
yacc_eqn:
exp EQUALS exp EOF { ($1, $3) }
yacc_expr:
exp EOF { $1 }
exp:
op1 { $1 }
op1:
op2 { $1 }
| op1 PLUS op2 { Node(Plus, [$1; $3]) }
| op1 MINUS op2 { Node(Minus, [$1; $3]) }
op2:
op3 { $1 }
| op2 TIMES op3 { Node(Times, [$1; $3]) }
| op2 DIV op3 { Node(Div, [$1; $3]) }
op3:
op4 { $1 }
| op3 op4 { Node(Times, [$1; $2]) }
op4:
leaf { $1 }
| op4 POWER leaf { Node(Power, [$1; $3]) }
leaf:
atom { $1 }
| LPAREN exp RPAREN { $2 }
atom:
VAR { Leaf(Poly(poly_of_var $1)) }
| FLOAT { Leaf(Poly(poly_of_float $1)) }
%%
let eqn_of_string s = yacc_eqn Lexer.lexer_main (Lexing.from_string s);;
let expr_of_string s = yacc_expr Lexer.lexer_main (Lexing.from_string s);;
let parse_eqn = eqn_of_string;;
let parse_expr = expr_of_string;;
The problem is that in the parser.mli which is create with the makefile, the value of yacc_eqn and yacc_expr are :
val yacc_eqn :
(Lexing.lexbuf -> token) -> Lexing.lexbuf -> Function.eqn
val yacc_expr :
(Lexing.lexbuf -> token) -> Lexing.lexbuf -> Function.expr
And I have the following error :
The implementation parser.ml does not match the interface parser.cmi:
Values do not match:
val yacc_eqn :
(Lexing.lexbuf -> Function.token) -> Lexing.lexbuf -> Function.eqn
is not included in
val yacc_eqn :
(Lexing.lexbuf -> token) -> Lexing.lexbuf -> Function.eqn
I think the solution might be to something like a cast, but I have stricly no idea how to do that ... Anyone help ?
Thanks in advance !

Pattern Matching in dypgen

I want handle some ambiguities in dypgen. I found something in the manual, that I want to know, how I can use that.
In the manual point 5.2 "Pattern matching on Symbols" there is an example:
expr:
| expr OP<"+"> expr { $1 + $2 }
| expr OP<"*"> expr { $1 * $2 }
OP is matched with "+" or "*", as I understand. I also find there:
The patterns can be any Caml patterns (but without the keyword when).
For instance this is possible:
expr: expr<(Function([arg1;arg2],f_body)) as f> expr
{ some action }
So I tried to put there some other expressions, but I dont understand, what happens. If I put in there printf it outputs the value of the matched string. But if I put in there (fun x -> printf x), that seems to me the same as printf, dypgen complains about a syntax error and points to the end of the expression. If I put Printf.printf in there, it complains about Syntax error: operator expected. And if I put there (fun x -> Printf.printf x) it says: Lexing failed with message: lexing: empty token
What do these different error-messages mean?
In the end I would like to look up something in a hashtable, if the value is in there, but I don't know, if it is possible this way. Is it or isn't it possible?
EDIT: A minimal example derived from the forest-example from the dypgen-demos.
The grammarfile forest_parser.dyp contains:
{
open Parse_tree
let dyp_merge = Dyp.keep_all
}
%start main
%layout [' ' '\t']
%%
main : np "." "\n" { $1 }
np:
| sg {Noun($1)}
| pl {Noun($1)}
sg: word <Word("sheep"|"fish")> {Sg($1)}
sg: word <Word("cat"|"dog")> {Sg($1)}
pl: word <Word("sheep"|"fish")> {Pl($1)}
pl: word <Word("cats"|"dogs")> {Pl($1)}
/* OR try:
sg: word <printf> {Sg($1)}
pl: word <printf> {Pl($1)}
*/
word:
| (['A'-'Z' 'a'-'z']+) {Word($1)}
The forest.ml has the following print_forest-function now:
let print_forest forest =
let rec aux1 t = match t with
| Word x
-> print_string x
| Noun (x) -> (
print_string "N [";
aux1 x;
print_string " ]")
| Sg (x) -> (
print_string "Sg [";
aux1 x;
print_string " ]")
| Pl (x) -> (
print_string "Pl [";
aux1 x;
print_string " ]")
in
let aux2 t = aux1 t; print_newline () in
List.iter aux2 forest;
print_newline ()
And the parser_tree.mli contains:
type tree =
| Word of string
| Noun of tree
| Sg of tree
| Pl of tree
And then you can determine, what numeri fish, sheep, cat(s) etc. are.
sheep or fish can be singular and plural. cats and dogs cannot.
fish.
N [Sg [fish ] ]
N [Pl [fish ] ]
I know nothing about Dypgen so I tried to figure it out.
Let's see what I found out.
In the parser.dyp file you can define the lexer and the parser or you can use an external lexer. Here's what I did :
My ast looks like this :
parse_prog.mli
type f =
| Print of string
| Function of string list * string * string
type program = f list
prog_parser.dyp
{
open Parse_prog
(* let dyp_merge = Dyp.keep_all *)
let string_buf = Buffer.create 10
}
%start main
%relation pf<pr
%lexer
let newline = '\n'
let space = [' ' '\t' '\r']
let uident = ['A'-'Z']['a'-'z' 'A'-'Z' '0'-'9' '_']*
let lident = ['a'-'z']['a'-'z' 'A'-'Z' '0'-'9' '_']*
rule string = parse
| '"' { () }
| _ { Buffer.add_string string_buf (Dyp.lexeme lexbuf);
string lexbuf }
main lexer =
newline | space + -> { () }
"fun" -> ANONYMFUNCTION { () }
lident -> FUNCTION { Dyp.lexeme lexbuf }
uident -> MODULE { Dyp.lexeme lexbuf }
'"' -> STRING { Buffer.clear string_buf;
string lexbuf;
Buffer.contents string_buf }
%parser
main : function_calls eof
{ $1 }
function_calls:
|
{ [] }
| function_call ";" function_calls
{ $1 :: $3 }
function_call:
| printf STRING
{ Print $2 } pr
| "(" ANONYMFUNCTION lident "->" printf lident ")" STRING
{ Print $6 } pf
| nested_modules "." FUNCTION STRING
{ Function ($1, $3, $4) } pf
| FUNCTION STRING
{ Function ([], $1, $2) } pf
| "(" ANONYMFUNCTION lident "->" FUNCTION lident ")" STRING
{ Function ([], $5, $8) } pf
printf:
| FUNCTION<"printf">
{ () }
| MODULE<"Printf"> "." FUNCTION<"printf">
{ () }
nested_modules:
| MODULE
{ [$1] }
| MODULE "." nested_modules
{ $1 :: $3 }
This file is the most important. As you can see, if I have a function printf "Test" my grammar is ambiguous and this can be reduced to either Print "Test" or Function ([], "printf", "Test") but !, as I realized, I can give priorities to my rules so if one as a higher priority it will be the one chosen for the first parsing. (try to uncomment let dyp_merge = Dyp.keep_all and you'll see all the possible combinations).
And in my main :
main.ml
open Parse_prog
let print_stlist fmt sl =
match sl with
| [] -> ()
| _ -> List.iter (Format.fprintf fmt "%s.") sl
let print_program tl =
let aux1 t = match t with
| Function (ml, f, p) ->
Format.printf "I can't do anything with %a%s(\"%s\")#." print_stlist ml f p
| Print s -> Format.printf "You want to print : %s#." s
in
let aux2 t = List.iter (fun (tl, _) ->
List.iter aux1 tl; Format.eprintf "------------#.") tl in
List.iter aux2 tl
let input_file = Sys.argv.(1)
let lexbuf = Dyp.from_channel (Forest_parser.pp ()) (Pervasives.open_in input_file)
let result = Parser_prog.main lexbuf
let () = print_program result
And, for example, for the following file :
test
printf "first print";
Printf.printf "nested print";
Format.eprintf "nothing possible";
(fun x -> printf x) "Anonymous print";
If I execute ./myexec test I will get the following prompt
You want to print : first print
You want to print : nested print
I can't do anything with Format.eprintf("nothing possible")
You want to print : x
------------
So, TL;DR, the manual example was just here to show you that you can play with your defined tokens (I never defined the token PRINT, just FUNCTION) and match on them to get new rules.
I hope it's clear, I learned a lot with your question ;-)
[EDIT] So, I changed the parser to match what you wanted to watch :
{
open Parse_prog
(* let dyp_merge = Dyp.keep_all *)
let string_buf = Buffer.create 10
}
%start main
%relation pf<pp
%lexer
let newline = '\n'
let space = [' ' '\t' '\r']
let uident = ['A'-'Z']['a'-'z' 'A'-'Z' '0'-'9' '_']*
let lident = ['a'-'z']['a'-'z' 'A'-'Z' '0'-'9' '_']*
rule string = parse
| '"' { () }
| _ { Buffer.add_string string_buf (Dyp.lexeme lexbuf);
string lexbuf }
main lexer =
newline | space + -> { () }
"fun" -> ANONYMFUNCTION { () }
lident -> FUNCTION { Dyp.lexeme lexbuf }
uident -> MODULE { Dyp.lexeme lexbuf }
'"' -> STRING { Buffer.clear string_buf;
string lexbuf;
Buffer.contents string_buf }
%parser
main : function_calls eof
{ $1 }
function_calls:
|
{ [] } pf
| function_call <Function((["Printf"] | []), "printf", st)> ";" function_calls
{ (Print st) :: $3 } pp
| function_call ";" function_calls
{ $1 :: $3 } pf
function_call:
| nested_modules "." FUNCTION STRING
{ Function ($1, $3, $4) }
| FUNCTION STRING
{ Function ([], $1, $2) }
| "(" ANONYMFUNCTION lident "->" FUNCTION lident ")" STRING
{ Function ([], $5, $8) }
nested_modules:
| MODULE
{ [$1] }
| MODULE "." nested_modules
{ $1 :: $3 }
Here, as you can see, I don't handle the fact that my function is print when I parse it but when I put it in my functions list. So, I match on the algebraic type that was built by my parser. I hope this example is ok for you ;-) (but be warned, this is extremely ambiguous ! :-D)

Haskell - Happy - "No instance ..." error

I'm trying to get familiar with Happy parser generator for Haskell. Currently, I have an example from the documentation but when I compile the program, I get an error.
This is the code:
{
module Main where
import Data.Char
}
%name calc
%tokentype { Token }
%error { parseError }
%token
let { TokenLet }
in { TokenIn }
int { TokenInt $$ }
var { TokenVar $$ }
'=' { TokenEq }
'+' { TokenPlus }
'-' { TokenMinus }
'*' { TokenTimes }
'/' { TokenDiv }
'(' { TokenOB }
')' { TokenCB }
%%
Exp : let var '=' Exp in Exp { \p -> $6 (($2,$4 p):p) }
| Exp1 { $1 }
Exp1 : Exp1 '+' Term { \p -> $1 p + $3 p }
| Exp1 '-' Term { \p -> $1 p - $3 p }
| Term { $1 }
Term : Term '*' Factor { \p -> $1 p * $3 p }
| Term '/' Factor { \p -> $1 p `div` $3 p }
| Factor { $1 }
Factor
: int { \p -> $1 }
| var { \p -> case lookup $1 p of
Nothing -> error "no var"
Just i -> i }
| '(' Exp ')' { $2 }
{
parseError :: [Token] -> a
parseError _ = error "Parse error"
data Token
= TokenLet
| TokenIn
| TokenInt Int
| TokenVar String
| TokenEq
| TokenPlus
| TokenMinus
| TokenTimes
| TokenDiv
| TokenOB
| TokenCB
deriving Show
lexer :: String -> [Token]
lexer [] = []
lexer (c:cs)
| isSpace c = lexer cs
| isAlpha c = lexVar (c:cs)
| isDigit c = lexNum (c:cs)
lexer ('=':cs) = TokenEq : lexer cs
lexer ('+':cs) = TokenPlus : lexer cs
lexer ('-':cs) = TokenMinus : lexer cs
lexer ('*':cs) = TokenTimes : lexer cs
lexer ('/':cs) = TokenDiv : lexer cs
lexer ('(':cs) = TokenOB : lexer cs
lexer (')':cs) = TokenCB : lexer cs
lexNum cs = TokenInt (read num) : lexer rest
where (num,rest) = span isDigit cs
lexVar cs =
case span isAlpha cs of
("let",rest) -> TokenLet : lexer rest
("in",rest) -> TokenIn : lexer rest
(var,rest) -> TokenVar var : lexer rest
main = getContents >>= print . calc . lexer
}
I'm getting this error:
[1 of 1] Compiling Main ( gr.hs, gr.o )
gr.hs:310:24:
No instance for (Show ([(String, Int)] -> Int))
arising from a use of `print'
Possible fix:
add an instance declaration for (Show ([(String, Int)] -> Int))
In the first argument of `(.)', namely `print'
In the second argument of `(>>=)', namely `print . calc . lexer'
In the expression: getContents >>= print . calc . lexer
Do you know why and how can I solve it?
If you examine the error message
No instance for (Show ([(String, Int)] -> Int))
arising from a use of `print'
it's clear that the problem is that you are trying to print a function. And indeed, the value produced by the parser function calc is supposed to be a function which takes a lookup table of variable bindings and gives back a result. See for example the rule for variables:
{ \p -> case lookup $1 p of
Nothing -> error "no var"
Just i -> i }
So in main, we need to pass in a list for the p argument, for example an empty list. (Or you could add some pre-defined global variables if you wanted). I've expanded the point-free code to a do block so it's easier to see what's going on:
main = do
input <- getContents
let fn = calc $ lexer input
print $ fn [] -- or e.g. [("foo", 42)] if you wanted it pre-defined
Now it works:
$ happy Calc.y
$ runghc Calc.hs <<< "let x = 1337 in x * 2"
2674

Yacc and Lex error in parsing expressions which use binary operators

I am new to Lex and Yacc and I am trying to create a parser for a simple language which allows for basic arithmetic and equality expressions. Though I have some of it working, I am encountering errors when trying to parse expressions involving binary operations. Here is my .y file:
%{
#include <stdlib.h>
#include <stdio.h>
%}
%token NUMBER
%token HOME
%token PU
%token PD
%token FD
%token BK
%token RT
%token LT
%left '+' '-'
%left '=' '<' '>'
%nonassoc UMINUS
%%
S : statement S { printf("S -> stmt S\n"); }
| { printf("S -> \n"); }
;
statement : HOME { printf("stmt -> HOME\n"); }
| PD { printf("stmt -> PD\n"); }
| PU { printf("stmt -> PU\n"); }
| FD expression { printf("stmt -> FD expr\n"); }
| BK expression { printf("stmt -> BK expr\n"); }
| RT expression { printf("stmt -> RT expr\n"); }
| LT expression { printf("stmt -> LT expr\n"); }
;
expression : expression '+' expression { printf("expr -> expr + expr\n"); }
| expression '-' expression { printf("expr -> expr - expr\n"); }
| expression '>' expression { printf("expr -> expr > expr\n"); }
| expression '<' expression { printf("expr -> expr < expr\n"); }
| expression '=' expression { printf("expr -> expr = expr\n"); }
| '(' expression ')' { printf("expr -> (expr)\n"); }
| '-' expression %prec UMINUS { printf("expr -> -expr\n"); }
| NUMBER { printf("expr -> number\n"); }
;
%%
int yyerror(char *s)
{
fprintf (stderr, "%s\n", s);
return 0;
}
int main()
{
yyparse();
}
And here is my .l file for Lex:
%{
#include "testYacc.h"
%}
number [0-9]+
%%
[ ] { /* skip blanks */ }
{number} { sscanf(yytext, "%d", &yylval); return NUMBER; }
home { return HOME; }
pu { return PU; }
pd { return PD; }
fd { return FD; }
bk { return BK; }
rt { return RT; }
lt { return LT; }
%%
When I try to enter an arithmetic expression on the command-line for evaluation, it results in the following error:
home
stmt -> HOME
pu
stmt -> PU
fd 10
expr -> number
fd 10
stmt -> FD expr
expr -> number
fd (10 + 10)
stmt -> FD expr
(expr -> number
+stmt -> FD expr
S ->
S -> stmt S
S -> stmt S
S -> stmt S
S -> stmt S
S -> stmt S
syntax error
Your lexer lacks rules to match and return tokens such as '+' and '*', so if there are any in your input, it will just echo them and discard them. This is what happens when you enter fd (10 + 10) -- the lexer returns the tokens FD NUMBER NUMBER while + and ( get echoed to stdout. The parser then gives a syntax error.
You want to add a rule to return these single character tokens. The easiest is to just add a single rule to your .l file at the end:
. { return *yytext; }
which matches any single character.
Note that this does NOT match a \n (newline), so newlines in your input will still be echoed and ignored. You might want to add them (and tabs and carriage returns) to your skip blanks rule:
[ \t\r\n] { /* skip blanks */ }

Assignment as expression in Antlr grammar

I'm trying to extend the grammar of the Tiny Language to treat assignment as expression. Thus it would be valid to write
a = b = 1; // -> a = (b = 1)
a = 2 * (b = 1); // contrived but valid
a = 1 = 2; // invalid
Assignment differs from other operators in two aspects. It's right associative (not a big deal), and its left-hand side is has to be a variable. So I changed the grammar like this
statement: assignmentExpr | functionCall ...;
assignmentExpr: Identifier indexes? '=' expression;
expression: assignmentExpr | condExpr;
It doesn't work, because it contains a non-LL(*) decision. I also tried this variant:
assignmentExpr: Identifier indexes? '=' (expression | condExpr);
but I got the same error. I am interested in
This specific question
Given a grammar with a non-LL(*) decision, how to find the two paths that cause the problem
How to fix it
I think you can change your grammar like this to achieve the same, without using syntactic predicates:
statement: Expr ';' | functionCall ';'...;
Expr: Identifier indexes? '=' Expr | condExpr ;
condExpr: .... and so on;
I altered Bart's example with this idea in mind:
grammar TL;
options {
output=AST;
}
tokens {
ROOT;
}
parse
: stat+ EOF -> ^(ROOT stat+)
;
stat
: expr ';'
;
expr
: Id Assign expr -> ^(Assign Id expr)
| add
;
add
: mult (('+' | '-')^ mult)*
;
mult
: atom (('*' | '/')^ atom)*
;
atom
: Id
| Num
| '('! expr ')' !
;
Assign : '=' ;
Comment : '//' ~('\r' | '\n')* {skip();};
Id : 'a'..'z'+;
Num : '0'..'9'+;
Space : (' ' | '\t' | '\r' | '\n')+ {skip();};
And for the input:
a=b=4;
a = 2 * (b = 1);
you get following parse tree:
The key here is that you need to "assure" the parser that inside an expression, there is something ahead that satisfies the expression. This can be done using a syntactic predicate (the ( ... )=> parts in the add and mult rules).
A quick demo:
grammar TL;
options {
output=AST;
}
tokens {
ROOT;
ASSIGN;
}
parse
: stat* EOF -> ^(ROOT stat+)
;
stat
: expr ';' -> expr
;
expr
: add
;
add
: mult ((('+' | '-') mult)=> ('+' | '-')^ mult)*
;
mult
: atom ((('*' | '/') atom)=> ('*' | '/')^ atom)*
;
atom
: (Id -> Id) ('=' expr -> ^(ASSIGN Id expr))?
| Num
| '(' expr ')' -> expr
;
Comment : '//' ~('\r' | '\n')* {skip();};
Id : 'a'..'z'+;
Num : '0'..'9'+;
Space : (' ' | '\t' | '\r' | '\n')+ {skip();};
which will parse the input:
a = b = 1; // -> a = (b = 1)
a = 2 * (b = 1); // contrived but valid
into the following AST:

Resources