I am coding a parser (for learning pourpuses).
I want it to parse constructions like
let myVar be 40 plus 2
and
let myVar be (40 plus 2)
With no problems... but my parser does not "understand" the former. It sees the 40 and thinks "well, it's a Literal Numeric 40".
When I put parentheses, my parser works great.
I am having a hard time to understand why.
Parser:
type value =
| Boolean of bool
| Numeric of float
| String of string
type arithmetic = Sum | Sub | Mul | Div | Pow
type logic = And | Or | Equal | NotEqual | Greater | Smaller
type identifier =
| Identifier of string
type expression =
| Literal of value
| Arithmetic of expression * arithmetic * expression
| Negative of expression
| Negation of expression
| Logic of expression * logic * expression
| Variable of identifier
type statement =
| Assignment of identifier * expression
| Print of expression
| Read of identifier
let private ws = spaces
let private str s = pstring s .>> ws
let private pnumeric =
pfloat
.>> ws
|>> fun n -> Literal (Numeric n)
let private pboolean =
choice [
(stringReturn "true" (Literal (Boolean true)))
(stringReturn "false" (Literal (Boolean false)))
]
.>> ws
let private pstringliteral =
choice [
between (pstring "\"") (pstring "\"") (manyChars (satisfy (fun c -> c <> '"')))
between (pstring "'") (pstring "'") (manyChars (satisfy (fun c -> c <> ''')))
]
|>> fun s -> Literal (String s)
let private pidentifier =
many1Satisfy2L isLetter (fun c -> isLetter c || isDigit c) "identifier"
|>> fun s -> Identifier s
let private betweenParentheses p =
between (str "(") (str ")") p
let private pvalue =
choice [
pnumeric
pboolean
]
let private prefixOperator (p: OperatorPrecedenceParser<_,_,_>) op prec map =
p.AddOperator(PrefixOperator (op, ws, prec, true, map))
let private infixOperator (p: OperatorPrecedenceParser<_,_,_>) op prec map =
p.AddOperator(InfixOperator (op, ws, prec, Associativity.Left, map))
let private oppNegation = new OperatorPrecedenceParser<_,_,_>()
let private oppLogic = new OperatorPrecedenceParser<_,_,_>()
let private oppArithmetic = new OperatorPrecedenceParser<_,_,_>()
let private oppNegative = new OperatorPrecedenceParser<_,_,_>()
prefixOperator oppNegation "not" 1 (fun x -> Negation x)
infixOperator oppLogic "is" 1 (fun x y -> Logic (x, Equal, y))
infixOperator oppLogic "isnt" 1 (fun x y -> Logic (x, NotEqual, y))
infixOperator oppLogic "and" 2 (fun x y -> Logic (x, And, y))
infixOperator oppLogic "or" 3 (fun x y -> Logic (x, Or, y))
prefixOperator oppNegative "-" 1 (fun x -> Negative x)
infixOperator oppArithmetic ">" 1 (fun x y -> Logic (x, Greater, y))
infixOperator oppArithmetic "<" 1 (fun x y -> Logic (x, Smaller, y))
infixOperator oppArithmetic "is" 2 (fun x y -> Logic (x, Equal, y))
infixOperator oppArithmetic "isnt" 2 (fun x y -> Logic (x, NotEqual, y))
infixOperator oppArithmetic "plus" 3 (fun x y -> Arithmetic (x, Sum, y))
infixOperator oppArithmetic "minus" 3 (fun x y -> Arithmetic (x, Sub, y))
infixOperator oppArithmetic "times" 4 (fun x y -> Arithmetic (x, Mul, y))
infixOperator oppArithmetic "divided by" 4 (fun x y -> Arithmetic (x, Div, y))
infixOperator oppArithmetic "power" 5 (fun x y -> Arithmetic (x, Pow, y))
let private negationExprParser = oppNegation.ExpressionParser
let private logicExprParser = oppLogic.ExpressionParser
let private arithmeticExprParser = oppArithmetic.ExpressionParser
let private negativeExprParser = oppNegative.ExpressionParser
oppNegation.TermParser <- choice [
betweenParentheses negationExprParser
pboolean
]
oppLogic.TermParser <- choice [
betweenParentheses logicExprParser
pboolean
]
oppNegative.TermParser <- choice [
betweenParentheses negativeExprParser
pnumeric
]
oppArithmetic.TermParser <- choice [
betweenParentheses arithmeticExprParser
pnumeric
]
let private pexpression =
choice [
attempt <| pstringliteral
attempt <| negationExprParser
attempt <| logicExprParser
attempt <| negativeExprParser
attempt <| arithmeticExprParser
attempt <| (pidentifier |>> fun id -> Variable id)
]
let private passignment =
pipe2 (str "let" .>> ws >>. pidentifier) (ws >>. str "be" >>. ws >>. pexpression) (fun id exp -> Assignment (id, exp))
let private pprint =
str "print"
>>. pexpression
|>> fun exp -> Print exp
let private pread =
str "read"
>>. pidentifier
|>> fun id -> Read id
let private pstatement =
choice [
passignment
pprint
pread
]
let private pline =
skipMany (satisfy (fun c -> c = '\n' || c = ' '))
>>. pstatement
.>> ws
let private pcode =
many pline
let generateAST code =
match run pcode code with
| Success (ast, _, _) -> sprintf "%A" ast
| Failure (msg, _, _) -> msg
Usage:
[<EntryPoint>]
let main argv =
printfn "%s\n" (generateAST "let b be 5 plus 7")
// [Assignment (Identifier "b",Literal (Numeric 5.0))]
printfn "%s\n" (generateAST "let b be (5 plus 7)")
// [Assignment
// (Identifier "b",Arithmetic (Literal (Numeric 5.0),Sum,Literal (Numeric 7.0)))]
0
Take a look at FParsec - Tracing a parser
If you add the recommended FParsec tracing function to the top of your code
let (<!>) (p: Parser<_,_>) label : Parser<_,_> =
fun stream ->
printfn "%A: Entering %s" stream.Position label
let reply = p stream
printfn "%A: Leaving %s (%A)" stream.Position label reply.Status
reply
then modify the parsers to use the trace function
let private pnumeric =
(pfloat
.>> ws
|>> fun n -> Literal (Numeric n)) <!> "pnumeric"
let private pboolean =
(choice [
(stringReturn "true" (Literal (Boolean true)))
(stringReturn "false" (Literal (Boolean false)))
]
.>> ws) <!> "pboolean"
let private pstringliteral =
(choice [
between (pstring "\"") (pstring "\"") (manyChars (satisfy (fun c -> c <> '"')))
between (pstring "'") (pstring "'") (manyChars (satisfy (fun c -> c <> ''')))
]
|>> fun s -> Literal (String s)) <!> "pstringliteral"
let private pidentifier =
(many1Satisfy2L isLetter (fun c -> isLetter c || isDigit c) "identifier"
|>> fun s -> Identifier s) <!> "pidentifier"
let private betweenParentheses p =
(between (str "(") (str ")") p) <!> "betweenParentheses"
let private pvalue =
(choice [
pnumeric
pboolean
]) <!> "pvalue"
let private negationExprParser = oppNegation.ExpressionParser <!> "negationExprParser"
let private logicExprParser = oppLogic.ExpressionParser <!> "logicExprParser"
let private arithmeticExprParser = oppArithmetic.ExpressionParser <!> "arithmeticExprParser"
let private negativeExprParser = oppNegative.ExpressionParser <!> "negativeExprParser "
let private pexpression =
choice [
attempt <| pstringliteral
attempt <| negationExprParser
attempt <| logicExprParser
attempt <| negativeExprParser
attempt <| arithmeticExprParser
attempt <| (pidentifier |>> fun id -> Variable id)
] <!> "pexpression"
let private passignment =
pipe2 (str "let" .>> ws >>. pidentifier) (ws >>. str "be" >>. ws >>. pexpression) (fun id exp -> Assignment (id, exp)) <!> "passignment"
let private pprint =
(str "print"
>>. pexpression
|>> fun exp -> Print exp) <!> "pprint"
let private pread =
(str "read"
>>. pidentifier
|>> fun id -> Read id) <!> "pread"
let private pstatement =
(choice [
passignment
pprint
pread
]) <!> "pstatement"
let private pline =
(skipMany (satisfy (fun c -> c = '\n' || c = ' '))
>>. pstatement
.>> ws) <!> "pline"
let private pcode =
many pline <!> "pcode"
and run the code you will get
(Ln: 1, Col: 1): Entering pcode
(Ln: 1, Col: 1): Entering pline
(Ln: 1, Col: 1): Entering pstatement
(Ln: 1, Col: 1): Entering passignment
(Ln: 1, Col: 5): Entering pidentifier
(Ln: 1, Col: 6): Leaving pidentifier (Ok)
(Ln: 1, Col: 10): Entering pexpression
(Ln: 1, Col: 10): Entering pstringliteral
(Ln: 1, Col: 10): Leaving pstringliteral (Error)
(Ln: 1, Col: 10): Entering negationExprParser
(Ln: 1, Col: 10): Entering betweenParentheses
(Ln: 1, Col: 10): Leaving betweenParentheses (Error)
(Ln: 1, Col: 10): Entering pboolean
(Ln: 1, Col: 10): Leaving pboolean (Error)
(Ln: 1, Col: 10): Leaving negationExprParser (Error)
(Ln: 1, Col: 10): Entering logicExprParser
(Ln: 1, Col: 10): Entering betweenParentheses
(Ln: 1, Col: 10): Leaving betweenParentheses (Error)
(Ln: 1, Col: 10): Entering pboolean
(Ln: 1, Col: 10): Leaving pboolean (Error)
(Ln: 1, Col: 10): Leaving logicExprParser (Error)
(Ln: 1, Col: 10): Entering negativeExprParser
(Ln: 1, Col: 10): Entering betweenParentheses
(Ln: 1, Col: 10): Leaving betweenParentheses (Error)
(Ln: 1, Col: 10): Entering pnumeric
(Ln: 1, Col: 12): Leaving pnumeric (Ok)
(Ln: 1, Col: 12): Leaving negativeExprParser (Ok)
(Ln: 1, Col: 12): Leaving pexpression (Ok)
(Ln: 1, Col: 12): Leaving passignment (Ok)
(Ln: 1, Col: 12): Leaving pstatement (Ok)
(Ln: 1, Col: 12): Leaving pline (Ok)
(Ln: 1, Col: 12): Entering pline
(Ln: 1, Col: 12): Entering pstatement
(Ln: 1, Col: 12): Entering passignment
(Ln: 1, Col: 12): Leaving passignment (Error)
(Ln: 1, Col: 12): Entering pprint
(Ln: 1, Col: 12): Leaving pprint (Error)
(Ln: 1, Col: 12): Entering pread
(Ln: 1, Col: 12): Leaving pread (Error)
(Ln: 1, Col: 12): Leaving pstatement (Error)
(Ln: 1, Col: 12): Leaving pline (Error)
(Ln: 1, Col: 12): Leaving pcode (Ok)
[Assignment (Identifier "b",Literal (Numeric 5.0))]
(Ln: 1, Col: 1): Entering pcode
(Ln: 1, Col: 1): Entering pline
(Ln: 1, Col: 1): Entering pstatement
(Ln: 1, Col: 1): Entering passignment
(Ln: 1, Col: 5): Entering pidentifier
(Ln: 1, Col: 6): Leaving pidentifier (Ok)
(Ln: 1, Col: 10): Entering pexpression
(Ln: 1, Col: 10): Entering pstringliteral
(Ln: 1, Col: 10): Leaving pstringliteral (Error)
(Ln: 1, Col: 10): Entering negationExprParser
(Ln: 1, Col: 10): Entering betweenParentheses
(Ln: 1, Col: 11): Entering negationExprParser
(Ln: 1, Col: 11): Entering betweenParentheses
(Ln: 1, Col: 11): Leaving betweenParentheses (Error)
(Ln: 1, Col: 11): Entering pboolean
(Ln: 1, Col: 11): Leaving pboolean (Error)
(Ln: 1, Col: 11): Leaving negationExprParser (Error)
(Ln: 1, Col: 11): Leaving betweenParentheses (Error)
(Ln: 1, Col: 11): Leaving negationExprParser (Error)
(Ln: 1, Col: 10): Entering logicExprParser
(Ln: 1, Col: 10): Entering betweenParentheses
(Ln: 1, Col: 11): Entering logicExprParser
(Ln: 1, Col: 11): Entering betweenParentheses
(Ln: 1, Col: 11): Leaving betweenParentheses (Error)
(Ln: 1, Col: 11): Entering pboolean
(Ln: 1, Col: 11): Leaving pboolean (Error)
(Ln: 1, Col: 11): Leaving logicExprParser (Error)
(Ln: 1, Col: 11): Leaving betweenParentheses (Error)
(Ln: 1, Col: 11): Leaving logicExprParser (Error)
(Ln: 1, Col: 10): Entering negativeExprParser
(Ln: 1, Col: 10): Entering betweenParentheses
(Ln: 1, Col: 11): Entering negativeExprParser
(Ln: 1, Col: 11): Entering betweenParentheses
(Ln: 1, Col: 11): Leaving betweenParentheses (Error)
(Ln: 1, Col: 11): Entering pnumeric
(Ln: 1, Col: 13): Leaving pnumeric (Ok)
(Ln: 1, Col: 13): Leaving negativeExprParser (Ok)
(Ln: 1, Col: 13): Leaving betweenParentheses (Error)
(Ln: 1, Col: 13): Leaving negativeExprParser (Error)
(Ln: 1, Col: 10): Entering arithmeticExprParser
(Ln: 1, Col: 10): Entering betweenParentheses
(Ln: 1, Col: 11): Entering arithmeticExprParser
(Ln: 1, Col: 11): Entering betweenParentheses
(Ln: 1, Col: 11): Leaving betweenParentheses (Error)
(Ln: 1, Col: 11): Entering pnumeric
(Ln: 1, Col: 13): Leaving pnumeric (Ok)
(Ln: 1, Col: 18): Entering betweenParentheses
(Ln: 1, Col: 18): Leaving betweenParentheses (Error)
(Ln: 1, Col: 18): Entering pnumeric
(Ln: 1, Col: 19): Leaving pnumeric (Ok)
(Ln: 1, Col: 19): Leaving arithmeticExprParser (Ok)
(Ln: 1, Col: 20): Leaving betweenParentheses (Ok)
(Ln: 1, Col: 20): Leaving arithmeticExprParser (Ok)
(Ln: 1, Col: 20): Leaving pexpression (Ok)
(Ln: 1, Col: 20): Leaving passignment (Ok)
(Ln: 1, Col: 20): Leaving pstatement (Ok)
(Ln: 1, Col: 20): Leaving pline (Ok)
(Ln: 1, Col: 20): Entering pline
(Ln: 1, Col: 20): Entering pstatement
(Ln: 1, Col: 20): Entering passignment
(Ln: 1, Col: 20): Leaving passignment (Error)
(Ln: 1, Col: 20): Entering pprint
(Ln: 1, Col: 20): Leaving pprint (Error)
(Ln: 1, Col: 20): Entering pread
(Ln: 1, Col: 20): Leaving pread (Error)
(Ln: 1, Col: 20): Leaving pstatement (Error)
(Ln: 1, Col: 20): Leaving pline (Error)
(Ln: 1, Col: 20): Leaving pcode (Ok)
[Assignment
(Identifier "b",Arithmetic (Literal (Numeric 5.0),Sum,Literal (Numeric 7.0)))]
This should help you figure out your problem, but more importantly how to solve future problems with FParsec.
Related
I try to parse the call of a function, here are the variants:
add 8 2
add x y
add (inc x) (dec y)
funcWithoutArgs
Depending on how I distribute my analyzers in the code, and perhaps also how they are coded, I get errors, as well as successful but unwanted analyses.
For example, this:
add 4 7
returns the following AST:
[Call ("foo",[Number 4]);
Number 7]
He therefore only takes the first parameter.
When I do that:
foo x y
He sends me back this AST:
[Call ("foo",[Call ("x",[Call ("y",[])])])]
And that's not what I want, since here, each parameter calls the next one as a parameter.
Another example, when I do this:
foo x y
inc x
I get:
[Call ("foo",[Call ("x",[Call ("y",[Call ("inc",[Call ("x",[])])])])])]
It does the same as above, but also calls the code that follows the line. When I ask my analyzer for a new line (see code), it sends me this:
[Call ("foo",[]); Call ("x",[]); Call ("y",[]); Call ("inc",[]); Call ("x",[])]
Even in brackets it doesn't work:
foo (x) (y)
Give:
[Call ("foo",[]); Call ("x",[]); Call ("y",[])]
And:
add (inc x) (dec y)
Give:
Error in Ln: 1 Col: 1
Note: The error occurred on an empty line.
The parser backtracked after:
Error in Ln: 2 Col: 5
add (inc x) (dec y)
^
Expecting: end of input or integer number (32-bit, signed)
The parser backtracked after:
Error in Ln: 2 Col: 10
add (inc x) (dec y)
^
Expecting: ')'
[]
In short, my function call analyzer does not work properly. Every time I change something, like a new line, an attempt, or a different hierarchy, something doesn't work...
Do you have any idea how to solve this very annoying problem?
Here is the minimum functional code that was used:
open FParsec
// Ast
type Expression =
| Number of int
| Call of string * Expression list
type Program = Expression list
// Tools
let private bws p =
spaces >>? p .>>? spaces
let private suiteOf p =
sepEndBy p spaces1
let inline private betweenParentheses p label =
between (pstring "(") (pstring ")") p
<?> (label + " between parentheses")
let private identifier =
many1Satisfy2 isLetter (fun c -> isLetter c)
// Expressions
let rec private call = parse {
let! call = pipe2 (spaces >>? identifier) (spaces >>? parameters)
(fun id parameters -> Call(id, parameters)) // .>>? newline
return call
}
and private parameters = suiteOf expression
and private callFuncWithoutArgs =
identifier |>> fun id -> Call(id, [])
and private number = pint32 |>> Number
and private betweenParenthesesExpression =
parse { let! ex = betweenParentheses expression "expression"
return ex }
and private expression =
bws (attempt betweenParenthesesExpression <|>
attempt number <|>
attempt call <|>
callFuncWithoutArgs)
// -------------------------------
let parse code =
let parser = many expression .>>? eof
match run parser code with
| Success(result, _, _) -> result
| Failure(msg, _, _) ->
printfn "%s" msg
[]
System.Console.Clear()
parse #"
add 4 7
foo x y
inc x
foo (x) (y)
add (inc x) (dec y)
" |> printfn "%A"
Your main problem is that you have the wrong high-level design for your parser.
Your current design is that an expression can be:
An expression (a "sub-expression", so to speak) between parentheses (no problem here)
A number (no problem here)
A call with parameters, which is an identifier followed by a space-separated list of expressions (this is the main part of the problem)
A call without parameters, which is a single identifier (this contributes to the problem)
Looking at the expression foo x y, let's apply those rules in order as a parser would. There are no parentheses and foo isn't a number, so it's either 3 or 4. First we try 3. foo is followed by x y: does x y parse as an expression? Why, yes, it does: it parses as a call with parameters, where x is the function and y is the parameter. Since x y matches 3, it parses according to rule 3 without checking rule 4, and so foo x y matches like foo (x y) would: a call to foo with a single parameter, which is a call to x with parameter y.
How to fix this? Well, you could try swapping the order of 3 and 4, so that a function call without parameters is checked before a call with parameters (which would make x y parse as just x. But that would fail, because foo x y would match as just foo. So putting rule 4 before rule 3 doesn't work here.
The real solution is to split the rules for an expression apart into two levels. The "inner" level, which I'll call a "value", could be:
An expression between parentheses
A number
A function call without parameters
And the "outer" level, the parse rules for expressions, would be:
A function call with parameters, all of which are values, not expressions
A value
Note that these parsing levels are mutually recursive, so you'll need to use createParserForwardedToRef in your implementation. Let's look at how foo x y would be parsed with this design:
First, foo parses as an identifier, so check if it could be a function call with parameters. Does x parse as a value? Yes, under rule 3 of values. And does y parse as a value? Yes, under rule 3 of values. So foo x y parses as a function call.
Now what about funcWithoutParameters? It would fail rule 1 of expressions because it's not followed by a list of parameters. So it would be checked for rule 2 of expressions, and then it would match under rule 3 of values.
Okay, a basic sanity check of the pseudocode works, so let's turn this into code. But first, I'll mention the other problem in your parser which I haven't mentioned yet, which is that you don't realize that the FParsec spaces parser also matches newlines. So when you wrap your expression parser in bws ("between whitespace"), it will also consume newlines after the text it parses. So when you're parsing something like:
foo a b
inc c
The suiteOf expression sees the list a b inc c and turns all of those into parameters for foo. In my code below I've distinguished between FParsec's spaces parser (which includes newlines) and a parser that parses only horizontal whitespace (space and tab but not newline), using each in the appropriate place. The following code implements the design I mentioned in this answer and its output looks right to me for all the test expressions you wrote:
open FParsec
// Ast
type Expression =
| Number of int
| Call of string * Expression list
type Program = Expression list
// Tools
let private justSpaces = skipMany (pchar ' ' <|> pchar '\t')
let private justSpaces1 = skipMany1 (pchar ' ' <|> pchar '\t')
let private bws p =
spaces >>? p .>>? spaces
let private suiteOf p =
sepEndBy1 p (justSpaces1)
let inline private betweenParentheses p label =
between (pstring "(") (pstring ")") p
<?> (label + " between parentheses")
let private identifier =
many1Satisfy2 isLetter (fun c -> isLetter c)
// Expressions
let private expression, expressionImpl = createParserForwardedToRef()
let private betweenParenthesesExpression =
parse { let! ex = betweenParentheses expression "expression"
return ex }
let private callFuncWithoutArgs =
(identifier |>> fun id -> Call(id, []))
let private number = pint32 |>> Number
let private value =
justSpaces >>? (attempt betweenParenthesesExpression <|>
attempt number <|>
callFuncWithoutArgs)
let private parameters = suiteOf value
let rec private callImpl = parse {
let! call = pipe2 (justSpaces >>? identifier) (justSpaces >>? parameters)
(fun id parameters -> Call(id, parameters))
return call }
let call = callImpl
expressionImpl.Value <-
bws (attempt call <|>
value)
// -------------------------------
let parse code =
let parser = many expression .>>? (spaces >>. eof)
match run parser code with
| Success(result, _, _) -> result
| Failure(msg, _, _) ->
printfn "%s" msg
[]
System.Console.Clear()
parse #"
add 4 7
foo x y
inc x
foo (x) (y)
add (inc x) (dec y)
" |> printfn "%A"
P.S. I used the following operator suggested by http://www.quanttec.com/fparsec/users-guide/debugging-a-parser.html to greatly help me in tracing the problem:
let (<!>) (p: Parser<_,_>) label : Parser<_,_> =
fun stream ->
printfn "%A: Entering %s" stream.Position label
let reply = p stream
printfn "%A: Leaving %s (%A)" stream.Position label reply.Status
reply
Usage: turn let parseFoo = ... into let parseFoo = ... <!> "foo". Then you'll get a stream of debugging output in your console that looks something like this:
(Ln: 2, Col: 20): Entering expression
(Ln: 3, Col: 1): Entering call
(Ln: 3, Col: 5): Entering parameters
(Ln: 3, Col: 5): Entering bwParens
(Ln: 3, Col: 5): Leaving bwParens (Error)
(Ln: 3, Col: 5): Entering number
(Ln: 3, Col: 6): Leaving number (Ok)
(Ln: 3, Col: 7): Entering bwParens
(Ln: 3, Col: 7): Leaving bwParens (Error)
(Ln: 3, Col: 7): Entering number
(Ln: 3, Col: 8): Leaving number (Ok)
(Ln: 3, Col: 8): Leaving parameters (Ok)
(Ln: 3, Col: 8): Leaving call (Ok)
(Ln: 3, Col: 8): Leaving expression (Ok)
That helps greatly when you're trying to figure out why your parser isn't doing what you expect.
I noticed that the error messages sent by FParsec were quite "ambiguous", except for the last message sent for an instruction.
Here is an example:
Code to parse:
if (2 + 2 == 4)
Here, normally, there should be an instruction block (so in brackets).
And what I get:
Failure: Error in Ln: 1 Col: 1 if (2 + 2 == 4) ^ Expecting: [some instructions]
The parser backtracked after: Error in Ln: 1 Col: 3 if (2 + 2 ==
4)
^ Bad identifier: 'if' is a reserved keyword
The parser backtracked after: Error in Ln: 1 Col: 16 if (2 + 2 ==
4)
^ Note: The error occurred at the end of the input stream. Expecting: start block
As you can see, only the last error message is relevant. So I would like to know if there is not a way to display only this one, and therefore the last one without going through the others.
I guess it's not easy, since it's a feature of FParsec, but you never know...
I don't think I need to post F# code, since it's usually in the use of the library.
Edit
Here is the code of my analyzer to parse the example above:
type Statement =
| If of Expr * Block option
// And others...
and Block = Block of Statement list
let ws = pspaces >>. many pspaces |>> (fun _ -> ())
let str_ws s = pstring s .>> ws
let pexpr, pexprimpl = createParserForwardedToRef ()
// With their implementations (addition, subtraction, ...)
let pstatement, pstatementimpl = createParserForwardedToRef ()
// With their implementations, like "pif" below
let psinglestatement = pstatement |>> fun statement -> [statement]
let pstatementblock =
psinglestatement <|>
between (ws >>. str_ws "{") (ws >>. str_ws "}") (many pstatement)
let pif =
pipe2
(str_ws "if" >>. pexpr)
(pstatementblock)
(fun cnd block -> If(cnd, Some (Block(block))))
pstatementimpl :=
attempt (pif) <|>
// And others...
Edit II:
Here is the code of identifier analyse:
let reserved = [
"if"; "else" // And other...
]
let pidentifierraw =
let inline isIdentifierFirstChar c = isLetter c
let inline isIdentifierChar c = isLetter c || isDigit c
many1Satisfy2L isIdentifierFirstChar isIdentifierChar "identifier"
let pidentifier =
pidentifierraw
>>= fun s ->
if reserved |> List.exists ((=) s) then fail ("Bad identifier: '" + s + "' is a reserved keyword")
else preturn s
type Literal =
| Identifier of string
// And other...
let pid = pidentifier |>> Literal.Identifier
pexpr is a set of values, including identifiers, literals, and their operations:
let pexpr, pexprimpl = createParserForwardedToRef ()
type Assoc = Associativity
let opp = OperatorPrecedenceParser<Expr, unit, unit> ()
pexprimpl := opp.ExpressionParser <?> "expression"
let term = pvalue .>> ws <|> between (str_ws "(") (str_ws ")") pexpr
opp.TermParser <- term
let inops = [ "+"; "-"; "*"; "/"; "=="; "!="; "<="; ">="; "<"; ">" ]
for op in inops do opp.AddOperator(InfixOperator(op, ws, 1, Assoc.Left, fun x y -> InfixOp(x, op, y)))
pvalue defines literals, including identifiers with pidentifier. I don't think I need to put their definitions, since they all follow this pattern (for example):
let ptrue = str_ws "true" |>> fun _ -> Bool(true)
let pfalse = str_ws "false" |>> fun _ -> Bool(false)
let pbool = ptrue <|> pfalse
This code will read this input "(WEEKEND-SUNDAY)" and then return "SATURDAY"
but input "WEEKEND-SUNDAY)" still return "SATURDAY" => this parser ignore last ')'
let pDayOfWeekKeyWords = choice [
pstring "MONDAY" ;
pstring "TUESDAY" ;
pstring "WEDNESDAY" ;
pstring "THURSDAY" ;
pstring "FRIDAY" ;
pstring "SATURDAY" ;
pstring "SUNDAY" ;
pstring "WEEKEND" ;
pstring "WEEKDAY" ;
pstring "ALL" ]
let betweenParentheses p =
between (pstring "(") (pstring ")") p
let opp = new OperatorPrecedenceParser<Set<DayOfWeek>,unit,unit>()
let pExpr = opp.ExpressionParser
let term = (betweenParentheses pExpr) <|> (pDayOfWeekKeyWords |>> ( fun x ->
match x with
| "MONDAY" -> Set.ofList [DayOfWeek.Monday]
| "TUESDAY" -> Set.ofList [DayOfWeek.Tuesday]
| "WEDNESDAY" -> Set.ofList [DayOfWeek.Wednesday]
| "THURSDAY" -> Set.ofList [DayOfWeek.Thursday]
| "FRIDAY" -> Set.ofList [DayOfWeek.Friday]
| "SATURDAY" -> Set.ofList [DayOfWeek.Saturday]
| "SUNDAY" -> Set.ofList [DayOfWeek.Sunday]
| "WEEKDAY" -> Set.ofList [DayOfWeek.Monday ; DayOfWeek.Tuesday ; DayOfWeek.Wednesday;DayOfWeek.Thursday;DayOfWeek.Friday]
| "WEEKEND" -> Set.ofList [DayOfWeek.Saturday;DayOfWeek.Sunday]
| "ALL"-> Set.ofList [DayOfWeek.Monday ; DayOfWeek.Tuesday ; DayOfWeek.Wednesday;DayOfWeek.Thursday;DayOfWeek.Friday;DayOfWeek.Saturday;DayOfWeek.Sunday]
| _ -> failwith "ERROR MESSAGE") )
opp.TermParser <- term
opp.AddOperator(InfixOperator<Set<DayOfWeek>,unit,unit>("+", skipString "", 1, Associativity.Left, fun x y -> x + y))
opp.AddOperator(InfixOperator<Set<DayOfWeek>,unit,unit>("-", skipString "" , 1, Associativity.Left, fun x y -> x - y))
run
run pExpr "MONDAY+(WEEKEND-SUNDAY)"
output
Success: set [Monday; Saturday]
problem is
run pExpr "MONDAY+WEEKEND-SUNDAY)" or run pExpr "MONDAY)+WEEKEND-SUNDAY"
it still return
Success: set [Monday; Saturday]
I want it to show Failure: something..
did I miss something? thank you
In your latter two examples pexpr returns after having successfully parsed the input stream up to the unmatched closing parenthesis. So, in the last example the result is actually Success: set [Monday] not Success: set [Monday; Saturday].
You can use the eof parser to force an error if the input stream hasn't been consumed completely:
> run (pExpr .>> eof) "MONDAY)+WEEKEND-SUNDAY"
Error in Ln: 1 Col: 7
MONDAY)+WEEKEND-SUNDAY
^
Expecting: end of input or infix operator
I'm coding my first parser. It's in F# and I'm using with FParsec.
My parser parses things like true and false, (true and false or true), true, (((true and false or true))) etc, which is correct.
But it doesn't parses when it's like (true and false) or true. It fails when there are parentheses in the middle of the text.
How can I solve it?
Sample code:
let private infixOperator (opp: OperatorPrecedenceParser<_,_,_>) op prec map =
opp.AddOperator(InfixOperator (op, ws, prec, Associativity.Left, map))
let private oppLogic = new OperatorPrecedenceParser<_,_,_>()
infixOperator oppLogic "is" 1 (fun x y -> Comparison (x, Equal, y))
infixOperator oppLogic "isnt" 1 (fun x y -> Comparison (x, NotEqual, y))
infixOperator oppLogic "or" 2 (fun x y -> Logic (x, Or, y))
infixOperator oppLogic "and" 3 (fun x y -> Logic (x, And, y))
let private exprParserLogic = oppLogic.ExpressionParser
let private betweenParentheses p =
between (str "(") (str ")") p
oppLogic.TermParser <- choice [
betweenParentheses exprParserLogic
pboolean
]
let pexpression =
choice [
attempt <| betweenParentheses exprParserLogic
exprParserLogic
]
let private pline =
ws
>>. pexpression
.>> eof
What happens for an input like "(true and false) or true" is that pline applies, which pexpression tries to apply betweenParentheses exprParserLogic. This succeeds and parses "(true and false)". So since the parse was successful, it never tries the second option exprParserLogic and simply returns to pline. pline then applies eof, which fails because "or true" is still left in the input.
Since betweenParentheses exprParserLogic is already part of the operator parser's term parser, there's no reason for you to try to parse it in its own rule. You can just have pline invoke exprParserLogic and remove pexpression altogether (or define let pexpression = oppLogic.ExpressionParser and remove exprParserLogic). This will correctly parse "(true and false) or true".
I have to write a parser which parses key-value pairs in a file that looks like this:
as235 242kj25klj Pairs:A=a1|B=b1|C=c1
kjlkjlkjlkj Pairs:A=a2|B=b2|C=c2
Note that the lines contain some garbage, the label and then the key-value pairs.
The F# code that I wrote is the following:
#r"FParsec.dll"
open FParsec
let parse keys label =
let pkey = keys |> Seq.map pstring |> choice
let pvalue = manyCharsTill anyChar (anyOf "|\n")
let ppair = pkey .>> (skipChar '=') .>>. pvalue
let ppairSeq = many ppair
let pline = skipManyTill anyChar (pstring label)
>>. ppairSeq .>> newline
let pfile = many (opt pline) |>> Seq.choose id
run pfile
>> function
| Success (result, _, _) -> result
| Failure (errorMsg, _, _) -> failwith errorMsg
"""
as235 242kj25klj Pairs:A=a1|B=b1|C=c1
lkjlkjlkjlkj Pairs:A=a2|B=b2|C=c2
"""
|> parse ["A";"B";"C"] "Pairs:"
|> List.ofSeq
|> printfn "%A"
The expected result is:
[[("A","a1"); "B","b1"; "C","c1"]
[("A","a2"); "B","b2"; "C","c2"]]
...but instead I get the following error:
System.Exception: Error: Error in Ln: 8 Col: 1
Note: The error occurred at the end of the input stream.
Expecting: any char or 'Pairs:'
Any ideas about how I can fix this parser?
Thanks!
UPDATE: after Stephan's comment I tried to fix it but without success. This is one of my last attempts which I was expecting to work but it doesn't.
let pkey = keys |> Seq.map pstring |> choice
let pvalue = manyCharsTill anyChar (anyOf "|\n")
let ppair = pkey .>> (skipChar '=') .>>. pvalue
let ppairSeq = manyTill ppair newline
let pnonEmptyLine =
skipManyTill anyChar (pstring label)
>>. ppairSeq
|>> Some
let pemptyLine = spaces >>. newline >>% None
let pline = pemptyLine <|> pnonEmptyLine
let pfile = manyTill pline eof |>> Seq.choose id
Now the error message is:
Error in Ln: 2 Col: 5
as235 242kj25klj Pairs:A=a1|B=b1|C=c1
^
Expecting: newline
A colleague of mine found the solution and I'm posting here for others who have similar issues. Also the parser is even better because it doesn't need the key set. I uses the left side of '=' as key and the right side as value:
let parse label str =
let poperand = manyChars (noneOf "=|\n")
let ppair = poperand .>> skipChar '=' .>>. poperand
let ppairSeq = sepBy ppair (pchar '|')
let pLineWithPairs = skipManyTill anyChar (pstring label) >>. ppairSeq |>> Some
let pLineWithoutPairs = (restOfLine false) >>% None
let pLogLine = (attempt pLineWithPairs) <|> pLineWithoutPairs
let pfile = sepBy pLogLine newline |>> Seq.choose id
match run pfile str with
| Success (result, _, _) -> result
| Failure (errorMsg, _, _) -> sprintf "Error: %s" errorMsg |> failwith