I want to parse the following text:
WHERE
( AND
ApplicationGroup.REFSTR = 5
BV_1.Year = 2009
BV_1.MonetaryCodeId = 'Commited'
BV_3.Year = 2009
BV_3.MonetaryCodeId = 'Commited'
BV_4.Year = 2009
BV_4.MonetaryCodeId = 'Commited
)
I started with a combinator for the list of conditions:
let multiConditionWhereList : Parser<WhereCondition list, unit> =
sepEndBy1 (ws >>. whereCondition) (newline)
<?> "where condition list"
When I give hand over the condition list of the where-statement (every line with an =) I get back a Reply with seven WhereConditions in its Result. The Status is "Ok". But the Error-list contains a "Expected newline" ErrorMessage.
But whenever I try to parse this kind of list wrapped in round braces with an oparator at the beginning with a combinator of the following shape:
let multiConditionWhereClause : Parser<WhereStatement, unit> =
pstringCI "where"
.>> spaces
>>. between (pchar '(') (pchar ')')
( ws >>. whereChainOperator .>> spaces1
.>>. multiConditionWhereList )
|>> (fun (chainOp, conds) -> { Operator = chainOp;
SearchConditions = conds } )
I get an Reply with Status "Error". But the Error-List is empty as well as the result.
So I'm kind of stuck at this point. First I don't understand, why the sepByEnd1 combinator in my multiConditionWhereList produces a non-empty error list and expects a newline at the end. And more important, I don't get why the list is not captured, when I wrap it in a between statement.
As a reference, I include the whole set of rules as well as an invocation of the rule which causes the problems:
#light
#r "System.Xml.Linq.dll"
#r #"..\packages\FParsec.1.0.1\lib\net40-client\FParsecCS.dll"
#r #"..\packages\FParsec.1.0.1\lib\net40-client\FParsec.dll"
module Ast =
open System
open System.Xml.Linq
type AlfabetParseError (msg: string) =
inherit Exception (msg)
type FindStatement =
{ TableReferences: TableReferences;}
and TableReferences =
{ PrimaryTableReference: TableReferenceWithAlias; JoinTableReferences: JoinTableReference list; }
and TableReferenceWithAlias =
{ Name: string; Alias: string }
and JoinTableReference =
{ JoinType:JoinType; TableReference: TableReferenceWithAlias; JoinCondition: JoinCondition; }
and JoinType =
| InnerJoin
| OuterJoin
| LeftJoin
| RightJoin
and JoinCondition =
{ LeftHandSide: FieldReference; RightHandSide: FieldReference; }
and WhereStatement =
{ Operator: WhereOperator; SearchConditions: WhereCondition list }
and WhereOperator =
| And
| Or
| Equal
| Is
| IsNot
| Contains
| Like
| NoOp
and WhereLeftHandSide =
| FieldReferenceLH of FieldReference
and WhereRightHandSide =
| FieldReferenceRH of FieldReference
| VariableReferenceRH of VariableReference
| LiteralRH of Literal
and WhereCondition =
{ LeftHandSide: WhereLeftHandSide; Operator: WhereOperator; RightHandSide: WhereRightHandSide; }
and FieldReference =
{ FieldName: Identifier; TableName: Identifier }
and VariableReference =
{ VariableName : Identifier; }
and Literal =
| Str of string
| Int of int
| Hex of int
| Bin of int
| Float of float
| Null
and Identifier =
Identifier of string
and QueryXml =
{ Doc : XDocument }
module AlfabetQueryParser =
open Ast
open FParsec
open System
open System.Xml.Linq
module Parsers =
(* Utilities *)
let toJoinType (str:string) =
match str.ToLowerInvariant() with
| "innerjoin" -> InnerJoin
| "outerjoin" -> OuterJoin
| "leftjoin" -> LeftJoin
| "rightjoin" -> RightJoin
| _ -> raise <| AlfabetParseError "Invalid join type"
let toWhereOperator (str:string) =
match str.ToLowerInvariant() with
| "and" -> And
| "or" -> Or
| "=" -> Equal
| "is" -> Is
| "is not" -> IsNot
| "contains" -> Contains
| "like" -> Like
| _ -> raise <| AlfabetParseError "Invalid where operator type"
(* Parsers *)
let ws : Parser<string, unit> =
manyChars (satisfy (fun c -> c = ' '))
let ws1 : Parser<string, unit> =
many1Chars (satisfy (fun c -> c = ' '))
let identifier : Parser<string, unit> =
many1Chars (satisfy (fun(c) -> isDigit(c) || isAsciiLetter(c) || c.Equals('_')))
let fieldReference : Parser<FieldReference, unit> =
identifier
.>> pstring "."
.>>. identifier
|>> (fun (tname, fname) -> {FieldName = Identifier(fname);
TableName = Identifier(tname) })
let variableReference : Parser<VariableReference, unit> =
pstring ":"
>>. identifier
|>> (fun vname -> { VariableName = Identifier(vname) })
let numeralOrDecimal : Parser<Literal, unit> =
numberLiteral NumberLiteralOptions.AllowFraction "number"
|>> fun num ->
if num.IsInteger then Int(int num.String)
else Float(float num.String)
let hexNumber : Parser<Literal, unit> =
pstring "#x" >>. many1SatisfyL isHex "hex digit"
|>> fun hexStr ->
Hex(System.Convert.ToInt32(hexStr, 16))
let binaryNumber : Parser<Literal, unit> =
pstring "#b" >>. many1SatisfyL (fun c -> c = '0' || c = '1') "binary digit"
|>> fun hexStr ->
Bin(System.Convert.ToInt32(hexStr, 2))
let numberLiteral : Parser<Literal, unit> =
choiceL [numeralOrDecimal
hexNumber
binaryNumber]
"number literal"
let strEscape =
pchar '\\' >>. pchar '\''
let strInnard =
strEscape <|> noneOf "\'"
let strInnards =
manyChars strInnard
let strLiteral =
between (pchar '\'') (pchar '\'') strInnards
|>> Str
let literal : Parser<Literal, unit> =
(pstringCI "null" |>> (fun str -> Null))
<|> numberLiteral
<|> strLiteral
let joinCondition : Parser<JoinCondition, unit> =
spaces .>> pstring "ON" .>> spaces
>>. fieldReference
.>> spaces .>> pstring "=" .>> spaces
.>>. fieldReference
|>> (fun(lhs, rhs) -> { LeftHandSide = lhs; RightHandSide = rhs })
let tableReferenceWithoutAlias : Parser<TableReferenceWithAlias, unit> =
identifier
|>> (fun (name) -> { Name = name; Alias = ""})
let tableReferenceWithAlias : Parser<TableReferenceWithAlias, unit> =
identifier
.>> spaces .>> pstringCI "as" .>> spaces
.>>. identifier
|>> (fun (name, alias) -> { Name = name; Alias = alias})
let primaryTableReference : Parser<TableReferenceWithAlias, unit> =
attempt tableReferenceWithAlias <|> tableReferenceWithoutAlias
let joinTableReference : Parser<JoinTableReference, unit> =
identifier
.>> spaces
.>>. (attempt tableReferenceWithAlias <|> tableReferenceWithoutAlias)
.>> spaces
.>>. joinCondition
|>> (fun ((joinTypeStr, tableRef), condition) -> { JoinType = toJoinType(joinTypeStr);
TableReference = tableRef;
JoinCondition = condition } )
let tableReferences : Parser<TableReferences, unit> =
primaryTableReference
.>> spaces
.>>. many (joinTableReference .>> spaces)
|>> (fun (pri, joinTables) -> { PrimaryTableReference = pri;
JoinTableReferences = joinTables; } )
let whereConditionOperator : Parser<WhereOperator, unit> =
choice [
pstringCI "="
; pstringCI "is not"
; pstringCI "is"
; pstringCI "contains"
; pstringCI "like"
]
|>> toWhereOperator
let whereChainOperator : Parser<WhereOperator, unit> =
choice [
pstringCI "and"
; pstringCI "or"
]
|>> toWhereOperator
let whereCondition : Parser<WhereCondition, unit> =
let leftHandSide : Parser<WhereLeftHandSide, unit> =
fieldReference |>> FieldReferenceLH
let rightHandSide : Parser<WhereRightHandSide, unit> =
(attempt fieldReference |>> FieldReferenceRH)
<|> (attempt variableReference |>> VariableReferenceRH)
<|> (literal |>> LiteralRH)
leftHandSide
.>> ws1 .>>. whereConditionOperator .>> ws1
.>>. rightHandSide
|>> (fun((lhs, op), rhs) -> { LeftHandSide = lhs;
Operator = op;
RightHandSide = rhs })
let singleConditionWhereClause : Parser<WhereStatement, unit> =
pstringCI "where" .>> spaces
>>. whereCondition
|>> (fun (cond) -> { Operator = NoOp;
SearchConditions = [ cond ] } );
let multiConditionChainOperator : Parser<WhereOperator, unit> =
pstring "(" .>> spaces >>. whereChainOperator .>> spaces
<?> "where multi-condition operator"
let multiConditionWhereList : Parser<WhereCondition list, unit> =
sepEndBy1 (ws >>. whereCondition) (newline)
<?> "where condition list"
let multiConditionWhereClause : Parser<WhereStatement, unit> =
pstringCI "where"
.>> spaces
>>. between (pchar '(') (pchar ')')
( ws >>. whereChainOperator .>> spaces1
.>>. multiConditionWhereList )
|>> (fun (chainOp, conds) -> { Operator = chainOp;
SearchConditions = conds } )
let whereClause : Parser<WhereStatement, unit> =
(attempt multiConditionWhereClause)
<|> singleConditionWhereClause
let findStatement : Parser<FindStatement, unit> =
spaces .>> pstringCI "find" .>> spaces
>>. tableReferences
|>> (fun (tableRef) -> { TableReferences = tableRef; } )
let queryXml : Parser<QueryXml, unit> =
pstringCI "QUERY_XML" .>> newline
>>. manyCharsTill anyChar eof
|>> (fun (xmlStr) -> { Doc = XDocument.Parse(xmlStr) } )
let parse input =
match run Parsers.findStatement input with
| Success (x, _, _) -> x
| Failure (x, _, _) -> raise <| AlfabetParseError x
open FParsec
let input = #"WHERE
( AND
ApplicationGroup.REFSTR CONTAINS :BASE
BV_1.Year = 2009
BV_1.MonetaryCodeId = 'Commited'
BV_3.Year = 2009
BV_3.MonetaryCodeId = 'Commited'
BV_4.Year = 2009
BV_4.MonetaryCodeId = 'Commited'
)"
let r = run AlfabetQueryParser.Parsers.multiConditionWhereClause input
The reason FParsec can't generate more useful error messages for your example is that you've defined the ws and id parsers using the satisfy primitive. Since you only specified a predicate function, FParsec doesn't know how to describe the expected input. The User's Guide explains this issues and how to avoid it. In your code, you could e.g. use satisfyL or many1SatisfyL for the definitions.
After fixing the ws and id parsers you'll quickly discover that your code doesn't properly parse the list because the whitespace parsing is messed up. Where possible, you should always parse whitespace as trailing whitespace, not as leading whitespace, because this avoids the need for backtracking. To fix your parser for the input you gave above, you could e.g. replace
sepEndBy1 (ws >>. whereCondition) (newline)
with
sepEndBy1 (whereCondition .>> ws) (newline >>. ws)
in the definition of multiConditionWhereList.
Note that a non-empty error message list doesn't necessarily imply an error, as FParsec will generally collect the error messages of all parsers that were applied at the current position in the stream, even if the parser is "optional". This is probably the reason you were seeing the "expected newline", since a newline would have been accepted at that position.
Related
NOTE: Not long ago, I had already asked a similar question. This is not a duplication, but the clarifications to be requested did not fall within the scope of the subject itself. I therefore allow myself to open another position dealing with the analysis of an ML-like syntax based on indentation, and considering everything as an instruction / expression.
For example:
"Hello" is an expression,
let foo = 2 + 1 is an instruction using an expression (2 + 1),
print foo is an instruction, ...
In short, a syntax and semantics that is quite modular and dynamic. Like F#, or OCaml.
To do this, I use F#, with the API (available on nuget) FParsec. The FParsec wiki provides an example of a syntax based on indentation, so I have taken it up again. The module in the code below used is IndentationParserWithoutBacktracking.
The example code to be parsed uses an elementary indentation, not mixing "literal" and "instructions/expressions":
loop i 1 10
loop k 1 10
print k
print i
print j
A simple code, and without context (but this is not important at the moment).
My implementation allows codes such as:
let foo = a + b
let foo =
let a = 9
let b = 1
a + b
let foo = 7
let foo =
loop i 1 10
print i
For example. (The loop and print are there just for the tests...)
The problem I have been having for a long week now, and that I can't solve, is the fact that the indentation module asks me every time an instruction is expected in a parser for a new line... Here is a screenshot:
This applies to all the examples mentioned above. I don't really understand the problem, and therefore don't know how to solve it.
Here is the code tested for this question, it meets the minimum and functional code criteria, however, FParsec must be used:
open FParsec
// This module come from 'https://github.com/stephan-tolksdorf/fparsec/wiki/Parsing-indentation-based-syntax-with-FParsec'
// I used the second module: 'IndentationParserWithoutBacktracking'
module IndentationParserWithoutBacktracking =
let tabStopDistance = 8
type LastParsedIndentation() =
[<DefaultValue>]
val mutable Value: int32
[<DefaultValue>]
val mutable EndIndex: int64
type UserState =
{Indentation: int
// We put LastParsedIndentation into the UserState so that we
// can conveniently use a separate instance for each stream.
// The members of the LastParsedIndentation instance will be mutated
// directly and hence won't be affected by any stream backtracking.
LastParsedIndentation: LastParsedIndentation}
with
static member Create() = {Indentation = -1
LastParsedIndentation = LastParsedIndentation(EndIndex = -1L)}
type CharStream = CharStream<UserState>
type Parser<'t> = Parser<'t, UserState>
// If this function is called at the same index in the stream
// where the function previously stopped, then the previously
// returned indentation will be returned again.
// This way we can avoid backtracking at the end of indented blocks.
let skipIndentation (stream: CharStream) =
let lastParsedIndentation = stream.UserState.LastParsedIndentation
if lastParsedIndentation.EndIndex = stream.Index then
lastParsedIndentation.Value
else
let mutable indentation = stream.SkipNewlineThenWhitespace(tabStopDistance, false)
lastParsedIndentation.EndIndex <- stream.Index
lastParsedIndentation.Value <- indentation
indentation
let indentedMany1 (p: Parser<'t>) label : Parser<'t list> =
fun stream ->
let oldIndentation = stream.UserState.Indentation
let indentation = skipIndentation stream
if indentation <= oldIndentation then
Reply(Error, expected (if indentation < 0 then "newline" else "indented " + label))
else
stream.UserState <- {stream.UserState with Indentation = indentation}
let results = ResizeArray()
let mutable stateTag = stream.StateTag
let mutable reply = p stream // parse the first element
let mutable newIndentation = 0
while reply.Status = Ok
&& (results.Add(reply.Result)
newIndentation <- skipIndentation stream
newIndentation = indentation)
do
stateTag <- stream.StateTag
reply <- p stream
if reply.Status = Ok
|| (stream.IsEndOfStream && results.Count > 0 && stream.StateTag = stateTag)
then
if newIndentation < indentation || stream.IsEndOfStream then
stream.UserState <- {stream.UserState with Indentation = oldIndentation}
Reply(List.ofSeq results)
else
Reply(Error, messageError "wrong indentation")
else // p failed
Reply(reply.Status, reply.Error)
open IndentationParserWithoutBacktracking
let isBlank = fun c -> c = ' ' || c = '\t'
let ws = spaces
let ws1 = skipMany1SatisfyL isBlank "whitespace"
let str s = pstring s .>> ws
let keyword str = pstring str >>? nextCharSatisfiesNot (fun c -> isLetter c || isDigit c) <?> str
// AST
type Identifier = Identifier of string
// A value is just a literal or a data name, called here "Variable"
type Value =
| Int of int | Float of float
| Bool of bool | String of string
| Char of char | Variable of Identifier
// All is an instruction, but there are some differences:
type Instr =
// Arithmetic
| Literal of Value | Infix of Instr * InfixOp * Instr
// Statements (instructions needing another instructions)
| Let of Identifier * Instr list
| Loop of Identifier * Instr * Instr * Instr list
// Other - the "print" function, from the link seen above
| Print of Identifier
and InfixOp =
// Arithmetic
| Sum | Sub | Mul | Div
// Logic
| And | Or | Equal | NotEqual | Greater | Smaller | GreaterEqual | SmallerEqual
// Literals
let numberFormat = NumberLiteralOptions.AllowMinusSign ||| NumberLiteralOptions.AllowFraction |||
NumberLiteralOptions.AllowHexadecimal ||| NumberLiteralOptions.AllowOctal |||
NumberLiteralOptions.AllowBinary
let literal_numeric =
numberLiteral numberFormat "number" |>> fun nl ->
if nl.IsInteger then Literal (Int(int nl.String))
else Literal (Float(float nl.String))
let literal_bool =
(choice [
(stringReturn "true" (Literal (Bool true)))
(stringReturn "false" (Literal (Bool false)))
]
.>> ws) <?> "boolean"
let literal_string =
(between (pstring "\"") (pstring "\"") (manyChars (satisfy (fun c -> c <> '"')))
|>> fun s -> Literal (String s)) <?> "string"
let literal_char =
(between (pstring "'") (pstring "'") (satisfy (fun c -> c <> '''))
|>> fun c -> Literal (Char c)) <?> "character"
let identifier =
(many1Satisfy2L isLetter (fun c -> isLetter c || isDigit c) "identifier"
|>> Identifier) <?> "identifier"
let betweenParentheses p =
(between (str "(") (str ")") p) <?> ""
let variable = identifier |>> fun id -> Literal (Variable id)
let literal = (attempt literal_numeric <|>
attempt literal_bool <|>
attempt literal_char <|>
attempt literal_string <|>
attempt variable)
// Instressions and statements
let pInstrs, pInstrimpl = createParserForwardedToRef()
// `ploop` is located here to force `pInstrs` to be of the type `Instr list`, `ploop` requesting an instression list.
let ploop =
pipe4
(keyword "loop" >>. ws1 >>. identifier)
(ws1 >>. literal)
(ws1 >>. literal)
(pInstrs)
(fun id min max stmts -> Loop(id, min, max, stmts))
// `singlepInstr` allows to use only one Instression, used just after.
let singlepInstr =
pInstrs |>> fun ex -> ex.Head
let term =
(ws >>. singlepInstr .>> ws) <|>
(betweenParentheses (ws >>. singlepInstr)) <|>
(ws >>. literal .>> ws) <|>
(betweenParentheses (ws >>. literal))
let infixOperator (p: OperatorPrecedenceParser<_, _, _>) op prec map =
p.AddOperator(InfixOperator(op, ws, prec, Associativity.Left, map))
let ops =
// Arithmetic
[ "+"; "-"; "*"; "/"; "%" ] #
// Logical
[ "&&"; "||"; "=="; "!="; ">"; "<"; ">="; "<=" ]
let opCorrespondance op =
match op with
// Arithmetic operators
| "+" -> Sum | "-" -> Sub
| "*" -> Mul | "/" -> Div
// Logical operators
| "&&" -> And | "||" -> Or
| "==" -> Equal | "!=" -> NotEqual
| ">" -> Greater | "<" -> Smaller
| ">=" -> GreaterEqual | "<=" -> SmallerEqual
| _ -> failwith ("Unknown operator: " + op)
let opParser = new OperatorPrecedenceParser<Instr, unit, UserState>()
for op in ops do
infixOperator opParser op 1 (fun x y -> Infix(x, opCorrespondance op, y))
opParser.TermParser <- term
// Statements
(*
- let:
let <identifier> = <instruction(s) / value>
- print:
print <identifier>
- loop:
loop <identifier> <literal> <literal> <indented statements>
*)
let plet =
pipe2
(keyword "let" >>. ws1 >>. identifier)
(ws >>. str "=" >>. ws >>. pInstrs)
(fun id exp -> Let(id, exp))
let print =
keyword "print" >>. ws1 >>. identifier
|>> Print
let instruction =
print <|> ploop <|> plet <|>
opParser.ExpressionParser <|>
literal
pInstrimpl := indentedMany1 instruction "instruction"
let document = pInstrs .>> spaces .>> eof
let test str =
match runParserOnString document (UserState.Create()) "" str with
| Success(result, _, _) -> printfn "%A" result
| Failure(errorMsg, _, _) -> printfn "%s" errorMsg
System.Console.Clear()
let code = test #"
let foo = a + b
"
I would like to understand first of all why it doesn't work, but also to be able to find a solution to my problem, and that this solution can be extended to the potential syntax additions of the parser.
Awaiting a salutary answer, thank you.
In order to understand why your parser doesn't work, you need to isolate the issues.
If I understand you correctly, you want your let parser to support either a single instruction on the same line or indented instructions on subsequent lines, e.g:
let x = instruction
let b =
instruction
instruction
If you can't get your existing implementation to work, I'd recommend going back to the implementation on the Wiki and trying to just add support for the let statement.
For example, I made the Wiki parser accept simple let statements with the following modifications:
type Statement = Loop of Identifier * int * int * Statement list
| Print of Identifier
| Let of Identifier * Statement list
let ws = skipManySatisfy isBlank
let str s = pstring s .>> ws
let statement, statementRef = createParserForwardedToRef()
let indentedStatements = indentedMany1 statement "statement"
let plet = keyword "let" >>. pipe2 (ws1 >>. identifier)
(ws >>. str "=" >>. ws
>>. (indentedStatements
<|> (statement |>> fun s -> [s])))
(fun id exp -> Let(id, exp))
statementRef := print <|> loop <|> plet
Note that in the modified version statement is now the parser forwarded to a ref cell, not indentedStatements.
Note also that ws is not implemented with spaces, like in your parser. This is important because spaces also consumes newlines, which would prevent the indentedMany1 from seeing the newline and properly calculating the indentation.
The reason your parser produced an "Expecting: newline" error is that indentedMany1 needs a newline at the beginning of the indented sequence in order to be able to calculate the indentation. You would have to modify the implementation of indentedMany1 if you wanted to support e.g. the following indentation pattern:
let x = instruction
instruction
instruction
I test indentation with FParsec, according to this implementation, but when I make it a little more complex by adding expressions (literals, lists, tuples and arithmetic operations), allowing expressions to top-level, and adding a variable creation statement; I first get a StackOverflowException error . In my opinion, this is because the expression parser is solicited in such a way as to make an infinite loop in the program. I see no other reason, however, I don't know how to fix this problem.
If I remove the attempt pexpression from my parser data statement, there is no more StackOverflowException, nevertheless the module IndentationParserWithoutBacktracking (therefore managing indentation) tells me that the code to be parsed is missing a "newline":
Failure: Error in Ln: 2 Col: 1
loop i 0 10
^
Expecting: let or print
The parser backtracked after:
Error in Ln: 3 Col: 5
let myVar = 2 + 1
^
Expecting: loop or print
The parser backtracked after:
Error in Ln: 3 Col: 17
let myVar = 2 + 1
^
Expecting: newline
All this according to the following text to be parsed:
loop i 0 10
let myVar = 2 + 1
print myVar
Here is my code:
open FParsec
// module IndentationParserWithoutBacktracking // see the link
// Utils
open IndentationParserWithoutBacktracking
let isBlank = fun c -> c = ' ' || c = '\t'
let ws = spaces
let ws1 = skipMany1SatisfyL isBlank "whitespace"
let str s = pstring s .>> ws
let keyword str = pstring str >>? nextCharSatisfiesNot (fun c -> isLetter c || isDigit c) <?> str
// AST
type Identifier = Identifier of string
type InfixOp =
| Sum | Sub | Mul | Div | Pow | Mod
| And | Or | Equal | NotEqual | Greater | Smaller | GreaterEqual | SmallerEqual
type Value =
| Int of int
| Float of float
| Bool of bool
| String of string
| Char of char
| Variable of Identifier
type Expr =
| Literal of Value
| Infix of Expr * InfixOp * Expr
| List of Expr list
| Tuple of Expr list
type Statement =
| Expression of Expr
| Let of Identifier * Statement list
| Loop of Identifier * Expr * Expr * Statement list
| Print of Identifier
// Literals
let numberFormat = NumberLiteralOptions.AllowMinusSign ||| NumberLiteralOptions.AllowFraction |||
NumberLiteralOptions.AllowHexadecimal ||| NumberLiteralOptions.AllowOctal |||
NumberLiteralOptions.AllowBinary ||| NumberLiteralOptions.AllowPlusSign
let literal_numeric =
numberLiteral numberFormat "number" |>> fun nl ->
if nl.IsInteger then Literal (Int(int nl.String))
else Literal (Float(float nl.String))
let literal_bool =
(choice [
(stringReturn "true" (Literal (Bool true)))
(stringReturn "false" (Literal (Bool false)))
]
.>> ws) <?> "boolean"
let literal_string =
(between (pstring "\"") (pstring "\"") (manyChars (satisfy (fun c -> c <> '"')))
|>> fun s -> Literal (String s)) <?> "string"
let literal_char =
(between (pstring "'") (pstring "'") (satisfy (fun c -> c <> '''))
|>> fun c -> Literal (Char c)) <?> "character"
let identifier =
(many1Satisfy2L isLetter (fun c -> isLetter c || isDigit c) "identifier"
|>> fun i -> Identifier i) <?> "valid identifier"
let betweenParentheses p =
(between (str "(") (str ")") p)
let variable = identifier |>> fun id -> Literal (Variable id)
let literal = (attempt literal_numeric <|>
attempt literal_bool <|>
attempt literal_char <|>
attempt literal_string <|>
attempt variable) <?> "literal"
// Expressions
let pexpr, pexprimpl = createParserForwardedToRef()
let term =
(ws >>. literal .>> ws) <|>
(betweenParentheses (ws >>. pexpr)) <|>
(ws >>. pexpr .>> ws)
let infixOperator (p: OperatorPrecedenceParser<_, _, _>) op prec map =
p.AddOperator(InfixOperator(op, ws, prec, Associativity.Left, map))
let ops =
// Arithmetic
[ "+"; "-"; "*"; "/"; "%" ] #
// Logical
[ "&&"; "||"; "=="; "!="; ">"; "<"; ">="; "<=" ]
let opCorrespondance op =
match op with
// Arithmetic operators
| "+" -> Sum
| "-" -> Sub
| "*" -> Mul
| "/" -> Div
| "%" -> Mod
// Logical operators
| "&&" -> And
| "||" -> Or
| "==" -> Equal
| "!=" -> NotEqual
| ">" -> Greater
| "<" -> Smaller
| ">=" -> GreaterEqual
| "<=" -> SmallerEqual
let opParser = new OperatorPrecedenceParser<_, _, _>()
for op in ops do
infixOperator opParser op 1 (fun x y -> Infix(x, opCorrespondance op, y))
opParser.TermParser <- term
let list = between (str "[") (str "]") (sepBy pexpr (str ",")) |>> List
let tuple = between (str "(") (str ")") (sepBy pexpr (str ",")) |>> Tuple
let expression =
opParser.ExpressionParser <|> // I removed this line to don't have the mistake again.
list <|>
tuple <|>
literal
pexprimpl := attempt expression
// Statements
let statements, statementsRef = createParserForwardedToRef()
let pexpression = expression |>> Expression
let plet =
pipe2
(keyword "let" >>. ws1 >>. identifier)
(ws >>. str "=" >>. ws >>. statements)
(fun id gtt exp -> Let(id, gtt, exp))
// From the link, but "revisited"
let ploop =
pipe4
(keyword "loop" >>. ws1 >>. identifier)
(ws1 >>. literal) // If I put 'pexpr', it doesn't work too...
(ws1 >>. literal)
(statements)
(fun id min max stmts -> Loop(id, min, max, stmts))
let print = keyword "print" >>. (ws1 >>. identifier |>> Print)
let statement =
attempt plet <|>
attempt print <|>
attempt ploop <|>
attempt pexpression
statementsRef := indentedMany1 statement "statement"
let document = statements .>> spaces .>> eof
let test str =
match runParserOnString document (UserState.Create()) "" str with
| Success(result, _, _) -> printfn "Success: %A" result
| Failure(errorMsg, _, _) -> printfn "Failure: %s" errorMsg
System.Console.Clear()
test #"
loop i 0 10
let myVar = 2 + 1
print myVar
"
I know I ask several questions at the same time, and the site doesn't really allow it, but they're all a little linked together, so I might as well kill two birds with one stone...
I would really like to understand my mistakes, in order to design a parser for a very small ML-like language.
Thank you.
Edit
Here is my current code, which has been modified to respond to the first problems encountered with indentation:
open IndentationParserWithoutBacktracking // So from the link
let isBlank = fun c -> c = ' ' || c = '\t'
let ws = spaces
let ws1 = skipMany1SatisfyL isBlank "whitespace"
let str s = pstring s .>> ws
let keyword str = pstring str >>? nextCharSatisfiesNot (fun c -> isLetter c || isDigit c) <?> str
// AST
type Identifier = Identifier of string
type Value =
| Int of int
| Float of float
| Bool of bool
| String of string
| Char of char
| Variable of Identifier
// In FP, "all" is an expression, so:
type Expr =
// Arithmetic + lists and tuple
| Literal of Value
| Infix of Expr * InfixOp * Expr
| List of Expr list
| Tuple of Expr list
// Statements
| Return of Expr
| Loop of Identifier * Expr * Expr * Expr list
| Print of Identifier
and InfixOp =
| Sum | Sub | Mul | Div | Pow | Mod
| And | Or | Equal | NotEqual | Greater | Smaller | GreaterEqual | SmallerEqual
// Literals
let numberFormat = NumberLiteralOptions.AllowMinusSign ||| NumberLiteralOptions.AllowFraction |||
NumberLiteralOptions.AllowHexadecimal ||| NumberLiteralOptions.AllowOctal |||
NumberLiteralOptions.AllowBinary
let literal_numeric =
numberLiteral numberFormat "number" |>> fun nl ->
if nl.IsInteger then Literal (Int(int nl.String))
else Literal (Float(float nl.String))
let literal_bool =
(choice [
(stringReturn "true" (Literal (Bool true)))
(stringReturn "false" (Literal (Bool false)))
]
.>> ws) <?> "boolean"
let literal_string =
(between (pstring "\"") (pstring "\"") (manyChars (satisfy (fun c -> c <> '"')))
|>> fun s -> Literal (String s)) <?> "string"
let literal_char =
(between (pstring "'") (pstring "'") (satisfy (fun c -> c <> '''))
|>> fun c -> Literal (Char c)) <?> "character"
let identifier =
(many1Satisfy2L isLetter (fun c -> isLetter c || isDigit c) "identifier"
|>> fun i -> Identifier i) <?> "identifier"
let betweenParentheses p =
(between (str "(") (str ")") p) <?> ""
let variable = identifier |>> fun id -> Literal (Variable id)
let literal = (attempt literal_numeric <|>
attempt literal_bool <|>
attempt literal_char <|>
attempt literal_string <|>
attempt variable) <?> "literal"
// Expressions and statements
let pexprs, pexprimpl = createParserForwardedToRef()
// `ploop` is located here to force `pexprs` to be of the type `Expr list`, `ploop` requesting an expression list.
let ploop =
pipe4
(keyword "loop" >>. ws1 >>. identifier)
(ws1 >>. literal)
(ws1 >>. literal)
(pexprs)
(fun id min max stmts -> Loop(id, min, max, stmts))
// `singlepexpr` allows to use only one expression.
let singlepexpr =
pexprs |>> fun ex -> ex.Head
let term =
(ws >>. singlepexpr .>> ws) <|>
(betweenParentheses (ws >>. singlepexpr)) <|>
(ws >>. literal .>> ws) <|>
(betweenParentheses (ws >>. literal))
let infixOperator (p: OperatorPrecedenceParser<_, _, _>) op prec map =
p.AddOperator(InfixOperator(op, ws, prec, Associativity.Left, map))
let ops =
// Arithmetic
[ "+"; "-"; "*"; "/"; "%" ] #
// Logical
[ "&&"; "||"; "=="; "!="; ">"; "<"; ">="; "<=" ]
let opCorrespondance op =
match op with
// Arithmetic operators
| "+" -> Sum
| "-" -> Sub
| "*" -> Mul
| "/" -> Div
| "%" -> Mod
// Logical operators
| "&&" -> And
| "||" -> Or
| "==" -> Equal
| "!=" -> NotEqual
| ">" -> Greater
| "<" -> Smaller
| ">=" -> GreaterEqual
| "<=" -> SmallerEqual
let opParser = new OperatorPrecedenceParser<Expr, unit, UserState>()
for op in ops do
infixOperator opParser op 1 (fun x y -> Infix(x, opCorrespondance op, y))
opParser.TermParser <- term
let list = (between (str "[") (str "]") (sepBy singlepexpr (str ",")) |>> List) <?> "list"
let tuple = (between (str "(") (str ")") (sepBy singlepexpr (str ",")) |>> Tuple) <?> "tuple"
// Statements
// A commented `let` expression, commented for tests with the `return` instruction.
//let plet =
// pipe3
// (keyword "let" >>. ws1 >>. identifier)
// (ws >>. gtt ":")
// (ws >>. str "=" >>. ws >>. pexprs)
// (fun id gtt exp -> Let(id, gtt, exp))
let preturn =
keyword "return" >>. ws >>. singlepexpr
|>> fun ex -> Return ex
let print = keyword "print" >>. (ws1 >>. identifier |>> Print)
let instruction =
print <|>
ploop <|>
preturn <|>
opParser.ExpressionParser <|> // So we add the arithmetic, like x + y or 21 * 32 - 12 for example
list <|>
tuple <|>
literal
pexprimpl := indentedMany1 instruction "instruction"
let document = pexprs .>> spaces .>> eof
let test str =
match runParserOnString document (UserState.Create()) "" str with
| Success(result, _, _) -> printfn "%A" result
| Failure(errorMsg, _, _) -> printfn "%s" errorMsg
System.Console.Clear()
// The test code that give an error of "newline" expecting
let code = test #"
return 2 + 1
"
And here some screenshots about error:
The reason why indentedMany1 tells you it's expecting a newline in your example code is because that's what it's looking for: an indented block. Not an expression on one line. So your let myVar = 2 + 1 line is confusing it. If you wrote it as:
let myVar =
2 + 1
then I bet it would work.
What you need, I believe, is to change your let parser to allow one of two things: either an expression on a single line, or a block of statements (your statements parser). I.e., something like:
let pLetValue = expression <|> statements
let plet =
pipe2
(keyword "let" >>. ws1 >>. identifier)
(ws >>. str "=" >>. ws >>. pLetValue)
(fun id gtt exp -> Let(id, gtt, exp))
Note that I haven't tested this, as I don't have much time today. It's possible that instead of expression above, you'd want attempt expression (or pexpr, which is the same thing). Experiment a little and see what happens; and if you're completely lost as you try to figure out how FParsec is handling a given expression, remember the advice given in http://www.quanttec.com/fparsec/users-guide/debugging-a-parser.html.
I noticed that the error messages sent by FParsec were quite "ambiguous", except for the last message sent for an instruction.
Here is an example:
Code to parse:
if (2 + 2 == 4)
Here, normally, there should be an instruction block (so in brackets).
And what I get:
Failure: Error in Ln: 1 Col: 1 if (2 + 2 == 4) ^ Expecting: [some instructions]
The parser backtracked after: Error in Ln: 1 Col: 3 if (2 + 2 ==
4)
^ Bad identifier: 'if' is a reserved keyword
The parser backtracked after: Error in Ln: 1 Col: 16 if (2 + 2 ==
4)
^ Note: The error occurred at the end of the input stream. Expecting: start block
As you can see, only the last error message is relevant. So I would like to know if there is not a way to display only this one, and therefore the last one without going through the others.
I guess it's not easy, since it's a feature of FParsec, but you never know...
I don't think I need to post F# code, since it's usually in the use of the library.
Edit
Here is the code of my analyzer to parse the example above:
type Statement =
| If of Expr * Block option
// And others...
and Block = Block of Statement list
let ws = pspaces >>. many pspaces |>> (fun _ -> ())
let str_ws s = pstring s .>> ws
let pexpr, pexprimpl = createParserForwardedToRef ()
// With their implementations (addition, subtraction, ...)
let pstatement, pstatementimpl = createParserForwardedToRef ()
// With their implementations, like "pif" below
let psinglestatement = pstatement |>> fun statement -> [statement]
let pstatementblock =
psinglestatement <|>
between (ws >>. str_ws "{") (ws >>. str_ws "}") (many pstatement)
let pif =
pipe2
(str_ws "if" >>. pexpr)
(pstatementblock)
(fun cnd block -> If(cnd, Some (Block(block))))
pstatementimpl :=
attempt (pif) <|>
// And others...
Edit II:
Here is the code of identifier analyse:
let reserved = [
"if"; "else" // And other...
]
let pidentifierraw =
let inline isIdentifierFirstChar c = isLetter c
let inline isIdentifierChar c = isLetter c || isDigit c
many1Satisfy2L isIdentifierFirstChar isIdentifierChar "identifier"
let pidentifier =
pidentifierraw
>>= fun s ->
if reserved |> List.exists ((=) s) then fail ("Bad identifier: '" + s + "' is a reserved keyword")
else preturn s
type Literal =
| Identifier of string
// And other...
let pid = pidentifier |>> Literal.Identifier
pexpr is a set of values, including identifiers, literals, and their operations:
let pexpr, pexprimpl = createParserForwardedToRef ()
type Assoc = Associativity
let opp = OperatorPrecedenceParser<Expr, unit, unit> ()
pexprimpl := opp.ExpressionParser <?> "expression"
let term = pvalue .>> ws <|> between (str_ws "(") (str_ws ")") pexpr
opp.TermParser <- term
let inops = [ "+"; "-"; "*"; "/"; "=="; "!="; "<="; ">="; "<"; ">" ]
for op in inops do opp.AddOperator(InfixOperator(op, ws, 1, Assoc.Left, fun x y -> InfixOp(x, op, y)))
pvalue defines literals, including identifiers with pidentifier. I don't think I need to put their definitions, since they all follow this pattern (for example):
let ptrue = str_ws "true" |>> fun _ -> Bool(true)
let pfalse = str_ws "false" |>> fun _ -> Bool(false)
let pbool = ptrue <|> pfalse
I have to write a parser which parses key-value pairs in a file that looks like this:
as235 242kj25klj Pairs:A=a1|B=b1|C=c1
kjlkjlkjlkj Pairs:A=a2|B=b2|C=c2
Note that the lines contain some garbage, the label and then the key-value pairs.
The F# code that I wrote is the following:
#r"FParsec.dll"
open FParsec
let parse keys label =
let pkey = keys |> Seq.map pstring |> choice
let pvalue = manyCharsTill anyChar (anyOf "|\n")
let ppair = pkey .>> (skipChar '=') .>>. pvalue
let ppairSeq = many ppair
let pline = skipManyTill anyChar (pstring label)
>>. ppairSeq .>> newline
let pfile = many (opt pline) |>> Seq.choose id
run pfile
>> function
| Success (result, _, _) -> result
| Failure (errorMsg, _, _) -> failwith errorMsg
"""
as235 242kj25klj Pairs:A=a1|B=b1|C=c1
lkjlkjlkjlkj Pairs:A=a2|B=b2|C=c2
"""
|> parse ["A";"B";"C"] "Pairs:"
|> List.ofSeq
|> printfn "%A"
The expected result is:
[[("A","a1"); "B","b1"; "C","c1"]
[("A","a2"); "B","b2"; "C","c2"]]
...but instead I get the following error:
System.Exception: Error: Error in Ln: 8 Col: 1
Note: The error occurred at the end of the input stream.
Expecting: any char or 'Pairs:'
Any ideas about how I can fix this parser?
Thanks!
UPDATE: after Stephan's comment I tried to fix it but without success. This is one of my last attempts which I was expecting to work but it doesn't.
let pkey = keys |> Seq.map pstring |> choice
let pvalue = manyCharsTill anyChar (anyOf "|\n")
let ppair = pkey .>> (skipChar '=') .>>. pvalue
let ppairSeq = manyTill ppair newline
let pnonEmptyLine =
skipManyTill anyChar (pstring label)
>>. ppairSeq
|>> Some
let pemptyLine = spaces >>. newline >>% None
let pline = pemptyLine <|> pnonEmptyLine
let pfile = manyTill pline eof |>> Seq.choose id
Now the error message is:
Error in Ln: 2 Col: 5
as235 242kj25klj Pairs:A=a1|B=b1|C=c1
^
Expecting: newline
A colleague of mine found the solution and I'm posting here for others who have similar issues. Also the parser is even better because it doesn't need the key set. I uses the left side of '=' as key and the right side as value:
let parse label str =
let poperand = manyChars (noneOf "=|\n")
let ppair = poperand .>> skipChar '=' .>>. poperand
let ppairSeq = sepBy ppair (pchar '|')
let pLineWithPairs = skipManyTill anyChar (pstring label) >>. ppairSeq |>> Some
let pLineWithoutPairs = (restOfLine false) >>% None
let pLogLine = (attempt pLineWithPairs) <|> pLineWithoutPairs
let pfile = sepBy pLogLine newline |>> Seq.choose id
match run pfile str with
| Success (result, _, _) -> result
| Failure (errorMsg, _, _) -> sprintf "Error: %s" errorMsg |> failwith
As far as I understand the choice combinator implicitly appends pzero parser to my parser list and when fparsec fails to parse next part of input stream, it should search for brackets.
Here is minimal complete code:
open System
open System.Collections.Generic
open FParsec
type IDL =
|Library of string * IDL list
|ImportLib of string
|ImportAlias of string
let comment : Parser<unit,unit> = pstring "//" >>. skipRestOfLine true >>. spaces
let ws = spaces >>. (opt comment)
let str s = pstring s >>. ws
let identifierString = ws >>. many1Satisfy isLetter .>> ws // [A-z]+
let identifierPath = ws >>. many1Satisfy (fun c -> isLetter c || isDigit c || c = '.' || c = '\\' || c = '/') .>> ws // valid path letters
let keywords = ["importlib"; "IMPORTLIB"; "interface"; "typedef"; "coclass"]
let keywordsSet = new HashSet<string>(keywords)
let isKeyword (set : HashSet<string>) str = set.Contains(str)
let pidentifier set (f : Parser<string, unit>) : Parser<string, unit> =
let expectedIdentifier = expected "identifier"
fun stream ->
let state = stream.State
let reply = f stream
if reply.Status <> Ok || not (isKeyword set reply.Result) then
printf "got id %s\n" reply.Result
ws stream |> ignore
reply
else // result is keyword, so backtrack to before the string
stream.BacktrackTo(state)
Reply(Error, expectedIdentifier)
let identifier = pidentifier keywordsSet
let stmt, stmtRef = createParserForwardedToRef()
let stmtList = sepBy1 stmt (str ";")
let importlib =
str "importlib" >>.
between (str "(" .>> str "\"") (str "\"" >>. str ")")
(identifier identifierPath) |>> ImportLib
let importalias =
str "IMPORTLIB" >>.
between (str "(") (str ")")
(identifier identifierString) |>> ImportAlias
let library =
pipe2
(str "library" >>. identifier identifierString)
(between (str "{") (str "}") stmtList)
(fun lib slist -> Library(lib, slist))
do stmtRef:= choice [importlib; importalias]
let prog =
ws >>. library .>> ws .>> eof
let s = #"
library ModExpress
{
importlib(""stdole2.tlb"");
importlib(""msxml6.dll"");
}"
let test p str =
match run p str with
| Success(result, _, _) -> printfn "Success: %A" result
| Failure(errorMsg, _, _) -> printfn "Failure: %s" errorMsg
test prog s
System.Console.Read() |> ignore
but for the input string
library ModExpress
{
importlib(""stdole2.tlb"");
importlib(""msxml6.dll"");
}
I got following error:
Failure: Error in Ln: 6 Col: 1
}
^
Expecting: '//', 'IMPORTLIB' or 'importlib'
It seems that the problem here is that the stmtList parser is implemented with the sepBy1 combinator. sepBy1 stmt sep parses one or more occurrences of p separated (but not ended) by sep, i.e. in EBNF: p (sep p)*. When the parser sees the semicolon after importlib(""msxml6.dll""), it expects another statement after the whitespace.
If you want to make the semicolon at the end of a statement list optional, you could simply use sepEndBy1 instead of sepBy1, or if you always want to require a semicolon, you could use
let stmtList = many1 stmt
do stmtRef:= choice [importlib; importalias] .>> str ";"