This code will read this input "(WEEKEND-SUNDAY)" and then return "SATURDAY"
but input "WEEKEND-SUNDAY)" still return "SATURDAY" => this parser ignore last ')'
let pDayOfWeekKeyWords = choice [
pstring "MONDAY" ;
pstring "TUESDAY" ;
pstring "WEDNESDAY" ;
pstring "THURSDAY" ;
pstring "FRIDAY" ;
pstring "SATURDAY" ;
pstring "SUNDAY" ;
pstring "WEEKEND" ;
pstring "WEEKDAY" ;
pstring "ALL" ]
let betweenParentheses p =
between (pstring "(") (pstring ")") p
let opp = new OperatorPrecedenceParser<Set<DayOfWeek>,unit,unit>()
let pExpr = opp.ExpressionParser
let term = (betweenParentheses pExpr) <|> (pDayOfWeekKeyWords |>> ( fun x ->
match x with
| "MONDAY" -> Set.ofList [DayOfWeek.Monday]
| "TUESDAY" -> Set.ofList [DayOfWeek.Tuesday]
| "WEDNESDAY" -> Set.ofList [DayOfWeek.Wednesday]
| "THURSDAY" -> Set.ofList [DayOfWeek.Thursday]
| "FRIDAY" -> Set.ofList [DayOfWeek.Friday]
| "SATURDAY" -> Set.ofList [DayOfWeek.Saturday]
| "SUNDAY" -> Set.ofList [DayOfWeek.Sunday]
| "WEEKDAY" -> Set.ofList [DayOfWeek.Monday ; DayOfWeek.Tuesday ; DayOfWeek.Wednesday;DayOfWeek.Thursday;DayOfWeek.Friday]
| "WEEKEND" -> Set.ofList [DayOfWeek.Saturday;DayOfWeek.Sunday]
| "ALL"-> Set.ofList [DayOfWeek.Monday ; DayOfWeek.Tuesday ; DayOfWeek.Wednesday;DayOfWeek.Thursday;DayOfWeek.Friday;DayOfWeek.Saturday;DayOfWeek.Sunday]
| _ -> failwith "ERROR MESSAGE") )
opp.TermParser <- term
opp.AddOperator(InfixOperator<Set<DayOfWeek>,unit,unit>("+", skipString "", 1, Associativity.Left, fun x y -> x + y))
opp.AddOperator(InfixOperator<Set<DayOfWeek>,unit,unit>("-", skipString "" , 1, Associativity.Left, fun x y -> x - y))
run
run pExpr "MONDAY+(WEEKEND-SUNDAY)"
output
Success: set [Monday; Saturday]
problem is
run pExpr "MONDAY+WEEKEND-SUNDAY)" or run pExpr "MONDAY)+WEEKEND-SUNDAY"
it still return
Success: set [Monday; Saturday]
I want it to show Failure: something..
did I miss something? thank you
In your latter two examples pexpr returns after having successfully parsed the input stream up to the unmatched closing parenthesis. So, in the last example the result is actually Success: set [Monday] not Success: set [Monday; Saturday].
You can use the eof parser to force an error if the input stream hasn't been consumed completely:
> run (pExpr .>> eof) "MONDAY)+WEEKEND-SUNDAY"
Error in Ln: 1 Col: 7
MONDAY)+WEEKEND-SUNDAY
^
Expecting: end of input or infix operator
Related
I have a small indentation management module with FParsec (found here); it works wonderfully well, but the only concern is that, when an error is encountered in the stream to be parsed, most of the time, FParsec returns the error message from the indentation manager, i.e. the UserState (correct me if I'm wrong on this point); which is problematic because it makes the errors very blurry, and all the same... How can I display indentation errors only when they are necessary?
Here is the module used for indentation:
module IndentParser
open FParsec
type Indentation =
| Fail
| Any
| Greater of Position
| Exact of Position
| AtLeast of Position
| StartIndent of Position
with
member this.Position = match this with
| Any | Fail -> None
| Greater p -> Some p
| Exact p -> Some p
| AtLeast p -> Some p
| StartIndent p -> Some p
type IndentState<'T> = { Indent : Indentation; UserState : 'T }
type CharStream<'T> = FParsec.CharStream<IndentState<'T>>
type IndentParser<'T, 'UserState> = Parser<'T, IndentState<'UserState>>
let indentState u = {Indent = Any; UserState = u}
let runParser p u s = runParserOnString p (indentState u) "" s
let runParserOnFile p u path = runParserOnFile p (indentState u) path System.Text.Encoding.UTF8
let getIndentation : IndentParser<_,_> =
fun stream -> match stream.UserState with
| {Indent = i} -> Reply i
let getUserState : IndentParser<_,_> =
fun stream -> match stream.UserState with
| {UserState = u} -> Reply u
let putIndentation newi : IndentParser<unit, _> =
fun stream ->
stream.UserState <- {stream.UserState with Indent = newi}
Reply(Unchecked.defaultof<unit>)
let failf fmt = fail << sprintf fmt
let acceptable i (pos : Position) =
match i with
| Any _ -> true
| Fail -> false
| Greater bp -> bp.Column < pos.Column
| Exact ep -> ep.Column = pos.Column
| AtLeast ap -> ap.Column <= pos.Column
| StartIndent _ -> true
let nestableIn i o =
match i, o with
| Greater i, Greater o -> o.Column < i.Column
| Greater i, Exact o -> o.Column < i.Column
| Exact i, Exact o -> o.Column = i.Column
| Exact i, Greater o -> o.Column <= i.Column
| _, _ -> true
let tokeniser p = parse {
let! pos = getPosition
let! i = getIndentation
if acceptable i pos then return! p
else return! fail "incorrect indentation"
}
let nestP i o p = parse {
do! putIndentation i
let! x = p
do! notFollowedBy (tokeniser anyChar) <?> (sprintf "unterminated %A" i)
do! putIndentation o
return x
}
let nest indentor p = parse {
let! outerI = getIndentation
let! curPos = getPosition
let innerI = indentor curPos
if nestableIn innerI outerI
then return! nestP innerI outerI p
else return! nestP Fail outerI p
}
let nestWithPos indentor pos p = parse {
let! outerI = getIndentation
let innerI = indentor pos
if nestableIn innerI outerI
then return! nestP innerI outerI p
else return! nestP Fail outerI p
}
let neglectIndent p = parse {
let! o = getIndentation
do! putIndentation Any
let! x = p
do! putIndentation o
return x
}
let checkIndent<'u> : IndentParser<unit, 'u> = tokeniser (preturn ())
let indented<'a,'u> i (p : Parser<'a,_>) : IndentParser<_, 'u> = parse {
do! putIndentation i
do! spaces
return! tokeniser p
}
/// Allows to check if the position of the parser currently being analyzed (`p`)
/// is on the same line as the defined position (`pos`).
let exact<'a,'u> pos p: IndentParser<'a, 'u> = indented (Exact pos) p
/// Allows to check if the position of the parser currently being analyzed (`p`)
/// is further away than the defined position (`pos`).
let greater<'a,'u> pos p: IndentParser<'a, 'u> = indented (Greater pos) p
/// Allows to check if the position of the parser currently being analyzed (`p`)
/// is on the same OR line further than the defined position (`pos`).
let atLeast<'a,'u> pos p: IndentParser<'a, 'u> = indented (AtLeast pos) p
/// Simply check if the parser (`p`) exists, regardless of its position in the text to be analyzed.
let any<'a,'u> pos p: IndentParser<'a, 'u> = indented Any p
let newline<'u> : IndentParser<unit, 'u> = many (skipAnyOf " \t" <?> "whitespace") >>. newline |>> ignore
let rec blockOf p = parse {
do! spaces
let! pos = getPosition
let! x = exact pos p
let! xs = attempt (exact pos <| blockOf p) <|> preturn []
return x::xs
}
and here is an example of the problem encountered:
open FParsec
open IndentParser
// ---------- AST ----------
type Statement
= Let of string * Expr
and Expr
= Tuple of Expr list
| Literal of Literal
and Literal
= Int of int
| Float of float
| Char of char
// ---------- Parser ----------
let inline pstr's s = stringReturn s s <?> sprintf "`%s`" s
let inline pstr'u s = stringReturn s () <?> sprintf "`%s`" s
let identifier = manySatisfy (fun c -> isLetter c || c = ''')
let comment = pstr'u "//" >>. skipRestOfLine true <?> ""
let numberFormat =
NumberLiteralOptions.AllowBinary
||| NumberLiteralOptions.AllowMinusSign
||| NumberLiteralOptions.AllowHexadecimal
||| NumberLiteralOptions.AllowOctal
||| NumberLiteralOptions.AllowPlusSign
||| NumberLiteralOptions.AllowFraction
let number<'u> : IndentParser<Literal, 'u> =
(numberLiteral numberFormat "number" |>> fun nl ->
if nl.IsInteger then Int(int nl.String)
else Float(float nl.String))
let char<'u> : IndentParser<Literal, 'u> =
((between (pstr'u "'") (pstr'u "'")
(satisfy (fun c -> c <> '\'')) <?> "char literal") |>> Char)
let rec let'parser =
parse { let! pos = getPosition
do! exact pos (pstr'u "let" <?> "let statement")
let! name = greater pos identifier <?> "identifier"
do! greater pos (pstr'u "=" <?> "value assignment")
let! value = greater pos expression
return Let(name, value) }
and tuple'parser =
parse { let! pos = getPosition
do! exact pos (pstr'u "(" <?> "tuple")
let! uplets = greater pos (sepBy1 expression (pstr'u ","))
do! greater pos (pstr'u ")" <?> "right parenthese")
return Tuple uplets }
and literal'parser = attempt number <|> char |>> Literal
and expression =
spaces >>? (attempt tuple'parser <|> literal'parser)
and statement = spaces >>? let'parser .>>? spaces .>>? (attempt comment <|> (spaces >>% ()))
// ---------- Test ----------
System.Console.Clear()
let res = runParser (spaces >>? blockOf statement .>>? (spaces .>>? eof)) () #"
let foo = (0, 1) // it works well
let bar = 887 // it works well
let oof = 'x' // it works well
let rab = // it fail with 'incorrect indentation' (without this comment)
let ofo = (0, 2, // it fail with 'incorrect indentation' (without this comment)
"
printfn "%A" res
It's really annoying...
Would someone explain to me how to solve this problem?
NOTE: Not long ago, I had already asked a similar question. This is not a duplication, but the clarifications to be requested did not fall within the scope of the subject itself. I therefore allow myself to open another position dealing with the analysis of an ML-like syntax based on indentation, and considering everything as an instruction / expression.
For example:
"Hello" is an expression,
let foo = 2 + 1 is an instruction using an expression (2 + 1),
print foo is an instruction, ...
In short, a syntax and semantics that is quite modular and dynamic. Like F#, or OCaml.
To do this, I use F#, with the API (available on nuget) FParsec. The FParsec wiki provides an example of a syntax based on indentation, so I have taken it up again. The module in the code below used is IndentationParserWithoutBacktracking.
The example code to be parsed uses an elementary indentation, not mixing "literal" and "instructions/expressions":
loop i 1 10
loop k 1 10
print k
print i
print j
A simple code, and without context (but this is not important at the moment).
My implementation allows codes such as:
let foo = a + b
let foo =
let a = 9
let b = 1
a + b
let foo = 7
let foo =
loop i 1 10
print i
For example. (The loop and print are there just for the tests...)
The problem I have been having for a long week now, and that I can't solve, is the fact that the indentation module asks me every time an instruction is expected in a parser for a new line... Here is a screenshot:
This applies to all the examples mentioned above. I don't really understand the problem, and therefore don't know how to solve it.
Here is the code tested for this question, it meets the minimum and functional code criteria, however, FParsec must be used:
open FParsec
// This module come from 'https://github.com/stephan-tolksdorf/fparsec/wiki/Parsing-indentation-based-syntax-with-FParsec'
// I used the second module: 'IndentationParserWithoutBacktracking'
module IndentationParserWithoutBacktracking =
let tabStopDistance = 8
type LastParsedIndentation() =
[<DefaultValue>]
val mutable Value: int32
[<DefaultValue>]
val mutable EndIndex: int64
type UserState =
{Indentation: int
// We put LastParsedIndentation into the UserState so that we
// can conveniently use a separate instance for each stream.
// The members of the LastParsedIndentation instance will be mutated
// directly and hence won't be affected by any stream backtracking.
LastParsedIndentation: LastParsedIndentation}
with
static member Create() = {Indentation = -1
LastParsedIndentation = LastParsedIndentation(EndIndex = -1L)}
type CharStream = CharStream<UserState>
type Parser<'t> = Parser<'t, UserState>
// If this function is called at the same index in the stream
// where the function previously stopped, then the previously
// returned indentation will be returned again.
// This way we can avoid backtracking at the end of indented blocks.
let skipIndentation (stream: CharStream) =
let lastParsedIndentation = stream.UserState.LastParsedIndentation
if lastParsedIndentation.EndIndex = stream.Index then
lastParsedIndentation.Value
else
let mutable indentation = stream.SkipNewlineThenWhitespace(tabStopDistance, false)
lastParsedIndentation.EndIndex <- stream.Index
lastParsedIndentation.Value <- indentation
indentation
let indentedMany1 (p: Parser<'t>) label : Parser<'t list> =
fun stream ->
let oldIndentation = stream.UserState.Indentation
let indentation = skipIndentation stream
if indentation <= oldIndentation then
Reply(Error, expected (if indentation < 0 then "newline" else "indented " + label))
else
stream.UserState <- {stream.UserState with Indentation = indentation}
let results = ResizeArray()
let mutable stateTag = stream.StateTag
let mutable reply = p stream // parse the first element
let mutable newIndentation = 0
while reply.Status = Ok
&& (results.Add(reply.Result)
newIndentation <- skipIndentation stream
newIndentation = indentation)
do
stateTag <- stream.StateTag
reply <- p stream
if reply.Status = Ok
|| (stream.IsEndOfStream && results.Count > 0 && stream.StateTag = stateTag)
then
if newIndentation < indentation || stream.IsEndOfStream then
stream.UserState <- {stream.UserState with Indentation = oldIndentation}
Reply(List.ofSeq results)
else
Reply(Error, messageError "wrong indentation")
else // p failed
Reply(reply.Status, reply.Error)
open IndentationParserWithoutBacktracking
let isBlank = fun c -> c = ' ' || c = '\t'
let ws = spaces
let ws1 = skipMany1SatisfyL isBlank "whitespace"
let str s = pstring s .>> ws
let keyword str = pstring str >>? nextCharSatisfiesNot (fun c -> isLetter c || isDigit c) <?> str
// AST
type Identifier = Identifier of string
// A value is just a literal or a data name, called here "Variable"
type Value =
| Int of int | Float of float
| Bool of bool | String of string
| Char of char | Variable of Identifier
// All is an instruction, but there are some differences:
type Instr =
// Arithmetic
| Literal of Value | Infix of Instr * InfixOp * Instr
// Statements (instructions needing another instructions)
| Let of Identifier * Instr list
| Loop of Identifier * Instr * Instr * Instr list
// Other - the "print" function, from the link seen above
| Print of Identifier
and InfixOp =
// Arithmetic
| Sum | Sub | Mul | Div
// Logic
| And | Or | Equal | NotEqual | Greater | Smaller | GreaterEqual | SmallerEqual
// Literals
let numberFormat = NumberLiteralOptions.AllowMinusSign ||| NumberLiteralOptions.AllowFraction |||
NumberLiteralOptions.AllowHexadecimal ||| NumberLiteralOptions.AllowOctal |||
NumberLiteralOptions.AllowBinary
let literal_numeric =
numberLiteral numberFormat "number" |>> fun nl ->
if nl.IsInteger then Literal (Int(int nl.String))
else Literal (Float(float nl.String))
let literal_bool =
(choice [
(stringReturn "true" (Literal (Bool true)))
(stringReturn "false" (Literal (Bool false)))
]
.>> ws) <?> "boolean"
let literal_string =
(between (pstring "\"") (pstring "\"") (manyChars (satisfy (fun c -> c <> '"')))
|>> fun s -> Literal (String s)) <?> "string"
let literal_char =
(between (pstring "'") (pstring "'") (satisfy (fun c -> c <> '''))
|>> fun c -> Literal (Char c)) <?> "character"
let identifier =
(many1Satisfy2L isLetter (fun c -> isLetter c || isDigit c) "identifier"
|>> Identifier) <?> "identifier"
let betweenParentheses p =
(between (str "(") (str ")") p) <?> ""
let variable = identifier |>> fun id -> Literal (Variable id)
let literal = (attempt literal_numeric <|>
attempt literal_bool <|>
attempt literal_char <|>
attempt literal_string <|>
attempt variable)
// Instressions and statements
let pInstrs, pInstrimpl = createParserForwardedToRef()
// `ploop` is located here to force `pInstrs` to be of the type `Instr list`, `ploop` requesting an instression list.
let ploop =
pipe4
(keyword "loop" >>. ws1 >>. identifier)
(ws1 >>. literal)
(ws1 >>. literal)
(pInstrs)
(fun id min max stmts -> Loop(id, min, max, stmts))
// `singlepInstr` allows to use only one Instression, used just after.
let singlepInstr =
pInstrs |>> fun ex -> ex.Head
let term =
(ws >>. singlepInstr .>> ws) <|>
(betweenParentheses (ws >>. singlepInstr)) <|>
(ws >>. literal .>> ws) <|>
(betweenParentheses (ws >>. literal))
let infixOperator (p: OperatorPrecedenceParser<_, _, _>) op prec map =
p.AddOperator(InfixOperator(op, ws, prec, Associativity.Left, map))
let ops =
// Arithmetic
[ "+"; "-"; "*"; "/"; "%" ] #
// Logical
[ "&&"; "||"; "=="; "!="; ">"; "<"; ">="; "<=" ]
let opCorrespondance op =
match op with
// Arithmetic operators
| "+" -> Sum | "-" -> Sub
| "*" -> Mul | "/" -> Div
// Logical operators
| "&&" -> And | "||" -> Or
| "==" -> Equal | "!=" -> NotEqual
| ">" -> Greater | "<" -> Smaller
| ">=" -> GreaterEqual | "<=" -> SmallerEqual
| _ -> failwith ("Unknown operator: " + op)
let opParser = new OperatorPrecedenceParser<Instr, unit, UserState>()
for op in ops do
infixOperator opParser op 1 (fun x y -> Infix(x, opCorrespondance op, y))
opParser.TermParser <- term
// Statements
(*
- let:
let <identifier> = <instruction(s) / value>
- print:
print <identifier>
- loop:
loop <identifier> <literal> <literal> <indented statements>
*)
let plet =
pipe2
(keyword "let" >>. ws1 >>. identifier)
(ws >>. str "=" >>. ws >>. pInstrs)
(fun id exp -> Let(id, exp))
let print =
keyword "print" >>. ws1 >>. identifier
|>> Print
let instruction =
print <|> ploop <|> plet <|>
opParser.ExpressionParser <|>
literal
pInstrimpl := indentedMany1 instruction "instruction"
let document = pInstrs .>> spaces .>> eof
let test str =
match runParserOnString document (UserState.Create()) "" str with
| Success(result, _, _) -> printfn "%A" result
| Failure(errorMsg, _, _) -> printfn "%s" errorMsg
System.Console.Clear()
let code = test #"
let foo = a + b
"
I would like to understand first of all why it doesn't work, but also to be able to find a solution to my problem, and that this solution can be extended to the potential syntax additions of the parser.
Awaiting a salutary answer, thank you.
In order to understand why your parser doesn't work, you need to isolate the issues.
If I understand you correctly, you want your let parser to support either a single instruction on the same line or indented instructions on subsequent lines, e.g:
let x = instruction
let b =
instruction
instruction
If you can't get your existing implementation to work, I'd recommend going back to the implementation on the Wiki and trying to just add support for the let statement.
For example, I made the Wiki parser accept simple let statements with the following modifications:
type Statement = Loop of Identifier * int * int * Statement list
| Print of Identifier
| Let of Identifier * Statement list
let ws = skipManySatisfy isBlank
let str s = pstring s .>> ws
let statement, statementRef = createParserForwardedToRef()
let indentedStatements = indentedMany1 statement "statement"
let plet = keyword "let" >>. pipe2 (ws1 >>. identifier)
(ws >>. str "=" >>. ws
>>. (indentedStatements
<|> (statement |>> fun s -> [s])))
(fun id exp -> Let(id, exp))
statementRef := print <|> loop <|> plet
Note that in the modified version statement is now the parser forwarded to a ref cell, not indentedStatements.
Note also that ws is not implemented with spaces, like in your parser. This is important because spaces also consumes newlines, which would prevent the indentedMany1 from seeing the newline and properly calculating the indentation.
The reason your parser produced an "Expecting: newline" error is that indentedMany1 needs a newline at the beginning of the indented sequence in order to be able to calculate the indentation. You would have to modify the implementation of indentedMany1 if you wanted to support e.g. the following indentation pattern:
let x = instruction
instruction
instruction
I test indentation with FParsec, according to this implementation, but when I make it a little more complex by adding expressions (literals, lists, tuples and arithmetic operations), allowing expressions to top-level, and adding a variable creation statement; I first get a StackOverflowException error . In my opinion, this is because the expression parser is solicited in such a way as to make an infinite loop in the program. I see no other reason, however, I don't know how to fix this problem.
If I remove the attempt pexpression from my parser data statement, there is no more StackOverflowException, nevertheless the module IndentationParserWithoutBacktracking (therefore managing indentation) tells me that the code to be parsed is missing a "newline":
Failure: Error in Ln: 2 Col: 1
loop i 0 10
^
Expecting: let or print
The parser backtracked after:
Error in Ln: 3 Col: 5
let myVar = 2 + 1
^
Expecting: loop or print
The parser backtracked after:
Error in Ln: 3 Col: 17
let myVar = 2 + 1
^
Expecting: newline
All this according to the following text to be parsed:
loop i 0 10
let myVar = 2 + 1
print myVar
Here is my code:
open FParsec
// module IndentationParserWithoutBacktracking // see the link
// Utils
open IndentationParserWithoutBacktracking
let isBlank = fun c -> c = ' ' || c = '\t'
let ws = spaces
let ws1 = skipMany1SatisfyL isBlank "whitespace"
let str s = pstring s .>> ws
let keyword str = pstring str >>? nextCharSatisfiesNot (fun c -> isLetter c || isDigit c) <?> str
// AST
type Identifier = Identifier of string
type InfixOp =
| Sum | Sub | Mul | Div | Pow | Mod
| And | Or | Equal | NotEqual | Greater | Smaller | GreaterEqual | SmallerEqual
type Value =
| Int of int
| Float of float
| Bool of bool
| String of string
| Char of char
| Variable of Identifier
type Expr =
| Literal of Value
| Infix of Expr * InfixOp * Expr
| List of Expr list
| Tuple of Expr list
type Statement =
| Expression of Expr
| Let of Identifier * Statement list
| Loop of Identifier * Expr * Expr * Statement list
| Print of Identifier
// Literals
let numberFormat = NumberLiteralOptions.AllowMinusSign ||| NumberLiteralOptions.AllowFraction |||
NumberLiteralOptions.AllowHexadecimal ||| NumberLiteralOptions.AllowOctal |||
NumberLiteralOptions.AllowBinary ||| NumberLiteralOptions.AllowPlusSign
let literal_numeric =
numberLiteral numberFormat "number" |>> fun nl ->
if nl.IsInteger then Literal (Int(int nl.String))
else Literal (Float(float nl.String))
let literal_bool =
(choice [
(stringReturn "true" (Literal (Bool true)))
(stringReturn "false" (Literal (Bool false)))
]
.>> ws) <?> "boolean"
let literal_string =
(between (pstring "\"") (pstring "\"") (manyChars (satisfy (fun c -> c <> '"')))
|>> fun s -> Literal (String s)) <?> "string"
let literal_char =
(between (pstring "'") (pstring "'") (satisfy (fun c -> c <> '''))
|>> fun c -> Literal (Char c)) <?> "character"
let identifier =
(many1Satisfy2L isLetter (fun c -> isLetter c || isDigit c) "identifier"
|>> fun i -> Identifier i) <?> "valid identifier"
let betweenParentheses p =
(between (str "(") (str ")") p)
let variable = identifier |>> fun id -> Literal (Variable id)
let literal = (attempt literal_numeric <|>
attempt literal_bool <|>
attempt literal_char <|>
attempt literal_string <|>
attempt variable) <?> "literal"
// Expressions
let pexpr, pexprimpl = createParserForwardedToRef()
let term =
(ws >>. literal .>> ws) <|>
(betweenParentheses (ws >>. pexpr)) <|>
(ws >>. pexpr .>> ws)
let infixOperator (p: OperatorPrecedenceParser<_, _, _>) op prec map =
p.AddOperator(InfixOperator(op, ws, prec, Associativity.Left, map))
let ops =
// Arithmetic
[ "+"; "-"; "*"; "/"; "%" ] #
// Logical
[ "&&"; "||"; "=="; "!="; ">"; "<"; ">="; "<=" ]
let opCorrespondance op =
match op with
// Arithmetic operators
| "+" -> Sum
| "-" -> Sub
| "*" -> Mul
| "/" -> Div
| "%" -> Mod
// Logical operators
| "&&" -> And
| "||" -> Or
| "==" -> Equal
| "!=" -> NotEqual
| ">" -> Greater
| "<" -> Smaller
| ">=" -> GreaterEqual
| "<=" -> SmallerEqual
let opParser = new OperatorPrecedenceParser<_, _, _>()
for op in ops do
infixOperator opParser op 1 (fun x y -> Infix(x, opCorrespondance op, y))
opParser.TermParser <- term
let list = between (str "[") (str "]") (sepBy pexpr (str ",")) |>> List
let tuple = between (str "(") (str ")") (sepBy pexpr (str ",")) |>> Tuple
let expression =
opParser.ExpressionParser <|> // I removed this line to don't have the mistake again.
list <|>
tuple <|>
literal
pexprimpl := attempt expression
// Statements
let statements, statementsRef = createParserForwardedToRef()
let pexpression = expression |>> Expression
let plet =
pipe2
(keyword "let" >>. ws1 >>. identifier)
(ws >>. str "=" >>. ws >>. statements)
(fun id gtt exp -> Let(id, gtt, exp))
// From the link, but "revisited"
let ploop =
pipe4
(keyword "loop" >>. ws1 >>. identifier)
(ws1 >>. literal) // If I put 'pexpr', it doesn't work too...
(ws1 >>. literal)
(statements)
(fun id min max stmts -> Loop(id, min, max, stmts))
let print = keyword "print" >>. (ws1 >>. identifier |>> Print)
let statement =
attempt plet <|>
attempt print <|>
attempt ploop <|>
attempt pexpression
statementsRef := indentedMany1 statement "statement"
let document = statements .>> spaces .>> eof
let test str =
match runParserOnString document (UserState.Create()) "" str with
| Success(result, _, _) -> printfn "Success: %A" result
| Failure(errorMsg, _, _) -> printfn "Failure: %s" errorMsg
System.Console.Clear()
test #"
loop i 0 10
let myVar = 2 + 1
print myVar
"
I know I ask several questions at the same time, and the site doesn't really allow it, but they're all a little linked together, so I might as well kill two birds with one stone...
I would really like to understand my mistakes, in order to design a parser for a very small ML-like language.
Thank you.
Edit
Here is my current code, which has been modified to respond to the first problems encountered with indentation:
open IndentationParserWithoutBacktracking // So from the link
let isBlank = fun c -> c = ' ' || c = '\t'
let ws = spaces
let ws1 = skipMany1SatisfyL isBlank "whitespace"
let str s = pstring s .>> ws
let keyword str = pstring str >>? nextCharSatisfiesNot (fun c -> isLetter c || isDigit c) <?> str
// AST
type Identifier = Identifier of string
type Value =
| Int of int
| Float of float
| Bool of bool
| String of string
| Char of char
| Variable of Identifier
// In FP, "all" is an expression, so:
type Expr =
// Arithmetic + lists and tuple
| Literal of Value
| Infix of Expr * InfixOp * Expr
| List of Expr list
| Tuple of Expr list
// Statements
| Return of Expr
| Loop of Identifier * Expr * Expr * Expr list
| Print of Identifier
and InfixOp =
| Sum | Sub | Mul | Div | Pow | Mod
| And | Or | Equal | NotEqual | Greater | Smaller | GreaterEqual | SmallerEqual
// Literals
let numberFormat = NumberLiteralOptions.AllowMinusSign ||| NumberLiteralOptions.AllowFraction |||
NumberLiteralOptions.AllowHexadecimal ||| NumberLiteralOptions.AllowOctal |||
NumberLiteralOptions.AllowBinary
let literal_numeric =
numberLiteral numberFormat "number" |>> fun nl ->
if nl.IsInteger then Literal (Int(int nl.String))
else Literal (Float(float nl.String))
let literal_bool =
(choice [
(stringReturn "true" (Literal (Bool true)))
(stringReturn "false" (Literal (Bool false)))
]
.>> ws) <?> "boolean"
let literal_string =
(between (pstring "\"") (pstring "\"") (manyChars (satisfy (fun c -> c <> '"')))
|>> fun s -> Literal (String s)) <?> "string"
let literal_char =
(between (pstring "'") (pstring "'") (satisfy (fun c -> c <> '''))
|>> fun c -> Literal (Char c)) <?> "character"
let identifier =
(many1Satisfy2L isLetter (fun c -> isLetter c || isDigit c) "identifier"
|>> fun i -> Identifier i) <?> "identifier"
let betweenParentheses p =
(between (str "(") (str ")") p) <?> ""
let variable = identifier |>> fun id -> Literal (Variable id)
let literal = (attempt literal_numeric <|>
attempt literal_bool <|>
attempt literal_char <|>
attempt literal_string <|>
attempt variable) <?> "literal"
// Expressions and statements
let pexprs, pexprimpl = createParserForwardedToRef()
// `ploop` is located here to force `pexprs` to be of the type `Expr list`, `ploop` requesting an expression list.
let ploop =
pipe4
(keyword "loop" >>. ws1 >>. identifier)
(ws1 >>. literal)
(ws1 >>. literal)
(pexprs)
(fun id min max stmts -> Loop(id, min, max, stmts))
// `singlepexpr` allows to use only one expression.
let singlepexpr =
pexprs |>> fun ex -> ex.Head
let term =
(ws >>. singlepexpr .>> ws) <|>
(betweenParentheses (ws >>. singlepexpr)) <|>
(ws >>. literal .>> ws) <|>
(betweenParentheses (ws >>. literal))
let infixOperator (p: OperatorPrecedenceParser<_, _, _>) op prec map =
p.AddOperator(InfixOperator(op, ws, prec, Associativity.Left, map))
let ops =
// Arithmetic
[ "+"; "-"; "*"; "/"; "%" ] #
// Logical
[ "&&"; "||"; "=="; "!="; ">"; "<"; ">="; "<=" ]
let opCorrespondance op =
match op with
// Arithmetic operators
| "+" -> Sum
| "-" -> Sub
| "*" -> Mul
| "/" -> Div
| "%" -> Mod
// Logical operators
| "&&" -> And
| "||" -> Or
| "==" -> Equal
| "!=" -> NotEqual
| ">" -> Greater
| "<" -> Smaller
| ">=" -> GreaterEqual
| "<=" -> SmallerEqual
let opParser = new OperatorPrecedenceParser<Expr, unit, UserState>()
for op in ops do
infixOperator opParser op 1 (fun x y -> Infix(x, opCorrespondance op, y))
opParser.TermParser <- term
let list = (between (str "[") (str "]") (sepBy singlepexpr (str ",")) |>> List) <?> "list"
let tuple = (between (str "(") (str ")") (sepBy singlepexpr (str ",")) |>> Tuple) <?> "tuple"
// Statements
// A commented `let` expression, commented for tests with the `return` instruction.
//let plet =
// pipe3
// (keyword "let" >>. ws1 >>. identifier)
// (ws >>. gtt ":")
// (ws >>. str "=" >>. ws >>. pexprs)
// (fun id gtt exp -> Let(id, gtt, exp))
let preturn =
keyword "return" >>. ws >>. singlepexpr
|>> fun ex -> Return ex
let print = keyword "print" >>. (ws1 >>. identifier |>> Print)
let instruction =
print <|>
ploop <|>
preturn <|>
opParser.ExpressionParser <|> // So we add the arithmetic, like x + y or 21 * 32 - 12 for example
list <|>
tuple <|>
literal
pexprimpl := indentedMany1 instruction "instruction"
let document = pexprs .>> spaces .>> eof
let test str =
match runParserOnString document (UserState.Create()) "" str with
| Success(result, _, _) -> printfn "%A" result
| Failure(errorMsg, _, _) -> printfn "%s" errorMsg
System.Console.Clear()
// The test code that give an error of "newline" expecting
let code = test #"
return 2 + 1
"
And here some screenshots about error:
The reason why indentedMany1 tells you it's expecting a newline in your example code is because that's what it's looking for: an indented block. Not an expression on one line. So your let myVar = 2 + 1 line is confusing it. If you wrote it as:
let myVar =
2 + 1
then I bet it would work.
What you need, I believe, is to change your let parser to allow one of two things: either an expression on a single line, or a block of statements (your statements parser). I.e., something like:
let pLetValue = expression <|> statements
let plet =
pipe2
(keyword "let" >>. ws1 >>. identifier)
(ws >>. str "=" >>. ws >>. pLetValue)
(fun id gtt exp -> Let(id, gtt, exp))
Note that I haven't tested this, as I don't have much time today. It's possible that instead of expression above, you'd want attempt expression (or pexpr, which is the same thing). Experiment a little and see what happens; and if you're completely lost as you try to figure out how FParsec is handling a given expression, remember the advice given in http://www.quanttec.com/fparsec/users-guide/debugging-a-parser.html.
Stuck trying to parse a string into an Abstract Syntax Tree
This is the grammar that im using, in BNF
<Block> ::= <Expr>;
<Expr> ::= <Number> | <App>
<App> ::= (<Expr>, <Expr>) <Func>
<Func> ::= + | - | * | \
<Number> ::= <Digit> | <Digit><Number>
<Digit> ::= 0 | 1 | .... | 9
I have to use this:
data Ast = Number Integer | Func String | App Ast [Ast] | Block [Ast]
I wrote a tokenize method (that perhaps is not completely finished, but gives the right idea)
tokenize :: String -> [String]
tokenize [] = []
tokenize xs # (x : xs')
| x `elem` t = [x] : tokenize xs'
| isDigit x = [y | y <- takeWhile isDigit xs] : (tokenize (dropWhile isDigit xs))
| otherwise = tokenize xs'
where t = ['+', '-', '*', '/', '(', ')', ';']
I'm stuck on the parseApp function. It's not obvious to me how I could use pattern matching, and I don't see any other way to do it (with my very limited haskell experience)
parse :: String -> Ast
parse xs = parseBlock (tokenize xs)
parseBlock :: [String] -> (Ast, [String])
parseBlock xss = let (a, b) = parseExpr (init xss) in (Block [a], b)
parseExpr :: [String] -> (Ast, [String])
parseExpr xss # (xs : xss')
| all isDigit xs = (Number (read xs :: Integer), xss')
| otherwise = parseApp xss
parseApp :: [String] -> (Ast, [String])
parseApp xss = -- ...
If the input string is
"((10, 2)- , (0, 2)-)+;"
it should tokenize to
["(", "(", "10", ",", "2", ")", "-", ",", "(", "0", ",", "2", ")", "-", ")", "+", ";"]
and that would become
(Block [ App (Name "+") [App (Name "-") [Number 10, Number 2], App (Name "-") [Number 0, Number 2]]], [])
It's an assignment so I can't change the types and so on. I'm supposed to assume that the given input has correct syntax. I have read similiar threads here, but the answers seem to assume a higher level of understanding
Within a simple query language I'd like to recognize date and time literals, preferably without using delimiters. For example,
CreationDate = 2013-05-13 5:30 PM
I could use a combinator to detect the basic syntax (e.g., yyyy-MM-dd hh:mm tt), but then it needs to be passed to DateTime.TryParse for full validation.
A few questions:
Is there a combinator for "post processing" a parser result, e.g., pstring "1/2/2000" |> (fun s -> try OK(DateTime.Parse s) with _ -> Fail("not a date"))
Is it possible to apply a predicate to a string (as satisfy does to char)?
Is there a better approach for parsing date/time?
UPDATE
Using Guvante's and Stephan's examples, I came up with this:
let dateTimeLiteral =
let date sep = pipe5 pint32 sep pint32 sep pint32 (fun a _ b _ c -> a, b, c)
let time =
(pint32 .>>. (skipChar ':' >>. pint32)) .>>.
(opt (stringCIReturn " am" false <|> stringCIReturn " pm" true))
(date (pstring "/") <|> date (pstring "-")) .>>.
(opt (skipChar ' ' >>. time)) .>> ws
>>=? (fun ((a, b, c), tt) ->
let y, m, d = if a > 12 then a, b, c else c, a, b
let h, n =
match tt with
| Some((h, n), tt) ->
match tt with
| Some true -> (match h with 12 -> h | _ -> h + 12), n
| Some false -> (match h with 12 -> h - 12 | _ -> h), n
| None -> h, n
| None -> 0, 0
try preturn (System.DateTime(y, m, d, h, n, 0)) |>> DateTime
with _ -> fail "Invalid date/time format")
You can easily build a custom combinator or parser that validates parsed input.
If you only want to use combinators ("Haskell-style"), you could use
let pDateString = pstring "1/2/2000"
let pDate1 =
pDateString
>>= fun str ->
try preturn (System.DateTime.Parse(str))
with _ -> fail "Date format error"
as Guvante just proposed.
If you want to avoid construction temporary parsers (see preturn ... and pfail ... above), you can just let the function accept a second parameter and directly return Reply values:
let pDate2 =
pDateString
>>= fun str stream ->
try Reply(System.DateTime.Parse(str))
with _ -> Reply(Error, messageError "Date format error")
If you want the error location to be at the beginning of the malformed date string, you could replace >>= with >>=?. Note that this also has consequences for error recovery.
If you want to have full control, you can write the parser only using the lower level API, starting with a basic version like the following:
let pDate3 =
fun stream ->
let reply = pDateString stream
if reply.Status = Ok then
try Reply(System.DateTime.Parse(reply.Result))
with _ -> Reply(Error, messageError "Date format error")
else
Reply(reply.Status, reply.Error)
This last version would also allow you to replace the pDateString parser with code that directly accesses the CharStream interface, which could give you some additional flexibility or performance.
Is there a combinator for "post processing" a parser result
It depends on what you want to do if you fail. You can always do |>> to get your DateTime out. Failing it is equally interesting, I think your example could be (given a parser sp that gets the correct string, note it would be of type Parser<string,'u>)
sp >>= (fun s -> match DateTime.TryParse s with
| true,result -> preturn result
| false,_ -> fail)
Here I am taking in the resultant string and calling the TryParse method, and returning either a preturn or a fail depending on whether it succeeds. I couldn't find any of the methods that worked exactly like that.
Note that >>=? would cause a backtrack if it failed.
Is it possible to apply a predicate to a string (as satisfy does for char)?
You would have to call the predicate for every character (2, 20, 201) which is usually not ideal. I am pretty sure you could whip up something like this if you wanted, but I don't think it is ideal for that reason, not to mention handling partial matches becomes harder.
Is there a better approach for parsing date/time?
The biggest factor is "What do you know about the date/time?" If you know it is in that syntax exactly then you should be able to use a post process and be fine (since hopefully the error case will be rare)
If you aren't sure, for instance if PM is optional, but would be unambiguously detailed, then you will probably want to break up the definition and combine it at the end. Note that here I have relaxed the character counts a little, you could add some opt to get even more relaxed, or replace the pint32 with digit and manual conversions.
let pipe6 = //Implementation left as an exercise
let dash = skipChar '-'
let space = skipChar ' '
let colon = skipChar ':'
pipe6 (pint32 .>> dash) //Year
(pint32 .>> dash) //Month
(pint32 .>> space) //Day
(pint32 .>> colon) //Hour
(pint32 .>> space) //Minute
(anyString) //AM/PM
(fun year month day hour minute amPm ->
DateTime(year, month, day,
hour + (if amPm.Equals("PM", StringComparison.InvariantCultureIgnoreCase)
then 12 else 0),
minute, 0)) //No seconds
Writing all that out I am not sure if you are better or worse off...
I've used next code to parse given date string into DataTime object.
2000-01-01 12:34:56,789
let pipe7 p1 p2 p3 p4 p5 p6 p7 f =
p1 >>= fun x1 ->
p2 >>= fun x2 ->
p3 >>= fun x3 ->
p4 >>= fun x4 ->
p5 >>= fun x5 ->
p6 >>= fun x6 ->
p7 >>= fun x7 -> preturn (f x1 x2 x3 x4 x5 x6 x7)
let int_ac = pint32 .>> anyChar
let pDateStr : Parser<DateTime, unit> = pipe7 int_ac int_ac int_ac int_ac int_ac int_ac int_ac (fun y m d h mi s mil -> new DateTime(y,m,d,h,mi,s,mil))