I have a small indentation management module with FParsec (found here); it works wonderfully well, but the only concern is that, when an error is encountered in the stream to be parsed, most of the time, FParsec returns the error message from the indentation manager, i.e. the UserState (correct me if I'm wrong on this point); which is problematic because it makes the errors very blurry, and all the same... How can I display indentation errors only when they are necessary?
Here is the module used for indentation:
module IndentParser
open FParsec
type Indentation =
| Fail
| Any
| Greater of Position
| Exact of Position
| AtLeast of Position
| StartIndent of Position
with
member this.Position = match this with
| Any | Fail -> None
| Greater p -> Some p
| Exact p -> Some p
| AtLeast p -> Some p
| StartIndent p -> Some p
type IndentState<'T> = { Indent : Indentation; UserState : 'T }
type CharStream<'T> = FParsec.CharStream<IndentState<'T>>
type IndentParser<'T, 'UserState> = Parser<'T, IndentState<'UserState>>
let indentState u = {Indent = Any; UserState = u}
let runParser p u s = runParserOnString p (indentState u) "" s
let runParserOnFile p u path = runParserOnFile p (indentState u) path System.Text.Encoding.UTF8
let getIndentation : IndentParser<_,_> =
fun stream -> match stream.UserState with
| {Indent = i} -> Reply i
let getUserState : IndentParser<_,_> =
fun stream -> match stream.UserState with
| {UserState = u} -> Reply u
let putIndentation newi : IndentParser<unit, _> =
fun stream ->
stream.UserState <- {stream.UserState with Indent = newi}
Reply(Unchecked.defaultof<unit>)
let failf fmt = fail << sprintf fmt
let acceptable i (pos : Position) =
match i with
| Any _ -> true
| Fail -> false
| Greater bp -> bp.Column < pos.Column
| Exact ep -> ep.Column = pos.Column
| AtLeast ap -> ap.Column <= pos.Column
| StartIndent _ -> true
let nestableIn i o =
match i, o with
| Greater i, Greater o -> o.Column < i.Column
| Greater i, Exact o -> o.Column < i.Column
| Exact i, Exact o -> o.Column = i.Column
| Exact i, Greater o -> o.Column <= i.Column
| _, _ -> true
let tokeniser p = parse {
let! pos = getPosition
let! i = getIndentation
if acceptable i pos then return! p
else return! fail "incorrect indentation"
}
let nestP i o p = parse {
do! putIndentation i
let! x = p
do! notFollowedBy (tokeniser anyChar) <?> (sprintf "unterminated %A" i)
do! putIndentation o
return x
}
let nest indentor p = parse {
let! outerI = getIndentation
let! curPos = getPosition
let innerI = indentor curPos
if nestableIn innerI outerI
then return! nestP innerI outerI p
else return! nestP Fail outerI p
}
let nestWithPos indentor pos p = parse {
let! outerI = getIndentation
let innerI = indentor pos
if nestableIn innerI outerI
then return! nestP innerI outerI p
else return! nestP Fail outerI p
}
let neglectIndent p = parse {
let! o = getIndentation
do! putIndentation Any
let! x = p
do! putIndentation o
return x
}
let checkIndent<'u> : IndentParser<unit, 'u> = tokeniser (preturn ())
let indented<'a,'u> i (p : Parser<'a,_>) : IndentParser<_, 'u> = parse {
do! putIndentation i
do! spaces
return! tokeniser p
}
/// Allows to check if the position of the parser currently being analyzed (`p`)
/// is on the same line as the defined position (`pos`).
let exact<'a,'u> pos p: IndentParser<'a, 'u> = indented (Exact pos) p
/// Allows to check if the position of the parser currently being analyzed (`p`)
/// is further away than the defined position (`pos`).
let greater<'a,'u> pos p: IndentParser<'a, 'u> = indented (Greater pos) p
/// Allows to check if the position of the parser currently being analyzed (`p`)
/// is on the same OR line further than the defined position (`pos`).
let atLeast<'a,'u> pos p: IndentParser<'a, 'u> = indented (AtLeast pos) p
/// Simply check if the parser (`p`) exists, regardless of its position in the text to be analyzed.
let any<'a,'u> pos p: IndentParser<'a, 'u> = indented Any p
let newline<'u> : IndentParser<unit, 'u> = many (skipAnyOf " \t" <?> "whitespace") >>. newline |>> ignore
let rec blockOf p = parse {
do! spaces
let! pos = getPosition
let! x = exact pos p
let! xs = attempt (exact pos <| blockOf p) <|> preturn []
return x::xs
}
and here is an example of the problem encountered:
open FParsec
open IndentParser
// ---------- AST ----------
type Statement
= Let of string * Expr
and Expr
= Tuple of Expr list
| Literal of Literal
and Literal
= Int of int
| Float of float
| Char of char
// ---------- Parser ----------
let inline pstr's s = stringReturn s s <?> sprintf "`%s`" s
let inline pstr'u s = stringReturn s () <?> sprintf "`%s`" s
let identifier = manySatisfy (fun c -> isLetter c || c = ''')
let comment = pstr'u "//" >>. skipRestOfLine true <?> ""
let numberFormat =
NumberLiteralOptions.AllowBinary
||| NumberLiteralOptions.AllowMinusSign
||| NumberLiteralOptions.AllowHexadecimal
||| NumberLiteralOptions.AllowOctal
||| NumberLiteralOptions.AllowPlusSign
||| NumberLiteralOptions.AllowFraction
let number<'u> : IndentParser<Literal, 'u> =
(numberLiteral numberFormat "number" |>> fun nl ->
if nl.IsInteger then Int(int nl.String)
else Float(float nl.String))
let char<'u> : IndentParser<Literal, 'u> =
((between (pstr'u "'") (pstr'u "'")
(satisfy (fun c -> c <> '\'')) <?> "char literal") |>> Char)
let rec let'parser =
parse { let! pos = getPosition
do! exact pos (pstr'u "let" <?> "let statement")
let! name = greater pos identifier <?> "identifier"
do! greater pos (pstr'u "=" <?> "value assignment")
let! value = greater pos expression
return Let(name, value) }
and tuple'parser =
parse { let! pos = getPosition
do! exact pos (pstr'u "(" <?> "tuple")
let! uplets = greater pos (sepBy1 expression (pstr'u ","))
do! greater pos (pstr'u ")" <?> "right parenthese")
return Tuple uplets }
and literal'parser = attempt number <|> char |>> Literal
and expression =
spaces >>? (attempt tuple'parser <|> literal'parser)
and statement = spaces >>? let'parser .>>? spaces .>>? (attempt comment <|> (spaces >>% ()))
// ---------- Test ----------
System.Console.Clear()
let res = runParser (spaces >>? blockOf statement .>>? (spaces .>>? eof)) () #"
let foo = (0, 1) // it works well
let bar = 887 // it works well
let oof = 'x' // it works well
let rab = // it fail with 'incorrect indentation' (without this comment)
let ofo = (0, 2, // it fail with 'incorrect indentation' (without this comment)
"
printfn "%A" res
It's really annoying...
Would someone explain to me how to solve this problem?
Related
I had already asked a question about how to parse the arrow type, this is not a duplicate, but rather an adaptation with the indentation based syntax.
Indeed, I would like to be able to analyze a syntax close to that of the ML family languages. I also introduced the syntax of the type signature of a function in Haskell, so this:
myFunction :: atype
My parser works very well for all kinds of signature types, except the arrow type when it is "alone":
foo :: a // ok
foo :: [a] // ok
foo :: (a, a) // ok
foo :: [a -> a] // ok
foo :: (a -> a, a) // ok
foo :: a -> a // error
Same for the creation of functions (to make it simple, I just expected a number as a value):
foo: a = 0 // ok
foo: [a] = 0 // ok
foo: (a, a) = 0 // ok
foo: [a -> a] = 0 // ok
foo: (a -> a, a) = 0 // ok
foo: a -> a = 0 // error
Without the indentation, all these cases work a priori.
I tried a module to parse the indentation other than the FParsec wiki, just to try and evaluate a little. It comes from there, and here is the necessary and sufficient module code for the question:
module IndentParser =
type Indentation =
| Fail
| Any
| Greater of Position
| Exact of Position
| AtLeast of Position
| StartIndent of Position
with
member this.Position = match this with
| Any | Fail -> None
| Greater p -> Some p
| Exact p -> Some p
| AtLeast p -> Some p
| StartIndent p -> Some p
type IndentState<'T> = { Indent : Indentation; UserState : 'T }
type CharStream<'T> = FParsec.CharStream<IndentState<'T>>
type IndentParser<'T, 'UserState> = Parser<'T, IndentState<'UserState>>
let indentState u = {Indent = Any; UserState = u}
let runParser p u s = runParserOnString p (indentState u) "" s
let runParserOnFile p u path = runParserOnFile p (indentState u) path System.Text.Encoding.UTF8
let getIndentation : IndentParser<_,_> =
fun stream -> match stream.UserState with
| {Indent = i} -> Reply i
let getUserState : IndentParser<_,_> =
fun stream -> match stream.UserState with
| {UserState = u} -> Reply u
let putIndentation newi : IndentParser<unit, _> =
fun stream ->
stream.UserState <- {stream.UserState with Indent = newi}
Reply(Unchecked.defaultof<unit>)
let failf fmt = fail << sprintf fmt
let acceptable i (pos : Position) =
match i with
| Any _ -> true
| Fail -> false
| Greater bp -> bp.Column < pos.Column
| Exact ep -> ep.Column = pos.Column
| AtLeast ap -> ap.Column <= pos.Column
| StartIndent _ -> true
let tokeniser p = parse {
let! pos = getPosition
let! i = getIndentation
if acceptable i pos then return! p
else return! failf "incorrect indentation at %A" pos
}
let indented<'a,'u> i (p : Parser<'a,_>) : IndentParser<_, 'u> = parse {
do! putIndentation i
do! spaces
return! tokeniser p
}
/// Allows to check if the position of the parser currently being analyzed (`p`)
/// is on the same line as the defined position (`pos`).
let exact<'a,'u> pos p: IndentParser<'a, 'u> = indented (Exact pos) p
/// Allows to check if the position of the parser currently being analyzed (`p`)
/// is further away than the defined position (`pos`).
let greater<'a,'u> pos p: IndentParser<'a, 'u> = indented (Greater pos) p
/// Allows to check if the position of the parser currently being analyzed (`p`)
/// is on the same OR line further than the defined position (`pos`).
let atLeast<'a,'u> pos p: IndentParser<'a, 'u> = indented (AtLeast pos) p
/// Simply check if the parser (`p`) exists, regardless of its position in the text to be analyzed.
let any<'a,'u> pos p: IndentParser<'a, 'u> = indented Any p
let newline<'u> : IndentParser<unit, 'u> = many (skipAnyOf " \t" <?> "whitespace") >>. newline |>> ignore
let rec blockOf p = parse {
do! spaces
let! pos = getPosition
let! x = exact pos p
let! xs = attempt (exact pos <| blockOf p) <|> preturn []
return x::xs
}
Now, here is the code I'm trying to fix for the problem I encountered:
module Parser =
open IndentParser
type Identifier = string
type Type =
| Typename of Identifier
| Tuple of Type list
| List of Type
| Arrow of Type * Type
| Infered
type Expression =
| Let of Identifier * Type * int
| Signature of Identifier * Type
type Program = Program of Expression list
// Utils -----------------------------------------------------------------
let private ws = spaces
/// All symbols granted for the "opws" parser
let private allowedSymbols =
['!'; '#'; '#'; '$'; '%'; '+'; '&'; '*'; '('; ')'; '-'; '+'; '='; '?'; '/'; '>'; '<'; '|']
/// Parse an operator and white spaces around it: `ws >>. p .>> ws`
let inline private opws str =
ws >>.
(tokeniser (pstring str >>?
(nextCharSatisfiesNot
(isAnyOf (allowedSymbols # ['"'; '''])) <?> str))) .>> ws
let private identifier =
(many1Satisfy2L isLetter
(fun c -> isLetter c || isDigit c) "identifier")
// Types -----------------------------------------------------------------
let rec typename = parse {
let! name = ws >>. identifier
return Type.Typename name
}
and tuple_type = parse {
let! types = between (opws "(") (opws ")") (sepBy (ws >>. type') (opws ","))
return Type.Tuple types
}
and list_type = parse {
let! ty = between (opws "[") (opws "]") type'
return Type.List ty
}
and arrow_type =
chainr1 (typename <|> tuple_type <|> list_type) (opws "->" >>% fun t1 t2 -> Arrow(t1, t2))
and type' =
attempt arrow_type <|>
attempt typename <|>
attempt tuple_type <|>
attempt list_type
// Expressions -----------------------------------------------------------------
let rec private let' = parse {
let! pos = getPosition
let! id = exact pos identifier
do! greater pos (opws ":")
let! ty = greater pos type'
do! greater pos (opws "=")
let! value = greater pos pint32
return Expression.Let(id, ty, value)
}
and private signature = parse {
let! pos = getPosition
let! id = exact pos identifier
do! greater pos (opws "::")
let! ty = greater pos type'
return Expression.Signature(id, ty)
}
and private expression =
attempt let'
and private expressions = blockOf expression <?> "expressions"
let private document = ws >>. expressions .>> ws .>> eof |>> Program
let private testType = ws >>. type' .>> ws .>> eof
let rec parse code =
runParser document () code
|> printfn "%A"
open Parser
parse #"
foo :: a -> a
"
Here is the error message obtained:
There is no reference to indentation in the error message, that's what troubles also, because if I implement an identical parser, except for indentation parsing, it works.
Could you put me on the right way?
EDIT
Here is the "fixed" code (the use of the function signature parser was missing + removal of unnecessary attempt):
open FParsec
// module IndentParser
module Parser =
open IndentParser
type Identifier = string
type Type =
| Typename of Identifier
| Tuple of Type list
| List of Type
| Arrow of Type * Type
| Infered
type Expression =
| Let of Identifier * Type * int
| Signature of Identifier * Type
type Program = Program of Expression list
// Utils -----------------------------------------------------------------
let private ws = spaces
/// All symbols granted for the "opws" parser
let private allowedSymbols =
['!'; '#'; '#'; '$'; '%'; '+'; '&'; '*'; '('; ')'; '-'; '+'; '='; '?'; '/'; '>'; '<'; '|']
/// Parse an operator and white spaces around it: `ws >>. p .>> ws`
let inline private opws str =
ws >>.
(tokeniser (pstring str >>?
(nextCharSatisfiesNot
(isAnyOf (allowedSymbols # ['"'; '''])) <?> str))) .>> ws
let private identifier =
(many1Satisfy2L isLetter
(fun c -> isLetter c || isDigit c) "identifier")
// Types -----------------------------------------------------------------
let rec typename = parse {
let! name = ws >>. identifier
return Type.Typename name
}
and tuple_type = parse {
let! types = between (opws "(") (opws ")") (sepBy (ws >>. type') (opws ","))
return Type.Tuple types
}
and list_type = parse {
let! ty = between (opws "[") (opws "]") type'
return Type.List ty
}
and arrow_type =
chainr1 (typename <|> tuple_type <|> list_type) (opws "->" >>% fun t1 t2 -> Arrow(t1, t2))
and type' =
attempt arrow_type <|>
typename <|>
tuple_type <|>
list_type
// Expressions -----------------------------------------------------------------
let rec private let' = parse {
let! pos = getPosition
let! id = exact pos identifier
do! greater pos (opws ":")
let! ty = greater pos type'
do! greater pos (opws "=")
let! value = greater pos pint32
return Expression.Let(id, ty, value)
}
and private signature = parse {
let! pos = getPosition
let! id = exact pos identifier
do! greater pos (opws "::")
let! ty = greater pos type'
return Expression.Signature(id, ty)
}
and private expression =
attempt let' <|>
signature
and private expressions = blockOf expression <?> "expressions"
let private document = ws >>. expressions .>> ws .>> eof |>> Program
let private testType = ws >>. type' .>> ws .>> eof
let rec parse code =
runParser document () code
|> printfn "%A"
open Parser
System.Console.Clear()
parse #"
foo :: a -> a
"
So, here are the new error messages:
and
At the moment, your code is failing on the :: signature because you haven't actually used your signature parser anywhere. You have defined expression as attempt let', but I think you meant to write attempt signature <|> attempt let'. That is why your test is failing on the second colon of ::, because it's matching the single colon of a let' and then not expecting the second colon.
Also, I think your chaining multiple attempt combinators together like attempt a <|> attempt b <|> attempt c is going to cause you problems somewhere, and that you should remove the final attempt, e.g., attempt a <|> attempt b <|> c. If you use attempt in all the possible choices, you'll end up with a parser that can succeed by parsing nothing, which is often not what you intended.
Update: I think I've found the cause and the solution.
Summary: In your opws parser, replace the line ws >>. with ws >>?.
Explanation: In all the sepBy variants (and chainr1 is a sepBy variant), FParsec expects that the separator parser will either succeed, or will fail without consuming input. (If the separator fails after consuming input, FParsec considers the entire sepBy-family parser to have failed in its entirety.) But your opws parser will consume whitespace, then fail if it doesn't find a correct operator. So when your arrow_type parser parses the string a -> a followed by a newline, the arrow after the first a is correctly matched, then it sees the second a, and then it tries to find another arrow. Since what follows next is at least one whitespace character (newlines count as whitespace), the opws "->" parser ends up consuming some input before it fails. (It fails because after that whitespace is the end of the file, not another -> token). This makes the chainr1 combinator fail, so arrow_type fails and your a -> a parser ends up being parsed as a single type a. (At which point the arrow is now unexpected).
By using >>? in your definition of opws, you ensure that if the second part of the parser fails, it will backtrack to before it matched any whitespace. That ensures that the separator parser will fail without matching input and without advancing the parse position in the character stream. Therefore, the chainr1 parser succeeds after parsing a -> a and you get the expected results.
NOTE: Not long ago, I had already asked a similar question. This is not a duplication, but the clarifications to be requested did not fall within the scope of the subject itself. I therefore allow myself to open another position dealing with the analysis of an ML-like syntax based on indentation, and considering everything as an instruction / expression.
For example:
"Hello" is an expression,
let foo = 2 + 1 is an instruction using an expression (2 + 1),
print foo is an instruction, ...
In short, a syntax and semantics that is quite modular and dynamic. Like F#, or OCaml.
To do this, I use F#, with the API (available on nuget) FParsec. The FParsec wiki provides an example of a syntax based on indentation, so I have taken it up again. The module in the code below used is IndentationParserWithoutBacktracking.
The example code to be parsed uses an elementary indentation, not mixing "literal" and "instructions/expressions":
loop i 1 10
loop k 1 10
print k
print i
print j
A simple code, and without context (but this is not important at the moment).
My implementation allows codes such as:
let foo = a + b
let foo =
let a = 9
let b = 1
a + b
let foo = 7
let foo =
loop i 1 10
print i
For example. (The loop and print are there just for the tests...)
The problem I have been having for a long week now, and that I can't solve, is the fact that the indentation module asks me every time an instruction is expected in a parser for a new line... Here is a screenshot:
This applies to all the examples mentioned above. I don't really understand the problem, and therefore don't know how to solve it.
Here is the code tested for this question, it meets the minimum and functional code criteria, however, FParsec must be used:
open FParsec
// This module come from 'https://github.com/stephan-tolksdorf/fparsec/wiki/Parsing-indentation-based-syntax-with-FParsec'
// I used the second module: 'IndentationParserWithoutBacktracking'
module IndentationParserWithoutBacktracking =
let tabStopDistance = 8
type LastParsedIndentation() =
[<DefaultValue>]
val mutable Value: int32
[<DefaultValue>]
val mutable EndIndex: int64
type UserState =
{Indentation: int
// We put LastParsedIndentation into the UserState so that we
// can conveniently use a separate instance for each stream.
// The members of the LastParsedIndentation instance will be mutated
// directly and hence won't be affected by any stream backtracking.
LastParsedIndentation: LastParsedIndentation}
with
static member Create() = {Indentation = -1
LastParsedIndentation = LastParsedIndentation(EndIndex = -1L)}
type CharStream = CharStream<UserState>
type Parser<'t> = Parser<'t, UserState>
// If this function is called at the same index in the stream
// where the function previously stopped, then the previously
// returned indentation will be returned again.
// This way we can avoid backtracking at the end of indented blocks.
let skipIndentation (stream: CharStream) =
let lastParsedIndentation = stream.UserState.LastParsedIndentation
if lastParsedIndentation.EndIndex = stream.Index then
lastParsedIndentation.Value
else
let mutable indentation = stream.SkipNewlineThenWhitespace(tabStopDistance, false)
lastParsedIndentation.EndIndex <- stream.Index
lastParsedIndentation.Value <- indentation
indentation
let indentedMany1 (p: Parser<'t>) label : Parser<'t list> =
fun stream ->
let oldIndentation = stream.UserState.Indentation
let indentation = skipIndentation stream
if indentation <= oldIndentation then
Reply(Error, expected (if indentation < 0 then "newline" else "indented " + label))
else
stream.UserState <- {stream.UserState with Indentation = indentation}
let results = ResizeArray()
let mutable stateTag = stream.StateTag
let mutable reply = p stream // parse the first element
let mutable newIndentation = 0
while reply.Status = Ok
&& (results.Add(reply.Result)
newIndentation <- skipIndentation stream
newIndentation = indentation)
do
stateTag <- stream.StateTag
reply <- p stream
if reply.Status = Ok
|| (stream.IsEndOfStream && results.Count > 0 && stream.StateTag = stateTag)
then
if newIndentation < indentation || stream.IsEndOfStream then
stream.UserState <- {stream.UserState with Indentation = oldIndentation}
Reply(List.ofSeq results)
else
Reply(Error, messageError "wrong indentation")
else // p failed
Reply(reply.Status, reply.Error)
open IndentationParserWithoutBacktracking
let isBlank = fun c -> c = ' ' || c = '\t'
let ws = spaces
let ws1 = skipMany1SatisfyL isBlank "whitespace"
let str s = pstring s .>> ws
let keyword str = pstring str >>? nextCharSatisfiesNot (fun c -> isLetter c || isDigit c) <?> str
// AST
type Identifier = Identifier of string
// A value is just a literal or a data name, called here "Variable"
type Value =
| Int of int | Float of float
| Bool of bool | String of string
| Char of char | Variable of Identifier
// All is an instruction, but there are some differences:
type Instr =
// Arithmetic
| Literal of Value | Infix of Instr * InfixOp * Instr
// Statements (instructions needing another instructions)
| Let of Identifier * Instr list
| Loop of Identifier * Instr * Instr * Instr list
// Other - the "print" function, from the link seen above
| Print of Identifier
and InfixOp =
// Arithmetic
| Sum | Sub | Mul | Div
// Logic
| And | Or | Equal | NotEqual | Greater | Smaller | GreaterEqual | SmallerEqual
// Literals
let numberFormat = NumberLiteralOptions.AllowMinusSign ||| NumberLiteralOptions.AllowFraction |||
NumberLiteralOptions.AllowHexadecimal ||| NumberLiteralOptions.AllowOctal |||
NumberLiteralOptions.AllowBinary
let literal_numeric =
numberLiteral numberFormat "number" |>> fun nl ->
if nl.IsInteger then Literal (Int(int nl.String))
else Literal (Float(float nl.String))
let literal_bool =
(choice [
(stringReturn "true" (Literal (Bool true)))
(stringReturn "false" (Literal (Bool false)))
]
.>> ws) <?> "boolean"
let literal_string =
(between (pstring "\"") (pstring "\"") (manyChars (satisfy (fun c -> c <> '"')))
|>> fun s -> Literal (String s)) <?> "string"
let literal_char =
(between (pstring "'") (pstring "'") (satisfy (fun c -> c <> '''))
|>> fun c -> Literal (Char c)) <?> "character"
let identifier =
(many1Satisfy2L isLetter (fun c -> isLetter c || isDigit c) "identifier"
|>> Identifier) <?> "identifier"
let betweenParentheses p =
(between (str "(") (str ")") p) <?> ""
let variable = identifier |>> fun id -> Literal (Variable id)
let literal = (attempt literal_numeric <|>
attempt literal_bool <|>
attempt literal_char <|>
attempt literal_string <|>
attempt variable)
// Instressions and statements
let pInstrs, pInstrimpl = createParserForwardedToRef()
// `ploop` is located here to force `pInstrs` to be of the type `Instr list`, `ploop` requesting an instression list.
let ploop =
pipe4
(keyword "loop" >>. ws1 >>. identifier)
(ws1 >>. literal)
(ws1 >>. literal)
(pInstrs)
(fun id min max stmts -> Loop(id, min, max, stmts))
// `singlepInstr` allows to use only one Instression, used just after.
let singlepInstr =
pInstrs |>> fun ex -> ex.Head
let term =
(ws >>. singlepInstr .>> ws) <|>
(betweenParentheses (ws >>. singlepInstr)) <|>
(ws >>. literal .>> ws) <|>
(betweenParentheses (ws >>. literal))
let infixOperator (p: OperatorPrecedenceParser<_, _, _>) op prec map =
p.AddOperator(InfixOperator(op, ws, prec, Associativity.Left, map))
let ops =
// Arithmetic
[ "+"; "-"; "*"; "/"; "%" ] #
// Logical
[ "&&"; "||"; "=="; "!="; ">"; "<"; ">="; "<=" ]
let opCorrespondance op =
match op with
// Arithmetic operators
| "+" -> Sum | "-" -> Sub
| "*" -> Mul | "/" -> Div
// Logical operators
| "&&" -> And | "||" -> Or
| "==" -> Equal | "!=" -> NotEqual
| ">" -> Greater | "<" -> Smaller
| ">=" -> GreaterEqual | "<=" -> SmallerEqual
| _ -> failwith ("Unknown operator: " + op)
let opParser = new OperatorPrecedenceParser<Instr, unit, UserState>()
for op in ops do
infixOperator opParser op 1 (fun x y -> Infix(x, opCorrespondance op, y))
opParser.TermParser <- term
// Statements
(*
- let:
let <identifier> = <instruction(s) / value>
- print:
print <identifier>
- loop:
loop <identifier> <literal> <literal> <indented statements>
*)
let plet =
pipe2
(keyword "let" >>. ws1 >>. identifier)
(ws >>. str "=" >>. ws >>. pInstrs)
(fun id exp -> Let(id, exp))
let print =
keyword "print" >>. ws1 >>. identifier
|>> Print
let instruction =
print <|> ploop <|> plet <|>
opParser.ExpressionParser <|>
literal
pInstrimpl := indentedMany1 instruction "instruction"
let document = pInstrs .>> spaces .>> eof
let test str =
match runParserOnString document (UserState.Create()) "" str with
| Success(result, _, _) -> printfn "%A" result
| Failure(errorMsg, _, _) -> printfn "%s" errorMsg
System.Console.Clear()
let code = test #"
let foo = a + b
"
I would like to understand first of all why it doesn't work, but also to be able to find a solution to my problem, and that this solution can be extended to the potential syntax additions of the parser.
Awaiting a salutary answer, thank you.
In order to understand why your parser doesn't work, you need to isolate the issues.
If I understand you correctly, you want your let parser to support either a single instruction on the same line or indented instructions on subsequent lines, e.g:
let x = instruction
let b =
instruction
instruction
If you can't get your existing implementation to work, I'd recommend going back to the implementation on the Wiki and trying to just add support for the let statement.
For example, I made the Wiki parser accept simple let statements with the following modifications:
type Statement = Loop of Identifier * int * int * Statement list
| Print of Identifier
| Let of Identifier * Statement list
let ws = skipManySatisfy isBlank
let str s = pstring s .>> ws
let statement, statementRef = createParserForwardedToRef()
let indentedStatements = indentedMany1 statement "statement"
let plet = keyword "let" >>. pipe2 (ws1 >>. identifier)
(ws >>. str "=" >>. ws
>>. (indentedStatements
<|> (statement |>> fun s -> [s])))
(fun id exp -> Let(id, exp))
statementRef := print <|> loop <|> plet
Note that in the modified version statement is now the parser forwarded to a ref cell, not indentedStatements.
Note also that ws is not implemented with spaces, like in your parser. This is important because spaces also consumes newlines, which would prevent the indentedMany1 from seeing the newline and properly calculating the indentation.
The reason your parser produced an "Expecting: newline" error is that indentedMany1 needs a newline at the beginning of the indented sequence in order to be able to calculate the indentation. You would have to modify the implementation of indentedMany1 if you wanted to support e.g. the following indentation pattern:
let x = instruction
instruction
instruction
I am working on the parsing stage for the language I am making and am having difficulty with the following.
let test2 = // I'd like this to be an error.
"""
2
+ 2
"""
let result = run (spaces >>. expr) test2
val result : ParserResult<CudaExpr,unit> =
Success: Add (LitInt32 2,LitInt32 2)
I already managed to make the following example when the terms are indented incorrectly
2 +
2
give me an error, but not when the operator is on the wrong indentation level. I need something like a before-parse check.
let operators expr i =
let f expr (s: CharStream<_>) = if i <= s.Column then expr s else pzero s
opp.TermParser <- f expr
f opp.ExpressionParser
The above function is how the operators phase is structured and as you can see, the term parsers get wrapped in a function that does the indentation check, but the last line is faulty.
Here is a simplified example of the full parser.
#r "../../packages/FParsec.1.0.2/lib/net40-client/FParsecCS.dll"
#r "../../packages/FParsec.1.0.2/lib/net40-client/FParsec.dll"
open FParsec
type Expr =
| V of string
| Add of Expr * Expr
let identifier = many1Satisfy2L isAsciiLetter (fun x -> isAsciiLetter x || isDigit x || x = ''') "identifier" .>> spaces |>> V
let indentations expressions (s: CharStream<_>) =
let i = s.Column
let expr_indent expr (s: CharStream<_>) =
let expr (s: CharStream<_>) = if i <= s.Column then expr s else pzero s
many1 expr s
expr_indent (expressions i) s
let expr =
let opp = new OperatorPrecedenceParser<_,_,_>()
opp.AddOperator(InfixOperator("+", spaces, 6, Associativity.Left, fun x y -> Add(x,y)))
let operators expr i =
let f (s: CharStream<_>) = if i <= s.Column then expr s else pzero s
opp.TermParser <- f
f opp.ExpressionParser
let rec expr s = indentations (operators identifier) s
expr
let test2 = // I'd like this to be an error.
"""
a
+
b
"""
let result = run (spaces >>. expr) test2
The full parser so far can be found here.
let operators expr i =
let f (s: CharStream<_>) = if i <= s.Column then expr s else pzero s
opp.TermParser <- f
f opp.ExpressionParser
I did not realize it 2.5 weeks ago, but what happens when a new block gets opened and expr s gets called is that the term parser gets overwritten with the new indentation and there is no way to back it up and restore it on exit. I did a bit of looking around and managed to adapt the Pratt top down parsing method for my purposes.
Here is a talk by Douglas Crockford on the method.
let poperator: Parser<_,_> =
let f c = (isAsciiIdContinue c || isAnyOf [|' ';'\t';'\n';'\"';'(';')';'{';'}';'[';']'|] c) = false
(many1Satisfy f .>> spaces)
>>= fun token ->
match dict_operator.TryGetValue token with
| true, x -> preturn x
| false, _ -> fail "unknown operator"
let rec led poperator term left (prec,asoc,m) =
match asoc with
| Associativity.Left | Associativity.None -> tdop poperator term prec |>> m left
| Associativity.Right -> tdop poperator term (prec-1) |>> m left
| _ -> failwith "impossible"
and tdop poperator term rbp =
let rec f left =
poperator >>= fun (prec,asoc,m as v) ->
if rbp < prec then led poperator term left v >>= loop
else pzero
and loop left = attempt (f left) <|>% left
term >>= loop
let operators expr i (s: CharStream<_>) =
let expr_indent expr (s: CharStream<_>) = expr_indent i (<=) expr s
let op s = expr_indent poperator s
let term s = expr_indent expr s
tdop op term 0 s
The led and tdop functions which do the actual precedence parsing are 10 lines long. The above is just a snippet of the full parser for the language I am making - in terms of syntax it is similar to F# and is indentation sensitive. Here is a more straightforward F# translation of Douglas Crockford's Javascript example.
Okay, since my last question elicited no responses, I'm forging ahead in a different direction. Lol!
I can't find any examples beyond the official documentation on managing user state, or accessing the results of a prior parser.
N.b. This code does not compile.
namespace MultipartMIMEParser
open FParsec
open System.IO
type Header = { name : string
; value : string
; addl : (string * string) list option }
type Content = Content of string
| Post of Post list
and Post = { headers : Header list
; content : Content }
type private UserState = { Boundary : string }
with static member Default = { Boundary="" }
module internal P =
let ($) f x = f x
let undefined = failwith "Undefined."
let ascii = System.Text.Encoding.ASCII
let str cs = System.String.Concat (cs:char list)
let makeHeader ((n,v),nvps) = { name=n; value=v; addl=nvps}
let runP p s = match runParserOnStream p UserState.Default "" s ascii with
| Success (r,_,_) -> r
| Failure (e,_,_) -> failwith (sprintf "%A" e)
let blankField = parray 2 newline
let delimited d e =
let pEnd = preturn () .>> e
let part = spaces >>. (manyTill $ noneOf d $ (attempt (preturn () .>> pstring d) <|> pEnd)) |>> str
in part .>>. part
let delimited3 firstDelimiter secondDelimiter thirdDelimiter endMarker =
delimited firstDelimiter endMarker
.>>. opt (many (delimited secondDelimiter endMarker
>>. delimited thirdDelimiter endMarker))
// TODO: This is the parser I'm asking about.
let pHeader =
let includesBoundary s = undefined
let setBoundary b = { Boundary=b }
in delimited3 ":" ";" "=" blankField
|>> makeHeader
>>. fun stream -> if includesBoundary // How do I access the output from makeHeader here?
then stream.UserState <- setBoundary b // I need b to be read from the output of makeHeader.
Reply ()
else Reply ()
let pHeaders = manyTill pHeader $ attempt (preturn () .>> blankField)
// N.b. This is the mess I'm currently wrestling with. It does not compile, and is
// not sound yet.
let rec pContent boundary =
match boundary with
| "" -> // Content is text.
let line = restOfLine false
in pipe2 pHeaders (manyTill line $ attempt (preturn () .>> blankField))
$ fun h c -> { headers=h
; content=Content $ System.String.Join (System.Environment.NewLine,c) }
| _ -> // Content contains boundaries.
let b = "--"+boundary
let p = pipe2 pHeaders (pContent b) $ fun h c -> { headers=h; content=c }
in skipString b >>. manyTill p (attempt (preturn () .>> blankField))
let pStream = runP (pipe2 pHeaders pContent $ fun h c -> { headers=h; content=c })
type MParser (s:Stream) =
let r = P.pStream s
let findHeader name =
match r.headers |> List.tryFind (fun h -> h.name.ToLower() = name) with
| Some h -> h.value
| None -> ""
member p.Boundary =
let isBoundary ((s:string),_) = s.ToLower() = "boundary"
let header = r.headers
|> List.tryFind (fun h -> if h.addl.IsSome
then h.addl.Value |> List.exists isBoundary
else false)
in match header with
| Some h -> h.addl.Value |> List.find isBoundary |> snd
| None -> ""
member p.ContentID = findHeader "content-id"
member p.ContentLocation = findHeader "content-location"
member p.ContentSubtype = findHeader "type"
member p.ContentTransferEncoding = findHeader "content-transfer-encoding"
member p.ContentType = findHeader "content-type"
member p.Content = r.content
member p.Headers = r.headers
member p.MessageID = findHeader "message-id"
member p.MimeVersion = findHeader "mime-version"
A truncated example of the POST I am trying to parse follows:
content-type: Multipart/related; boundary="RN-Http-Body-Boundary"; type="multipart/related"
--RN-Http-Body-Boundary
Message-ID: <25845033.1160080657073.JavaMail.webmethods#exshaw>
Mime-Version: 1.0
Content-Type: multipart/related; type="application/xml";
boundary="----=_Part_235_11184805.1160080657052"
------=_Part_235_11184805.1160080657052
Content-Type: Application/XML
Content-Transfer-Encoding: binary
Content-Location: RN-Preamble
Content-ID: <1430586.1160080657050.JavaMail.webmethods#exshaw>
XML document begins here...
So basically, what you want to do in pHeader is to use the parser as a monad, rather than an applicative. Based on your code style you come from Haskell so I'll assume you know these words. Something like this then:
let pHeader =
let includesBoundary s = undefined
let setBoundary b = { Boundary=b }
in delimited3 ":" ";" "=" blankField
|>> makeHeader
>>= fun header stream ->
if includesBoundary header
then let b = undefined // some expression including header, if I understood correctly
stream.UserState <- setBoundary b
Reply ()
else Reply ()
Or you can write it in a computation expression (which would correspond to do-notation in Haskell):
let pHeader =
let includesBoundary s = undefined
let setBoundary b = { Boundary=b }
parse {
let! header =
delimited3 ":" ";" "=" blankField
|>> makeHeader
return! fun stream ->
if includesBoundary header
then let b = undefined // some expression including header, if I understood correctly
stream.UserState <- setBoundary b
Reply ()
else Reply ()
}
Is there a way to use F#'s sprintf float formating with a decimal comma? It would be nice if this worked:
sprintf "%,1f" 23.456
// expected: "23,456"
Or can I only use String.Format Method (IFormatProvider, String, Object()) ?
EDIT: I would like to have a comma not a point as a decimal separator. Like most non-English speaking countries use it.
It's quite a pain, but you can write your own version of sprintf that does exactly what you want:
open System
open System.Text.RegularExpressions
open System.Linq.Expressions
let printfRegex = Regex(#"^(?<text>[^%]*)((?<placeholder>%(%|((0|-|\+| )?([0-9]+)?(\.[0-9]+)?b|c|s|d|i|u|x|X|o|e|E|f|F|g|G|M|O|A|\+A|a|t)))(?<text>[^%]*))*$", RegexOptions.ExplicitCapture ||| RegexOptions.Compiled)
type PrintfExpr =
| K of Expression
| F of ParameterExpression * Expression
let sprintf' (c:System.Globalization.CultureInfo) (f:Printf.StringFormat<'a>) : 'a =
//'a has form 't1 -> 't2 -> ... -> string
let cultureExpr = Expression.Constant(c) :> Expression
let m = printfRegex.Match(f.Value)
let prefix = m.Groups.["text"].Captures.[0].Value
let inputTypes =
let rec loop t =
if Reflection.FSharpType.IsFunction t then
let dom, rng = Reflection.FSharpType.GetFunctionElements t
dom :: loop rng
else
if t <> typeof<string> then
failwithf "Unexpected return type: %A" t
[]
ref(loop typeof<'a>)
let pop() =
let (t::ts) = !inputTypes
inputTypes := ts
t
let exprs =
K(Expression.Constant(prefix)) ::
[for i in 0 .. m.Groups.["placeholder"].Captures.Count - 1 do
let ph = m.Groups.["placeholder"].Captures.[i].Value
let text = m.Groups.["text"].Captures.[i+1].Value
// TODO: handle flags, width, precision, other placeholder types, etc.
if ph = "%%" then yield K(Expression.Constant("%" + text))
else
match ph with
| "%f" ->
let t = pop()
if t <> typeof<float> && t <> typeof<float32> then
failwithf "Unexpected type for %%f placeholder: %A" t
let e = Expression.Variable t
yield F(e, Expression.Call(e, t.GetMethod("ToString", [| typeof<System.Globalization.CultureInfo> |]), [cultureExpr]))
| "%s" ->
let t = pop()
if t <> typeof<string> then
failwithf "Unexpected type for %%s placeholder: %A" t
let e = Expression.Variable t
yield F(e, e)
| _ ->
failwithf "unhandled placeholder: %s" ph
yield K (Expression.Constant text)]
let innerExpr =
Expression.Call(typeof<string>.GetMethod("Concat", [|typeof<string[]>|]), Expression.NewArrayInit(typeof<string>, exprs |> Seq.map (fun (K e | F(_,e)) -> e)))
:> Expression
let funcConvert =
typeof<FuncConvert>.GetMethods()
|> Seq.find (fun mi -> mi.Name = "ToFSharpFunc" && mi.GetParameters().[0].ParameterType.GetGenericTypeDefinition() = typedefof<Converter<_,_>>)
let body =
List.foldBack (fun pe (e:Expression) ->
match pe with
| K _ -> e
| F(p,_) ->
let m = funcConvert.MakeGenericMethod(p.Type, e.Type)
Expression.Call(m, Expression.Lambda(m.GetParameters().[0].ParameterType, e, p))
:> Expression) exprs innerExpr
Expression.Lambda(body, [||]).Compile().DynamicInvoke() :?> 'a
sprintf' (Globalization.CultureInfo.GetCultureInfo "fr-FR") "%s %f > %f" "It worked!" 1.5f -12.3
Taking a look at source code of Printf module, it uses invariantCulture. I don't think printf-like functions are culture aware.
If you always need a comma, you could use sprintf and string.Replace function. If your code is culture-dependent, using ToString or String.Format is your best bet.