given the following
let maxCount = System.Int32.MaxValue
let pmlcomment = pstring "/*" >>. skipCharsTillString "*/" true (maxCount)
let ws = pspaces >>. many (pspaces >>. pmlcomment .>> pspaces) |>> (function | [] -> () | _ -> ())
let str_ws s = pstring s .>> ws
let exprBraceSeqOpt p =
let trailingComma = (str_ws "," |>> fun _ -> None )
between (str_ws "{") (str_ws "}") ((opt (attempt (sepBy p (str_ws ",")))) <|> (attempt trailingComma))
let sampleP = exprBraceSeqOpt (str_ws "x")
it properly matches all of the following except the last one:
["{}";"{x}";"{x,x}";"{x,x,}"]
I'm guessing something is altering the state or something.
How do I handle an optional trailing comma in fparsec?
sepBy "eats up" the extra separator if it is present. That's just how it works, period. You cannot hack it by applying attempt in various places: if you apply attempt to the separator, it won't help, because the last separator actually succeeds, so attempt will have no effect. And applying attempt to the whole sepBy will also not help, because then the whole sepBy will be rolled back, not just the last separator. And applying attempt to the "x" parser itself, while achieving the desired trailing comma behavior, would also have the adverse effect of making the parser accept multiple commas in a row.
And because it is impossible to achieve the desired result via clever use of combinators, there is actually a special function for doing just what you're after - sepEndBy.
This would work as desired:
let exprBraceSeqOpt p =
between (str_ws "{") (str_ws "}") (sepEndBy p (str_ws ","))
Also, as an aside, I should point out that function | [] -> () | _ -> () is a remarkably elaborate way to do ignore. :-)
Related
I usually use FParsec for LL grammars, but sometimes it happens that in a whole grammar only one element requires left recursive parsing (so the grammar is no longer LL). Currently I have such a situation, I have a large LL grammar implemented with FParsec, but a small grammar element is bothering me because it obviously cannot be parsed correctly.
The syntax element in question is an access to an array index à la F#, e.g. myArray.[index] where myArray can be any expression and index can be any expression too. It turns out that my function calls use square brackets, not parentheses, and my identifiers can be qualified with dots.
An example of correct syntax for an expression is: std.fold[fn, f[myArray.[0]], std.tail[myArray]].
The .[] syntax element is obviously left recursive, but perhaps there is a trick that allows me to parse it anyway? My minimal code is as follows:
open FParsec
type Name = string list
type Expr =
(* foo, Example.Bar.fizz *)
| Variable of Name
(* 9, 17, -1 *)
| Integer of int
(* foo[3, 2], Std.sqrt[2] *)
| FunCall of Name * Expr list
(* (a + b), (a + (1 - c)) *)
| Parens of Expr
(* myArray.[0], table.[index - 1] *)
| ArrayAccess of Expr * Expr
(* a + b *)
| Addition of Expr * Expr
let opp =
new OperatorPrecedenceParser<Expr, _, _>()
let pExpr = opp.ExpressionParser
let pName =
let id =
identifier (IdentifierOptions(isAsciiIdStart = isAsciiLetter, isAsciiIdContinue = isAsciiLetter))
sepBy1 id (skipChar '.')
let pVariable = pName |>> Variable
let pInt = pint32 |>> Integer
let pFunCall =
pipe4
pName
(spaces >>. skipChar '[')
(sepBy (spaces >>. pExpr) (skipChar ','))
(spaces >>. skipChar ']')
(fun name _ args _ -> FunCall(name, args))
let pArrayAccess =
pipe5
pExpr
(spaces >>. skipChar '.')
(spaces >>. skipChar '[')
(spaces >>. pExpr)
(spaces >>. skipChar ']')
(fun expr _ _ index _ -> ArrayAccess(expr, index))
let pParens =
between (skipChar '(') (skipChar ')') (spaces >>. pExpr)
opp.TermParser <-
choice [ attempt pFunCall
pVariable
pArrayAccess
pInt
pParens ]
.>> spaces
let addInfixOperator str prec assoc mapping =
opp.AddOperator
<| InfixOperator(str, spaces, prec, assoc, (), (fun _ leftTerm rightTerm -> mapping leftTerm rightTerm))
addInfixOperator "+" 6 Associativity.Left (fun a b -> Addition(a, b))
let startParser = runParserOnString (pExpr .>> eof) () ""
printfn "%A" <| startParser "std.fold[fn, f[myArray.[0]], std.tail[myArray]]"
One way to do this is as follows: instead of making a list of parsing choices that also lists pArrayAccess like above, which will at some point cause an infinite loop, one can modify pExpr to parse the grammar element in question as an optional element following an expression:
let pExpr =
parse {
let! exp = opp.ExpressionParser
let pArrayAccess =
between (skipString ".[") (skipString "]") opp.ExpressionParser
match! opt pArrayAccess with
| None -> return exp
| Some index -> return ArrayAccess(exp, index)
}
After testing, it turns out that this works very well if the following two conditions are not met:
The contents of the square brackets must not contain access to another array ;
An array cannot be accessed a second time in succession (my2DArray.[x].[y]).
This restricts usage somewhat. How can I get away with this? Is there a way to do this or do I have to change the grammar?
Finally, a solution to this problem is quite simple: just expect a list of array access. If the list is empty, then return the initial expression, otherwise fold over all the array accesses and return the result. Here is the implementation:
let rec pExpr =
parse {
let! exp = opp.ExpressionParser
let pArrayAccess =
between (skipString ".[") (skipString "]") pExpr
match! many pArrayAccess with
| [] -> return exp
| xs -> return List.fold
(fun acc curr -> ArrayAccess(acc, curr)) exp xs
}
This way of doing things meets my needs, so I'd be happy with it, if anyone passes by and wants something more general and not applicable with the proposed solution, then I refer to #Martin Freedman comment, using createParserForwardedToRef().
I'm parsing a file and want to throw away certain lines of the file I'm not interested in. I've been able to get this to work for all cases except for when the last line is a throwaway and does not end in newline.
I've tried constructing an endOfInput rule and joining it with a skipLine rule via <|>. This is all wrapped in a many. Tweaking everything I seem to either get a 'many succeeds without consuming input...' error or a fail on the skipLine rule when I don't try some kind of back track.
let skipLine = many (noneOf "\n") .>> newline |>> fun x -> [string x]
let endOfInput = many (noneOf "\n") .>> eof |>> fun x -> [string x]
test (many (skipLine <|> endOfInput)) "And here is the next.\nThen the last."
** this errors out on the skipLine parser at the last line
I've tried
let skipLine = many (noneOf "\n") .>>? newline |>> fun x -> [string x]
... and ...
let skipLine = many (noneOf "\n") .>> newline |>> fun x -> [string x]
test (many (attempt skipLine <|> endOfInput)) "And here is the next.\nThen the last."
** these produce the many error
Note: the output functions are just place holders to get these to work with my other rules. I haven't gotten into figuring out how to format the output.
This is my first time using FParsec and I'm new to F#.
FParsec actually has a built-in parser that does exactly what you're looking for: skipRestOfLine. It terminates on either newlines or eof, just like what you're looking for.
If you want to try to implement it yourself as a learning exercise, let me know and I'll try to help you figure out the problem. But if you just want a parser that skips characters until the end of the line, the built-in skipRestOfLine is exactly what you need.
Here's an approach of parsing such a files with using an Option type,
it'll help you to parse files with newlines in the end or skip blank lines in the middle. I've got the solution from that post - fparsec key-value parser fails to parse . Parsing of a text file with integer values in one column:
module OptionIntParser =
open FParsec
open System
open System.IO
let pCell: Parser<int, unit> = pint32 |>> fun x -> x
let pSome = pCell |>> Some
let pNone = (restOfLine false) >>% None
let pLine = (attempt pSome) <|> pNone
let pAllover = sepBy pLine newline |>> List.choose id
let readFile filePath =
let rr = File.OpenRead(filePath)
use reader = new IO.StreamReader(rr)
reader.ReadToEnd()
let testStr = readFile("./test1.txt")
let runAll s =
let res = run pAllover s in
match res with
| Success (rows, _, _) -> rows
| Failure (s, _, _) -> []
let myTest =
let res = runAll testStr
res |> List.iter (fun (x) -> Console.WriteLine(x.ToString() ))
I'm trying to use FParsec to parse a TOML multi-line string, and I'm having trouble with the closing delimiter ("""). I have the following parsers:
let controlChars =
['\u0000'; '\u0001'; '\u0002'; '\u0003'; '\u0004'; '\u0005'; '\u0006'; '\u0007';
'\u0008'; '\u0009'; '\u000a'; '\u000b'; '\u000c'; '\u000d'; '\u000e'; '\u000f';
'\u0010'; '\u0011'; '\u0012'; '\u0013'; '\u0014'; '\u0015'; '\u0016'; '\u0017';
'\u0018'; '\u0019'; '\u001a'; '\u001b'; '\u001c'; '\u001d'; '\u001e'; '\u001f';
'\u007f']
let nonSpaceCtrlChars =
Set.difference (Set.ofList controlChars) (Set.ofList ['\n';'\r';'\t'])
let multiLineStringContents : Parser<char,unit> =
satisfy (isNoneOf nonSpaceCtrlChars)
let multiLineString : Parser<string,unit> =
optional newline >>. manyCharsTill multiLineStringContents (pstring "\"\"\"")
|> between (pstring "\"\"\"") (pstring "\"\"\"")
let test parser str =
match run parser str with
| Success (s1, s2, s3) -> printfn "Ok: %A %A %A" s1 s2 s3
| Failure (f1, f2, f3) -> printfn "Fail: %A %A %A" f1 f2 f3
When I test multiLineString against an input like this:
test multiLineString "\"\"\"x\"\"\""
The parser fails with this error:
Fail: "Error in Ln: 1 Col: 8 """x"""
^ Note: The error occurred at the end of the input stream. Expecting: '"""'
I'm confused by this. Wouldn't the manyCharsTill multiLineStringContents (pstring "\"\"\"") parser stop at the """ for the between parser to find it? Why is the parser eating all the input and then failing the between parser?
This seems like a relevant post: How to parse comments with FParsec
But I don't see how the solution to that one differs from what I'm doing here, really.
The manyCharsTill documentation says (emphasis mine):
manyCharsTill cp endp parses chars with the char parser cp until the parser endp succeeds. It stops after endp and returns the parsed chars as a string.
So you don't want to use between in combination with manyCharsTill; you want to do something like pstring "\"\"\"" >>. manyCharsTill (pstring "\"\"\"").
But as it happens, I can save you a lot of work. I've been working on a TOML parser with FParsec myself in my spare time. It's far from complete, but the string part works and handles backslash escapes correctly (as far as I can tell: I've tested thoroughly but not exhaustively). The only thing I'm missing is the "strip first newline if it appears right after the opening delimiter" rule, which you've handled with optional newline. So just add that bit into my code below and you should have a working TOML string parser.
BTW, I am planning to license my code (if I finish it) under the MIT license. So I hereby release the following code block under the MIT license. Feel free to use it in your project if it's useful to you.
let pShortCodepointInHex = // Anything from 0000 to FFFF, *except* the range D800-DFFF
(anyOf "dD" >>. (anyOf "01234567" <?> "a Unicode scalar value (range D800-DFFF not allowed)") .>>. exactly 2 isHex |>> fun (c,s) -> sprintf "d%c%s" c s)
<|> (exactly 4 isHex <?> "a Unicode scalar value")
let pLongCodepointInHex = // Anything from 00000000 to 0010FFFF, *except* the range D800-DFFF
(pstring "0000" >>. pShortCodepointInHex)
<|> (pstring "000" >>. exactly 5 isHex)
<|> (pstring "0010" >>. exactly 4 isHex |>> fun s -> "0010" + s)
<?> "a Unicode scalar value (i.e., in range 00000000 to 0010FFFF)"
let toCharOrSurrogatePair p =
p |> withSkippedString (fun codePoint _ -> System.Int32.Parse(codePoint, System.Globalization.NumberStyles.HexNumber) |> System.Char.ConvertFromUtf32)
let pStandardBackslashEscape =
anyOf "\\\"bfnrt"
|>> function
| 'b' -> "\b" // U+0008 BACKSPACE
| 'f' -> "\u000c" // U+000C FORM FEED
| 'n' -> "\n" // U+000A LINE FEED
| 'r' -> "\r" // U+000D CARRIAGE RETURN
| 't' -> "\t" // U+0009 CHARACTER TABULATION a.k.a. Tab or Horizonal Tab
| c -> string c
let pUnicodeEscape = (pchar 'u' >>. (pShortCodepointInHex |> toCharOrSurrogatePair))
<|> (pchar 'U' >>. ( pLongCodepointInHex |> toCharOrSurrogatePair))
let pEscapedChar = pstring "\\" >>. (pStandardBackslashEscape <|> pUnicodeEscape)
let quote = pchar '"'
let isBasicStrChar c = c <> '\\' && c <> '"' && c > '\u001f' && c <> '\u007f'
let pBasicStrChars = manySatisfy isBasicStrChar
let pBasicStr = stringsSepBy pBasicStrChars pEscapedChar |> between quote quote
let pEscapedNewline = skipChar '\\' .>> skipNewline .>> spaces
let isMultilineStrChar c = c = '\n' || isBasicStrChar c
let pMultilineStrChars = manySatisfy isMultilineStrChar
let pTripleQuote = pstring "\"\"\""
let pMultilineStr = stringsSepBy pMultilineStrChars (pEscapedChar <|> (notFollowedByString "\"\"\"" >>. pstring "\"")) |> between pTripleQuote pTripleQuote
#rmunn provided a correct answer, thanks! I also solved this in a slightly different way after playing with the FParsec API a bit more. As explained in the other answer, The endp argument to manyCharTill was eating the closing """, so I needed to switch to something that wouldn't do that. A simple modification using lookAhead did the trick:
let multiLineString : Parser<string,unit> =
optional newline >>. manyCharsTill multiLineStringContents (lookAhead (pstring "\"\"\""))
|> between (pstring "\"\"\"") (pstring "\"\"\"")
I wish to parse a string in to a recursive data structure using F#. In this question I'm going to present a simplified example that cuts to the core of what I want to do.
I want to parse a string of nested square brackets in to the record type:
type Bracket = | Bracket of Bracket option
So:
"[]" -> Bracket None
"[[]]" -> Bracket ( Some ( Bracket None) )
"[[[]]]" -> Bracket ( Some ( Bracket ( Some ( Bracket None) ) ) )
I would like to do this using the parser combinators in the FParsec library. Here is what I have so far:
let tryP parser =
parser |>> Some
<|>
preturn None
/// Parses up to nesting level of 3
let parseBrakets : Parser<_> =
let mostInnerLevelBracket =
pchar '['
.>> pchar ']'
|>> fun _ -> Bracket None
let secondLevelBracket =
pchar '['
>>. tryP mostInnerLevelBracket
.>> pchar ']'
|>> Bracket
let firstLevelBracket =
pchar '['
>>. tryP secondLevelBracket
.>> pchar ']'
|>> Bracket
firstLevelBracket
I even have some Expecto tests:
open Expecto
[<Tests>]
let parserTests =
[ "[]", Bracket None
"[[]]", Bracket (Some (Bracket None))
"[[[]]]", Bracket ( Some (Bracket (Some (Bracket None)))) ]
|> List.map(fun (str, expected) ->
str
|> sprintf "Trying to parse %s"
|> testCase
<| fun _ ->
match run parseBrakets str with
| Success (x, _,_) -> Expect.equal x expected "These should have been equal"
| Failure (m, _,_) -> failwithf "Expected a match: %s" m
)
|> testList "Bracket tests"
let tests =
[ parserTests ]
|> testList "Tests"
runTests defaultConfig tests
The problem is of course how to handle and arbitrary level of nesting - the code above only works for up to 3 levels. The code I would like to write is:
let rec pNestedBracket =
pchar '['
>>. tryP pNestedBracket
.>> pchar ']'
|>> Bracket
But F# doesn't allow this.
Am I barking up the wrong tree completely with how to solve this (I understand that there are easier ways to solve this particular problem)?
You are looking for FParsecs createParserForwardedToRef method. Because parsers are values and not functions it is impossible to make mutually recursive or self recursive parsers in order to do this you have to in a sense declare a parser before you define it.
Your final code will end up looking something like this
let bracketParser, bracketParserRef = createParserForwardedToRef<Bracket>()
bracketParserRef := ... //here you can finally declare your parser
//you can reference bracketParser which is a parser that uses the bracketParserRef
Also I would recommend this article for basic understanding of parser combinators. https://fsharpforfunandprofit.com/posts/understanding-parser-combinators/. The final section on a JSON parser talks about the createParserForwardedToRef method.
As an example of how to use createParserForwardedToRef, here's a snippet from a small parser I wrote recently. It parses lists of space-separated integers between brackets (and the lists can be nested), and the "integers" can be small arithmetic expressions like 1+2 or 3*5.
type ListItem =
| Int of int
| List of ListItem list
let pexpr = // ... omitted for brevity
let plist,plistImpl = createParserForwardedToRef()
let pListContents = (many1 (plist |>> List .>> spaces)) <|>
(many (pexpr |>> Int .>> spaces))
plistImpl := pchar '[' >>. spaces
>>. pListContents
.>> pchar ']'
P.S. I would have put this as a comment to Thomas Devries's answer, but a comment can't contain nicely-formatted code. Go ahead and accept his answer; mine is just intended to flesh his out.
I tried to parse a prefix function such as Pow(3+2,2) using FParsec. I read the calculator tutorial in the example files as follows. The examples are all unary prefix function. I wonder how can I achieve prefix functions with more than one inputs using FParsec.OperatorPrecedenceParser.
http://www.quanttec.com/fparsec/reference/operatorprecedenceparser.html#members.PrefixOperator
let number = pfloat .>> ws
let opp = new OperatorPrecedenceParser<float,unit,unit>()
let expr = opp.ExpressionParser
opp.TermParser <- number <|> between (str_ws "(") (str_ws ")") expr
opp.AddOperator(InfixOperator("+", ws, 1, Associativity.Left, (+)))
opp.AddOperator(InfixOperator("-", ws, 1, Associativity.Left, (-)))
opp.AddOperator(InfixOperator("*", ws, 2, Associativity.Left, (*)))
opp.AddOperator(InfixOperator("/", ws, 2, Associativity.Left, (/)))
opp.AddOperator(InfixOperator("^", ws, 3, Associativity.Right, fun x y -> System.Math.Pow(x, y)))
opp.AddOperator(PrefixOperator("-", ws, 4, true, fun x -> -x))
let ws1 = nextCharSatisfiesNot isLetter >>. ws
opp.AddOperator(PrefixOperator("log", ws1, 4, true, System.Math.Log))
opp.AddOperator(PrefixOperator("exp", ws1, 4, true, System.Math.Exp))
Update 1
I've written a quick script following after-string parser example as I need after-string parser for the actual application
http://www.quanttec.com/fparsec/users-guide/tips-and-tricks.html#parsing-f-infix-operators
abs(pow(1,2)) can be parsed but pow(abs(1),2) cannot be done. I'm puzzled about how to use prefix function as part of the input for identWithArgs.
#I #"..\packages\FParsec.1.0.2\lib\net40-client"
#r "FParsecCS.dll"
#r "FParsec.dll"
open FParsec
type PrefixFunc = POW
type Expr =
| InfixOpExpr of string * Expr * Expr
| PrefixOpExpr of string * Expr
| PrefixFuncExpr of PrefixFunc * Expr list
| Number of int
let ws = spaces
let ws1 = spaces1
let str s = pstring s
let str_ws s = ws >>. str s .>> ws
let strci s = pstringCI s
let strci_ws s = ws >>. strci s .>> ws
let strciret_ws s x = ws >>. strci s .>> ws >>% x
let isSymbolicOperatorChar = isAnyOf "!%&*+-./<=>#^|~?"
let remainingOpChars_ws = manySatisfy isSymbolicOperatorChar .>> ws
let primitive = pint32 .>> ws |>> Number
let argList = sepBy primitive (str_ws ",")
let argListInParens = between (str_ws "(") (str_ws ")") argList
let prefixFunc = strciret_ws "pow" POW
let identWithArgs =
pipe2 prefixFunc argListInParens (fun funcId args -> PrefixFuncExpr(funcId, args))
let opp = new OperatorPrecedenceParser<Expr, string, unit>()
opp.TermParser <-
primitive <|>
identWithArgs <|>
between (pstring "(") (pstring ")") opp.ExpressionParser
// a helper function for adding infix operators to opp
let addSymbolicInfixOperators prefix precedence associativity =
let op = InfixOperator(prefix, remainingOpChars_ws,
precedence, associativity, (),
fun remOpChars expr1 expr2 ->
InfixOpExpr(prefix + remOpChars, expr1, expr2))
opp.AddOperator(op)
// the operator definitions:
addSymbolicInfixOperators "*" 10 Associativity.Left
addSymbolicInfixOperators "**" 20 Associativity.Right
opp.AddOperator(PrefixOperator("abs",remainingOpChars_ws,3,true,(),fun remOpChars expr -> PrefixOpExpr("abs", expr)))
opp.AddOperator(PrefixOperator("log",remainingOpChars_ws,3,true,(),fun remOpChars expr -> PrefixOpExpr("log", expr)))
run opp.ExpressionParser "abs(pow(1,2))"
run opp.ExpressionParser "pow(abs(1),2)"
I started to review the problem after one year and finally realized the problem.
I've changed the following code
let argList = sepBy primitive (str_ws ",")
to the following
let opp = new OperatorPrecedenceParser<Expr, string, unit>()
let argList = sepBy opp.ExpressionParser (str_ws ",")
I bring OperatorPrecedenceParser to the beginning of the code. And then I achieve recursively calling opp.ExpressionParser by putting it directly into argList.
I just realized that OperatorPrecedenceParser is very similar to createParserForwardedToRef. It creates a parser first without writing down implementation until later. FParsec has to achieve recursiveness in this way. Similar to its JSON sample parser.
After this change, both abs(pow(1,2)) and pow(abs(1),2) can be parsed. Hope this helps others who ever got this problem.