I have this program that I have created in F#. I need to accept a sentence and then return the sentence stating their each words/character's part of speech and the word/character. How do I read something in, match with the mapStrToken and print out the word/character and the part of speech?
I have this code here:
type tokens = Art of string | Noun of string | Adj of string
| Adv of string |
| Verb of string | Comma of string | Conj of string
|Prep of string | EOS of string | NONE;;
let mapStrToken str =
match str with
| "dog" -> Noun str | "cat" -> Noun str | "mouse" -> Noun str | "tree" -> Noun str | "cheese" -> Noun str
| "a" -> Art str | "the" -> Art str
| "likes" -> Verb str | "hates" -> Verb str | "eats" -> Verb str | "chases" -> Verb str
| "," -> Comma str
| "." -> EOS str | "!" -> EOS str
| "fast" -> Adj str | "slow" -> Adj str | "furry" -> Adj str | "yellow" -> Adj str
| "quickly" -> Adv str | "slowly" -> Adv str | "quietly" -> Adv str
| "and" -> Conj str | "or" -> Conj str
| "of" -> Prep str | "around" -> Prep str
| _ -> NONE
Not sure if this is what you are asking, but try this.
let tokenArray = "a fast , yellow dog".Split(' ') |> Array.map mapStrToken
let doSomethingWithTokens = function
| [Art; Adj; Comma; Adj; Noun] -> Some("article adjective comma adjective noune")
| _ -> None
tokenArray |> doSomethingWithtokens
Related
I wrote a predictive parser for a LL1 grammar. Each nonterminal A has a corresponding parseA method, which takes in a tokenlist, and returns the remainder of tokenlist and a parse tree.
I'm confused about which AST method to call in my parser. Is there a general approach to figuring this out?
This is my attempt:
Take for instance a subsection of my grammar:
expr -> t eprime
eprime -> PLUS t eprime | MINUS t eprime | ε
t -> t tprime
tprime -> TIMES f tprime | DIVIDE f tprime | ε
f -> LPAREN expr RPAREN | LITERAL | TRUE | FALSE | ID
I have four parse methods, one for each nonterminal.
let parseExpr tokenlist =
match tokenlist.head with
| "LPAREN" -> let t_expr tokenlist_t = next tokenlist |> parseExpr in
let e_expr tokenlist_e = parseEPrime tokenlist_t in
(tokenlist_e, Ast.Expression(t_expr, e_expr))
| "LITERAL" -> let t_expr tokenlist_t = next tokenlist |> parseExpr in
let e_expr tokenlist_e = parseEPrime tokenlist_t in
(tokenlist_e, Ast.Expression(t_expr, e_expr))
| "TRUE" -> let t_expr tokenlist_t = next tokenlist |> parseExpr in
let e_expr tokenlist_e = parseEPrime tokenlist_t in
(tokenlist_e, Ast.Expression(t_expr, e_expr))
| "FALSE" -> let t_expr tokenlist_t = next tokenlist |> parseExpr in
let e_expr tokenlist_e = parseEPrime tokenlist_t in
(tokenlist_e, Ast.Expression(t_expr, e_expr))
| "ID" -> let t_expr tokenlist_t = next tokenlist |> parseExpr in
let e_expr tokenlist_e = parseEPrime tokenlist_t in
(tokenlist_e, Ast.Expression(t_expr, e_expr))
let parseEPrime tokenlist =
match tokenlist with
| "PLUS" -> let expr_t tokenlist_t = next tokenlist |> parseT in
let expr_eprime tokenlist_e = parseEPrime tokenlist_t in
(tokenlist_e, Ast.Add(expr_t, expr_eprime))
| "MINUS" -> let expr_t tokenlist_t = next tokenlist |> parseT in
let expr_eprime tokenlist_e = parseEPrime tokenlist_t in
(tokenlist_e, Ast.Minus(expr_t, expr_eprime))
| "SEMI" -> (tokenlist, [])
| "RPAREN" -> (tokenlist, [])
| _ -> raise error
let parseT tokenlist =
match tokenlist.lookathead with
| "LPAREN" -> let expr_f tokenlist_f = parseF tokenlist in
let expr_tprime tokenlist_tprime = parseTprime tokenlist_f in
(tokenlist_tprime, Ast.F(expr_f, expr_tprime))
| "LITERAL" -> let expr_f tokenlist_f = parseF tokenlist in
let expr_tprime tokenlist_tprime = parseTprime tokenlist_f in
(tokenlist_tprime, Ast.Literal(expr_f, expr_tprime))
| "TRUE" -> let expr_f tokenlist_f = parseF tokenlist in
let expr_tprime tokenlist_tprime = parseTprime tokenlist_f in
(tokenlist_tprime, Ast.F(expr_f, expr_tprime))
| "FALSE" -> let expr_f tokenlist_f = parseF tokenlist in
let expr_tprime tokenlist_tprime = parseTprime tokenlist_f in
(tokenlist_tprime, Ast.F(expr_f, expr_tprime))
| "ID" -> let expr_f tokenlist_f = parseF tokenlist in
let expr_tprime tokenlist_tprime = parseTprime tokenlist_f in
(tokenlist_tprime, Ast.F(expr_f, expr_tprime))
| _-> raise error
let parseTprime tokenlist =
match tokenlist.lookathead with
| "TIMES" -> let expr_f tokenlist_f = next tokenlist |> parseF in
let expr_tprime tokenlist_tprime = parseTPrime tokenlist_f in
(tokenlist_tprime, Ast.Times(expr_f, expr_tprime))
| "DIVIDE" -> let expr_f tokenlist_f = next tokenlist |> parseF in
let expr_tprime tokenlist_tprime = parseTPrime tokenlist_f in
(tokenlist_tprime, Ast.Divide(expr_f, expr_tprime))
| "PLUS" -> (tokenlist, [])
| "MINUS" -> (tokenlist, [])
| "SEMI" -> (tokenlist, [])
| "RPAREN" -> (tokenlist, [])
| _ -> raise error
let parseF tokenlist =
match tokenlist.lookathead with
| "LPAREN" -> let expr tokenlist_expr = next tokenlist |> parseE in
match next tokenlist_expr with
| "RPAREN" -> (next tokenlist_expr, Ast.ExpressionParen(expr))
| "LITERAL" -> (next tokenlist, Ast.FLiteral)
| "TRUE" -> (next tokenlist, Ast.BoolLit)
| "FALSE" -> (next tokenlist, Ast.FBool)
| "ID" -> (next tokenlist, Ast.Id)
| _ -> raise error
As you can probably tell from my code, I wrote a type for every nonterminal, and then had a method for every production of that nonterminal.
(*expr -> T E* *)
type expr =
| Expression of t eprime
(*T -> F T*)
type t =
| F of f * tprime
(*E* -> + T E*
E* -> - T E*
E* -> ε *)
type eprime =
| Add of t eprime
| Minus of t eprime
| Eempty
(*T* -> TIMES F T*
T* -> / F T*
T* -> ε*)
type tprime =
| Divide of f * tprime
| Times of f * tprime
| TEmpty
(*F -> LPAREN E RPAREN
F -> Literal
F -> TRUE
F -> FALSE
F -> ID*)
type f =
| ExpressionParen of expr
| Literal of int
| BoolLit of bool
| Id of string
But I don't know my approach keeps too much unnecessary information than a AST would normally shake out (I imagine an AST to be a parse tree that shakes and rids itself of unnecessary leaves). So far, I've just left off the parentheses and semi colons. I'm afraid I'm leaving too much on by having type t, type f, type tprime, type eprime in my AST. But if I were to remove them, I wouldn't know how to write the type expr in my AST.
Given an AST defined as such:
type expr =
| Add of expr * expr
| Minus of expr * expr
| Times of expr * expr
| Divide of expr * expr
| IntLit of int
| BoolLit of bool
| Id of string
You can adjust your parse functions to return such an AST by making the Prime functions take the left operand as an argument like this:
let parseExpr tokens =
let (lhs, remainingTokens) = parseT tokens in
parseExprPrime lhs remainingTokens
let parseExprPrime lhs tokens = match tokenlist.lookahead with
| PLUS :: tokens ->
let (rhs, remainingTokens) = parseT (next tokens) in
parseExprPrime (Add (lhs, rhs)) remainingTokens
| MINUS :: tokens ->
let (rhs, remainingTokens) = parseT (next tokens) in
parseExprPrime (Minus (lhs, rhs)) remainingTokens
| tokens ->
lhs, tokens
parseT and parseTPrime would look the same (except with multiplication and division of course) and parseF would stay almost exactly as-is, except that Ast.ExpressionParen(expr) would just be expr because I've also removed the ExpressionParen case from the AST definition.
Note that it's not necessary to distinguish between legal and illegal tokens here. It's okay to just return lhs, tokens both for legal tokens like ; or ) and for illegal tokens. In the latter case, the illegal token will eventually detected by a calling parser - no need to detect the error in multiple places. The same is true for the expression rule: if tokens starts with an illegal token, that will be detected by parseF, so there's no need to check this here. Nor is there any need to repeat the same code four times, so you can just call parseT and parseExprPrime without even looking at the current token and those functions will take care of it.
As for whether simplifying the AST like this is worth it - let's consider a function eval: expr -> int as a case study (and let's ignore BoolLit and Id for that purpose). Using the original definition it would look like this:
let rec eval = function
| Expression (lhs, eprime) -> evalEPrime (evalT lhs) eprime
and evalEPrime lhsValue = function
| Add (rhs, rest) -> evalEPrime (lhsValue + evalT rhs) rest
| Minus (rhs, rest) -> evalEPrime (lhsValue - evalT rhs) rest
| Eempty -> lhsValue
and evalT = function
| T (lhs, tprime) -> evalTPrime (evalF lhs) tprime
and evalTPrime lhsValue = function
| Times (rhs, rest) -> evalTPrime (lhsValue * evalF rhs) rest
| Divide (rhs, rest) -> evalTPrime (lhsValue / evalF rhs) rest
| TEmpty -> lhsValue
and evalF = function
| ExpressionParen expr -> eval expr
| IntLit i -> i
Using the simplified defintiion it would instead be:
let rec eval = function
| Add (lhs, rhs) -> eval lhs + eval rhs
| Minus (lhs, rhs) -> eval lhs - eval rhs
| Times (lhs, rhs) -> eval lhs * eval rhs
| Divide (lhs, rhs) -> eval lhs / eval rhs
| IntLit i -> i
So I'd say the simplified version definitely improves working with the AST and I'd consider it worth it.
It seems true that if you have a type for every nonterminal you'll end up with tree that's more on the concrete side (similar to a parse tree) than the abstract side.
I don't know that this is so bad, it's still a good representation of the code.
One way to look at it is that your grammar is so simple and streamlined that there's not a lot of incidental punctuation that needs to be left out to make the tree more abstract.
You could probably unify the types for expressions and terms. In other words, you could use just one internal node type for your expression trees. Once the precedences have been sorted out in the parsing, both expressions and terms are a list of subexpressions with operators between them.
I've written a typical evaluator for simple math expressions (arithmetic with some custom functions) in F#. While it seems to be working correctly, some expressions don't evaluate as expected, for example, these work fine:
eval "5+2" --> 7
eval "sqrt(25)^2" --> 25
eval "1/(sqrt(4))" --> 0.5
eval "1/(2^2+2)" --> 1/6 ~ 0.1666...
but these don't:
eval "1/(sqrt(4)+2)" --> evaluates to 1/sqrt(6) ~ 0.408...
eval "1/(sqrt 4 + 2)" --> will also evaluate to 1/sqrt(6)
eval "1/(-1+3)" --> evaluates to 1/(-4) ~ -0.25
the code works as follows, tokenization (string as input) -> to rev-polish-notation (RPN) -> evalRpn
I thought that the problem seems to occur somewhere with the unary functions (functions accepting one operator), these are the sqrt function and the negation (-) function. I don't really see what's going wrong in my code. Can someone maybe point out what I am missing here?
this is my implementation in F#
open System.Collections
open System.Collections.Generic
open System.Text.RegularExpressions
type Token =
| Num of float
| Plus
| Minus
| Star
| Hat
| Sqrt
| Slash
| Negative
| RParen
| LParen
let hasAny (list: Stack<'T>) =
list.Count <> 0
let tokenize (input:string) =
let tokens = new Stack<Token>()
let push tok = tokens.Push tok
let regex = new Regex(#"[0-9]+(\.+\d*)?|\+|\-|\*|\/|\^|\)|\(|pi|e|sqrt")
for x in regex.Matches(input.ToLower()) do
match x.Value with
| "+" -> push Plus
| "*" -> push Star
| "/" -> push Slash
| ")" -> push LParen
| "(" -> push RParen
| "^" -> push Hat
| "sqrt" -> push Sqrt
| "pi" -> push (Num System.Math.PI)
| "e" -> push (Num System.Math.E)
| "-" ->
if tokens |> hasAny then
match tokens.Peek() with
| LParen -> push Minus
| Num v -> push Minus
| _ -> push Negative
else
push Negative
| value -> push (Num (float value))
tokens.ToArray() |> Array.rev |> Array.toList
let isUnary = function
| Negative | Sqrt -> true
| _ -> false
let prec = function
| Hat -> 3
| Star | Slash -> 2
| Plus | Minus -> 1
| _ -> 0
let toRPN src =
let output = new ResizeArray<Token>()
let stack = new Stack<Token>()
let rec loop = function
| Num v::tokens ->
output.Add(Num v)
loop tokens
| RParen::tokens ->
stack.Push RParen
loop tokens
| LParen::tokens ->
while stack.Peek() <> RParen do
output.Add(stack.Pop())
stack.Pop() |> ignore // pop the "("
loop tokens
| op::tokens when op |> isUnary ->
stack.Push op
loop tokens
| op::tokens ->
if stack |> hasAny then
if prec(stack.Peek()) >= prec op then
output.Add(stack.Pop())
stack.Push op
loop tokens
| [] ->
output.AddRange(stack.ToArray())
output
(loop src).ToArray()
let (#) op tok =
match tok with
| Num v ->
match op with
| Sqrt -> Num (sqrt v)
| Negative -> Num (v * -1.0)
| _ -> failwith "input error"
| _ -> failwith "input error"
let (##) op toks =
match toks with
| Num v,Num u ->
match op with
| Plus -> Num(v + u)
| Minus -> Num(v - u)
| Star -> Num(v * u)
| Slash -> Num(u / v)
| Hat -> Num(u ** v)
| _ -> failwith "input error"
| _ -> failwith "inpur error"
let evalRPN src =
let stack = new Stack<Token>()
let rec loop = function
| Num v::tokens ->
stack.Push(Num v)
loop tokens
| op::tokens when op |> isUnary ->
let result = op # stack.Pop()
stack.Push result
loop tokens
| op::tokens ->
let result = op ## (stack.Pop(),stack.Pop())
stack.Push result
loop tokens
| [] -> stack
if loop src |> hasAny then
match stack.Pop() with
| Num v -> v
| _ -> failwith "input error"
else failwith "input error"
let eval input =
input |> (tokenize >> toRPN >> Array.toList >> evalRPN)
Before answering your specific question, did you notice you have another bug? Try eval "2-4" you get 2.0 instead of -2.0.
That's probably because along these lines:
match op with
| Plus -> Num(v + u)
| Minus -> Num(v - u)
| Star -> Num(v * u)
| Slash -> Num(u / v)
| Hat -> Num(u ** v)
u and v are swapped, in commutative operations you don't notice the difference, so just revert them to u -v.
Now regarding the bug you mentioned, the cause seems obvious to me, by looking at your code you missed the precedence of those unary operations:
let prec = function
| Hat -> 3
| Star | Slash -> 2
| Plus | Minus -> 1
| _ -> 0
I tried adding them this way:
let prec = function
| Negative -> 5
| Sqrt -> 4
| Hat -> 3
| Star | Slash -> 2
| Plus | Minus -> 1
| _ -> 0
And now it seems to be fine.
Edit: meh, seems I was late, Gustavo posted the answer while I was wondering about the parentheses. Oh well.
Unary operators have the wrong precedence. Add the primary case | a when isUnary a -> 4 to prec.
The names of LParen and RParen are consistently swapped throughout the code. ( maps to RParen and ) to LParen!
It runs all tests from the question properly for me, given the appropriate precedence, but I haven't checked the code for correctness.
I want to create something that's kind of like an enum with an F# record type for a value instead of an int. For example, if I've got the union:
type BologneseIngredients = | Spaghetti
| Tomatoes
| MincedBeef
| GrandmasSecretIngredient
I know that spaghetti is always 30cm long and tomatoes are always red. What I could do is have a 'get metadata' function:
let getMetadata = function
| Spaghetti -> { length: 30.0<cm> }
| Tomatoes -> { colour: Color.Red }
| _ -> { }
but I'd really like to keep the definition of the union and the data together. Is there a nice way to do this?
You could add properties to your discriminated union...
type BologneseIngredients =
| Spaghetti
| Tomatoes
| MincedBeef
| GrandmasSecretIngredient
member x.Color =
match x with
| Spaghetti -> Color.AntiqueWhite
| Tomatoes -> Color.Red
| MincedBeef -> Color.Firebrick
| GrandmasSecretIngredient -> Color.Transparent
let foo = Tomatoes
printfn "%A" foo.Color
> Color [Red]
my suggestion:
module Recipes =
type BologneseIngredients = | Spaghetti
| Tomatoes
| MincedBeef
| GrandmasSecretIngredient
let length (ind : BologneseIngredients) : float<cm> option =
match ind with
| Sphaghetti -> Some 30.0<cm>
| _ -> None
// .. or a bit more "metadata"ish
type Metadata =
| Length of float<cm>
| Color of System.Drawing.Color
let metadata =
function
| Sphaghetti -> [ Length 30.0<cm ]
| Tomatoes -> [ Color System.Drawing.Color.Red ]
| ...
let metaLength meta =
meta |> List.tryPick (function | Length l -> Some l | _ -> None)
let getLength = metadata >> metaLength
I'm fairly new to F# but I'm struggling to find how to properly represent the null character in the language. Can anyone tell me how to represent the null character in F#?
More to the point, what started me down the path is I'm trying to do some string processing with String.mapi, but I can't figure out how to remove a character in the below function:
let GetTargetFrameworkFolder version =
let versionMapper i c =
match c with
| 'v' -> if i = 0 then char(0x000) else c
| '.' -> char(0x000)
| _ -> c
match version with
| "v3.5" -> "net35"
| "v4.0" -> "net40"
| "v4.5" -> "net45"
| vers -> vers |> String.mapi versionMapper
GetTargetFrameworkFolder "v4.5.1" |> Dump
How can I remove a character from a string while doing character by character processing, as in the case with String.map and String.mapi?
You cannot remove a character using String.mapi, as this function maps exactly one character from the input to one character from the output. The null character is not the same thing as removing a character; it's just another character that happens to have the code 0.
In your case, if I understand correctly you want to remove the initial 'v' (if any) and remove dots. I would do it like this:
let GetTargetFrameworkFolder version =
match version with
| "v3.5" -> "net35"
| "v4.0" -> "net40"
| "v4.5" -> "net45"
| vers ->
let vers = if vers.[0] = 'v' then vers.[1..] else vers
vers.Replace(".", "")
Another way of doing this if you wanted to keep your original approach would be to write your own choose function for strings:
module String =
let choosei predicate str =
let sb = System.Text.StringBuilder()
let choose i (c:char) =
match predicate i c with
| Some(x) -> sb.Append(c) |> ignore
| None -> ()
str |> String.iteri choose
sb.ToString()
Then use it as follows:
let GetTargetFrameworkFolder version =
let versionMapper i = function
| 'v' when i = 0 -> None
| '.' -> None
| c -> Some(c)
match version with
| "v3.5" -> "net35"
| "v4.0" -> "net40"
| "v4.5" -> "net45"
| vers -> vers |> String.choosei versionMapper
GetTargetFrameworkFolder "v4.5.1" |> Dump
You can achieve this by using an array comprehension:
let GetTargetFrameworkFolder version =
match version with
| "v3.5" -> "net35"
| "v4.0" -> "net40"
| "v4.5" -> "net45"
| vers -> new String([|
for i in 0 .. vers.Length - 1 do
match i, vers.[i] with
| 0, 'v' | _, '.' -> () // skip 'v' at [0] and all '.'s
| _, c -> yield c // let everything else through
|])
By character processing while removing a character is filtering (string is a sequence of char):
let version (s: String) =
s
|> Seq.filter (fun ch -> ch <> '.' && ch <> 'v')
|> String.Concat
UPDATE:
To skip first 'v':
let version (s: String) =
s
|> Seq.skip (if s.StartsWith "v" then 1 else 0)
|> Seq.filter ((<>) '.')
|> String.Concat
Say I've got some code like this
match exp with
| Addition(lhs,rhs,_) -> Addition(fix lhs,fix rhs)
| Subtraction(lhs,rhs,_) -> Subtraction(fix lhs,fix rhs)
is there any way that would allow me to do something like
match exp with
| Addition(lhs,rhs,_)
| Subtraction(lhs,rhs,_) -> X(fix lhs,fix rhs)
where X be based on the actual pattern being matched
I like #kvb's answer.
This does suggest that you may want to redefine the DU, though:
type Op = | Add | Sub
type Expr = | Binary of Op * Expr * Expr
You can use an active pattern:
let (|Binary|_|) = function
| Addition(e1,e2) -> Some(Addition, e1, e2)
| Subtraction(e1,e2) -> Some(Subtraction, e1, e2)
| _ -> None
let rec fix = function
| Binary(con,lhs,rhs) -> con(fix lhs, fix rhs)
| _ -> ...