How to define x++ (where x: int ref) in F#? - f#

I currently use this function
let inc (i : int ref) =
let res = !i
i := res + 1
res
to write things like
let str = input.[inc index]
How define increment operator ++, so that I could write
let str = input.[index++]

You cannot define postfix operators in F# - see 4.4 Operators and Precedence. If you agree to making it prefix instead, then you can define, for example,
let (++) x = incr x; !x
and use it as below:
let y = ref 1
(++) y;;
val y : int ref = {contents = 2;}
UPDATE: as fpessoa pointed out ++ cannot be used as a genuine prefix operator, indeed (see here and there for the rules upon characters and character sequences comprising valid F# prefix operators).
Interestingly, the unary + can be overloaded for the purpose:
let (~+) x = incr x; !x
allowing
let y = ref 1
+y;;
val y : int ref = {contents = 2;}
Nevertheless, it makes sense to mention that the idea of iterating an array like below
let v = [| 1..5 |]
let i = ref -1
v |> Seq.iter (fun _ -> printfn "%d" v.[+i])
for the sake of "readability" looks at least strange in comparison with the idiomatic functional manner
[|1..5|] |> Seq.iter (printfn "%d")
which some initiated already had expressed in comments to the original question.

I was trying to write it as a prefix operator as suggested, but you can't define (++) as a proper prefix operator, i.e., run things like ++y without the () as you could for example for (!+):
let (!+) (i : int ref) = incr i; !i
let v = [| 1..5 |]
let i = ref -1
[1..5] |> Seq.iter (fun _ -> printfn "%d" v.[!+i])
Sorry, but I guess the answer is that actually you can't do even that.

Related

Tail Recursion in F# : Stack Overflow

I'm trying to implement Kosaraju's algorithm on a large graph
as part of an assignment [MOOC Algo I Stanford on Coursera]
https://en.wikipedia.org/wiki/Kosaraju%27s_algorithm
The current code works on a small graph, but I'm hitting Stack Overflow during runtime execution.
Despite having read the relevant chapter in Expert in F#, or other available examples on websites and SO, i still don't get how to use continuation to solve this problem
Below is the full code for general purpose, but it will already fail when executing DFSLoop1 and the recursive function DFSsub inside. I think I'm not making the function tail recursive [because of the instructions
t<-t+1
G.[n].finishingtime <- t
?]
but i don't understand how i can implement the continuation properly.
When considering only the part that fails, DFSLoop1 is taking as argument a graph to which we will apply Depth-First Search. We need to record the finishing time as part of the algo to proceed to the second part of the algo in a second DFS Loop (DFSLoop2) [of course we are failing before that].
open System
open System.Collections.Generic
open System.IO
let x = File.ReadAllLines "C:\Users\Fagui\Documents\GitHub\Learning Fsharp\Algo Stanford I\PA 4 - SCC.txt";;
// let x = File.ReadAllLines "C:\Users\Fagui\Documents\GitHub\Learning Fsharp\Algo Stanford I\PA 4 - test1.txt";;
// val x : string [] =
let splitAtTab (text:string)=
text.Split [|'\t';' '|]
let splitIntoKeyValue (A: int[]) =
(A.[0], A.[1])
let parseLine (line:string)=
line
|> splitAtTab
|> Array.filter (fun s -> not(s=""))
|> Array.map (fun s-> (int s))
|> splitIntoKeyValue
let y =
x |> Array.map parseLine
//val it : (int * int) []
type Children = int[]
type Node1 =
{children : Children ;
mutable finishingtime : int ;
mutable explored1 : bool ;
}
type Node2 =
{children : Children ;
mutable leader : int ;
mutable explored2 : bool ;
}
type DFSgraphcore = Dictionary<int,Children>
let directgraphcore = new DFSgraphcore()
let reversegraphcore = new DFSgraphcore()
type DFSgraph1 = Dictionary<int,Node1>
let reversegraph1 = new DFSgraph1()
type DFSgraph2 = Dictionary<int,Node2>
let directgraph2 = new DFSgraph2()
let AddtoGraph (G:DFSgraphcore) (n,c) =
if not(G.ContainsKey n) then
let node = [|c|]
G.Add(n,node)
else
let c'= G.[n]
G.Remove(n) |> ignore
G.Add (n, Array.append c' [|c|])
let inline swaptuple (a,b) = (b,a)
y|> Array.iter (AddtoGraph directgraphcore)
y|> Array.map swaptuple |> Array.iter (AddtoGraph reversegraphcore)
for i in directgraphcore.Keys do
if reversegraphcore.ContainsKey(i) then do
let node = {children = reversegraphcore.[i] ;
finishingtime = -1 ;
explored1 = false ;
}
reversegraph1.Add (i,node)
else
let node = {children = [||] ;
finishingtime = -1 ;
explored1 = false ;
}
reversegraph1.Add (i,node)
directgraphcore.Clear |> ignore
reversegraphcore.Clear |> ignore
// for i in reversegraph1.Keys do printfn "%d %A" i reversegraph1.[i].children
printfn "pause"
Console.ReadKey() |> ignore
let num_nodes =
directgraphcore |> Seq.length
let DFSLoop1 (G:DFSgraph1) =
let mutable t = 0
let mutable s = -1
let mutable k = num_nodes
let rec DFSsub (G:DFSgraph1)(n:int) (cont:int->int) =
//how to make it tail recursive ???
G.[n].explored1 <- true
// G.[n].leader <- s
for j in G.[n].children do
if not(G.[j].explored1) then DFSsub G j cont
t<-t+1
G.[n].finishingtime <- t
// end of DFSsub
for i in num_nodes .. -1 .. 1 do
printfn "%d" i
if not(G.[i].explored1) then do
s <- i
( DFSsub G i (fun s -> s) ) |> ignore
// printfn "%d %d" i G.[i].finishingtime
DFSLoop1 reversegraph1
printfn "pause"
Console.ReadKey() |> ignore
for i in directgraphcore.Keys do
let node = {children =
directgraphcore.[i]
|> Array.map (fun k -> reversegraph1.[k].finishingtime) ;
leader = -1 ;
explored2= false ;
}
directgraph2.Add (reversegraph1.[i].finishingtime,node)
let z = 0
let DFSLoop2 (G:DFSgraph2) =
let mutable t = 0
let mutable s = -1
let mutable k = num_nodes
let rec DFSsub (G:DFSgraph2)(n:int) (cont:int->int) =
G.[n].explored2 <- true
G.[n].leader <- s
for j in G.[n].children do
if not(G.[j].explored2) then DFSsub G j cont
t<-t+1
// G.[n].finishingtime <- t
// end of DFSsub
for i in num_nodes .. -1 .. 1 do
if not(G.[i].explored2) then do
s <- i
( DFSsub G i (fun s -> s) ) |> ignore
// printfn "%d %d" i G.[i].leader
DFSLoop2 directgraph2
printfn "pause"
Console.ReadKey() |> ignore
let table = [for i in directgraph2.Keys do yield directgraph2.[i].leader]
let results = table |> Seq.countBy id |> Seq.map snd |> Seq.toList |> List.sort |> List.rev
printfn "%A" results
printfn "pause"
Console.ReadKey() |> ignore
Here is a text file with a simple graph example
1 4
2 8
3 6
4 7
5 2
6 9
7 1
8 5
8 6
9 7
9 3
(the one which is causing overflow is 70Mo big with around 900,000 nodes)
EDIT
to clarify a few things first
Here is the "pseudo code"
Input: a directed graph G = (V,E), in adjacency list representation. Assume that the vertices V are labeled
1, 2, 3, . . . , n.
1. Let Grev denote the graph G after the orientation of all arcs have been reversed.
2. Run the DFS-Loop subroutine on Grev, processing vertices according to the given order, to obtain a
finishing time f(v) for each vertex v ∈ V .
3. Run the DFS-Loop subroutine on G, processing vertices in decreasing order of f(v), to assign a leader
to each vertex v ∈ V .
4. The strongly connected components of G correspond to vertices of G that share a common leader.
Figure 2: The top level of our SCC algorithm. The f-values and leaders are computed in the first and second
calls to DFS-Loop, respectively (see below).
Input: a directed graph G = (V,E), in adjacency list representation.
1. Initialize a global variable t to 0.
[This keeps track of the number of vertices that have been fully explored.]
2. Initialize a global variable s to NULL.
[This keeps track of the vertex from which the last DFS call was invoked.]
3. For i = n downto 1:
[In the first call, vertices are labeled 1, 2, . . . , n arbitrarily. In the second call, vertices are labeled by
their f(v)-values from the first call.]
(a) if i not yet explored:
i. set s := i
ii. DFS(G, i)
Figure 3: The DFS-Loop subroutine.
Input: a directed graph G = (V,E), in adjacency list representation, and a source vertex i ∈ V .
1. Mark i as explored.
[It remains explored for the entire duration of the DFS-Loop call.]
2. Set leader(i) := s
3. For each arc (i, j) ∈ G:
(a) if j not yet explored:
i. DFS(G, j)
4. t + +
5. Set f(i) := t
Figure 4: The DFS subroutine. The f-values only need to be computed during the first call to DFS-Loop, and
the leader values only need to be computed during the second call to DFS-Loop.
EDIT
i have amended the code, with the help of an experienced programmer (a lisper but who has no experience in F#) simplifying somewhat the first part to have more quickly an example without bothering about non-relevant code for this discussion.
The code focuses only on half of the algo, running DFS once to get finishing times of the reversed tree.
This is the first part of the code just to create a small example
y is the original tree. the first element of a tuple is the parent, the second is the child. But we will be working with the reverse tree
open System
open System.Collections.Generic
open System.IO
let x = File.ReadAllLines "C:\Users\Fagui\Documents\GitHub\Learning Fsharp\Algo Stanford I\PA 4 - SCC.txt";;
// let x = File.ReadAllLines "C:\Users\Fagui\Documents\GitHub\Learning Fsharp\Algo Stanford I\PA 4 - test1.txt";;
// val x : string [] =
let splitAtTab (text:string)=
text.Split [|'\t';' '|]
let splitIntoKeyValue (A: int[]) =
(A.[0], A.[1])
let parseLine (line:string)=
line
|> splitAtTab
|> Array.filter (fun s -> not(s=""))
|> Array.map (fun s-> (int s))
|> splitIntoKeyValue
// let y =
// x |> Array.map parseLine
//let y =
// [|(1, 4); (2, 8); (3, 6); (4, 7); (5, 2); (6, 9); (7, 1); (8, 5); (8, 6);
// (9, 7); (9, 3)|]
// let y = Array.append [|(1,1);(1,2);(2,3);(3,1)|] [|for i in 4 .. 10000 do yield (i,4)|]
let y = Array.append [|(1,1);(1,2);(2,3);(3,1)|] [|for i in 4 .. 99999 do yield (i,i+1)|]
//val it : (int * int) []
type Children = int list
type Node1 =
{children : Children ;
mutable finishingtime : int ;
mutable explored1 : bool ;
}
type Node2 =
{children : Children ;
mutable leader : int ;
mutable explored2 : bool ;
}
type DFSgraphcore = Dictionary<int,Children>
let directgraphcore = new DFSgraphcore()
let reversegraphcore = new DFSgraphcore()
type DFSgraph1 = Dictionary<int,Node1>
let reversegraph1 = new DFSgraph1()
let AddtoGraph (G:DFSgraphcore) (n,c) =
if not(G.ContainsKey n) then
let node = [c]
G.Add(n,node)
else
let c'= G.[n]
G.Remove(n) |> ignore
G.Add (n, List.append c' [c])
let inline swaptuple (a,b) = (b,a)
y|> Array.iter (AddtoGraph directgraphcore)
y|> Array.map swaptuple |> Array.iter (AddtoGraph reversegraphcore)
// définir reversegraph1 = ... with....
for i in reversegraphcore.Keys do
let node = {children = reversegraphcore.[i] ;
finishingtime = -1 ;
explored1 = false ;
}
reversegraph1.Add (i,node)
for i in directgraphcore.Keys do
if not(reversegraphcore.ContainsKey(i)) then do
let node = {children = [] ;
finishingtime = -1 ;
explored1 = false ;
}
reversegraph1.Add (i,node)
directgraphcore.Clear |> ignore
reversegraphcore.Clear |> ignore
// for i in reversegraph1.Keys do printfn "%d %A" i reversegraph1.[i].children
printfn "pause"
Console.ReadKey() |> ignore
let num_nodes =
directgraphcore |> Seq.length
So basically the graph is (1->2->3->1)::(4->5->6->7->8->....->99999->10000)
and the reverse graph is (1->3->2->1)::(10000->9999->....->4)
here is the main code written in direct style
//////////////////// main code is below ///////////////////
let DFSLoop1 (G:DFSgraph1) =
let mutable t = 0
let mutable s = -1
let rec iter (n:int) (f:'a->unit) (list:'a list) : unit =
match list with
| [] -> (t <- t+1) ; (G.[n].finishingtime <- t)
| x::xs -> f x ; iter n f xs
let rec DFSsub (G:DFSgraph1) (n:int) : unit =
let my_f (j:int) : unit = if not(G.[j].explored1) then (DFSsub G j)
G.[n].explored1 <- true
iter n my_f G.[n].children
for i in num_nodes .. -1 .. 1 do
// printfn "%d" i
if not(G.[i].explored1) then do
s <- i
DFSsub G i
printfn "%d %d" i G.[i].finishingtime
// End of DFSLoop1
DFSLoop1 reversegraph1
printfn "pause"
Console.ReadKey() |> ignore
its not tail recursive, so we use continuations, here is the same code adapted to CPS style:
//////////////////// main code is below ///////////////////
let DFSLoop1 (G:DFSgraph1) =
let mutable t = 0
let mutable s = -1
let rec iter_c (n:int) (f_c:'a->(unit->'r)->'r) (list:'a list) (cont: unit->'r) : 'r =
match list with
| [] -> (t <- t+1) ; (G.[n].finishingtime <- t) ; cont()
| x::xs -> f_c x (fun ()-> iter_c n f_c xs cont)
let rec DFSsub (G:DFSgraph1) (n:int) (cont: unit->'r) : 'r=
let my_f_c (j:int)(cont:unit->'r):'r = if not(G.[j].explored1) then (DFSsub G j cont) else cont()
G.[n].explored1 <- true
iter_c n my_f_c G.[n].children cont
for i in maxnum_nodes .. -1 .. 1 do
// printfn "%d" i
if not(G.[i].explored1) then do
s <- i
DFSsub G i id
printfn "%d %d" i G.[i].finishingtime
DFSLoop1 reversegraph1
printfn "faré"
printfn "pause"
Console.ReadKey() |> ignore
both codes compile and give the same results for the small example (the one in comment) or the same tree that we are using , with a smaller size (1000 instead of 100000)
so i don't think its a bug in the algo here, we've got the same tree structure, just a bigger tree is causing problems. it looks to us the continuations are well written. we've typed the code explicitly. and all calls end with a continuation in all cases...
We are looking for expert advice !!! thanks !!!
I did not try to understand the whole code snippet, because it is fairly long, but you'll certainly need to replace the for loop with an iteration implemented using continuation passing style. Something like:
let rec iterc f cont list =
match list with
| [] -> cont ()
| x::xs -> f x (fun () -> iterc f cont xs)
I didn't understand the purpose of cont in your DFSub function (it is never called, is it?), but the continuation based version would look roughly like this:
let rec DFSsub (G:DFSgraph2)(n:int) cont =
G.[n].explored2 <- true
G.[n].leader <- s
G.[n].children
|> iterc
(fun j cont -> if not(G.[j].explored2) then DFSsub G j cont else cont ())
(fun () -> t <- t + 1)
Overflowing the stack when you recurse through hundreds of thousands of entries isn't bad at all, really. A lot of programming language implementations will choke on much shorter recursions than that. You're having serious programmer problems — nothing to be ashamed of!
Now if you want to do deeper recursions than your implementation will handle, you need to transform your algorithm so it is iterative and/or tail-recursive (the two are isomorphic — except that tail-recursion allows for decentralization and modularity, whereas iteration is centralized and non-modular).
To transform an algorithm from recursive to tail-recursive, which is an important skill to possess, you need to understand the state that is implicitly stored in a stack frame, i.e. those free variables in the function body that change across the recursion, and explicitly store them in a FIFO queue (a data structure that replicates your stack, and can be implemented trivially as a linked list). Then you can pass that linked list of reified frame variables as an argument to your tail recursive functions.
In more advanced cases where you have many tail recursive functions each with a different kind of frame, instead of simple self-recursion, you may need to define some mutually recursive data types for the reified stack frames, instead of using a list. But I believe Kosaraju's algorithm only involves self-recursive functions.
OK, so the code given above was the RIGHT code !
the problem lies with the compiler of F#
here is some words about it from Microsoft
http://blogs.msdn.com/b/fsharpteam/archive/2011/07/08/tail-calls-in-fsharp.aspx
Basically, be careful with the settings, in default mode, the compiler may NOT make automatically the tail calls. To do so, in VS2015, go to the Solution Explorer, right click with the mouse and click on "Properties" (the last element of the scrolling list)
Then in the new window, click on "Build" and tick the box "Generate tail calls"
It is also to check if the compiler did its job looking at the disassembly using
ILDASM.exe
you can find the source code for the whole algo in my github repository
https://github.com/FaguiCurtain/Learning-Fsharp/blob/master/Algo%20Stanford/Algo%20Stanford/Kosaraju_cont.fs
on a performance point of view, i'm not very satisfied. The code runs on 36 seconds on my laptop. From the forum with other fellow MOOCers, C/C++/C# typically executes in subsecond to 5s, Java around 10-15, Python around 20-30s.
So my implementation is clearly not optimized. I am now happy to hear about tricks to make it faster !!! thanks !!!!

Is there a less clumsy way to perform automatic numbering with infinite sequences?

While the code I have does what I need, I feel I am missing some coding technique to implement this kind of stuff in a more concise way.
The goal is to compose items and give them an Id value along the way.
Here the code which I feel can be simplified and improved in many ways. If only I knew, how...
type Foo = | A of int | B of int | C of int
let ids startvalue = Seq.initInfinite (fun i -> i + startvalue)
let makeA ids =
A(Seq.head ids), Seq.skip 1 ids
let makeB ids =
B(Seq.head ids), Seq.skip 1 ids
let makeC ids =
C(Seq.head ids), Seq.skip 1 ids
let config = [makeA; makeA; makeC; makeB]
let create (ids : seq<int>) (cfg : (seq<int> -> Foo * seq<int>) list) : Foo list * seq<int> =
let rec cre ids1 acc cl =
match cl with
| [] -> (acc,ids1)
| x::xs ->
let v,ids2 = x ids1
cre ids2 (acc # [v]) xs
cre ids [] cfg
let result : Foo list * seq<int> = create (ids 0) config
Which results in the very simple:
val result : Foo list * seq = ([A 0; A 1; C 2; B 3], )
Somehow I feel there should be an easier way to accomplish the same.
In fact, I know one way to make it simpler but this would involve mutable state and memoization (and would thusly be probably considered worse):
let idgen startvalue =
let v = ref startvalue
fun () ->
let result = !v
v := !v + 1
result
With the thusly received generator function I could get rid of all those tuples, at least and I could also get rid of create function and simply write:
let ids = idgen 0
let result =
[
A(ids())
A(ids())
C(ids())
B(ids())
]
But there should also exist a "functional" way to get it done more simply.
It seems that what you want is to take two sequences, one of functions, the other of arguments, and produce new sequence by applying functions to corresponding arguments, where in your particular case arguments are successive integer numbers and functions are union case constructors. Would that be a correct assessment?
If so, here's what I would do:
let create args funs =
Seq.zip args funs
|> Seq.map (fun (arg, fn) -> fn arg)
|> List.ofSeq
let result = create (ids 0) [A; A; C; B]
type Foo = | A of int | B of int | C of int
let ids startvalue = Seq.initInfinite (fun i -> i + startvalue)
let config = [A; A; C; B]
let create ids i cfg =
let ids' = ids i
let nextI = i + List.length cfg
(Seq.map2 id cfg ids'), nextI
let result, nextI = create ids 0 config
let result2, nextI2 = create ids nextI config
There are several considerations here:
A, B, and C are constructors of the type int -> Foo. You may directly use them as functions and eliminate repetitive wrapper functions;
Seq.map2 is able to process sequences of different length and ignore the remaining elements of the longer one;
id is a shortcut for fun f number -> f number, please use the longer one if you find id unclear
You may also refactor ids function into Seq.initInfinite id |> Seq.skip startvalue.
At the end, you may convert a sequence into a List if you really need.
Updated to receive an keep i and nextI.
As Fyodor pointed out in his answer, you really just want to apply your constructors to consecutive integers. You can use the built-in mapi for that, writing your entire program as:
type Foo = | A of int | B of int | C of int
let config = [A; A; B; C]
let create offset = List.mapi (fun i f -> f (offset + i))
create 78 config
// val it : Foo list = [A 78; A 79; B 80; C 81]

F#: Testing whether two strings are anagrams

I am new to programming and F# is my first language.
Here is my code:
let areAnagrams (firstString: string) (secondString: string) =
let countCharacters (someString: string) =
someString.ToLower().ToCharArray() |> Array.toSeq
|> Seq.countBy (fun eachChar -> eachChar)
|> Seq.sortBy (snd >> (~-))
countCharacters firstString = countCharacters secondString
let testString1 = "Laity"
let testString2 = "Italy"
printfn "It is %b that %s and %s are anagrams." (areAnagrams testString1 testString2) (testString1) (testString2)
This is the output:
It is false that Laity and Italy are anagrams.
What went wrong? What changes should I make?
Your implementation of countCharacters sorts the tuples just using the second element (the number of occurrences for each character), but if there are multiple characters that appear the same number of times, then the order is not defined.
If you run the countCharacters function on your two samples, you can see the problem:
> countCharacters "Laity";;
val it : seq<char * int> = seq [('l', 1); ('a', 1); ('i', 1); ('t', 1); ...]
> countCharacters "Italy";;
val it : seq<char * int> = seq [('i', 1); ('t', 1); ('a', 1); ('l', 1); ...]
One solution is to just use Seq.sort and sort the tuples using both the letter code and the number of occurrences.
The other problem is that you are comparing two seq<_> values and this does not use structural comparison, so you'll need to turn the result into a list or an array (something that is fully evaluated):
let countCharacters (someString: string) =
someString.ToLower().ToCharArray()
|> Seq.countBy (fun eachChar -> eachChar)
|> Seq.sort
|> List.ofSeq
Note that you do not actually need Seq.countBy - because if you just sort all the characters, it will work equally well (the repeated characters will just be one after another). So you could use just:
let countCharacters (someString: string) =
someString.ToLower() |> Seq.sort |> List.ofSeq
Sorting the characters of the two strings gives you an easy solution but this could be a good example of recursion.
You can immediately exclude strings of different length.
You can also filter out all the occurrences of a char per iteration, by replacing them with an empty string.
let rec areAnagram (x:string) (y:string) =
if x.Lenght <> t.Lenght
then false else
if x.Lenght = 0
then true else
let reply = x.[0].ToString ()
areAnagram
(x.Replace (reply,""))
(y.Replace (reply,""))
The above should be faster than sorting for many use cases.
Anyway we can go further and transform it into a fast Integer Sorting without recursion and string replacements
let inline charToInt c =
int c - int '0'
let singlePassAnagram (x:string) =
let hash : int array = Array.zeroCreate 100
x |> Seq.iter (fun c->
hash.[charToInt c] <- (hash.[charToInt c]+1)
)
let areAnagramsFast
(x:string) (y:string) =
if x.Length <> y.Length
then false else
(singlePassAnagram x) =
(singlePassAnagram y)
Here is a fiddle

F#: How do i split up a sequence into a sequence of sequences

Background:
I have a sequence of contiguous, time-stamped data. The data-sequence has gaps in it where the data is not contiguous. I want create a method to split the sequence up into a sequence of sequences so that each subsequence contains contiguous data (split the input-sequence at the gaps).
Constraints:
The return value must be a sequence of sequences to ensure that elements are only produced as needed (cannot use list/array/cacheing)
The solution must NOT be O(n^2), probably ruling out a Seq.take - Seq.skip pattern (cf. Brian's post)
Bonus points for a functionally idiomatic approach (since I want to become more proficient at functional programming), but it's not a requirement.
Method signature
let groupContiguousDataPoints (timeBetweenContiguousDataPoints : TimeSpan) (dataPointsWithHoles : seq<DateTime * float>) : (seq<seq< DateTime * float >>)= ...
On the face of it the problem looked trivial to me, but even employing Seq.pairwise, IEnumerator<_>, sequence comprehensions and yield statements, the solution eludes me. I am sure that this is because I still lack experience with combining F#-idioms, or possibly because there are some language-constructs that I have not yet been exposed to.
// Test data
let numbers = {1.0..1000.0}
let baseTime = DateTime.Now
let contiguousTimeStamps = seq { for n in numbers ->baseTime.AddMinutes(n)}
let dataWithOccationalHoles = Seq.zip contiguousTimeStamps numbers |> Seq.filter (fun (dateTime, num) -> num % 77.0 <> 0.0) // Has a gap in the data every 77 items
let timeBetweenContiguousValues = (new TimeSpan(0,1,0))
dataWithOccationalHoles |> groupContiguousDataPoints timeBetweenContiguousValues |> Seq.iteri (fun i sequence -> printfn "Group %d has %d data-points: Head: %f" i (Seq.length sequence) (snd(Seq.hd sequence)))
I think this does what you want
dataWithOccationalHoles
|> Seq.pairwise
|> Seq.map(fun ((time1,elem1),(time2,elem2)) -> if time2-time1 = timeBetweenContiguousValues then 0, ((time1,elem1),(time2,elem2)) else 1, ((time1,elem1),(time2,elem2)) )
|> Seq.scan(fun (indexres,(t1,e1),(t2,e2)) (index,((time1,elem1),(time2,elem2))) -> (index+indexres,(time1,elem1),(time2,elem2)) ) (0,(baseTime,-1.0),(baseTime,-1.0))
|> Seq.map( fun (index,(time1,elem1),(time2,elem2)) -> index,(time2,elem2) )
|> Seq.filter( fun (_,(_,elem)) -> elem <> -1.0)
|> PSeq.groupBy(fst)
|> Seq.map(snd>>Seq.map(snd))
Thanks for asking this cool question
I translated Alexey's Haskell to F#, but it's not pretty in F#, and still one element too eager.
I expect there is a better way, but I'll have to try again later.
let N = 20
let data = // produce some arbitrary data with holes
seq {
for x in 1..N do
if x % 4 <> 0 && x % 7 <> 0 then
printfn "producing %d" x
yield x
}
let rec GroupBy comp (input:LazyList<'a>) : LazyList<LazyList<'a>> =
LazyList.delayed (fun () ->
match input with
| LazyList.Nil -> LazyList.cons (LazyList.empty()) (LazyList.empty())
| LazyList.Cons(x,LazyList.Nil) ->
LazyList.cons (LazyList.cons x (LazyList.empty())) (LazyList.empty())
| LazyList.Cons(x,(LazyList.Cons(y,_) as xs)) ->
let groups = GroupBy comp xs
if comp x y then
LazyList.consf
(LazyList.consf x (fun () ->
let (LazyList.Cons(firstGroup,_)) = groups
firstGroup))
(fun () ->
let (LazyList.Cons(_,otherGroups)) = groups
otherGroups)
else
LazyList.cons (LazyList.cons x (LazyList.empty())) groups)
let result = data |> LazyList.of_seq |> GroupBy (fun x y -> y = x + 1)
printfn "Consuming..."
for group in result do
printfn "about to do a group"
for x in group do
printfn " %d" x
You seem to want a function that has signature
(`a -> bool) -> seq<'a> -> seq<seq<'a>>
I.e. a function and a sequence, then break up the input sequence into a sequence of sequences based on the result of the function.
Caching the values into a collection that implements IEnumerable would likely be simplest (albeit not exactly purist, but avoiding iterating the input multiple times. It will lose much of the laziness of the input):
let groupBy (fun: 'a -> bool) (input: seq) =
seq {
let cache = ref (new System.Collections.Generic.List())
for e in input do
(!cache).Add(e)
if not (fun e) then
yield !cache
cache := new System.Collections.Generic.List()
if cache.Length > 0 then
yield !cache
}
An alternative implementation could pass cache collection (as seq<'a>) to the function so it can see multiple elements to chose the break points.
A Haskell solution, because I don't know F# syntax well, but it should be easy enough to translate:
type TimeStamp = Integer -- ticks
type TimeSpan = Integer -- difference between TimeStamps
groupContiguousDataPoints :: TimeSpan -> [(TimeStamp, a)] -> [[(TimeStamp, a)]]
There is a function groupBy :: (a -> a -> Bool) -> [a] -> [[a]] in the Prelude:
The group function takes a list and returns a list of lists such that the concatenation of the result is equal to the argument. Moreover, each sublist in the result contains only equal elements. For example,
group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
It is a special case of groupBy, which allows the programmer to supply their own equality test.
It isn't quite what we want, because it compares each element in the list with the first element of the current group, and we need to compare consecutive elements. If we had such a function groupBy1, we could write groupContiguousDataPoints easily:
groupContiguousDataPoints maxTimeDiff list = groupBy1 (\(t1, _) (t2, _) -> t2 - t1 <= maxTimeDiff) list
So let's write it!
groupBy1 :: (a -> a -> Bool) -> [a] -> [[a]]
groupBy1 _ [] = [[]]
groupBy1 _ [x] = [[x]]
groupBy1 comp (x : xs#(y : _))
| comp x y = (x : firstGroup) : otherGroups
| otherwise = [x] : groups
where groups#(firstGroup : otherGroups) = groupBy1 comp xs
UPDATE: it looks like F# doesn't let you pattern match on seq, so it isn't too easy to translate after all. However, this thread on HubFS shows a way to pattern match sequences by converting them to LazyList when needed.
UPDATE2: Haskell lists are lazy and generated as needed, so they correspond to F#'s LazyList (not to seq, because the generated data is cached (and garbage collected, of course, if you no longer hold a reference to it)).
(EDIT: This suffers from a similar problem to Brian's solution, in that iterating the outer sequence without iterating over each inner sequence will mess things up badly!)
Here's a solution that nests sequence expressions. The imperitave nature of .NET's IEnumerable<T> is pretty apparent here, which makes it a bit harder to write idiomatic F# code for this problem, but hopefully it's still clear what's going on.
let groupBy cmp (sq:seq<_>) =
let en = sq.GetEnumerator()
let rec partitions (first:option<_>) =
seq {
match first with
| Some first' -> //'
(* The following value is always overwritten;
it represents the first element of the next subsequence to output, if any *)
let next = ref None
(* This function generates a subsequence to output,
setting next appropriately as it goes *)
let rec iter item =
seq {
yield item
if (en.MoveNext()) then
let curr = en.Current
if (cmp item curr) then
yield! iter curr
else // consumed one too many - pass it on as the start of the next sequence
next := Some curr
else
next := None
}
yield iter first' (* ' generate the first sequence *)
yield! partitions !next (* recursively generate all remaining sequences *)
| None -> () // return an empty sequence if there are no more values
}
let first = if en.MoveNext() then Some en.Current else None
partitions first
let groupContiguousDataPoints (time:TimeSpan) : (seq<DateTime*_> -> _) =
groupBy (fun (t,_) (t',_) -> t' - t <= time)
Okay, trying again. Achieving the optimal amount of laziness turns out to be a bit difficult in F#... On the bright side, this is somewhat more functional than my last attempt, in that it doesn't use any ref cells.
let groupBy cmp (sq:seq<_>) =
let en = sq.GetEnumerator()
let next() = if en.MoveNext() then Some en.Current else None
(* this function returns a pair containing the first sequence and a lazy option indicating the first element in the next sequence (if any) *)
let rec seqStartingWith start =
match next() with
| Some y when cmp start y ->
let rest_next = lazy seqStartingWith y // delay evaluation until forced - stores the rest of this sequence and the start of the next one as a pair
seq { yield start; yield! fst (Lazy.force rest_next) },
lazy Lazy.force (snd (Lazy.force rest_next))
| next -> seq { yield start }, lazy next
let rec iter start =
seq {
match (Lazy.force start) with
| None -> ()
| Some start ->
let (first,next) = seqStartingWith start
yield first
yield! iter next
}
Seq.cache (iter (lazy next()))
Below is some code that does what I think you want. It is not idiomatic F#.
(It may be similar to Brian's answer, though I can't tell because I'm not familiar with the LazyList semantics.)
But it doesn't exactly match your test specification: Seq.length enumerates its entire input. Your "test code" calls Seq.length and then calls Seq.hd. That will generate an enumerator twice, and since there is no caching, things get messed up. I'm not sure if there is any clean way to allow multiple enumerators without caching. Frankly, seq<seq<'a>> may not be the best data structure for this problem.
Anyway, here's the code:
type State<'a> = Unstarted | InnerOkay of 'a | NeedNewInner of 'a | Finished
// f() = true means the neighbors should be kept together
// f() = false means they should be split
let split_up (f : 'a -> 'a -> bool) (input : seq<'a>) =
// simple unfold that assumes f captured a mutable variable
let iter f = Seq.unfold (fun _ ->
match f() with
| Some(x) -> Some(x,())
| None -> None) ()
seq {
let state = ref (Unstarted)
use ie = input.GetEnumerator()
let innerMoveNext() =
match !state with
| Unstarted ->
if ie.MoveNext()
then let cur = ie.Current
state := InnerOkay(cur); Some(cur)
else state := Finished; None
| InnerOkay(last) ->
if ie.MoveNext()
then let cur = ie.Current
if f last cur
then state := InnerOkay(cur); Some(cur)
else state := NeedNewInner(cur); None
else state := Finished; None
| NeedNewInner(last) -> state := InnerOkay(last); Some(last)
| Finished -> None
let outerMoveNext() =
match !state with
| Unstarted | NeedNewInner(_) -> Some(iter innerMoveNext)
| InnerOkay(_) -> failwith "Move to next inner seq when current is active: undefined behavior."
| Finished -> None
yield! iter outerMoveNext }
open System
let groupContigs (contigTime : TimeSpan) (holey : seq<DateTime * int>) =
split_up (fun (t1,_) (t2,_) -> (t2 - t1) <= contigTime) holey
// Test data
let numbers = {1 .. 15}
let contiguousTimeStamps =
let baseTime = DateTime.Now
seq { for n in numbers -> baseTime.AddMinutes(float n)}
let holeyData =
Seq.zip contiguousTimeStamps numbers
|> Seq.filter (fun (dateTime, num) -> num % 7 <> 0)
let grouped_data = groupContigs (new TimeSpan(0,1,0)) holeyData
printfn "Consuming..."
for group in grouped_data do
printfn "about to do a group"
for x in group do
printfn " %A" x
Ok, here's an answer I'm not unhappy with.
(EDIT: I am unhappy - it's wrong! No time to try to fix right now though.)
It uses a bit of imperative state, but it is not too difficult to follow (provided you recall that '!' is the F# dereference operator, and not 'not'). It is as lazy as possible, and takes a seq as input and returns a seq of seqs as output.
let N = 20
let data = // produce some arbitrary data with holes
seq {
for x in 1..N do
if x % 4 <> 0 && x % 7 <> 0 then
printfn "producing %d" x
yield x
}
let rec GroupBy comp (input:seq<_>) = seq {
let doneWithThisGroup = ref false
let areMore = ref true
use e = input.GetEnumerator()
let Next() = areMore := e.MoveNext(); !areMore
// deal with length 0 or 1, seed 'prev'
if not(e.MoveNext()) then () else
let prev = ref e.Current
while !areMore do
yield seq {
while not(!doneWithThisGroup) do
if Next() then
let next = e.Current
doneWithThisGroup := not(comp !prev next)
yield !prev
prev := next
else
// end of list, yield final value
yield !prev
doneWithThisGroup := true }
doneWithThisGroup := false }
let result = data |> GroupBy (fun x y -> y = x + 1)
printfn "Consuming..."
for group in result do
printfn "about to do a group"
for x in group do
printfn " %d" x

Handy F# snippets [closed]

As it currently stands, this question is not a good fit for our Q&A format. We expect answers to be supported by facts, references, or expertise, but this question will likely solicit debate, arguments, polling, or extended discussion. If you feel that this question can be improved and possibly reopened, visit the help center for guidance.
Closed 10 years ago.
There are already two questions about F#/functional snippets.
However what I'm looking for here are useful snippets, little 'helper' functions that are reusable. Or obscure but nifty patterns that you can never quite remember.
Something like:
open System.IO
let rec visitor dir filter=
seq { yield! Directory.GetFiles(dir, filter)
for subdir in Directory.GetDirectories(dir) do
yield! visitor subdir filter}
I'd like to make this a kind of handy reference page. As such there will be no right answer, but hopefully lots of good ones.
EDIT Tomas Petricek has created a site specifically for F# snippets http://fssnip.net/.
Perl style regex matching
let (=~) input pattern =
System.Text.RegularExpressions.Regex.IsMatch(input, pattern)
It lets you match text using let test = "monkey" =~ "monk.+" notation.
Infix Operator
I got this from http://sandersn.com/blog//index.php/2009/10/22/infix-function-trick-for-f go to that page for more details.
If you know Haskell, you might find yourself missing infix sugar in F#:
// standard Haskell call has function first, then args just like F#. So obviously
// here there is a function that takes two strings: string -> string -> string
startsWith "kevin" "k"
//Haskell infix operator via backQuotes. Sometimes makes a function read better.
"kevin" `startsWith` "K"
While F# doesn't have a true 'infix' operator, the same thing can be accomplished almost as elegantly via a pipeline and a 'backpipeline' (who knew of such a thing??)
// F# 'infix' trick via pipelines
"kevin" |> startsWith <| "K"
Multi-Line Strings
This is pretty trivial, but it seems to be a feature of F# strings that is not widely known.
let sql = "select a,b,c \
from table \
where a = 1"
This produces:
val sql : string = "select a,b,c from table where a = 1"
When the F# compiler sees a back-slash followed by a carriage return inside a string literal, it will remove everything from the back-slash to the first non-space character on the next line. This allows you to have multi-line string literals that line up, without using a bunch of string concatenation.
Generic memoization, courtesy of the man himself
let memoize f =
let cache = System.Collections.Generic.Dictionary<_,_>(HashIdentity.Structural)
fun x ->
let ok, res = cache.TryGetValue(x)
if ok then res
else let res = f x
cache.[x] <- res
res
Using this, you could do a cached reader like so:
let cachedReader = memoize reader
Simple read-write to text files
These are trivial, but make file access pipeable:
open System.IO
let fileread f = File.ReadAllText(f)
let filewrite f s = File.WriteAllText(f, s)
let filereadlines f = File.ReadAllLines(f)
let filewritelines f ar = File.WriteAllLines(f, ar)
So
let replace f (r:string) (s:string) = s.Replace(f, r)
"C:\\Test.txt" |>
fileread |>
replace "teh" "the" |>
filewrite "C:\\Test.txt"
And combining that with the visitor quoted in the question:
let filereplace find repl path =
path |> fileread |> replace find repl |> filewrite path
let recurseReplace root filter find repl =
visitor root filter |> Seq.iter (filereplace find repl)
Update Slight improvement if you want to be able to read 'locked' files (e.g. csv files which are already open in Excel...):
let safereadall f =
use fs = new FileStream(f, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)
use sr = new StreamReader(fs, System.Text.Encoding.Default)
sr.ReadToEnd()
let split sep (s:string) = System.Text.RegularExpressions.Regex.Split(s, sep)
let fileread f = safereadall f
let filereadlines f = f |> safereadall |> split System.Environment.NewLine
For performance intensive stuff where you need to check for null
let inline isNull o = System.Object.ReferenceEquals(o, null)
if isNull o then ... else ...
Is about 20x faster then
if o = null then ... else ...
Active Patterns, aka "Banana Splits", are a very handy construct that let one match against multiple regular expression patterns. This is much like AWK, but without the high performance of DFA's because the patterns are matched in sequence until one succeeds.
#light
open System
open System.Text.RegularExpressions
let (|Test|_|) pat s =
if (new Regex(pat)).IsMatch(s)
then Some()
else None
let (|Match|_|) pat s =
let opt = RegexOptions.None
let re = new Regex(pat,opt)
let m = re.Match(s)
if m.Success
then Some(m.Groups)
else None
Some examples of use:
let HasIndefiniteArticle = function
| Test "(?: |^)(a|an)(?: |$)" _ -> true
| _ -> false
type Ast =
| IntVal of string * int
| StringVal of string * string
| LineNo of int
| Goto of int
let Parse = function
| Match "^LET\s+([A-Z])\s*=\s*(\d+)$" g ->
IntVal( g.[1].Value, Int32.Parse(g.[2].Value) )
| Match "^LET\s+([A-Z]\$)\s*=\s*(.*)$" g ->
StringVal( g.[1].Value, g.[2].Value )
| Match "^(\d+)\s*:$" g ->
LineNo( Int32.Parse(g.[1].Value) )
| Match "^GOTO \s*(\d+)$" g ->
Goto( Int32.Parse(g.[1].Value) )
| s -> failwithf "Unexpected statement: %s" s
Maybe monad
type maybeBuilder() =
member this.Bind(v, f) =
match v with
| None -> None
| Some(x) -> f x
member this.Delay(f) = f()
member this.Return(v) = Some v
let maybe = maybeBuilder()
Here's a brief intro to monads for the uninitiated.
Option-coalescing operators
I wanted a version of the defaultArg function that had a syntax closer to the C# null-coalescing operator, ??. This lets me get the value from an Option while providing a default value, using a very concise syntax.
/// Option-coalescing operator - this is like the C# ?? operator, but works with
/// the Option type.
/// Warning: Unlike the C# ?? operator, the second parameter will always be
/// evaluated.
/// Example: let foo = someOption |? default
let inline (|?) value defaultValue =
defaultArg value defaultValue
/// Option-coalescing operator with delayed evaluation. The other version of
/// this operator always evaluates the default value expression. If you only
/// want to create the default value when needed, use this operator and pass
/// in a function that creates the default.
/// Example: let foo = someOption |?! (fun () -> new Default())
let inline (|?!) value f =
match value with Some x -> x | None -> f()
'Unitize' a function which doesn't handle units
Using the FloatWithMeasure function http://msdn.microsoft.com/en-us/library/ee806527(VS.100).aspx.
let unitize (f:float -> float) (v:float<'u>) =
LanguagePrimitives.FloatWithMeasure<'u> (f (float v))
Example:
[<Measure>] type m
[<Measure>] type kg
let unitize (f:float -> float) (v:float<'u>) =
LanguagePrimitives.FloatWithMeasure<'u> (f (float v))
//this function doesn't take units
let badinc a = a + 1.
//this one does!
let goodinc v = unitize badinc v
goodinc 3.<m>
goodinc 3.<kg>
OLD version:
let unitize (f:float -> float) (v:float<'u>) =
let unit = box 1. :?> float<'u>
unit * (f (v/unit))
Kudos to kvb
Scale/Ratio function builder
Again, trivial, but handy.
//returns a function which will convert from a1-a2 range to b1-b2 range
let scale (a1:float<'u>, a2:float<'u>) (b1:float<'v>,b2:float<'v>) =
let m = (b2 - b1)/(a2 - a1) //gradient of line (evaluated once only..)
(fun a -> b1 + m * (a - a1))
Example:
[<Measure>] type m
[<Measure>] type px
let screenSize = (0.<px>, 300.<px>)
let displayRange = (100.<m>, 200.<m>)
let scaleToScreen = scale displayRange screenSize
scaleToScreen 120.<m> //-> 60.<px>
Transposing a list (seen on Jomo Fisher's blog)
///Given list of 'rows', returns list of 'columns'
let rec transpose lst =
match lst with
| (_::_)::_ -> List.map List.head lst :: transpose (List.map List.tail lst)
| _ -> []
transpose [[1;2;3];[4;5;6];[7;8;9]] // returns [[1;4;7];[2;5;8];[3;6;9]]
And here is a tail-recursive version which (from my sketchy profiling) is mildly slower, but has the advantage of not throwing a stack overflow when the inner lists are longer than 10000 elements (on my machine):
let transposeTR lst =
let rec inner acc lst =
match lst with
| (_::_)::_ -> inner (List.map List.head lst :: acc) (List.map List.tail lst)
| _ -> List.rev acc
inner [] lst
If I was clever, I'd try and parallelise it with async...
F# Map <-> C# Dictionary
(I know, I know, System.Collections.Generic.Dictionary isn't really a 'C#' dictionary)
C# to F#
(dic :> seq<_>) //cast to seq of KeyValuePair
|> Seq.map (|KeyValue|) //convert KeyValuePairs to tuples
|> Map.ofSeq //convert to Map
(From Brian, here, with improvement proposed by Mauricio in comment below. (|KeyValue|) is an active pattern for matching KeyValuePair - from FSharp.Core - equivalent to (fun kvp -> kvp.Key, kvp.Value))
Interesting alternative
To get all of the immutable goodness, but with the O(1) lookup speed of Dictionary, you can use the dict operator, which returns an immutable IDictionary (see this question).
I currently can't see a way to directly convert a Dictionary using this method, other than
(dic :> seq<_>) //cast to seq of KeyValuePair
|> (fun kvp -> kvp.Key, kvp.Value) //convert KeyValuePairs to tuples
|> dict //convert to immutable IDictionary
F# to C#
let dic = Dictionary()
map |> Map.iter (fun k t -> dic.Add(k, t))
dic
What is weird here is that FSI will report the type as (for example):
val it : Dictionary<string,int> = dict [("a",1);("b",2)]
but if you feed dict [("a",1);("b",2)] back in, FSI reports
IDictionary<string,int> = seq[[a,1] {Key = "a"; Value = 1; } ...
Tree-sort / Flatten a tree into a list
I have the following binary tree:
___ 77 _
/ \
______ 47 __ 99
/ \
21 _ 54
\ / \
43 53 74
/
39
/
32
Which is represented as follows:
type 'a tree =
| Node of 'a tree * 'a * 'a tree
| Nil
let myTree =
Node
(Node
(Node (Nil,21,Node (Node (Node (Nil,32,Nil),39,Nil),43,Nil)),47,
Node (Node (Nil,53,Nil),54,Node (Nil,74,Nil))),77,Node (Nil,99,Nil))
A straightforward method to flatten the tree is:
let rec flatten = function
| Nil -> []
| Node(l, a, r) -> flatten l # a::flatten r
This isn't tail-recursive, and I believe the # operator causes it to be O(n log n) or O(n^2) with unbalanced binary trees. With a little tweaking, I came up with this tail-recursive O(n) version:
let flatten2 t =
let rec loop acc c = function
| Nil -> c acc
| Node(l, a, r) ->
loop acc (fun acc' -> loop (a::acc') c l) r
loop [] (fun x -> x) t
Here's the output in fsi:
> flatten2 myTree;;
val it : int list = [21; 32; 39; 43; 47; 53; 54; 74; 77; 99]
LINQ-to-XML helpers
namespace System.Xml.Linq
// hide warning about op_Explicit
#nowarn "77"
[<AutoOpen>]
module XmlUtils =
/// Converts a string to an XName.
let xn = XName.op_Implicit
/// Converts a string to an XNamespace.
let xmlns = XNamespace.op_Implicit
/// Gets the string value of any XObject subclass that has a Value property.
let inline xstr (x : ^a when ^a :> XObject) =
(^a : (member get_Value : unit -> string) x)
/// Gets a strongly-typed value from any XObject subclass, provided that
/// an explicit conversion to the output type has been defined.
/// (Many explicit conversions are defined on XElement and XAttribute)
/// Example: let value:int = xval foo
let inline xval (x : ^a when ^a :> XObject) : ^b =
((^a or ^b) : (static member op_Explicit : ^a -> ^b) x)
/// Dynamic lookup operator for getting an attribute value from an XElement.
/// Returns a string option, set to None if the attribute was not present.
/// Example: let value = foo?href
/// Example with default: let value = defaultArg foo?Name "<Unknown>"
let (?) (el:XElement) (name:string) =
match el.Attribute(xn name) with
| null -> None
| att -> Some(att.Value)
/// Dynamic operator for setting an attribute on an XElement.
/// Example: foo?href <- "http://www.foo.com/"
let (?<-) (el:XElement) (name:string) (value:obj) =
el.SetAttributeValue(xn name, value)
OK, this has nothing to do with snippets, but I keep forgetting this:
If you are in the interactive window, you hit F7 to jump back to the code window (without deselecting the code which you just ran...)
Going from code window to F# window (and also to open the F# window) is Ctrl Alt F
(unless CodeRush has stolen your bindings...)
Weighted sum of arrays
Calculating a weighted [n-array] sum of a [k-array of n-arrays] of numbers, based on a [k-array] of weights
(Copied from this question, and kvb's answer)
Given these arrays
let weights = [|0.6;0.3;0.1|]
let arrs = [| [|0.0453;0.065345;0.07566;1.562;356.6|] ;
[|0.0873;0.075565;0.07666;1.562222;3.66|] ;
[|0.06753;0.075675;0.04566;1.452;3.4556|] |]
We want a weighted sum (by column), given that both dimensions of the arrays can be variable.
Array.map2 (fun w -> Array.map ((*) w)) weights arrs
|> Array.reduce (Array.map2 (+))
First line: Partial application of the first Array.map2 function to weights yields a new function (Array.map ((*) weight) which is applied (for each weight) to each array in arr.
Second line: Array.reduce is like fold, except it starts on the second value and uses the first as the initial 'state'. In this case each value is a 'line' of our array of arrays. So applying an Array.map2 (+) on the first two lines means that we sum the first two arrays, which leaves us with a new array, which we then (Array.reduce) sum again onto the next (in this case last) array.
Result:
[|0.060123; 0.069444; 0.07296; 1.5510666; 215.40356|]
Performance testing
(Found here and updated for latest release of F#)
open System
open System.Diagnostics
module PerformanceTesting =
let Time func =
let stopwatch = new Stopwatch()
stopwatch.Start()
func()
stopwatch.Stop()
stopwatch.Elapsed.TotalMilliseconds
let GetAverageTime timesToRun func =
Seq.initInfinite (fun _ -> (Time func))
|> Seq.take timesToRun
|> Seq.average
let TimeOperation timesToRun =
GC.Collect()
GetAverageTime timesToRun
let TimeOperations funcsWithName =
let randomizer = new Random(int DateTime.Now.Ticks)
funcsWithName
|> Seq.sortBy (fun _ -> randomizer.Next())
|> Seq.map (fun (name, func) -> name, (TimeOperation 100000 func))
let TimeOperationsAFewTimes funcsWithName =
Seq.initInfinite (fun _ -> (TimeOperations funcsWithName))
|> Seq.take 50
|> Seq.concat
|> Seq.groupBy fst
|> Seq.map (fun (name, individualResults) -> name, (individualResults |> Seq.map snd |> Seq.average))
DataSetExtensions for F#, DataReaders
System.Data.DataSetExtensions.dll adds the ability to treat a DataTable as an IEnumerable<DataRow> as well as unboxing the values of individual cells in a way that gracefully handles DBNull by supporting System.Nullable. For example, in C# we can get the value of an integer column that contains nulls, and specify that DBNull should default to zero with a very concise syntax:
var total = myDataTable.AsEnumerable()
.Select(row => row.Field<int?>("MyColumn") ?? 0)
.Sum();
There are two areas where DataSetExtensions are lacking, however. First, it doesn't support IDataReader and second, it doesn't support the F# option type. The following code does both - it allows an IDataReader to be treated as a seq<IDataRecord>, and it can unbox values from either a reader or a dataset, with support for F# options or System.Nullable. Combined with the option-coalescing operator in another answer, this allows for code such as the following when working with a DataReader:
let total =
myReader.AsSeq
|> Seq.map (fun row -> row.Field<int option>("MyColumn") |? 0)
|> Seq.sum
Perhaps a more idiomatic F# way of ignoring database nulls would be...
let total =
myReader.AsSeq
|> Seq.choose (fun row -> row.Field<int option>("MyColumn"))
|> Seq.sum
Further, the extension methods defined below are usable from both F# and from C#/VB.
open System
open System.Data
open System.Reflection
open System.Runtime.CompilerServices
open Microsoft.FSharp.Collections
/// Ported from System.Data.DatasetExtensions.dll to add support for the Option type.
[<AbstractClass; Sealed>]
type private UnboxT<'a> private () =
// This class generates a converter function based on the desired output type,
// and then re-uses the converter function forever. Because the class itself is generic,
// different output types get different cached converter functions.
static let referenceField (value:obj) =
if value = null || DBNull.Value.Equals(value) then
Unchecked.defaultof<'a>
else
unbox value
static let valueField (value:obj) =
if value = null || DBNull.Value.Equals(value) then
raise <| InvalidCastException("Null cannot be converted to " + typeof<'a>.Name)
else
unbox value
static let makeConverter (target:Type) methodName =
Delegate.CreateDelegate(typeof<Converter<obj,'a>>,
typeof<UnboxT<'a>>
.GetMethod(methodName, BindingFlags.NonPublic ||| BindingFlags.Static)
.MakeGenericMethod([| target.GetGenericArguments().[0] |]))
|> unbox<Converter<obj,'a>>
|> FSharpFunc.FromConverter
static let unboxFn =
let theType = typeof<'a>
if theType.IsGenericType && not theType.IsGenericTypeDefinition then
let genericType = theType.GetGenericTypeDefinition()
if typedefof<Nullable<_>> = genericType then
makeConverter theType "NullableField"
elif typedefof<option<_>> = genericType then
makeConverter theType "OptionField"
else
invalidOp "The only generic types supported are Option<T> and Nullable<T>."
elif theType.IsValueType then
valueField
else
referenceField
static member private NullableField<'b when 'b : struct and 'b :> ValueType and 'b:(new:unit -> 'b)> (value:obj) =
if value = null || DBNull.Value.Equals(value) then
Nullable<_>()
else
Nullable<_>(unbox<'b> value)
static member private OptionField<'b> (value:obj) =
if value = null || DBNull.Value.Equals(value) then
None
else
Some(unbox<'b> value)
static member inline Unbox =
unboxFn
/// F# data-related extension methods.
[<AutoOpen>]
module FsDataEx =
type System.Data.IDataReader with
/// Exposes a reader's current result set as seq<IDataRecord>.
/// Reader is closed when sequence is fully enumerated.
member this.AsSeq =
seq { use reader = this
while reader.Read() do yield reader :> IDataRecord }
/// Exposes all result sets in a reader as seq<seq<IDataRecord>>.
/// Reader is closed when sequence is fully enumerated.
member this.AsMultiSeq =
let rowSeq (reader:IDataReader) =
seq { while reader.Read() do yield reader :> IDataRecord }
seq {
use reader = this
yield rowSeq reader
while reader.NextResult() do
yield rowSeq reader
}
/// Populates a new DataSet with the contents of the reader. Closes the reader after completion.
member this.ToDataSet () =
use reader = this
let dataSet = new DataSet(RemotingFormat=SerializationFormat.Binary, EnforceConstraints=false)
dataSet.Load(reader, LoadOption.OverwriteChanges, [| "" |])
dataSet
type System.Data.IDataRecord with
/// Gets a value from the record by name.
/// DBNull and null are returned as the default value for the type.
/// Supports both nullable and option types.
member this.Field<'a> (fieldName:string) =
this.[fieldName] |> UnboxT<'a>.Unbox
/// Gets a value from the record by column index.
/// DBNull and null are returned as the default value for the type.
/// Supports both nullable and option types.
member this.Field<'a> (ordinal:int) =
this.GetValue(ordinal) |> UnboxT<'a>.Unbox
type System.Data.DataRow with
/// Identical to the Field method from DatasetExtensions, but supports the F# Option type.
member this.Field2<'a> (columnName:string) =
this.[columnName] |> UnboxT<'a>.Unbox
/// Identical to the Field method from DatasetExtensions, but supports the F# Option type.
member this.Field2<'a> (columnIndex:int) =
this.[columnIndex] |> UnboxT<'a>.Unbox
/// Identical to the Field method from DatasetExtensions, but supports the F# Option type.
member this.Field2<'a> (column:DataColumn) =
this.[column] |> UnboxT<'a>.Unbox
/// Identical to the Field method from DatasetExtensions, but supports the F# Option type.
member this.Field2<'a> (columnName:string, version:DataRowVersion) =
this.[columnName, version] |> UnboxT<'a>.Unbox
/// Identical to the Field method from DatasetExtensions, but supports the F# Option type.
member this.Field2<'a> (columnIndex:int, version:DataRowVersion) =
this.[columnIndex, version] |> UnboxT<'a>.Unbox
/// Identical to the Field method from DatasetExtensions, but supports the F# Option type.
member this.Field2<'a> (column:DataColumn, version:DataRowVersion) =
this.[column, version] |> UnboxT<'a>.Unbox
/// C# data-related extension methods.
[<Extension; AbstractClass; Sealed>]
type CsDataEx private () =
/// Populates a new DataSet with the contents of the reader. Closes the reader after completion.
[<Extension>]
static member ToDataSet(this:IDataReader) =
this.ToDataSet()
/// Exposes a reader's current result set as IEnumerable{IDataRecord}.
/// Reader is closed when sequence is fully enumerated.
[<Extension>]
static member AsEnumerable(this:IDataReader) =
this.AsSeq
/// Exposes all result sets in a reader as IEnumerable{IEnumerable{IDataRecord}}.
/// Reader is closed when sequence is fully enumerated.
[<Extension>]
static member AsMultipleEnumerable(this:IDataReader) =
this.AsMultiSeq
/// Gets a value from the record by name.
/// DBNull and null are returned as the default value for the type.
/// Supports both nullable and option types.
[<Extension>]
static member Field<'T> (this:IDataRecord, fieldName:string) =
this.Field<'T>(fieldName)
/// Gets a value from the record by column index.
/// DBNull and null are returned as the default value for the type.
/// Supports both nullable and option types.
[<Extension>]
static member Field<'T> (this:IDataRecord, ordinal:int) =
this.Field<'T>(ordinal)
Handling arguments in a command line application:
//We assume that the actual meat is already defined in function
// DoStuff (string -> string -> string -> unit)
let defaultOutOption = "N"
let defaultUsageOption = "Y"
let usage =
"Scans a folder for and outputs results.\n" +
"Usage:\n\t MyApplication.exe FolderPath [IncludeSubfolders (Y/N) : default=" +
defaultUsageOption + "] [OutputToFile (Y/N): default=" + defaultOutOption + "]"
let HandlArgs arr =
match arr with
| [|d;u;o|] -> DoStuff d u o
| [|d;u|] -> DoStuff d u defaultOutOption
| [|d|] -> DoStuff d defaultUsageOption defaultOutOption
| _ ->
printf "%s" usage
Console.ReadLine() |> ignore
[<EntryPoint>]
let main (args : string array) =
args |> HandlArgs
0
(I had a vague memory of this technique being inspired by Robert Pickering, but can't find a reference now)
A handy cache function that keeps up to max (key,reader(key)) in a dictionary and use a SortedList to track the MRU keys
let Cache (reader: 'key -> 'value) max =
let cache = new Dictionary<'key,LinkedListNode<'key * 'value>>()
let keys = new LinkedList<'key * 'value>()
fun (key : 'key) -> (
let found, value = cache.TryGetValue key
match found with
|true ->
keys.Remove value
keys.AddFirst value |> ignore
(snd value.Value)
|false ->
let newValue = key,reader key
let node = keys.AddFirst newValue
cache.[key] <- node
if (keys.Count > max) then
let lastNode = keys.Last
cache.Remove (fst lastNode.Value) |> ignore
keys.RemoveLast() |> ignore
(snd newValue))
Creating XElements
Nothing amazing, but I keep getting caught out by the implicit conversion of XNames:
#r "System.Xml.Linq.dll"
open System.Xml.Linq
//No! ("type string not compatible with XName")
//let el = new XElement("MyElement", "text")
//better
let xn s = XName.op_Implicit s
let el = new XElement(xn "MyElement", "text")
//or even
let xEl s o = new XElement(xn s, o)
let el = xEl "MyElement" "text"
Pairwise and pairs
I always expect Seq.pairwise to give me [(1,2);(3;4)] and not [(1,2);(2,3);(3,4)]. Given that neither exist in List, and that I needed both, here's the code for future reference. I think they're tail recursive.
//converts to 'windowed tuples' ([1;2;3;4;5] -> [(1,2);(2,3);(3,4);(4,5)])
let pairwise lst =
let rec loop prev rem acc =
match rem with
| hd::tl -> loop hd tl ((prev,hd)::acc)
| _ -> List.rev acc
loop (List.head lst) (List.tail lst) []
//converts to 'paged tuples' ([1;2;3;4;5;6] -> [(1,2);(3,4);(5,6)])
let pairs lst =
let rec loop rem acc =
match rem with
| l::r::tl -> loop tl ((l,r)::acc)
| l::[] -> failwith "odd-numbered list"
| _ -> List.rev acc
loop lst []
Naive CSV reader (i.e., won't handle anything nasty)
(Using filereadlines and List.transpose from other answers here)
///Given a file path, returns a List of row lists
let ReadCSV =
filereadlines
>> Array.map ( fun line -> line.Split([|',';';'|]) |> List.ofArray )
>> Array.toList
///takes list of col ids and list of rows,
/// returns array of columns (in requested order)
let GetColumns cols rows =
//Create filter
let pick cols (row:list<'a>) = List.map (fun i -> row.[i]) cols
rows
|> transpose //change list of rows to list of columns
|> pick cols //pick out the columns we want
|> Array.ofList //an array output is easier to index for user
Example
"C:\MySampleCSV"
|> ReadCSV
|> List.tail //skip header line
|> GetColumns [0;3;1] //reorder columns as well, if needs be.
Date Range
simple but useful list of dates between fromDate and toDate
let getDateRange fromDate toDate =
let rec dates (fromDate:System.DateTime) (toDate:System.DateTime) =
seq {
if fromDate <= toDate then
yield fromDate
yield! dates (fromDate.AddDays(1.0)) toDate
}
dates fromDate toDate
|> List.ofSeq
toggle code to sql
More trivial than most on this list, but handy nonetheless:
I'm always taking sql in and out of code to move it to a sql environment during development. Example:
let sql = "select a,b,c "
+ "from table "
+ "where a = 1"
needs to be 'stripped' to:
select a,b,c
from table
where a = 1
keeping the formatting. It's a pain to strip out the code symbols for the sql editor, then put them back again by hand when I've got the sql worked out. These two functions toggle the sql back and forth from code to stripped:
// reads the file with the code quoted sql, strips code symbols, dumps to FSI
let stripForSql fileName =
File.ReadAllText(fileName)
|> (fun s -> Regex.Replace(s, "\+(\s*)\"", ""))
|> (fun s -> s.Replace("\"", ""))
|> (fun s -> Regex.Replace(s, ";$", "")) // end of line semicolons
|> (fun s -> Regex.Replace(s, "//.+", "")) // get rid of any comments
|> (fun s -> printfn "%s" s)
then when you are ready to put it back into your code source file:
let prepFromSql fileName =
File.ReadAllText(fileName)
|> (fun s -> Regex.Replace(s, #"\r\n", " \"\r\n+\"")) // matches newline
|> (fun s -> Regex.Replace(s, #"\A", " \""))
|> (fun s -> Regex.Replace(s, #"\z", " \""))
|> (fun s -> printfn "%s" s)
I'd love to get rid of the input file but can't even begin to grok how to make that happen. anyone?
edit:
I figured out how to eliminate the requirement of a file for these functions by adding a windows forms dialog input/output. Too much code to show, but for those who would like to do such a thing, that's how I solved it.
Pascal's Triangle (hey, someone might find it useful)
So we want to create a something like this:
1
1 1
1 2 1
1 3 3 1
1 4 6 4 1
Easy enough:
let rec next = function
| [] -> []
| x::y::xs -> (x + y)::next (y::xs)
| x::xs -> x::next xs
let pascal n =
seq { 1 .. n }
|> List.scan (fun acc _ -> next (0::acc) ) [1]
The next function returns a new list where each item[i] = item[i] + item[i + 1].
Here's the output in fsi:
> pascal 10 |> Seq.iter (printfn "%A");;
[1]
[1; 1]
[1; 2; 1]
[1; 3; 3; 1]
[1; 4; 6; 4; 1]
[1; 5; 10; 10; 5; 1]
[1; 6; 15; 20; 15; 6; 1]
[1; 7; 21; 35; 35; 21; 7; 1]
[1; 8; 28; 56; 70; 56; 28; 8; 1]
[1; 9; 36; 84; 126; 126; 84; 36; 9; 1]
[1; 10; 45; 120; 210; 252; 210; 120; 45; 10; 1]
For the adventurous, here's a tail-recursive version:
let rec next2 cont = function
| [] -> cont []
| x::y::xs -> next2 (fun l -> cont <| (x + y)::l ) <| y::xs
| x::xs -> next2 (fun l -> cont <| x::l ) <| xs
let pascal2 n =
set { 1 .. n }
|> Seq.scan (fun acc _ -> next2 id <| 0::acc)) [1]
Flatten a List
if you have something like this:
let listList = [[1;2;3;];[4;5;6]]
and want to 'flatten' it down to a singe list so the result is like this:
[1;2;3;4;5;6]
it can be done thusly:
let flatten (l: 'a list list) =
seq {
yield List.head (List.head l)
for a in l do yield! (Seq.skip 1 a)
}
|> List.ofSeq
List comprehensions for float
This [23.0 .. 1.0 .. 40.0] was marked as deprecated a few versions backed.
But apparently, this works:
let dl = 9.5 / 11.
let min = 21.5 + dl
let max = 40.5 - dl
let a = [ for z in min .. dl .. max -> z ]
let b = a.Length
(BTW, there's a floating point gotcha in there. Discovered at fssnip - the other place for F# snippets)
Parallel map
let pmap f s =
seq { for a in s -> async { return f s } }
|> Async.Parallel
|> Async.Run

Resources