How to check if string only contains alphabets in F# - f#

I have a string which should only contain alphabets. To check that I have written the following code
let isCorrectString(str:string) =
let isInRange x = (x > 64 && x < 91 ) || (x > 96 && x < 123) || x = 32
not (str.Any(fun ch -> not (isInRange (ch :?> int)) ))
I am obviously using LINQ's `Any' extension method here. Is there any better way to write the above code ?

If you want to just verify that it's correct, you could do:
let isCorrectString(str:string) =
let isInRange x = (x > 64 && x < 91 ) || (x > 96 && x < 123) || x = 32
let bad =
str
|> Seq.map (fun c -> isInRange(int c))
|> Seq.exists (fun b -> b = false)
not bad
Note that this may be a simpler alternative:
let isCorrectString(str:string) =
str
|> Seq.forall (fun c -> System.Char.IsLetter(c) || c = ' ')
Or, if you prefer:
let isCorrectString(str:string) =
str
|> Seq.tryFind (fun c -> not(System.Char.IsLetter(c) || c = ' '))
|> Option.isNone

Related

How to sum adjacent numbers of same sign using List.fold in F#

Let's say I have a list in F# like this: [5,-2, -6, 7, -2, 2, 14, 2]
I want to write a function that will use List.fold to return a new list such as [5, -8, 7, -2, 18]
My template looks like this:
let sumAdjacentOfSameSign (lst :int list) : int list =
let f x y =
if x.Length = 0 then
[y]
elif System.Math.Sign(x) = System.Math.Sign(y) then ...
else y :: x
List.fold f [] lst
I need to fill in the ... part but can't quite say how.
Making the fewest changes to your code, I would do this:
let sumAdjacentOfSameSign (lst :int list) : int list =
let f (x : int list) (y : int) =
if x.Length = 0 then
[y]
elif System.Math.Sign(x.Head) = System.Math.Sign(y) then
(x.Head + y) :: x.Tail
else y :: x
List.fold f [] lst
|> List.rev // note that you have to reverse the resulting list
But I would suggest simplifying f to:
let f (x : int list) (y : int) =
match x with
| head :: tail when
System.Math.Sign(head) = System.Math.Sign(y) ->
(head + y) :: tail
| _ -> y :: x

F# functional style approach much slower

Trying to learn F#, by solving some programming puzzles. I don't want to add too many details about the problem as I don't want to spoil the fun for others.
Basically, the issue is to find all 4-uples { (i,j,k,l) | i ^ j ^ k ^ l != 0 } with no repetition (eg., (1,1,1,2) and (1,1,2,1) are the same and should be counted just once).
I have found a O(n^3) approach which works, please see countImperative(a,b,c,d) below. But I also tried to refactor the code as to get rid of the nested for loops. However, I could not do so without a significant performance penalty. It was my impression that F#'s syntactic sugar would allow a more concise style (using pipes and folds), letting the compiler do the heavy-lifting to produce comparably fast code (compared to my nested for loops). The big performance hit comes from the calculation of the partial2 sum.
Here's the code:
open System
open System.Diagnostics
open System.Collections
module quadruples =
[<EntryPoint>]
let main argv =
let input = "2000 2000 2000 2000"
let ordered = [ for x in input.Split([|' '|]) -> Convert.ToInt32(x) ] |> List.sort
let a,b,c,d = ordered.[0], ordered.[1], ordered.[2], ordered.[3]
let inner(a,b) = a * (a-1) / 2 + a * (b-a)
let sw = new Stopwatch()
sw.Start()
let partial1 = [ 1.. b ] |> List.fold (fun acc j -> acc + (int64 ((min a j) * inner(c-j+1, d-j+1)))) 0L
sw.Stop()
let elapsed1 = (sw.ElapsedMilliseconds |> double) / 1000.0
printfn "Partial1: %f s" elapsed1
sw.Restart()
let combinations = [ for i in 1..a do for j in i+1..b do yield (j,i^^^j) ]
let range = [ 1..c ]
let partial2 = combinations |> List.fold(fun acc (j,x) -> acc + (range |> List.skip(j-1) |> List.fold(fun acc k -> if k ^^^ x < k || k ^^^ x > d then acc + 1L else acc) 0L)) 0L
sw.Stop()
let elapsed2 = (sw.ElapsedMilliseconds |> double) / 1000.0
printfn "Partial2: %f s" elapsed2
printfn "Functional: %d, Elapsed: %f s" (partial1 + partial2) (elapsed1 + elapsed2)
// "imperative" approach
let countImperative(a,b,c,d) =
let mutable count = seq { 1..b } |> Seq.fold (fun acc j -> acc + (int64 ((min a j) * inner(c-j+1, d-j+1)))) 0L
for i in 1..a do
for j in i+1..b do
let x = i ^^^ j
for k in j..c do
let y = x ^^^ k
if y < k || y > d then
count <- count + 1L
count
sw.Restart();
let count = countImperative(a,b,c,d)
sw.Stop()
printfn "Imperative: %d, Elapsed: %f s" count ((sw.ElapsedMilliseconds |> double) / 1000.0)
0 // return an integer exit code
So my question was, if there is any way to speed up the code (specifically the calculation of partial2) while maintaining F#'s nice syntax.

Further optimizing Number to Roman Numeral function in F#

I'm new to F# and I'm curious if this can still be optimized further. I am not particularly sure if I've done this correctly as well. I'm curious particularly on the last line as it looks really long and hideous.
I've searched over google, but only Roman Numeral to Number solutions only show up, so I'm having a hard time comparing.
type RomanDigit = I | IV | V | IX
let rec romanNumeral number =
let values = [ 9; 5; 4; 1 ]
let capture number values =
values
|> Seq.find ( fun x -> number >= x )
let toRomanDigit x =
match x with
| 9 -> IX
| 5 -> V
| 4 -> IV
| 1 -> I
match number with
| 0 -> []
| int -> Seq.toList ( Seq.concat [ [ toRomanDigit ( capture number values ) ]; romanNumeral ( number - ( capture number values ) ) ] )
Thanks for anyone who can help with this problem.
A slightly shorter way of recursively finding the largest digit representation that can be subtracted from the value (using List.find):
let units =
[1000, "M"
900, "CM"
500, "D"
400, "CD"
100, "C"
90, "XC"
50, "L"
40, "XL"
10, "X"
9, "IX"
5, "V"
4, "IV"
1, "I"]
let rec toRomanNumeral = function
| 0 -> ""
| n ->
let x, s = units |> List.find (fun (x,s) -> x <= n)
s + toRomanNumeral (n-x)
If I had to use a Discriminated Union to represent the roman letters I would not include IV and IX.
type RomanDigit = I|V|X
let numberToRoman n =
let (r, diff) =
if n > 8 then [X], n - 10
elif n > 3 then [V], n - 5
else [], n
if diff < 0 then I::r
else r # (List.replicate diff I)
Then, based in this solution you can go further and extend it to all numbers.
Here's my first attempt, using fold and partial application:
type RomanDigit = I|V|X|L|C|D|M
let numberToRoman n i v x =
let (r, diff) =
if n > 8 then [x], n - 10
elif n > 3 then [v], n - 5
else [], n
if diff < 0 then i::r
else r # (List.replicate diff i)
let allDigits (n:int) =
let (_, f) =
[(I,V); (X,L); (C,D)]
|> List.fold (fun (n, f) (i, v) ->
(n / 10, fun x -> (numberToRoman (n % 10) i v x) # f i)) (n, (fun _ -> []))
f M
Here's a tail-recursive version of #Philip Trelford's answer:
let toRomanNumeral n =
let rec iter acc n =
match n with
| 0 -> acc
| n ->
let x, s = units |> List.find (fun (x, _) -> x <= n)
iter (acc + s) (n-x)
iter "" n

Combine memoization and tail-recursion

Is it possible to combine memoization and tail-recursion somehow? I'm learning F# at the moment and understand both concepts but can't seem to combine them.
Suppose I have the following memoize function (from Real-World Functional Programming):
let memoize f = let cache = new Dictionary<_, _>()
(fun x -> match cache.TryGetValue(x) with
| true, y -> y
| _ -> let v = f(x)
cache.Add(x, v)
v)
and the following factorial function:
let rec factorial(x) = if (x = 0) then 1 else x * factorial(x - 1)
Memoizing factorial isn't too difficult and making it tail-recursive isn't either:
let rec memoizedFactorial =
memoize (fun x -> if (x = 0) then 1 else x * memoizedFactorial(x - 1))
let tailRecursiveFactorial(x) =
let rec factorialUtil(x, res) = if (x = 0)
then res
else let newRes = x * res
factorialUtil(x - 1, newRes)
factorialUtil(x, 1)
But can you combine memoization and tail-recursion? I made some attempts but can't seem to get it working. Or is this simply not possible?
As always, continuations yield an elegant tailcall solution:
open System.Collections.Generic
let cache = Dictionary<_,_>() // TODO move inside
let memoizedTRFactorial =
let rec fac n k = // must make tailcalls to k
match cache.TryGetValue(n) with
| true, r -> k r
| _ ->
if n=0 then
k 1
else
fac (n-1) (fun r1 ->
printfn "multiplying by %d" n //***
let r = r1 * n
cache.Add(n,r)
k r)
fun n -> fac n id
printfn "---"
let r = memoizedTRFactorial 4
printfn "%d" r
for KeyValue(k,v) in cache do
printfn "%d: %d" k v
printfn "---"
let r2 = memoizedTRFactorial 5
printfn "%d" r2
printfn "---"
// comment out *** line, then run this
//let r3 = memoizedTRFactorial 100000
//printfn "%d" r3
There are two kinds of tests. First, this demos that calling F(4) caches F(4), F(3), F(2), F(1) as you would like.
Then, comment out the *** printf and uncomment the final test (and compile in Release mode) to show that it does not StackOverflow (it uses tailcalls correctly).
Perhaps I'll generalize out 'memoize' and demonstrate it on 'fib' next...
EDIT
Ok, here's the next step, I think, decoupling memoization from factorial:
open System.Collections.Generic
let cache = Dictionary<_,_>() // TODO move inside
let memoize fGuts n =
let rec newFunc n k = // must make tailcalls to k
match cache.TryGetValue(n) with
| true, r -> k r
| _ ->
fGuts n (fun r ->
cache.Add(n,r)
k r) newFunc
newFunc n id
let TRFactorialGuts n k memoGuts =
if n=0 then
k 1
else
memoGuts (n-1) (fun r1 ->
printfn "multiplying by %d" n //***
let r = r1 * n
k r)
let memoizedTRFactorial = memoize TRFactorialGuts
printfn "---"
let r = memoizedTRFactorial 4
printfn "%d" r
for KeyValue(k,v) in cache do
printfn "%d: %d" k v
printfn "---"
let r2 = memoizedTRFactorial 5
printfn "%d" r2
printfn "---"
// comment out *** line, then run this
//let r3 = memoizedTRFactorial 100000
//printfn "%d" r3
EDIT
Ok, here's a fully generalized version that seems to work.
open System.Collections.Generic
let memoize fGuts =
let cache = Dictionary<_,_>()
let rec newFunc n k = // must make tailcalls to k
match cache.TryGetValue(n) with
| true, r -> k r
| _ ->
fGuts n (fun r ->
cache.Add(n,r)
k r) newFunc
cache, (fun n -> newFunc n id)
let TRFactorialGuts n k memoGuts =
if n=0 then
k 1
else
memoGuts (n-1) (fun r1 ->
printfn "multiplying by %d" n //***
let r = r1 * n
k r)
let facCache,memoizedTRFactorial = memoize TRFactorialGuts
printfn "---"
let r = memoizedTRFactorial 4
printfn "%d" r
for KeyValue(k,v) in facCache do
printfn "%d: %d" k v
printfn "---"
let r2 = memoizedTRFactorial 5
printfn "%d" r2
printfn "---"
// comment out *** line, then run this
//let r3 = memoizedTRFactorial 100000
//printfn "%d" r3
let TRFibGuts n k memoGuts =
if n=0 || n=1 then
k 1
else
memoGuts (n-1) (fun r1 ->
memoGuts (n-2) (fun r2 ->
printfn "adding %d+%d" r1 r2 //%%%
let r = r1+r2
k r))
let fibCache, memoizedTRFib = memoize TRFibGuts
printfn "---"
let r5 = memoizedTRFib 4
printfn "%d" r5
for KeyValue(k,v) in fibCache do
printfn "%d: %d" k v
printfn "---"
let r6 = memoizedTRFib 5
printfn "%d" r6
printfn "---"
// comment out %%% line, then run this
//let r7 = memoizedTRFib 100000
//printfn "%d" r7
The predicament of memoizing tail-recursive functions is, of course, that when tail-recursive function
let f x =
......
f x1
calls itself, it is not allowed to do anything with a result of the recursive call, including putting it into cache. Tricky; so what can we do?
The critical insight here is that since the recursive function is not allowed to do anything with a result of recursive call, the result for all arguments to recursive calls will be the same! Therefore if recursion call trace is this
f x0 -> f x1 -> f x2 -> f x3 -> ... -> f xN -> res
then for all x in x0,x1,...,xN the result of f x will be the same, namely res. So the last invocation of a recursive function, the non-recursive call, knows the results for all the previous values - it is in a position to cache them. The only thing you need to do is to pass a list of visited values to it. Here is what it might look for factorial:
let cache = Dictionary<_,_>()
let rec fact0 l ((n,res) as arg) =
let commitToCache r =
l |> List.iter (fun a -> cache.Add(a,r))
match cache.TryGetValue(arg) with
| true, cachedResult -> commitToCache cachedResult; cachedResult
| false, _ ->
if n = 1 then
commitToCache res
cache.Add(arg, res)
res
else
fact0 (arg::l) (n-1, n*res)
let fact n = fact0 [] (n,1)
But wait! Look - l parameter of fact0 contains all the arguments to recursive calls to fact0 - just like the stack would in a non-tail-recursive version! That is exactly right. Any non-tail recursive algorithm can be converted to a tail-recursive one by moving the "list of stack frames" from stack to heap and converting the "postprocessing" of recursive call result into a walk over that data structure.
Pragmatic note: The factorial example above illustrates a general technique. It is quite useless as is - for factorial function it is quite enough to cache the top-level fact n result, because calculation of fact n for a particular n only hits a unique series of (n,res) pairs of arguments to fact0 - if (n,1) is not cached yet, then none of the pairs fact0 is going to be called on are.
Note that in this example, when we went from non-tail-recursive factorial to a tail-recursive factorial, we exploited the fact that multiplication is associative and commutative - tail-recursive factorial execute a different set of multiplications than a non-tail-recursive one.
In fact, a general technique exists for going from non-tail-recursive to tail-recursive algorithm, which yields an algorithm equivalent to a tee. This technique is called "continuatuion-passing transformation". Going that route, you can take a non-tail-recursive memoizing factorial and get a tail-recursive memoizing factorial by pretty much a mechanical transformation. See Brian's answer for exposition of this method.
I'm not sure if there's a simpler way to do this, but one approach would be to create a memoizing y-combinator:
let memoY f =
let cache = Dictionary<_,_>()
let rec fn x =
match cache.TryGetValue(x) with
| true,y -> y
| _ -> let v = f fn x
cache.Add(x,v)
v
fn
Then, you can use this combinator in lieu of "let rec", with the first argument representing the function to call recursively:
let tailRecFact =
let factHelper fact (x, res) =
printfn "%i,%i" x res
if x = 0 then res
else fact (x-1, x*res)
let memoized = memoY factHelper
fun x -> memoized (x,1)
EDIT
As Mitya pointed out, memoY doesn't preserve the tail recursive properties of the memoee. Here's a revised combinator which uses exceptions and mutable state to memoize any recursive function without overflowing the stack (even if the original function is not itself tail recursive!):
let memoY f =
let cache = Dictionary<_,_>()
fun x ->
let l = ResizeArray([x])
while l.Count <> 0 do
let v = l.[l.Count - 1]
if cache.ContainsKey(v) then l.RemoveAt(l.Count - 1)
else
try
cache.[v] <- f (fun x ->
if cache.ContainsKey(x) then cache.[x]
else
l.Add(x)
failwith "Need to recurse") v
with _ -> ()
cache.[x]
Unfortunately, the machinery which is inserted into each recursive call is somewhat heavy, so performance on un-memoized inputs requiring deep recursion can be a bit slow. However, compared to some other solutions, this has the benefit that it requires fairly minimal changes to the natural expression of recursive functions:
let fib = memoY (fun fib n ->
printfn "%i" n;
if n <= 1 then n
else (fib (n-1)) + (fib (n-2)))
let _ = fib 5000
EDIT
I'll expand a bit on how this compares to other solutions. This technique takes advantage of the fact that exceptions provide a side channel: a function of type 'a -> 'b doesn't actually need to return a value of type 'b, but can instead exit via an exception. We wouldn't need to use exceptions if the return type explicitly contained an additional value indicating failure. Of course, we could use the 'b option as the return type of the function for this purpose. This would lead to the following memoizing combinator:
let memoO f =
let cache = Dictionary<_,_>()
fun x ->
let l = ResizeArray([x])
while l.Count <> 0 do
let v = l.[l.Count - 1]
if cache.ContainsKey v then l.RemoveAt(l.Count - 1)
else
match f(fun x -> if cache.ContainsKey x then Some(cache.[x]) else l.Add(x); None) v with
| Some(r) -> cache.[v] <- r;
| None -> ()
cache.[x]
Previously, our memoization process looked like:
fun fib n ->
printfn "%i" n;
if n <= 1 then n
else (fib (n-1)) + (fib (n-2))
|> memoY
Now, we need to incorporate the fact that fib should return an int option instead of an int. Given a suitable workflow for option types, this could be written as follows:
fun fib n -> option {
printfn "%i" n
if n <= 1 then return n
else
let! x = fib (n-1)
let! y = fib (n-2)
return x + y
} |> memoO
However, if we're willing to change the return type of the first parameter (from int to int option in this case), we may as well go all the way and just use continuations in the return type instead, as in Brian's solution. Here's a variation on his definitions:
let memoC f =
let cache = Dictionary<_,_>()
let rec fn n k =
match cache.TryGetValue(n) with
| true, r -> k r
| _ ->
f fn n (fun r ->
cache.Add(n,r)
k r)
fun n -> fn n id
And again, if we have a suitable computation expression for building CPS functions, we can define our recursive function like this:
fun fib n -> cps {
printfn "%i" n
if n <= 1 then return n
else
let! x = fib (n-1)
let! y = fib (n-2)
return x + y
} |> memoC
This is exactly the same as what Brian has done, but I find the syntax here is easier to follow. To make this work, all we need are the following two definitions:
type CpsBuilder() =
member this.Return x k = k x
member this.Bind(m,f) k = m (fun a -> f a k)
let cps = CpsBuilder()
I wrote a test to visualize the memoization. Each dot is a recursive call.
......720 // factorial 6
......720 // factorial 6
.....120 // factorial 5
......720 // memoizedFactorial 6
720 // memoizedFactorial 6
120 // memoizedFactorial 5
......720 // tailRecFact 6
720 // tailRecFact 6
.....120 // tailRecFact 5
......720 // tailRecursiveMemoizedFactorial 6
720 // tailRecursiveMemoizedFactorial 6
.....120 // tailRecursiveMemoizedFactorial 5
kvb's solution returns the same results are straight memoization like this function.
let tailRecursiveMemoizedFactorial =
memoize
(fun x ->
let rec factorialUtil x res =
if x = 0 then
res
else
printf "."
let newRes = x * res
factorialUtil (x - 1) newRes
factorialUtil x 1
)
Test source code.
open System.Collections.Generic
let memoize f =
let cache = new Dictionary<_, _>()
(fun x ->
match cache.TryGetValue(x) with
| true, y -> y
| _ ->
let v = f(x)
cache.Add(x, v)
v)
let rec factorial(x) =
if (x = 0) then
1
else
printf "."
x * factorial(x - 1)
let rec memoizedFactorial =
memoize (
fun x ->
if (x = 0) then
1
else
printf "."
x * memoizedFactorial(x - 1))
let memoY f =
let cache = Dictionary<_,_>()
let rec fn x =
match cache.TryGetValue(x) with
| true,y -> y
| _ -> let v = f fn x
cache.Add(x,v)
v
fn
let tailRecFact =
let factHelper fact (x, res) =
if x = 0 then
res
else
printf "."
fact (x-1, x*res)
let memoized = memoY factHelper
fun x -> memoized (x,1)
let tailRecursiveMemoizedFactorial =
memoize
(fun x ->
let rec factorialUtil x res =
if x = 0 then
res
else
printf "."
let newRes = x * res
factorialUtil (x - 1) newRes
factorialUtil x 1
)
factorial 6 |> printfn "%A"
factorial 6 |> printfn "%A"
factorial 5 |> printfn "%A\n"
memoizedFactorial 6 |> printfn "%A"
memoizedFactorial 6 |> printfn "%A"
memoizedFactorial 5 |> printfn "%A\n"
tailRecFact 6 |> printfn "%A"
tailRecFact 6 |> printfn "%A"
tailRecFact 5 |> printfn "%A\n"
tailRecursiveMemoizedFactorial 6 |> printfn "%A"
tailRecursiveMemoizedFactorial 6 |> printfn "%A"
tailRecursiveMemoizedFactorial 5 |> printfn "%A\n"
System.Console.ReadLine() |> ignore
That should work if mutual tail recursion through y are not creating stack frames:
let rec y f x = f (y f) x
let memoize (d:System.Collections.Generic.Dictionary<_,_>) f n =
if d.ContainsKey n then d.[n]
else d.Add(n, f n);d.[n]
let rec factorialucps factorial' n cont =
if n = 0I then cont(1I) else factorial' (n-1I) (fun k -> cont (n*k))
let factorialdpcps =
let d = System.Collections.Generic.Dictionary<_, _>()
fun n -> y (factorialucps >> fun f n -> memoize d f n ) n id
factorialdpcps 15I //1307674368000

How would you convert this mutable tree into an immutable one?

How would you convert type Node into an immutable tree?
This class implements a range tree that does not allow overlapping or adjacent ranges and instead joins them. For example if the root node is {min = 10; max = 20} then it's right child and all its grandchildren must have a min and max value greater than 21. The max value of a range must be greater than or equal to the min. I included a test function so you can run this as is and it will dump out any cases that fail.
I recommend starting with the Insert method to read this code.
module StackOverflowQuestion
open System
type Range =
{ min : int64; max : int64 }
with
override this.ToString() =
sprintf "(%d, %d)" this.min this.max
[<AllowNullLiteralAttribute>]
type Node(left:Node, right:Node, range:Range) =
let mutable left = left
let mutable right = right
let mutable range = range
// Symmetric to clean right
let rec cleanLeft(node : Node) =
if node.Left = null then
()
elif range.max < node.Left.Range.min - 1L then
cleanLeft(node.Left)
elif range.max <= node.Left.Range.max then
range <- {min = range.min; max = node.Left.Range.max}
node.Left <- node.Left.Right
else
node.Left <- node.Left.Right
cleanLeft(node)
// Clean right deals with merging when the node to merge with is not on the
// left outside of the tree. It travels right inside the tree looking for an
// overlapping node. If it finds one it merges the range and replaces the
// node with its left child thereby deleting it. If it finds a subset node
// it replaces it with its left child, checks it and continues looking right.
let rec cleanRight(node : Node) =
if node.Right = null then
()
elif range.min > node.Right.Range.max + 1L then
cleanRight(node.Right)
elif range.min >= node.Right.Range.min then
range <- {min = node.Right.Range.min; max = range.max}
node.Right <- node.Right.Left
else
node.Right <- node.Right.Left
cleanRight(node)
// Merger left is called whenever the min value of a node decreases.
// It handles the case of left node overlap/subsets and merging/deleting them.
// When no more overlaps are found on the left nodes it calls clean right.
let rec mergeLeft(node : Node) =
if node.Left = null then
()
elif range.min <= node.Left.Range.min - 1L then
node.Left <- node.Left.Left
mergeLeft(node)
elif range.min <= node.Left.Range.max + 1L then
range <- {min = node.Left.Range.min; max = range.max}
node.Left <- node.Left.Left
else
cleanRight(node.Left)
// Symmetric to merge left
let rec mergeRight(node : Node) =
if node.Right = null then
()
elif range.max >= node.Right.Range.max + 1L then
node.Right <- node.Right.Right
mergeRight(node)
elif range.max >= node.Right.Range.min - 1L then
range <- {min = range.min; max = node.Right.Range.max}
node.Right <- node.Right.Right
else
cleanLeft(node.Right)
let (|Before|After|BeforeOverlap|AfterOverlap|Superset|Subset|) r =
if r.min > range.max + 1L then After
elif r.min >= range.min then
if r.max <= range.max then Subset
else AfterOverlap
elif r.max < range.min - 1L then Before
elif r.max <= range.max then
if r.min >= range.min then Subset
else BeforeOverlap
else Superset
member this.Insert r =
match r with
| After ->
if right = null then
right <- Node(null, null, r)
else
right.Insert(r)
| AfterOverlap ->
range <- {min = range.min; max = r.max}
mergeRight(this)
| Before ->
if left = null then
left <- Node(null, null, r)
else
left.Insert(r)
| BeforeOverlap ->
range <- {min = r.min; max = range.max}
mergeLeft(this)
| Superset ->
range <- r
mergeLeft(this)
mergeRight(this)
| Subset -> ()
member this.Left with get() : Node = left and set(x) = left <- x
member this.Right with get() : Node = right and set(x) = right <- x
member this.Range with get() : Range = range and set(x) = range <- x
static member op_Equality (a : Node, b : Node) =
a.Range = b.Range
override this.ToString() =
sprintf "%A" this.Range
type RangeTree() =
let mutable root = null
member this.Add(range) =
if root = null then
root <- Node(null, null, range)
else
root.Insert(range)
static member fromArray(values : Range seq) =
let tree = new RangeTree()
values |> Seq.iter (fun value -> tree.Add(value))
tree
member this.Seq
with get() =
let rec inOrder(node : Node) =
seq {
if node <> null then
yield! inOrder node.Left
yield {min = node.Range.min; max = node.Range.max}
yield! inOrder node.Right
}
inOrder root
let TestRange() =
printf "\n"
let source(n) =
let rnd = new Random(n)
let rand x = rnd.NextDouble() * float x |> int64
let rangeRnd() =
let a = rand 1500
{min = a; max = a + rand 15}
[|for n in 1 .. 50 do yield rangeRnd()|]
let shuffle n (array:_[]) =
let rnd = new Random(n)
for i in 0 .. array.Length - 1 do
let n = rnd.Next(i)
let temp = array.[i]
array.[i] <- array.[n]
array.[n] <- temp
array
let testRangeAdd n i =
let dataSet1 = source (n+0)
let dataSet2 = source (n+1)
let dataSet3 = source (n+2)
let result1 = Array.concat [dataSet1; dataSet2; dataSet3] |> shuffle (i+3) |> RangeTree.fromArray
let result2 = Array.concat [dataSet2; dataSet3; dataSet1] |> shuffle (i+4) |> RangeTree.fromArray
let result3 = Array.concat [dataSet3; dataSet1; dataSet2] |> shuffle (i+5) |> RangeTree.fromArray
let test1 = (result1.Seq, result2.Seq) ||> Seq.forall2 (fun a b -> a.min = b.min && a.max = b.max)
let test2 = (result2.Seq, result3.Seq) ||> Seq.forall2 (fun a b -> a.min = b.min && a.max = b.max)
let test3 = (result3.Seq, result1.Seq) ||> Seq.forall2 (fun a b -> a.min = b.min && a.max = b.max)
let print dataSet =
dataSet |> Seq.iter (fun r -> printf "%s " <| string r)
if not (test1 && test2 && test3) then
printf "\n\nTest# %A: " n
printf "\nSource 1: %A: " (n+0)
dataSet1 |> print
printf "\nSource 2: %A: " (n+1)
dataSet2 |> print
printf "\nSource 3: %A: " (n+2)
dataSet3 |> print
printf "\nResult 1: %A: " (n+0)
result1.Seq |> print
printf "\nResult 2: %A: " (n+1)
result2.Seq |> print
printf "\nResult 3: %A: " (n+2)
result3.Seq |> print
()
for i in 1 .. 10 do
for n in 1 .. 1000 do
testRangeAdd n i
printf "\n%d" (i * 1000)
printf "\nDone"
TestRange()
System.Console.ReadLine() |> ignore
Test cases for Range
After (11, 14) | | <-->
AfterOverlap (10, 14) | |<--->
AfterOverlap ( 9, 14) | +---->
AfterOverlap ( 6, 14) |<--+---->
"Test Case" ( 5, 9) +---+
BeforeOverlap ( 0, 8) <----+-->|
BeforeOverlap ( 0, 5) <----+ |
BeforeOverlap ( 0, 4) <--->| |
Before ( 0, 3) <--> | |
Superset ( 4, 10) <+---+>
Subset ( 5, 9) +---+
Subset ( 6, 8) |<->|
This is not an answer.
I adapted my test case to run against Juliet's code. It fails on a number of cases however I do see it passing some test.
type Range =
{ min : int64; max : int64 }
with
override this.ToString() =
sprintf "(%d, %d)" this.min this.max
let rangeSeqToJTree ranges =
ranges |> Seq.fold (fun tree range -> tree |> insert (range.min, range.max)) Nil
let JTreeToRangeSeq node =
let rec inOrder node =
seq {
match node with
| JNode(left, min, max, right) ->
yield! inOrder left
yield {min = min; max = max}
yield! inOrder right
| Nil -> ()
}
inOrder node
let TestJTree() =
printf "\n"
let source(n) =
let rnd = new Random(n)
let rand x = rnd.NextDouble() * float x |> int64
let rangeRnd() =
let a = rand 15
{min = a; max = a + rand 5}
[|for n in 1 .. 5 do yield rangeRnd()|]
let shuffle n (array:_[]) =
let rnd = new Random(n)
for i in 0 .. array.Length - 1 do
let n = rnd.Next(i)
let temp = array.[i]
array.[i] <- array.[n]
array.[n] <- temp
array
let testRangeAdd n i =
let dataSet1 = source (n+0)
let dataSet2 = source (n+1)
let dataSet3 = source (n+2)
let result1 = Array.concat [dataSet1; dataSet2; dataSet3] |> shuffle (i+3) |> rangeSeqToJTree
let result2 = Array.concat [dataSet2; dataSet3; dataSet1] |> shuffle (i+4) |> rangeSeqToJTree
let result3 = Array.concat [dataSet3; dataSet1; dataSet2] |> shuffle (i+5) |> rangeSeqToJTree
let test1 = (result1 |> JTreeToRangeSeq, result2 |> JTreeToRangeSeq) ||> Seq.forall2 (fun a b -> a.min = b.min && a.max = b.max)
let test2 = (result2 |> JTreeToRangeSeq, result3 |> JTreeToRangeSeq) ||> Seq.forall2 (fun a b -> a.min = b.min && a.max = b.max)
let test3 = (result3 |> JTreeToRangeSeq, result1 |> JTreeToRangeSeq) ||> Seq.forall2 (fun a b -> a.min = b.min && a.max = b.max)
let print dataSet =
dataSet |> Seq.iter (fun r -> printf "%s " <| string r)
if not (test1 && test2 && test3) then
printf "\n\nTest# %A: " n
printf "\nSource 1: %A: " (n+0)
dataSet1 |> print
printf "\nSource 2: %A: " (n+1)
dataSet2 |> print
printf "\nSource 3: %A: " (n+2)
dataSet3 |> print
printf "\n\nResult 1: %A: " (n+0)
result1 |> JTreeToRangeSeq |> print
printf "\nResult 2: %A: " (n+1)
result2 |> JTreeToRangeSeq |> print
printf "\nResult 3: %A: " (n+2)
result3 |> JTreeToRangeSeq |> print
()
for i in 1 .. 1 do
for n in 1 .. 10 do
testRangeAdd n i
printf "\n%d" (i * 10)
printf "\nDone"
TestJTree()
Got it working! I think the hardest part was figuring out how to make recursive calls on children while passing state back up the stack.
Performance is rather interesting. When inserting mainly ranges that collide and get merged together the mutable version is faster while if you insert mainly none overlapping nodes and fill out the tree the immutable version is faster. I've seen performance swing a max of 100% both ways.
Here's the complete code.
module StackOverflowQuestion
open System
type Range =
{ min : int64; max : int64 }
with
override this.ToString() =
sprintf "(%d, %d)" this.min this.max
type RangeTree =
| Node of RangeTree * int64 * int64 * RangeTree
| Nil
// Clean right deals with merging when the node to merge with is not on the
// left outside of the tree. It travels right inside the tree looking for an
// overlapping node. If it finds one it merges the range and replaces the
// node with its left child thereby deleting it. If it finds a subset node
// it replaces it with its left child, checks it and continues looking right.
let rec cleanRight n node =
match node with
| Node(left, min, max, (Node(left', min', max', right') as right)) ->
if n > max' + 1L then
let node, n' = right |> cleanRight n
Node(left, min, max, node), n'
elif n >= min' then
Node(left, min, max, left'), min'
else
Node(left, min, max, left') |> cleanRight n
| _ -> node, n
// Symmetric to clean right
let rec cleanLeft x node =
match node with
| Node(Node(left', min', max', right') as left, min, max, right) ->
if x < min' - 1L then
let node, x' = left |> cleanLeft x
Node(node, min, max, right), x'
elif x <= max' then
Node(right', min, max, right), max'
else
Node(right', min, max, right) |> cleanLeft x
| Nil -> node, x
| _ -> node, x
// Merger left is called whenever the min value of a node decreases.
// It handles the case of left node overlap/subsets and merging/deleting them.
// When no more overlaps are found on the left nodes it calls clean right.
let rec mergeLeft n node =
match node with
| Node(Node(left', min', max', right') as left, min, max, right) ->
if n <= min' - 1L then
Node(left', min, max, right) |> mergeLeft n
elif n <= max' + 1L then
Node(left', min', max, right)
else
let node, min' = left |> cleanRight n
Node(node, min', max, right)
| _ -> node
// Symmetric to merge left
let rec mergeRight x node =
match node with
| Node(left, min, max, (Node(left', min', max', right') as right)) ->
if x >= max' + 1L then
Node(left, min, max, right') |> mergeRight x
elif x >= min' - 1L then
Node(left, min, max', right')
else
let node, max' = right |> cleanLeft x
Node(left, min, max', node)
| node -> node
let (|Before|After|BeforeOverlap|AfterOverlap|Superset|Subset|) (min, max, min', max') =
if min > max' + 1L then After
elif min >= min' then
if max <= max' then Subset
else AfterOverlap
elif max < min' - 1L then Before
elif max <= max' then
if min >= min' then Subset
else BeforeOverlap
else Superset
let rec insert min' max' this =
match this with
| Node(left, min, max, right) ->
match (min', max', min, max) with
| After -> Node(left, min, max, right |> insert min' max')
| AfterOverlap -> Node(left, min, max', right) |> mergeRight max'
| Before -> Node(left |> insert min' max', min, max, right)
| BeforeOverlap -> Node(left, min', max, right) |> mergeLeft min'
| Superset -> Node(left, min', max', right) |> mergeLeft min' |> mergeRight max'
| Subset -> this
| Nil -> Node(Nil, min', max', Nil)
let rangeSeqToRangeTree ranges =
ranges |> Seq.fold (fun tree range -> tree |> insert range.min range.max) Nil
let rangeTreeToRangeSeq node =
let rec inOrder node =
seq {
match node with
| Node(left, min, max, right) ->
yield! inOrder left
yield {min = min; max = max}
yield! inOrder right
| Nil -> ()
}
inOrder node
let TestImmutableRangeTree() =
printf "\n"
let source(n) =
let rnd = new Random(n)
let rand x = rnd.NextDouble() * float x |> int64
let rangeRnd() =
let a = rand 15000
{min = a; max = a + rand 150}
[|for n in 1 .. 200 do yield rangeRnd()|]
let shuffle n (array:_[]) =
let rnd = new Random(n)
for i in 0 .. array.Length - 1 do
let n = rnd.Next(i)
let temp = array.[i]
array.[i] <- array.[n]
array.[n] <- temp
array
let print dataSet =
dataSet |> Seq.iter (fun r -> printf "%s " <| string r)
let testRangeAdd n i =
let dataSet1 = source (n+0)
let dataSet2 = source (n+1)
let dataSet3 = source (n+2)
let result1 = Array.concat [dataSet1; dataSet2; dataSet3] |> shuffle (i+3) |> rangeSeqToRangeTree
let result2 = Array.concat [dataSet2; dataSet3; dataSet1] |> shuffle (i+4) |> rangeSeqToRangeTree
let result3 = Array.concat [dataSet3; dataSet1; dataSet2] |> shuffle (i+5) |> rangeSeqToRangeTree
let test1 = (result1 |> rangeTreeToRangeSeq, result2 |> rangeTreeToRangeSeq) ||> Seq.forall2 (fun a b -> a.min = b.min && a.max = b.max)
let test2 = (result2 |> rangeTreeToRangeSeq, result3 |> rangeTreeToRangeSeq) ||> Seq.forall2 (fun a b -> a.min = b.min && a.max = b.max)
let test3 = (result3 |> rangeTreeToRangeSeq, result1 |> rangeTreeToRangeSeq) ||> Seq.forall2 (fun a b -> a.min = b.min && a.max = b.max)
if not (test1 && test2 && test3) then
printf "\n\nTest# %A: " n
printf "\nSource 1: %A: " (n+0)
dataSet1 |> print
printf "\nSource 2: %A: " (n+1)
dataSet2 |> print
printf "\nSource 3: %A: " (n+2)
dataSet3 |> print
printf "\n\nResult 1: %A: " (n+0)
result1 |> rangeTreeToRangeSeq |> print
printf "\nResult 2: %A: " (n+1)
result2 |> rangeTreeToRangeSeq |> print
printf "\nResult 3: %A: " (n+2)
result3 |> rangeTreeToRangeSeq |> print
()
for i in 1 .. 10 do
for n in 1 .. 100 do
testRangeAdd n i
printf "\n%d" (i * 10)
printf "\nDone"
TestImmutableRangeTree()
System.Console.ReadLine() |> ignore
It looks like you're defining a binary tree which is basically a union of a bunch of ranges. So, you have the following scenarios:
(10, 20) left (10, 20)
/ \ --> / \
(0, 5) (25, 30) (7, 8) (7, 8) (25, 30)
/
(0, 5)
(10, 20) right (10, 20)
/ \ --> / \
(0, 5) (25, 30) (21, 22) (0, 5) (21, 22)
\
(25, 30)
(10, 20) subset (10, 20)
/ \ --> / \
(0, 5) (25, 30) (15, 19) (0, 5) (25, 30)
(10, 20) R-superset (10, 30)
/ \ --> /
(0, 5) (25, 30) (11, 30) (0, 5)
(10, 20) L-superset (0, 20)
/ \ --> \
(0, 5) (25, 30) (0, 10) (25, 30)
(10, 20) LR-superset (0, 30)
/ \ -->
(0, 5) (25, 30) (0, 30)
The L- R- and LR-superset cases are interesting because it requires merging/deleting nodes when you insert a node whose range already contains other nodes.
The following is hastily written and not tested very well, but appears to satisfy the simple definition above:
type JTree =
| JNode of JTree * int64 * int64 * JTree
| Nil
let rec merge = function
| JNode(JNode(ll, lmin, lmax, lr), min, max, r) when min <= lmin -> merge <| JNode(ll, min, max, r)
| JNode(l, min, max, JNode(rl, rmin, rmax, rr)) when max >= rmax -> merge <| JNode(l, min, max, rr)
| n -> n
let rec insert (min, max) = function
| JNode(l, min', max', r) ->
let node =
// equal.
// e.g. Given Node(l, 10, 20, r) insert (10, 20)
if min' = min && max' = max then JNode(l, min', max', r)
// before. Insert left
// e.g. Given Node(l, 10, 20, r) insert (5, 7)
elif min' >= max then JNode(insert (min, max) l, min', max', r)
// after. Insert right
// e.g. Given Node(l, 10, 20, r) insert (30, 40)
elif max' <= min then JNode(l, min', max', insert (min, max) r)
// superset
// e.g. Given Node(l, 10, 20, r) insert (0, 40)
elif min' >= min && max' <= max then JNode(l, min, max, r)
// overlaps left
// e.g. Given Node(l, 10, 20, r) insert (5, 15)
elif min' >= min && max' >= max then JNode(l, min, max', r)
// overlaps right
// e.g. Given Node(l, 10, 20, r) insert (15, 40)
elif min' <= min && max' <= max then JNode(l, min', max, r)
// subset.
// e.g. Given Node(l, 10, 20, r) insert (15, 17)
elif min' <= min && max >= max then JNode(l, min', max', r)
// shouldn't happen
else failwith "insert (%i, %i) into Node(l, %i, %i, r)" min max min' max'
// balances left and right sides
merge node
| Nil -> JNode(Nil, min, max, Nil)
JTree = Juliet Tree :) The merge function does all the heavy lifting. It'll merge as far as possible down the left spine, then as far as possible down the right spine.
I'm not wholly convinced that my overlaps left and overlaps right cases are implemented properly, but the other cases should be correct.

Resources