Related
Suppose I have a collection like [ "a"; "b"; "c" ] and I want to test every element against every other element.
I could generate all pairs like this:
let combinations xs =
Seq.allPairs xs xs
|> Seq.filter (fun (x, y) -> x <> y)
|> Seq.toList
combinations [ "a"; "b"; "c" ]
// [("a", "b"); ("a", "c"); ("b", "a"); ("b", "c"); ("c", "a"); ("c", "b")]
But for my test, I always know that f x y = f y x (since f is symmetric), so I want to trim the number of combinations tested:
let combinations xs =
Seq.allPairs xs xs
|> Seq.filter (fun (x, y) -> x <> y && x < y)
|> Seq.toList
combinations [ "a"; "b"; "c" ]
// [("a", "b"); ("a", "c"); ("b", "c")]
But this:
Doesn't seem like an efficient way to generate the test cases
Requires that x : comparison, which I don't think should be necessary
How should I implement this in F#?
Don't know about efficient - this looks like you need to cache the pairs already generated and filter on their presence in the cache.
The library implementation of Seq.allPairs goes along these lines:
let allPairs source1 source2 =
source1 |> Seq.collect (fun x -> source2 |> Seq.map (fun y -> x, y))
// val allPairs : source1:seq<'a> -> source2:seq<'b> -> seq<'a * 'b>
Then you integrate the caching and filtering into this, constraining both sequences to type seq<'a> and introducing the equality constraint.
let allPairs1 source1 source2 =
let h = System.Collections.Generic.HashSet()
source1 |> Seq.collect (fun x ->
source2 |> Seq.choose (fun y ->
if x = y || h.Contains (x, y) || h.Contains (y, x) then None
else h.Add (x, y) |> ignore; Some (x, y) ) )
// val allPairs1 :
// source1:seq<'a> -> source2:seq<'a> -> seq<'a * 'a> when 'a : equality
Test
allPairs1 [1..3] [2..4] |> Seq.toList
// val it : (int * int) list = [(1, 2); (1, 3); (1, 4); (2, 3); (2, 4); (3, 4)]
Because f is commutative, the simplest way to get all combinations is to project each item into a pair with the remainder of the list.
let rec combinations = function
| [] -> []
| x::xs -> (xs |> List.map (fun y -> (x, y))) # (combinations xs)
We don't need any comparison constraint.
let xs = [1; 2; 3; 4;]
combinations xs // [(1, 2); (1, 3); (1, 4); (2, 3); (2, 4); (3, 4)]
Checking the results with #kaefer's method:
combinations xs = (allPairs1 xs xs |> Seq.toList) // true
Another solution that assumes all elements are distinct (it uses position as identity):
let allSymmetricPairs xs =
seq {
let xs = Seq.toArray xs
for i = 0 to Array.length xs - 2 do
for j = i + 1 to Array.length xs - 1 do
yield xs.[i], xs.[j]
}
We can also pre-allocate the array, which may be faster if you plan to pull the whole sequence:
let allSymmetricPairs xs =
let xs = Seq.toArray xs
let n = Array.length xs
let result = Array.zeroCreate (n * (n - 1) / 2)
let mutable k = 0
for i = 0 to n - 2 do
for j = i + 1 to n - 1 do
result.[k] <- xs.[i], xs.[j]
k <- k + 1
result
I have a sequence of pairs (key, value) like
[("a", 1), ("a", 2), ("a", 111), ("b", 3), ("bb", 1), ("bb", -1), ...]
, what is the most effective way to convert it into sequence like
[("a", [1,2,111]), ("b", [3]), ("bb", [1,-1])]
or similar?
The sequence has following property: it's really big (>2Gb)
This makes Seq.groupBy really ineffective and incorrect, are there any other ways to do it?
P.S.: this sequence:
[("a", 1), ("a", 2), ("a", 111), ("bb", 1), ("bb", -1), ("a", 5), ("a", 6), ...]
should be converted as
[("a", [1,2,111]), ("bb", [1,-1]), ("a", [5,6]), ...]
--
edit #1: Fixed incorrect sample
edit #2: Sequence is big, so lazy (or fastest) solution is preferred
If you want the option to get lazy results, then I don't think there's an elegant way without maintaining mutable state. Here's a relatively straight-forward one with mutation. You maintain a store of the last key you saw, and all the values that correspond to that:
let s = [("a", 1); ("a", 2); ("a", 111); ("bb", 1); ("bb", -1); ("a", 5); ("a", 6)]
let s2 =
[
let mutable prevKey = None
let mutable values = System.Collections.Generic.List<_>()
let init key value =
prevKey <- Some key
values.Clear()
values.Add value
for (key, value) in s do
match prevKey with
| None -> init key value
| Some k when k = key -> values.Add value
| Some k ->
yield (k, List.ofSeq values)
init key value
match prevKey with
| Some k -> yield (k, List.ofSeq values)
| _ -> ()
]
This gives:
val s2 : (string * int list) list =
[("a", [1; 2; 111]); ("bb", [1; -1]); ("a", [5; 6])]
For lazy evaluation, replace the [ ... ] with seq { ... }
A simple recursive approach with no mutable state.
let rec chunk inseq (accumelem,accumlist) =
match inseq with
|(a,b)::c ->
match accumelem with
|Some(t) -> if t=a then chunk c (accumelem,b::accumlist) else (t,accumlist)::(chunk c (Some(a),b::[]))
|None -> chunk c (Some a,b::[])
|[] ->
match accumelem with
|Some(t) -> (t,accumlist)::[]
|None -> []
chunk [("a", 1); ("a", 2); ("a", 111); ("bb", 1); ("bb", -1); ("a", 5);("a", 6)] (None,[])
val it : (string * int list) list =
[("a", [111; 2; 1]); ("bb", [-1; 1]); ("a", [6; 5])]
Here is a recursive solution:
let test = [("a", 1); ("a", 2); ("a", 111); ("bb", 1); ("bb", -1); ("a", 5); ("a", 6)]
let groupByAdjacentElements alist =
let rec group a groupAcc prevElement adjacentAcc =
match a with
| [] -> match adjacentAcc with
| [] -> groupAcc
| _ -> (prevElement, List.rev adjacentAcc)::groupAcc
| (b, c)::tail -> if b = prevElement then
group tail groupAcc prevElement (c::adjacentAcc)
else
group tail ((prevElement, List.rev adjacentAcc)::groupAcc) b [c]
group alist [] (fst alist.Head) []
|> List.rev
let b = groupByAdjacentElements test
It returns: [("a", [1; 2; 111]); ("bb", [1; -1]); ("a", [5; 6])]
If you want lazy evaluation, you should consider trying LazyList
EDIT: Here's a script comparing LazyList from ExtCore to the accepted solution. It generates a large text file and then does the transformations asked for. Note that the LazyList is returned in reverse order:
open System.Diagnostics
open System.IO
open ExtCore
let fileName = "Test.txt"
let outFile = new StreamWriter(fileName)
for i in [1..20000*300] do
outFile.WriteLine("a,1")
outFile.WriteLine("a,2")
outFile.WriteLine("a,111")
outFile.WriteLine("bb,1")
outFile.WriteLine("bb,-1")
outFile.WriteLine("a,5")
outFile.WriteLine("a,6")
outFile.WriteLine("c,8")
outFile.Close()
printfn "Finished Writing to File"
let data = System.IO.File.ReadLines(fileName)
|> Seq.map (fun i -> let parts = i.Split(',')
(parts.[0], parts.[1]))
printfn "Finished Reading File"
let s2 data =
[
let mutable prevKey = None
let mutable values = System.Collections.Generic.List<_>()
let init key value =
prevKey <- Some key
values.Clear()
values.Add value
for (key, value) in data do
match prevKey with
| None -> init key value
| Some k when k = key -> values.Add value
| Some k ->
yield (k, List.ofSeq values)
init key value
match prevKey with
| Some key -> yield (key, List.ofSeq values)
| _ -> ()
]
let groupByAdjacentElements aseq =
let alist = LazyList.ofSeq aseq
let rec group alist groupAcc prevElement adjacentAcc =
match alist with
| Cons((b, c), tail) ->
if b = prevElement then
group tail groupAcc prevElement (c::adjacentAcc)
else
group tail (LazyList.consDelayed (prevElement, List.rev adjacentAcc) (fun () -> groupAcc)) b [c]
| Nil ->
match adjacentAcc with
| [] -> groupAcc
| _ -> LazyList.consDelayed (prevElement, List.rev adjacentAcc) (fun () -> groupAcc)
group alist LazyList.empty (fst (alist.Head())) []
let groupByAdjacentElementsList aseq =
let alist = aseq |> Seq.toList
let rec group a groupAcc prevElement adjacentAcc =
match a with
| [] -> match adjacentAcc with
| [] -> groupAcc
| _ -> (prevElement, List.rev adjacentAcc)::groupAcc
| (b, c)::tail -> if b = prevElement then
group tail groupAcc prevElement (c::adjacentAcc)
else
group tail ((prevElement, List.rev adjacentAcc)::groupAcc) b [c]
group alist [] (fst alist.Head) []
|> List.rev
[<EntryPoint>]
let main argv =
let stopwatch = new Stopwatch()
stopwatch.Start()
let b = s2 data
printfn "The result is: %A" b
stopwatch.Stop()
printfn "It took %A ms." stopwatch.ElapsedMilliseconds
System.GC.WaitForFullGCComplete() |> ignore
stopwatch.Reset()
stopwatch.Start()
let b = groupByAdjacentElements data
printfn "The result is: %A" b
stopwatch.Stop()
printfn "It took %A ms." stopwatch.ElapsedMilliseconds
System.GC.WaitForFullGCComplete() |> ignore
stopwatch.Reset()
stopwatch.Start()
let b = groupByAdjacentElementsList data
printfn "The result is: %A" b
stopwatch.Stop()
printfn "It took %A ms." stopwatch.ElapsedMilliseconds
0
I when using files of around 300MB in size, LazyList was slightly slower (83s to 94s) than the seq solution. That said LazyList has the major advantage that iterating over it is cached, unlike the sequence solution. The normal list solution was faster than both even when doing List.rev (without it was around 73s).
Grouping by adjacent keys can be also done without mutable bindings. With Seq.scan, it's possible to generate a lazy sequence with eager chunk. It already provides for one of the special cases, the first element of the sequence; by wrapping the input sequence as options followed by None we can take care of the other. Afterwards, we skip over intermediate results and strip out the state with Seq.choose.
For maximum versatility, I'd like to suggest a signature similar to Seq.groupBy,
f:('T -> 'Key) -> xs:seq<'T> -> seq<'Key * 'T list> when 'Key : equality
which takes a key projection function as first argument.
let chunkBy (f : 'T-> 'Key) xs =
// Determine key and wrap in option
seq{for x in xs -> Some(f x, x)
// Indicates end of sequence
yield None }
|> Seq.scan (fun (_, acc, previous) current ->
match previous, current with
| Some(pKey, _), Some(key, value) when pKey = key ->
// No intermediate result, but add to accumulator
None, value::acc, current
| _ ->
// New state is 3-tuple of previous key and completed chunk,
// accumulator from current element, and new previous element
Option.map (fun (k, _) -> k, List.rev acc) previous,
Option.map snd current |> Option.toList, current )
(None, [], None)
|> Seq.choose (fun (result, _, _) -> result)
This can be adopted to OP's requirements by providing also a result projection function.
let chunkBy2 (f : 'T-> 'Key) (g : 'T->'Result) =
chunkBy f >> Seq.map (fun (k, gs) -> k, List.map g gs)
// val chunkBy2 :
// f:('T -> 'Key) -> g:('T -> 'Result) -> (seq<'T> -> seq<'Key * 'Result list>)
// when 'Key : equality
["a", 1; "a", 2; "a", 111; "b", 3; "bb", 1; "bb", -1]
|> chunkBy2 fst snd
// val it : seq<string * int list> =
// seq [("a", [1; 2; 111]); ("b", [3]); ("bb", [1; -1])]
Seq.initInfinite (fun x ->
if (x / 2) % 2 = 0 then "a", x else "b", x)
|> chunkBy2 fst snd
|> Seq.skip 50000
// val it : seq<string * int list> =
// seq
// [("a", [100000; 100001]); ("b", [100002; 100003]); ("a", [100004; 100005]);
// ("b", [100006; 100007]); ...]
I am trying F# and trying to make a map reduce for a list of words to a word, count.
Here's what I have so far,
let data1 = ["Hello"; "Hello"; "How"; "How"; "how"; "are"]
let map = data1 |> List.map (fun x -> (x, 1))
printfn "%A" map
which gives the following output:
val map : (string * int) list =
[("Hello", 1); ("Hello", 1); ("How", 1); ("How", 1); ("how", 1); ("are", 1)]
but
let reduce = ...???
Now I am confused to how design a reduce function so that it has the word, count pair list. Any suggestions? I appreciate your help! Thanks
There's a built-in function for that:
data1 |> Seq.countBy id
which will give you a sequence of tuples:
val it : seq<string * int> =
seq [("Hello", 2); ("How", 2); ("how", 1); ("are", 1)]
The id function is another built-in function that takes a value and returns the same value, so in this case it means that you count by the strings themselves.
If you rather want a list than a seq, you can use Seq.toList:
> data1 |> Seq.countBy id |> Seq.toList;;
val it : (string * int) list =
[("Hello", 2); ("How", 2); ("how", 1); ("are", 1)]
If you want a map, this is also easy:
> data1 |> Seq.countBy id |> Map.ofSeq;;
val it : Map<string,int> =
map [("Hello", 2); ("How", 2); ("are", 1); ("how", 1)]
You don't actually need the map list. It's simpler to directly put the list into an associative map:
let reduce x =
x |> List.fold (fun m x -> match Map.tryFind x m with
| None -> Map.add x 1 m
| Some c -> Map.add x (c+1) m)
Map.empty
Let's try it in the interpreter:
> reduce data1
val it : Map<string,int> = map [("Hello", 2); ("How", 2); ("are", 1); ("how", 1)]
There is a good explanation of how to use the reducing function fold here, and a good explanation of how to use the associative map data structure Map<'Key,'T> here.
Here is a relatively inefficient but easy to understand solution:
data1 |> Seq.groupBy id |> Seq.map (fun (a,b) -> a,Seq.length b)
essentially, do the grouping and then see how many elements are in each group.
#ildjarn pointed out an improvement, which is probably the most efficient and also even simpler:
data1 |> Seq.countBy id
I have an array like this:
let items = ["A";"B";"C";"D"]
I want to transform it into an array like this:
let result = ["AB";"AC";"AD";"BC";"BD";"CD"]
I can't find anything in the language spec that does this - though I might be searching incorrectly. I thought of Seq.Fold like this:
let result = items |> Seq.fold(fun acc x -> acc+x) ""
but I am getting "ABCD"
Does anyone know how to do this? Will a modified CartesianProduct work?
Thanks in advance
What you have there are lists, not arrays -- lists use the [...] syntax, arrays use the [|...|] syntax.
That said, here's a simple implementation:
let listProduct (items : string list) =
items
|> List.collect (fun x ->
items
|> List.choose (fun y ->
if x < y then Some (x + y)
else None))
If you put it into F# interactive:
> let items = ["A"; "B"; "C"; "D"];;
val items : string list = ["A"; "B"; "C"; "D"]
> items |> listProduct |> Seq.toList;;
val it : string list = ["AB"; "AC"; "AD"; "BC"; "BD"; "CD"]
Something like this should do it:
items
|> List.map (fun x -> items |> List.map (fun y -> (x, y)))
|> List.concat
|> List.filter (fun (x, y) -> x < y)
|> List.map (fun (x, y) -> x + y)
|> List.sort
I don't know if it's efficient for large lists, but it does produce this output:
["AB"; "AC"; "AD"; "BC"; "BD"; "CD"]
Breakdown
The first step produces a list of list of tuples, by mapping items twice:
[[("A", "A"); ("A", "B"); ("A", "C"); ("A", "D")];
[("B", "A"); ("B", "B"); ("B", "C"); ("B", "D")];
[("C", "A"); ("C", "B"); ("C", "C"); ("C", "D")];
[("D", "A"); ("D", "B"); ("D", "C"); ("D", "D")]]
Second, List.concat turns the list of list into a single list:
[("A", "A"); ("A", "B"); ("A", "C"); ("A", "D"); ("B", "A"); ("B", "B");
("B", "C"); ("B", "D"); ("C", "A"); ("C", "B"); ("C", "C"); ("C", "D");
("D", "A"); ("D", "B"); ("D", "C"); ("D", "D")]
Third, List.filter removes the tuples where the first element is equal to or larger than the second element:
[("A", "B"); ("A", "C"); ("A", "D"); ("B", "C"); ("B", "D"); ("C", "D")]
Fourth, List.map produces a list of concatenated strings:
["AB"; "AC"; "AD"; "BC"; "BD"; "CD"]
Finally, List.sort sorts the list, although in this case it's not necessary, as the list already has the correct order.
You might also consider using Seq.distinct to remove duplicates, if there are any.
You could create a function to create a list of all head/tail pairs in a list:
let rec dec = function
| [] -> []
| (x::xs) -> (x, xs) :: dec xs
or a tail-recursive version:
let dec l =
let rec aux acc = function
| [] -> acc
| (x::xs) -> aux ((x, xs)::acc) xs
aux [] l |> List.rev
you can then use this function to create your list:
let strs (l: string list) = l |> dec |> List.collect (fun (h, t) -> List.map ((+)h) t)
I'd do it like this:
let rec loop = function
[] -> []
| x :: xs -> List.map ((^) x) xs # loop xs
This has the advantage of not building every pair of elements from the list only to discard half. (I'll leave getting rid of the append as an exercise :-)
For me, it is a bit easier to tell what's going on here compared some of the other proposed solutions. For this kind of problem, where to process an element x you need also access to the rest of the list xs, standard combinators won't always make solutions clearer.
let items = ["A";"B";"C";"D"]
let rec produce (l: string list) =
match l with
// if current list is empty or contains one element - return empty list
| [] | [_] -> []
// if current list is not empty - match x to head and xs to tail
| x::xs ->
[
// (1)
// iterate over the tail, return string concatenation of head and every item in tail
for c in xs -> x + c
// apply produce to tail, concat return values
yield! produce xs
]
1st iteration: l = [A, B, C, D] - is not empty, in second match case we'll have x = A, xs = [B, C, D]. 'for' part of the list expression will yield [AB, AC, AD] and result of applying produce to xs.
2nd iteration:l = [B, C, D] is not empty so second match case we'll have x = B, xs = [C, D]. 'for' part of the list expression will yield [BC, BD] and result of applying produce to xs.
3rd iteration:l = [C, D] is not empty in second match case we'll have x = C, xs = [D]. 'for' part of the list expression will yield [CD] and result of applying produce to xs.
4th iteration:l = [D] contains one element -> return empty list.
Final result will be concatenation of [AB, AC, AD] ++ [BC, BD] ++ [CD]
This is an apt motivating example for implementing a List monad in F#. Using F# computation expressions, we get:
type ListMonadBuilder() =
member b.Bind(xs, f) = List.collect f xs
member b.Delay(f) = fun () -> f()
member b.Let(x, f) = f x
member b.Return(x) = [x]
member b.Zero() = []
let listM = new ListMonadBuilder()
Now, to solve the original problem we simply use our List monad.
let run = listM {
let! x = ['A' .. 'D']
let! y = List.tail [ x .. 'D']
return string x + string y
}
run();; in F# Interactive will return the desired result.
For another example of using the List monad, we can get the Pythagorean triples <= n.
let pythagoreanTriples n = listM {
let! c = [1 .. n]
let! b = [1 .. c]
let! a = [1 .. b]
if a*a + b*b = c*c then return (a, b, c)
}
Running pythagoreanTriples 10 ();; in F# interactive returns:
val it : (int * int * int) list = [(3, 4, 5); (6, 8, 10)]
#light
let a1 = [| (1, 1); (2, 1); (3, 1) |]
let a2 = [| (1, 1); (2, 3); (3, 1) |]
let aa = Array.zip a1 a2
|> Array.filter(fun (x, y) -> x <> y)
I want to write a function to do this: it will return the different tuple from two arrays, but I also want to return the index of the different tuple in the second array and the corresponding tuple in the second array. (My code did not work totally yet!)
For my above example, I want to return: 1 and (2, 3)
Another example:
let a1 = [| (1, 1); (2, 1); (3, 1); (4, 1) |]
let a2 = [| (1, 1); (2, 3); (3, 1); (4, 2) |]
I want to return: 1 and (2, 3); 3 and (4, 2)
If you have any idea, please show me your code.
Besides, I am not used to the new place for F#, the format makes me feel difficult to find a good place to post my questions, therefore, I still post my question here.
let a1 = [| (1, 1); (2, 1); (3, 1) |]
let a2 = [| (1, 1); (2, 3); (3, 1) |]
let diff =
(a1, a2)
||> Array.mapi2(fun i t1 t2 -> (i, t1, t2))
|> Array.choose(fun (i, a, b) -> if a <> b then Some (i, b) else None)
Here's one way to do it:
let diff a b =
let s = Set.ofSeq a
b
|> Seq.mapi (fun i x -> i, x)
|> Seq.filter (fun (_, x) -> not (Set.contains x s))
Example
let a1 = [| (1, 1); (2, 1); (3, 1) |]
let a2 = [| (1, 1); (2, 3); (3, 1) |]
diff a1 a2 //output: seq [(1, (2, 3))]
This works for any collection (list, array, seq<_>, set, etc) and the sequences may be of different lengths. If you know you'll always be using arrays of equal length, you can optimize accordingly (see desco's answer).
let diff (a:(int * int)[]) b =
b
|> Array.mapi (fun i tp -> if a.[i] <> tp then (i, tp) else (-1, tp))
|> Array.filter (fun (x, _) -> x >= 0)
DEMO
> diff a1 a2;;
val it : (int * (int * int)) [] = [|1, (2, 3)); (3, (4, 2))|]