Map state transitions with Deedle - f#

Suppose I have a table with products and events (processing steps), like
type Event = | E1 | E2
let events = Series.ofValues [ E1;E2;E2 ]
let products = Series.ofValues [ "A";"A";"B"]
let df = Frame(["Product"; "Event"], [products; events])
df.Print()
Product Event
0 -> A E1
1 -> A E2
2 -> B E2
and a transition function which determines a new state given the old state and the event
type State = S0 | S1 | S2
let evolve (s:State) (e:Event) :State =
match s,e with
| _, E1 -> S1
| S0, E2 -> S0
| _, E2 -> S2
How can the state transitions be mapped?
The result should be something like
let stateTransitions = df |> ???
stateTransitions.Print()
Product Event NewState
0 -> A E1 S1
1 -> A E2 S2
2 -> B E2 S0
Update: I know how to get the final state of every product but the aggregate function does not show the transitions.
let finalStates =
df
|> Frame.aggregateRowsBy ["Product"] ["Event"]
(fun s -> s.Values |> Seq.fold evolve S0)
finalStates.Print()
Product Event
0 -> A S2
1 -> B S0

I guess there is no existing function. I did grouping/nesting by product, fold with storing all states and build a new series/column of the results, unnest.
let stateTransitions =
df
|> Frame.groupRowsByString "Product"
|> Frame.nest
|> Series.mapValues (fun nf ->
let events = nf.Columns.["Event"].As<Event>()
let values' =
events.Values
|> Seq.fold (fun acc e ->
let s = acc |> List.head
let s' = evolve s e
s'::acc) [S0]
|> Seq.rev
|> Seq.tail
let states' =
Seq.zip events.Keys values'
|> Series.ofObservations
nf.AddColumn("NewState", states')
nf
)
|> Frame.unnest
|> Frame.indexRowsOrdinally
stateTransitions.Print()
Product Event NewState
0 -> A E1 S1
1 -> A E2 S2
2 -> B E2 S0

Related

F# list group by running total?

I have the following list of tuples ordered by the first item. I want to cluster the times by
If the second item of the tuple is greater then 50, it will be in its own cluster.
Otherwise, cluster the items whose sum is less than 50.
The order cannot be changed.
code:
let values =
[("ACE", 78);
("AMR", 3);
("Aam", 6);
("Acc", 1);
("Adj", 23);
("Aga", 12);
("All", 2);
("Ame", 4);
("Amo", 60);
//....
]
values |> Seq.groupBy(fun (k,v) -> ???)
The expected value will be
[["ACE"] // 78
["AMR"; "Aam"; "Acc"; "Adj"; "Aga"; "All"] // 47
["Ame"] // 4
["Amo"] // 60
....]
Ideally, I want to evenly distribute the second group (["AMR"; "Aam"; "Acc"; "Adj"; "Aga"; "All"] which got the sum of 47) and the third one (["Ame"] which has only 4).
How to implement it in F#?
I had the following solution. It uses a mutable variable. It's not F# idiomatic? Is for ... do imperative in F# or is it a syntactic sugar of some function construct?
seq {
let mutable c = []
for v in values |> Seq.sortBy(fun (k, _) -> k) do
let sum = c |> Seq.map(fun (_, v) -> v) |> Seq.sum
if not(c = []) && sum + (snd v) > 50
then
yield c
c <- [v]
else
c <- List.append c [v]
}
I think I got it. Not the nicest code ever, but works and is immutable.
let foldFn (acc:(string list * int) list) (name, value) =
let addToLast last =
let withoutLast = acc |> List.filter ((<>) last)
let newLast = [((fst last) # [name]), (snd last) + value]
newLast |> List.append withoutLast
match acc |> List.tryLast with
| None -> [[name],value]
| Some l ->
if (snd l) + value <= 50 then addToLast l
else [[name], value] |> List.append acc
values |> List.fold foldFn [] |> List.map fst
Update: Since append can be quite expensive operation, I added prepend only version (still fulfills original requirement to keep order).
let foldFn (acc:(string list * int) list) (name, value) =
let addToLast last =
let withoutLast = acc |> List.filter ((<>) last) |> List.rev
let newLast = ((fst last) # [name]), (snd last) + value
(newLast :: withoutLast) |> List.rev
match acc |> List.tryLast with
| None -> [[name],value]
| Some l ->
if (snd l) + value <= 50 then addToLast l
else ([name], value) :: (List.rev acc) |> List.rev
Note: There is still # operator on line 4 (when creating new list of names in cluster), but since the theoretical maximum amount of names in cluster is 50 (if all of them would be equal 1), the performance here is negligible.
If you remove List.map fst on last line, you would get sum value for each cluster in list.
Append operations are expensive. A straight-forward fold with prepended intermediate results is cheaper, even if the lists need to be reversed after processing.
["ACE", 78; "AMR", 3; "Aam", 6; "Acc", 1; "Adj", 23; "Aga", 12; "All", 2; "Ame", 4; "Amd", 6; "Amo", 60]
|> List.fold (fun (r, s1, s2) (t1, t2) ->
if t2 > 50 then [t1]::s1::r, [], 0
elif s2 + t2 > 50 then s1::r, [t1], t2
else r, t1::s1, s2 + t2 ) ([], [], 0)
|> fun (r, s1, _) -> s1::r
|> List.filter (not << List.isEmpty)
|> List.map List.rev
|> List.rev
// val it : string list list =
// [["ACE"]; ["AMR"; "Aam"; "Acc"; "Adj"; "Aga"; "All"]; ["Ame"; "Amd"];
// ["Amo"]]
Here is a recursive version - working much the same way as fold-versions:
let groupBySums data =
let rec group cur sum acc lst =
match lst with
| [] -> acc |> List.where (not << List.isEmpty) |> List.rev
| (name, value)::tail when value > 50 -> group [] 0 ([(name, value)]::(cur |> List.rev)::acc) tail
| (name, value)::tail ->
match sum + value with
| x when x > 50 -> group [(name, value)] 0 ((cur |> List.rev)::acc) tail
| _ -> group ((name, value)::cur) (sum + value) acc tail
(data |> List.sortBy (fun (name, _) -> name)) |> group [] 0 []
values |> groupBySums |> List.iter (printfn "%A")
Yet another solution using Seq.mapFold and Seq.groupBy:
let group values =
values
|> Seq.mapFold (fun (group, total) (name, count) ->
let newTotal = count + total
let newGroup = group + if newTotal > 50 then 1 else 0
(newGroup, name), (newGroup, if newGroup = group then newTotal else count)
) (0, 0)
|> fst
|> Seq.groupBy fst
|> Seq.map (snd >> Seq.map snd >> Seq.toList)
Invoke it like this:
[ "ACE", 78
"AMR", 3
"Aam", 6
"Acc", 1
"Adj", 23
"Aga", 12
"All", 2
"Ame", 4
"Amo", 60
]
|> group
|> Seq.iter (printfn "%A")
// ["ACE"]
// ["AMR"; "Aam"; "Acc"; "Adj"; "Aga"; "All"]
// ["Ame"]
// ["Amo"]

Deedle - Weighted Average after filtering FilterRowValues

I am new to F#. I am attempting to calculate a weighted average after filtering my Frame by two timestamps and an instrument_id.
example data:
| trade_qty | trade_price | trade_timestamp | instrument_id
| 1000 | 100.59 | 1/26/2018 16:00:00 | 1
| 2000 | 105.10 | 1/26/2018 15:59:30 | 1
| 3000 | 97.59 | 1/26/2018 15:59:00 | 1
I found I can filter easily: e.g. instrument 1 between two times
frameVolume
|> Frame.filterRowValues (fun c.GetAs<DateTime>
("trade_timestamp)>DateTime(2018,1,27,15,31,0))
|> Frame.filterRowValues (fun c.GetAs<DateTime>
("trade_timestamp)<DateTime(2018,1,27,16,00,0))
|> Frame.filterRowValues (fun c.GetAs<int>("instrument_id")=
1
I am stuck here. I haven't figured out how to 1/sum(trade_qty) * Sum(trade_price*trade_qty)
I have tried:
|>Frame.GetColumn<float>("trade_qty") *
Frame.GetColumn<float>("trade_price")
For context, I'd like to use this as a function to be fed into another function in order to calculate the weighted average price over several intervals.
Any Thoughts? Thank you!
It's nice that Deedle provides higher-order functions similar to the built in higher-order functions for F# List, Arrays, and Seqs. Using this knowledge, it makes the task simpler. Here is an implementation of the function you are describing:
#I "..\packages\Deedle.1.2.5"
#load "Deedle.fsx"
open System
open Deedle
let weightedAverage after before frame: float =
let filteredFrame =
frame
|> Frame.filterRowValues (fun r -> r.GetAs<DateTime>("trade_timestamp") < before)
|> Frame.filterRowValues (fun r -> r.GetAs<DateTime>("trade_timestamp") > after)
|> Frame.filterRowValues (fun r -> r.GetAs<int>("instrument_id") = 1)
let quantities: Series<int, float> = filteredFrame |> Frame.getCol "trade_qty"
let tradePrices: Series<int, float> = filteredFrame |> Frame.getCol "trade_price"
let weightedSum =
(quantities, tradePrices)
||> Series.zip
|> Series.mapValues (fun (q, p) -> (OptionalValue.get q * OptionalValue.get p))
|> Series.reduceValues (fun acc curr -> acc + curr)
let total =
quantities
|> Series.reduceValues (fun acc curr -> acc + curr)
weightedSum / total
let path = __SOURCE_DIRECTORY__ + "\data.csv"
let df = Frame.ReadCsv(path, separators = "|")
let ans = df |> weightedAverage (DateTime(2017, 1, 1)) (DateTime(2019, 1, 1))

F# find all elements present in another array

this solution
d1 |> Array.filter (fun t -> d2 |> Array.exists (fun t2 -> t=t2))
from this so answer
Finding the difference between two arrays in FSharp
gives this error
Severity Code Description Project File Line Suppression State
Error Type mismatch. Expecting a
unit -> bool
but given a
'a [] -> bool
The type 'unit' does not match the type ''a []' ParseLibs
Program.fs 25
Full code:
// Learn more about F# at http://fsharp.org
// See the 'F# Tutorial' project for more help.
open System
open System.IO
open FSharp.Collections
[<EntryPoint>]
let main argv =
let path = "data1.txt"
let lines = use reader = new StreamReader(path) in reader.ReadToEnd().Split('\n')
let n = 5
let d1 = lines
|> Array.takeWhile (fun e -> not (e.Equals "\r"))
let d2 = lines
|> Array.skipWhile (fun e -> not (e.Equals "\r"))
|> Array.skip 1
|> Array.mapi (fun i e -> e, i)
|> Array.filter (fun (e, i) -> i % n = 0)
|> Array.iter (fun (e, i) -> printfn "%s" e)
d1 |> Array.filter (fun t -> d2 |> Array.exists (fun t2 -> t=t2))
//let writer = new StreamWriter(path)
ignore (Console.ReadKey())
0 // return an integer exit code
Is the answer there wrong? What is the real answer? I am simply trying to filter all the elements that are in both arrays. In most functional languages this is as trivial as they come.
d1 |> Array.filter (fun t -> d2.contains(t))
The problem is that d2 has type unit.
As array.iter returns ()
I would change to
let d2 = lines
|> Array.skipWhile (fun e -> not (e.Equals "\r"))
|> Array.skip 1
|> Array.mapi (fun i e -> e, i)
|> Array.filter (fun (e, i) -> i % n = 0)
d1
|> Array.filter (fun t -> d2 |> Array.exists (fun t2 -> t=t2))
|> Array.iter (fun (e, i) -> printfn "%s" e)
Using the actual answer from the above link: https://stackoverflow.com/a/28682277/5514938 and adding the information at https://en.wikipedia.org/wiki/Set_theory#Basic_concepts_and_notation
the following code is an example of the three first concepts with same sets/values as in wiki article.
let d1= [|"1";"2";"3";|] //pretend this to be the filtered/skipped/mapped
//whatever but otherwise "clean" structure/datatypes
let d2 = [|"2";"3";"4";|] //pretend this to be the filtered/skipped/mapped
//whatever but otherwise "clean" structure/datatypes
//equal to d1
let s1 = d1
|> Set.ofArray
let s2 = d2
|> Set.ofArray
let all = s1 + s2 //Union
let inBoth = Set.intersect s1 s2 //Intersection
let onlyIn_d1 = s1 - s2 //Set difference
let onlyIn_d2 = s2 - s1 //Set difference the other way ;-)
I have removed your other code to simplify the concepts, so the initial filter, skippings and mappings you are doing must of course be done ahead of the above code. And you must also "realign" the types to be equal again:
|> Array.mapi (fun i e -> e, i)
|> Array.filter (fun (e, i) -> i % n = 0)
|> Array.map (fun (e,i) -> e)
|> Set.ofArray

More volatile sequence than "classical"

For cartesian production there is a good enough function - sequence which defined like that:
let rec sequence = function
| [] -> Seq.singleton []
| (l::ls) -> seq { for x in l do for xs in sequence ls do yield (x::xs) }
but look at its result:
sequence [[1..2];[1..10000]] |> Seq.skip 1000 ;;
val it : seq = seq [[1; 1001]; [1; 1002]; [1; 1003]; [1; 1004]; ...]
As we can see the first "coordinate" of the product alters very slowly and it will change the value when the second list is ended.
I wrote my own sequence as following (comments below):
/// Sum of all producted indeces = n
let rec hyper'plane'indices indexsum maxlengths =
match maxlengths with
| [x] -> if indexsum < x then [[indexsum]] else []
| (i::is) -> [for x in [0 .. min indexsum (i-1)] do for xs in hyper'plane'indices (indexsum-x) is do yield (x::xs)]
| [] -> [[]]
let finite'sequence = function
| [] -> Seq.singleton []
| ns ->
let ars = [ for n in ns -> Seq.toArray n ]
let length'list = List.map Array.length ars
let nmax = List.max length'list
seq {
for n in [0 .. nmax] do
for ixs in hyper'plane'indices n length'list do
yield (List.map2 (fun (a:'a[]) i -> a.[i]) ars ixs)
}
The key idea is to look at (two) lists as at (two) orthogonal dimensions where every element marked by its index in the list. So we can enumerate all elements by enumerating every element in every section of cartesian product by hyper plane (in 2D case this is a line). In another words imagine excel's sheet where first column contains values from [1;1] to [1;10000] and second - from [2;1] to [2;10000]. And "hyper plane" with number 1 is the line that connects cell A2 and cell B1. For the our example
hyper'plane'indices 0 [2;10000];; val it : int list list = [[0; 0]]
hyper'plane'indices 1 [2;10000];; val it : int list list = [[0; 1]; [1; 0]]
hyper'plane'indices 2 [2;10000];; val it : int list list = [[0; 2]; [1; 1]]
hyper'plane'indices 3 [2;10000];; val it : int list list = [[0; 3]; [1; 2]]
hyper'plane'indices 4 [2;10000];; val it : int list list = [[0; 4]; [1; 3]]
Well if we have indeces and arrays that we are producing from the given lists than we can now define sequence as {all elements in plane 0; than all elements in plane 1 ... and so on } and get more volatile function than original sequence.
But finite'sequence turned out very gluttonous function. And now the question. How I can improve it?
With best wishes, Alexander. (and sorry for poor English)
Can you explain what exactly is the problem - time or space complexity or performance? Do you have a specific benchmark in mind? I am not sure how to improve on the time complexity here, but I edited your code a bit to remove the intermediate lists, which might help a bit with memory allocation behavior.
Do not do this:
for n in [0 .. nmax] do
Do this instead:
for n in 0 .. nmax do
Here is the code:
let rec hyper'plane'indices indexsum maxlengths =
match maxlengths with
| [] -> Seq.singleton []
| [x] -> if indexsum < x then Seq.singleton [indexsum] else Seq.empty
| i :: is ->
seq {
for x in 0 .. min indexsum (i - 1) do
for xs in hyper'plane'indices (indexsum - x) is do
yield x :: xs
}
let finite'sequence xs =
match xs with
| [] -> Seq.singleton []
| ns ->
let ars = [ for n in ns -> Seq.toArray n ]
let length'list = List.map Array.length ars
let nmax = List.max length'list
seq {
for n in 0 .. nmax do
for ixs in hyper'plane'indices n length'list do
yield List.map2 Array.get ars ixs
}
Does this fare any better? Beautiful problem by the way.
UPDATE: Perhaps you are more interested to mix the sequences fairly than in maintaining the exact formula in your algorithm. Here is a Haskell code that mixes a finite number of possibly infinite sequences fairly, where fairness means that for every input element there is a finite prefix of the output sequence that contains it. You mention in the comment that you have a 2D incremental solution that is hard to generalize to N dimensions, and the Haskell code does exactly that:
merge :: [a] -> [a] -> [a]
merge [] y = y
merge x [] = x
merge (x:xs) (y:ys) = x : y : merge xs ys
prod :: (a -> b -> c) -> [a] -> [b] -> [c]
prod _ [] _ = []
prod _ _ [] = []
prod f (x:xs) (y:ys) = f x y : a `merge` b `merge` prod f xs ys where
a = [f x y | x <- xs]
b = [f x y | y <- ys]
prodN :: [[a]] -> [[a]]
prodN [] = [[]]
prodN (x:xs) = prod (:) x (prodN xs)
I have not ported this to F# yet - it requires some thought as sequences do not match to head/tail very well.
UPDATE 2:
A fairly mechanical translation to F# follows.
type Node<'T> =
| Nil
| Cons of 'T * Stream<'T>
and Stream<'T> = Lazy<Node<'T>>
let ( !! ) (x: Lazy<'T>) = x.Value
let ( !^ ) x = Lazy.CreateFromValue(x)
let rec merge (xs: Stream<'T>) (ys: Stream<'T>) : Stream<'T> =
lazy
match !!xs, !!ys with
| Nil, r | r, Nil -> r
| Cons (x, xs), Cons (y, ys) -> Cons (x, !^ (Cons (y, merge xs ys)))
let rec map (f: 'T1 -> 'T2) (xs: Stream<'T1>) : Stream<'T2> =
lazy
match !!xs with
| Nil -> Nil
| Cons (x, xs) -> Cons (f x, map f xs)
let ( ++ ) = merge
let rec prod f xs ys =
lazy
match !!xs, !!ys with
| Nil, _ | _, Nil -> Nil
| Cons (x, xs), Cons (y, ys) ->
let a = map (fun x -> f x y) xs
let b = map (fun y -> f x y) ys
Cons (f x y, a ++ b ++ prod f xs ys)
let ofSeq (s: seq<'T>) =
lazy
let e = s.GetEnumerator()
let rec loop () =
lazy
if e.MoveNext()
then Cons (e.Current, loop ())
else e.Dispose(); Nil
!! (loop ())
let toSeq stream =
stream
|> Seq.unfold (fun stream ->
match !!stream with
| Nil -> None
| Cons (x, xs) -> Some (x, xs))
let empty<'T> : Stream<'T> = !^ Nil
let cons x xs = !^ (Cons (x, xs))
let singleton x = cons x empty
let rec prodN (xs: Stream<Stream<'T>>) : Stream<Stream<'T>> =
match !!xs with
| Nil -> singleton empty
| Cons (x, xs) -> prod cons x (prodN xs)
let test () =
ofSeq [
ofSeq [1; 2; 3]
ofSeq [4; 5; 6]
ofSeq [7; 8; 9]
]
|> prodN
|> toSeq
|> Seq.iter (fun xs ->
toSeq xs
|> Seq.map string
|> String.concat ", "
|> stdout.WriteLine)

write records just in time when the value (for id of something) is changing

How to write records just in time when the value for id of something is changing ? id for each record when ture->false and false->true for some list?
for example table
id value
1 0
2 0
2 0
2 0
1 0
2 1 --> the only changes here
2 1
1 0
2 0 --> and here (node with id 2 changed 1 -> 0 )
1 1 --> node with id 1 changed 0 -> 1
result table
2 1
2 0
1 1
my idea is not functional and a bit weird, I'm thinking about functional or linq way of making the same.
let oop = ref (filteredsq
|> Seq.distinctBy(fun (node,v,k) -> k)
|> Seq.map(fun (node,v,k) -> k, false )
|> Array.ofSeq )
[for (node,value,key) in filteredsq do
let i = ref 0
for (k,v) in !oop do
if key = k && value <> v then
(!oop).[!i] <- (k,value)
yield node
i := !i + 1 ]
Thank you
I think if you define a function like the following:
let getChanges f items =
items
|> Seq.map (fun x -> f x, x)
|> Seq.pairwise
|> Seq.choose (fun ((a, _), (b, x)) -> if a <> b then Some x else None)
Then you can do:
filteredsq
|> Seq.groupBy (fun (_, _, k) -> k)
|> Seq.collect (fun (_, items) ->
items
|> getChanges (fun (_, value, _) -> value)
|> Seq.map (fun (node, _, _) -> node))
|> Seq.toList
I'm not sure if I fully understand your question, but the following gives the right output according to your sample. The idea is to first filter out values that don't have the right key and then use Seq.pairwaise (as in jpalmer's solution) to find the places where the value changes:
let input = [ (1, 0); (2, 0); (2, 0); (2, 0); (1, 0); (2, 1); (2, 1); (1, 0); (2, 0) ]
let findValueChanges key input =
input
|> Seq.filter (fun (k, v) -> k = key) // Get values with the right key
|> Seq.pairwise // Make tuples with previous & next value
|> Seq.filter (fun ((_, prev), (_, next)) -> prev <> next) // Find changing points
|> Seq.map snd // Return the new key-value pair (after change)
If you wanted to find changes for all different keys, then you could use Seq.groupBy to find all possible keys (then you wouldn't need the first line in findValueChanges):
input
|> Seq.groupBy fst
|> Seq.map (fun (k, vals) -> findValueChanges k vals)
(For your input, there are no changes in values for the key 1, because the value is always 1, 0)
I would do something like
List
|> List.toSeq
|> Seq.pairwise
|> Seq.pick (fun ((fa,fb),(sa,sb)) -> if fb <> sb then Some(sa,sb) else None)
I'd just use an internal mutable dictionary to keep track of the last-seen values for each key and yield (key,value) when any value is different from the last value at that key:
let filterChanges (s:('a*'b) seq) =
let dict = new System.Collections.Generic.Dictionary<'a,'b>()
seq {
for (key,value) in s do
match dict.TryGetValue(key) with
| false,_ -> dict.[key] <- value
| true,lastValue ->
if lastValue <> value then
yield (key,value)
dict.[key] <- value
}
Test:
> filterChanges [(1,0);(2,0);(2,0);(2,0);(1,0);(2,1);(2,1);(1,0);(2,0);(1,1)];;
val it : seq<int * int> = seq [(2, 1); (2, 0); (1, 1)]
Updated
open System.Collections.Generic
let filter (acc:'a) (f:('a -> 'b -> bool * 'a)) (s:'b seq) =
let rec iter (acc:'a) (e:IEnumerator<'b>) =
match e.MoveNext() with
| false -> Seq.empty
| true -> match f acc e.Current with
| (true,newAcc) -> seq { yield e.Current; yield! iter newAcc e}
| (false,newAcc) -> seq { yield! iter newAcc e}
iter acc (s.GetEnumerator())
let skipUntilChange (f : 'a -> 'b) (s : 'a seq) =
s |> Seq.skip 1
|> filter (s |> Seq.head |> f)
(fun a b -> if a = f b then false,f b else true,f b)
[(1,0);(2,0);(2,0);(2,0);(1,0);(2,1);(2,1);(1,0);(2,0);]
|> Seq.mapi (fun c (i,v) -> (i,v,c))
|> Seq.groupBy (fun (i,v,c) -> i)
|> Seq.map (snd >> skipUntilChange (fun (_,v,_) -> v))
|> Seq.concat |> Seq.sortBy (fun (i,v,c) -> c)
|> Seq.map (fun (i,v,c) -> (i,v))
|> printfn "%A"

Resources