I'm trying to create a adjacency list with linked nodes that is currently defined like this:
type Node =
{
Name: string
Neighbors: Node list
}
type AdjacencyList(nodes: Node list) =
interface IHasNodes with
/// Gets the number of nodes in the adjacency list.
member this.NodeCount = nodes.Length
/// Gets a list of all nodes in the adjacency list.
member this.Nodes = nodes
The input from which i want to create the list is a sequence of strings in the format
node_name neighbor_name_1 ... neighbor_name_n
So, basically this should be a simple task, but i can't think of a way to update nodes without running into an cycle when one node, e.g. A, has an edge to B and B has an edge to A. In this case i have to update the neighbors of B when creating its node object and in turn update the neighbor reference in node A to node B, which in turn leaves me with updating node B again and so on.
module List =
/// <summary>
/// Replaces the first item in a list that matches the given predicate.
/// </summary>
/// <param name="predicate">The predicate for the item to replace.</param>
/// <param name="item">The replacement item.</param>
/// <param name="list">The list in which to replace the item.</param>
/// <returns>A new list with the first item that matches <paramref name="predicate"/> replaced by <paramref name="item"/>.</returns>
let replace predicate item list =
let rec replaceItem remainingItems resultList =
match remainingItems with
| [] -> resultList |> List.rev
| head::tail ->
match predicate(head) with
| false -> replaceItem tail (head::resultList)
| true -> (item::resultList |> List.rev) # tail
replaceItem list []
[<CompilationRepresentation(CompilationRepresentationFlags.ModuleSuffix)>]
module AdjacencyList =
let create (rows: seq<string>) =
let mutable preBuiltNodes: Node list = []
let rowsToEnumerate = rows |> Seq.where (fun str -> not (System.String.IsNullOrWhiteSpace(str)) || not (str.StartsWith("//")))
let neighborsForNodes = Dictionary<string, string array>()
// Build the base nodes and get the neighbors of each node.
for row in rowsToEnumerate do
let rowData = row.Split(' ')
neighborsForNodes.Add(rowData.[0], rowData |> Array.skip 1)
preBuiltNodes <- { Name = rowData.[0]; Neighbors = [] } :: preBuiltNodes
// Build the final nodes from the pre-built nodes.
let rec buildAdjacencyList remainingNodes (builtNodes: Node list) =
match remainingNodes with
| [] -> builtNodes
| head::tail ->
let neighbors = preBuiltNodes |> List.where (fun node -> neighborsForNodes.[head.Name] |> Array.exists (fun name -> name = node.Name))
let newNode = { head with Neighbors = neighbors };
// Update nodes referencing an old version of the new node.
let mutable newBuiltNodes: Node list = []
for i = 0 to (builtNodes.Length - 1) do
if builtNodes.[i].Neighbors |> List.exists (fun node -> node.Name = head.Name) then
let updatedNode = { builtNodes.[i] with Neighbors = builtNodes.[i].Neighbors |> List.replace (fun n -> n.Name = newNode.Name) newNode }
newBuiltNodes <- updatedNode :: newBuiltNodes
// Cycle here when updating newNode
// if it has an edge to the node at builtNodes.[i]
else
newBuiltNodes <- builtNodes.[i] :: newBuiltNodes
preBuiltNodes <- preBuiltNodes |> List.replace (fun n -> n.Name = newNode.Name) newNode
buildAdjacencyList tail (newNode::newBuiltNodes)
buildAdjacencyList preBuiltNodes [] |> AdjacencyList
I've implemented that algorithm in C# before (using mutable lists) so i might be missing a point here. Of course i could use mutable lists as well, but i wanted to try to use immutable ones.
I don't think there's any way to do what you want with your exact representation. One simple alternative is to make the set of neighbors lazy:
type node<'a> = {
id : 'a
neighbors : Lazy<node<'a> list>
}
let convert (m:Map<'a, 'a list>) =
let rec nodes = [
for KeyValue(k,vs) in m ->
{ id = k;
neighbors = lazy
vs
|> List.map (fun id ->
nodes
|> List.find (fun n -> n.id = id))}]
nodes
convert (Map [1,[2;3]; 2,[3]; 3,[1]])
The key is that by making the neighbor list lazy, we can first create the set of all nodes that we care about before populating the set of neighbors (the neighbor computation is specified at the same time as the node is created, but not run until later).
In C# you would include a reference to the neighbor Nodes. But with your definition you have a Neighbor node instead, that makes it an impossible task.
Apart from the solution by #kvb there are other options:
Option 1: use a string list
To me it would make much more sense to make the list reference the Node name and not the node itself:
type Node =
{
Name : string
Neighbors: string list
}
First some helper functions:
let addRelation relations (node1, node2) =
relations
|> Set.add (node1, node2)
|> Set.add (node2, node1)
let allRelations nodes =
nodes |> List.fold addRelation Set.empty
This would be the way to create it:
let getNodes nodes =
let rels = allRelations nodes
rels
|> Seq.groupBy fst
|> Seq.map (fun (name, neighbors) ->
{ Name = name
Neighbors = neighbors |> Seq.map snd |> Seq.toList
}
)
|> Seq.toList
and basically you feed it a list with the pair of names:
["1", "2"
"3", "4"
"1", "3"
"4", "5" ]
|> getNodes
|> Seq.iter (printfn "%A")
produces:
{Name = "1";
Neighbors = ["2"; "3"];}
{Name = "2";
Neighbors = ["1"];}
{Name = "3";
Neighbors = ["1"; "4"];}
{Name = "4";
Neighbors = ["3"; "5"];}
{Name = "5";
Neighbors = ["4"];}
Option 2: ref to Node list
You could have a reference to a list of neighbor nodes:
type Node =
{
Name : string
Neighbors: Node list ref
}
This way you can first create the nodes and then add them to the lists:
let getNodes nodePairs =
let rels = allRelations nodePairs
let nodes = rels |> Seq.map (fun (name, _) -> name, { Name = name ; Neighbors = ref [] }) |> Map
let getNode nm = nodes |> Map.find nm
rels
|> Seq.groupBy fst
|> Seq.iter (fun (name, neighbors) ->
(getNode name).Neighbors := neighbors |> Seq.map (snd >> getNode) |> Seq.toList
)
nodes |> Map.toList |> List.map snd
Test it like this:
["1", "2"
"3", "4"
"1", "3"
"4", "5" ]
|> getNodes
|> Seq.iter (printfn "%A")
output:
{Name = "1";
Neighbors =
{contents =
[{Name = "2";
Neighbors = {contents = [...];};};
{Name = "3";
Neighbors =
{contents =
[...;
{Name = "4";
Neighbors = {contents = [...; {Name = "5";
Neighbors = {contents = [...];};}];};}];};}];};}
{Name = "2";
Neighbors =
{contents =
[{Name = "1";
Neighbors =
{contents =
[...;
{Name = "3";
Neighbors =
{contents =
[...;
{Name = "4";
Neighbors =
{contents = [...; {Name = "5";
Neighbors = {contents = [...];};}];};}];};}];};}];};}
{Name = "3";
Neighbors =
{contents =
[{Name = "1";
Neighbors = {contents = [{Name = "2";
Neighbors = {contents = [...];};}; ...];};};
{Name = "4";
Neighbors = {contents = [...; {Name = "5";
Neighbors = {contents = [...];};}];};}];};}
{Name = "4";
Neighbors =
{contents =
[{Name = "3";
Neighbors =
{contents =
[{Name = "1";
Neighbors = {contents = [{Name = "2";
Neighbors = {contents = [...];};}; ...];};};
...];};}; {Name = "5";
Neighbors = {contents = [...];};}];};}
{Name = "5";
Neighbors =
{contents =
[{Name = "4";
Neighbors =
{contents =
[{Name = "3";
Neighbors =
{contents =
[{Name = "1";
Neighbors =
{contents = [{Name = "2";
Neighbors = {contents = [...];};}; ...];};}; ...];};};
...];};}];};}
The problem with your approach is the Node type. I propose a different approach like this:
type Node = Node of string
type Graph = Graph of Map<Node, Set<Node>>
let emptyGraph = Graph Map.empty
let addEdge nodeA nodeB g =
let update mainNode adjNode map =
let updatedAdjs =
match map |> Map.tryFind mainNode with
| Some adjs ->
adjs |> Set.add adjNode
| None ->
Set.singleton adjNode
map
|> Map.add mainNode updatedAdjs
let (Graph map) = g
map
|> update nodeA nodeB
|> update nodeB nodeA
|> Graph
let addIsolatedNode node g =
let (Graph map) = g
match map |> Map.tryFind node with
| Some _ ->
g
| None ->
map
|> Map.add node Set.empty
|> Graph
let getAdjs node g =
let (Graph map) = g
map
|> Map.tryFind node
// TEST
let myGraph =
emptyGraph
|> addEdge (Node "A") (Node "B")
|> addEdge (Node "A") (Node "C")
|> addEdge (Node "A") (Node "D")
|> addEdge (Node "B") (Node "C")
|> addEdge (Node "C") (Node "D")
|> addIsolatedNode (Node "E")
myGraph |> getAdjs (Node "A") // result: Some (set [Node "B"; Node "C"; Node "D"])
myGraph |> getAdjs (Node "E") // result: Some (set [])
myGraph |> getAdjs (Node "F") // result: None
Related
Let's say I have a discriminated union consisting of three cases. Case A and C each takes a constructor to type X and Y respectively. I have a list consisting of different DU types and I want to filter that list down to a single DU type. Currently, I have a list consisting each of A, B and C. Now, if I want to filter the DU list only to type case A, how can I do that without having to pass the constructor to case A? (or pass a default constructor, I don't know how to do that either)
type X = {
Id: string
Name: string
}
type Y = {
Id: int
}
type DU =
| A of a:X
| B
| C of b:Y
let extractDUTypesFromList (listDU: List<DU>) (typ: DU) : List<DU> =
listDU
|> List.filter (fun m -> m = typ)
let a = (A {Id = "1"; Name = "Test"})
let aa = (A {Id = "2"; Name = "Test 2"})
let b = B
let c = (C {Id = 1})
let listDU: List<DU> = [a; b; c; aa]
let filteredDUList: List<DU> = // this list will only contain case A
extractDUTypesFromList listDU (A _) // doesn't work
There's no way to make such a filter generically. What I would do is
let filteredDUList =
listDU |> List.filter (function A _ -> true | _ -> false)
If you want to extract all the Xs, you can do the following instead:
listDU |> List.choose (function A x -> Some(x) | _ -> None)
In order to filter like that we need the opposite of the DU constructor, which is an active recognizer.
Unfortunately you'll have to create them by hand, although I did a suggestion to have the F# compiler derive them automatically, this is a good example of why such suggestion matters.
// Active recognizers (ideally autogenerated)
let (|A|_|) = function | A x -> Some x | _ -> None
let (|B|_|) = function | B -> Some () | _ -> None
let (|C|_|) = function | C x -> Some x | _ -> None
let inline extractDUTypesFromList (listDU: List<DU>) (typ: DU -> Option<'t>) : List<DU> =
listDU
|> List.choose (fun x -> typ x |> Option.map (fun _ -> x))
let a = (A {Id = "1"; Name = "Test"})
let aa = (A {Id = "2"; Name = "Test 2"})
let b = B
let c = (C {Id = 1})
let listDU: List<DU> = [a; b; c; aa]
let filteredDUList: List<DU> = // this list will only contain case A
extractDUTypesFromList listDU (|A|_|)
results in
val filteredDUList : List<DU> = [A { Id = "1"
Name = "Test" }; A { Id = "2"
Name = "Test 2" }]
No need to say that you can make normal functions instead of active recognizers, since in this usage alone we're not using pattern matching at all, I mean you can name the function tryA as suggested, instead of (|A|_|).
another version: in contrast to #brianberns solution (which i think is a good one) this does not use reflection. it requires the creation of dummy values to define the filter criteria as in the op.
this and all other solutions are not really nice f# code, there should be a better way for what you want to accomplish.
type X = {
Id: string
Name: string
}
with static member Empty = { Id=""; Name="" }
type Y = {
Id: int
}
with static member Empty = { Id=0 }
type DU =
| A of a:X
| B
| C of b:Y
with
static member IsSameCase a b =
match a, b with
| A _, A _ -> true
| B, B -> true
| C _, C _ -> true
| _ -> false
let extractDUTypesFromList (listDU: List<DU>) (case: DU) : List<DU> =
listDU
|> List.filter (DU.IsSameCase case)
let a = (A {Id = "1"; Name = "Test"})
let aa = (A {Id = "2"; Name = "Test 2"})
let b = B
let c = (C {Id = 1})
let listDU: List<DU> = [a; b; c; aa]
let filteredDUList: List<DU> = // this list will only contain case A
extractDUTypesFromList listDU ((A (X.Empty)))
extractDUTypesFromList listDU ((A (X.Empty)))
extractDUTypesFromList listDU B
extractDUTypesFromList listDU (C (Y.Empty))
#torbonde's suggestion is good if you know the union case you want to filter by at compile time, but if you want a general solution that works for any union case, I think you'll need F# reflection:
open FSharp.Reflection
let extractDUTypesFromList (listDU: List<DU>) (unionCaseName : string) : List<DU> =
listDU
|> List.filter (fun m ->
let unionCase, _ = FSharpValue.GetUnionFields(m, typeof<DU>)
unionCase.Name = unionCaseName)
let filteredDUList: List<DU> = // this list will only contain case A
extractDUTypesFromList listDU "A"
Note that you'll pay a runtime cost and lose some of the type-checking benefits of the compiler (e.g. the code will silently break if case A's name is subsequently modified), but it will do what you want.
I have these records:
type Name = string
type PhoneNumber = int
type Sex = Male | Female
type YearOfBirth = int
type Interests = string list
type Client = {name: Name; phone: PhoneNumber; sex: Sex; birth: YearOfBirth; interests: Interests}
let client1 = {name = "Jon"; phone = 37613498; sex = Male; birth = 1980; interests = ["Cars"; "Boats"; "Airplanes"]}
let client2 = {name = "Jonna"; phone = 31852654; sex = Female; birth = 1970; interests = ["Makeup"; "Sewing"; "Bananas"]}
Which I put into a list:
let file1 = [client1;client2]
I then attempt to create a function using Map which should be able to filter file1 and only return the client, which has the same birthday as the one given in the function.
Example:
requestMap 1980
Would return map [("Jon", (37613498, Male, 1980, ["Cars"; "Boats"; "Airplanes"]))] in this case.
I've stumbled into a function, but I've got a bit stuck now.
let requestMap yob =
Map.ofList [for f in file1 do yield f.name,(f.phone,f.sex,f.birth,f.interests)] |>
Map.filter (fun key value -> )
I have trouble figuring out how I can get to birth in the current map's value? Because as it is right now, it's hidden inside value which is a PhoneNumber * Sex * YearOfBirth * Interests tuple currently.
Any hints?
To access elements of a tuple, you can use pattern matching:
Map.filter (fun key (phone, sex, birth, interests) -> birth = yob)
Or, if you're not interested in anything except birth year, you can ignore all other fields using underscore:
Map.filter (fun _ (_, _, birth, _) -> birth = yob)
That said, I would recommend filtering first and creating the map after, this would be less expensive:
let requestMap yob =
file1
|> List.filter (fun x -> x.birth = yob)
|> List.map (fun f -> f.name,(f.phone,f.sex,f.birth,f.interests))
|> Map.ofList
And while we're on the subject: why do you need to create that huge tuple in the first place? Can't you make the original records be values in your map? Like this:
let requestMap yob =
file1
|> List.filter (fun x -> x.birth = yob)
|> List.map (fun f -> f.name, f)
|> Map.ofList
Here are some more options not mentioned in other answers:
Store the whole Client in the Map value:
[for f in file1 do yield f.name, f]
|> Map.ofList
|> Map.filter (fun _ f -> f.birth = yob)
Do a conditional yield with an if:
[ for f in file1 do
if f.birth = yob then
yield f.name, f ]
|> Map.ofList
I think it would be better to filter the file1 list before transforming it into a map rather than creating the map and then filtering it.
let requestMap yob =
let filtered =
List.filter (fun client ->
match client with
| { Client.birth = year } when year = yob -> true
| _ -> false) file1
Map.ofList [for f in filtered do yield f.name,(f.phone,f.sex,f.birth,f.interests)]
I'm a beginner in F#, and this is my first attempt at programming something serious. I'm sorry the code is a bit long, but there are some issues with mutability that I don't understand.
This is an implementation of the Karger MinCut Algorithm to calculate the mincut on a non-directed graph component. I won't discuss here how the algo works,
for more info https://en.wikipedia.org/wiki/Karger%27s_algorithm
What is important is it's a randomized algorithm, which is running a determined number of trial runs, and taking the "best" run.
I realize now that I could avoid a lot of the problems below if I did construct a specific function for each random trial, but I'd like to understand EXACTLY what is wrong in the implementation below.
I'm running the code on this simple graph (the mincut is 2 when we cut the graph
into 2 components (1,2,3,4) and (5,6,7,8) with only 2 edges between those 2 components)
3--4-----5--6
|\/| |\/|
|/\| |/\|
2--1-----7--8
the file simplegraph.txt should encode this graph as follow
(1st column = node number, other columns = links)
1 2 3 4 7
2 1 3 4
3 1 2 4
4 1 2 3 5
5 4 6 7 8
6 5 7 8
7 1 5 6 8
8 5 6 7
This code may look too much as imperative programming yet, I'm sorry for that.
So There is a main for i loop calling each trial.
the first execution, (when i=1) looks smooth and perfect,
but I have runtime error execution when i=2, because it looks some variables,
like WG are not reinitialized correctly, causing out of bound errors.
WG, WG1 and WGmin are type wgraphobj, which are a record of Dictionary objects
WG1 is defined outside the main loop and i make no new assignments to WG1.
[but its type is mutable though, alas]
I defined first WG with the instruction
let mutable WG = WG1
then at the beginning of the for i loop,
i write
WG <- WG1
and then later, i modify the WG object in each trial to make some calculations.
when the trial is finished and we go to the next trial (i is increased) i want to reset WG to its initial state being like WG1.
but it seems its not working, and I don't get why...
Here is the full code
MyModule.fs [some functions not necessary for execution]
namespace MyModule
module Dict =
open System.Collections.Generic
let toSeq d = d |> Seq.map (fun (KeyValue(k,v)) -> (k,v))
let toArray (d:IDictionary<_,_>) = d |> toSeq |> Seq.toArray
let toList (d:IDictionary<_,_>) = d |> toSeq |> Seq.toList
let ofMap (m:Map<'k,'v>) = new Dictionary<'k,'v>(m) :> IDictionary<'k,'v>
let ofList (l:('k * 'v) list) = new Dictionary<'k,'v>(l |> Map.ofList) :> IDictionary<'k,'v>
let ofSeq (s:('k * 'v) seq) = new Dictionary<'k,'v>(s |> Map.ofSeq) :> IDictionary<'k,'v>
let ofArray (a:('k * 'v) []) = new Dictionary<'k,'v>(a |> Map.ofArray) :> IDictionary<'k,'v>
Karger.fs
open MyModule.Dict
open System.IO
let x = File.ReadAllLines "\..\simplegraph.txt";;
// val x : string [] =
let splitAtTab (text:string)=
text.Split [|'\t';' '|]
let splitIntoKeyValue (s:seq<'T>) =
(Seq.head s, Seq.tail s)
let parseLine (line:string)=
line
|> splitAtTab
|> Array.filter (fun s -> not(s=""))
|> Array.map (fun s-> (int s))
|> Array.toSeq
|> splitIntoKeyValue
let y =
x |> Array.map parseLine
open System.Collections.Generic
// let graph = new Map <int, int array>
let graphD = new Dictionary<int,int seq>()
y |> Array.iter graphD.Add
let graphM = y |> Map.ofArray //immutable
let N = y.Length // number of nodes
let Nruns = 2
let remove_table = new Dictionary<int,bool>()
[for i in 1..N do yield (i,false)] |> List.iter remove_table.Add
// let remove_table = seq [|for a in 1 ..N -> false|] // plus court
let label_head_table = new Dictionary<int,int>()
[for i in 1..N do yield (i,i)] |> List.iter label_head_table.Add
let label = new Dictionary<int,int seq>()
[for i in 1..N do yield (i,[i])] |> List.iter label.Add
let mutable min_cut = 1000000
type wgraphobj =
{ Graph : Dictionary<int,int seq>
RemoveTable : Dictionary<int,bool>
Label : Dictionary<int,int seq>
LabelHead : Dictionary<int,int> }
let WG1 = {Graph = graphD;
RemoveTable = remove_table;
Label = label;
LabelHead = label_head_table}
let mutable WGmin = WG1
let IsNotRemoved x = //
match x with
| (i,false) -> true
| (i,true) -> false
let IsNotRemoved1 WG i = //
(i,WG.RemoveTable.[i]) |>IsNotRemoved
let GetLiveNode d =
let myfun x =
match x with
| (i,b) -> i
d |> toList |> List.filter IsNotRemoved |> List.map myfun
let rand = System.Random()
// subsets a dictionary given a sub_list of keys
let D_Subset (dict:Dictionary<'T,'U>) (sub_list:list<'T>) =
let z = Dictionary<'T,'U>() // create new empty dictionary
sub_list |> List.filter (fun k -> dict.ContainsKey k)
|> List.map (fun k -> (k, dict.[k]))
|> List.iter (fun s -> z.Add s)
z
// subsets a dictionary given a sub_list of keys to remove
let D_SubsetC (dict:Dictionary<'T,'U>) (sub_list:list<'T>) =
let z = dict
sub_list |> List.filter (fun k -> dict.ContainsKey k)
|> List.map (fun k -> (dict.Remove k)) |>ignore
z
// subsets a sequence by values in a sequence
let S_Subset (S:seq<'T>)(sub_list:seq<'T>) =
S |> Seq.filter (fun s-> Seq.exists (fun elem -> elem = s) sub_list)
let S_SubsetC (S:seq<'T>)(sub_list:seq<'T>) =
S |> Seq.filter (fun s-> not(Seq.exists (fun elem -> elem = s) sub_list))
[<EntryPoint>]
let main argv =
let mutable u = 0
let mutable v = 0
let mutable r = 0
let mutable N_cut = 1000000
let mutable cluster_A_min = seq [0]
let mutable cluster_B_min = seq [0]
let mutable WG = WG1
let mutable LiveNodeList = [0]
// when i = 2, i encounter problems with mutability
for i in 1 .. Nruns do
WG <- WG1
printfn "%d" i
for k in 1..(N-2) do
LiveNodeList <- GetLiveNode WG.RemoveTable
r <- rand.Next(0,N-k)
u <- LiveNodeList.[r] //selecting a live node
let uuu = WG.Graph.[u] |> Seq.map (fun s -> WG.LabelHead.[s] )
|> Seq.filter (IsNotRemoved1 WG)
|> Seq.distinct
let n_edge = uuu |> Seq.length
let x = rand.Next(1,n_edge)
let mutable ok = false //maybe we can take this out
while not(ok) do
// selecting the edge from node u
v <- WG.LabelHead.[Array.get (uuu |> Seq.toArray) (x-1)]
let vvv = WG.Graph.[v] |> Seq.map (fun s -> WG.LabelHead.[s] )
|> Seq.filter (IsNotRemoved1 WG)
|> Seq.distinct
let zzz = S_SubsetC (Seq.concat [uuu;vvv] |> Seq.distinct) [u;v]
WG.Graph.[u] <- zzz
let lab_u = WG.Label.[u]
let lab_v = WG.Label.[v]
WG.Label.[u] <- Seq.concat [lab_u;lab_v] |> Seq.distinct
if (k<N-1) then
WG.RemoveTable.[v]<-true
//updating Label_head for all members of Label.[v]
WG.LabelHead.[v]<- u
for j in WG.Label.[v] do
WG.LabelHead.[j]<- u
ok <- true
printfn "u= %d v=%d" u v
// end of for k in 1..(N-2)
// counting cuts
// u,v contain the 2 indexes of groupings
let cluster_A = WG.Label.[u]
let cluster_B = S_SubsetC (seq[for i in 1..N do yield i]) cluster_A // defined as complementary of A
// let WG2 = {Graph = D_Subset WG1.Graph (cluster_A |> Seq.toList)
// RemoveTable = remove_table
// Label = D_Subset WG1.Graph (cluster_A |> Seq.toList)
// LabelHead = label_head_table}
let cross_edge = // returns keyvalue pair (k,S')
let IsInCluster cluster (k,S) =
(k,S_Subset S cluster)
graphM |> toSeq |> Seq.map (IsInCluster cluster_B)
N_cut <-
cross_edge |> Seq.map (fun (k:int,v:int seq)-> Seq.length v)
|> Seq.sum
if (N_cut<min_cut) then
min_cut <- N_cut
WGmin <- WG
cluster_A_min <- cluster_A
cluster_B_min <- cluster_B
// end of for i in 1..Nruns
0 // return an integer exit code
Description of the algo: (i don't think its too essential to solve my problem)
at each trial, there are several steps. at each step, we merge 2 nodes into 1, (removing effectively 1) updating the graph. we do that 6 times until there are only 2 nodes left, which we define as 2 clusters, and we look at the number of cross edges between those 2 clusters. if we are "lucky" those 2 clusters would be (1,2,3,4) and (5,6,7,8) and find the right number of cuts.
at each step, the object WG is updated with the effects of merging 2 nodes
with only LiveNodes (the ones which are not eliminated as a result of merging 2 nodes) being perfectly kept up to date.
WG.Graph is the updated graph
WG.Label contains the labels of the nodes which have been merged into the current node
WG.LabelHead contains the label of the node into which that node has been merged
WG.RemoveTable says if the node has been removed or not.
Thanks in advance for anyone willing to take a look at it !
"It seems not working", because wgraphobj is a reference type, which is allocated on the stack, which means that when you're mutating the innards of WG, you're also mutating the innards of WG1, because they're the same innards.
This is precisely the kind of mess you get yourself into if you use mutable state. This is why people recommend to not use it. In particular, your use of mutable dictionaries undermines the robustness of your algorithm. I recommend using the F#'s own efficient immutable dictionary (called Map) instead.
Now, in response to your comment about WG.Graph <- GraphD giving compile error.
WG is mutable, but WG.Graph is not (but the contents of WG.Graph are again mutable). There is a difference, let me try to explain it.
WG is mutable in the sense that it points to some object of type wgraphobj, but you can make it, in the course of your program, to point at another object of the same type.
WG.Graph, on the other hand, is a field packed inside WG. It points to some object of type Dictionary<_,_>. And you cannot make it point to another object. You can create a different wgraphobj, in which the field Graph point to a different dictionary, but you cannot change where the field Graph of the original wgraphobj points.
In order to make the field Graph itself mutable, you can declare it as such:
type wgraphobj = {
mutable Graph: Dictionary<int, int seq>
...
Then you will be able to mutate that field:
WG.Graph <- GraphD
Note that in this case you do not need to declare the value WG itself as mutable.
However, it seems to me that for your purposes you can actually go the way of creating a new instance wgraphobj with the field Graph changed, and assigning it to the mutable reference WG:
WG.Graph <- { WG with Graph = GraphD }
I have the following code:
open System
open System.Linq
type Child = {
id: Guid
name: int
parent: Guid
}
type Parent = {
id: Guid
name: int
children: seq<Guid>
}
let makeChild name parentId =
{
Child.id = Guid.NewGuid()
name = name
parent = parentId
}
let makeParent (group: IGrouping<int, int>) =
let id = Guid.NewGuid()
let children = group |> Seq.map (fun x -> makeChild x id)
let ids = children |> Seq.map (fun x -> x.id)
({
Parent.id = id
name = group.Key
children = ids
}, children)
let makeAll (groups: seq<IGrouping<int, int>>) =
let result = groups |> Seq.map (fun x -> makeParent x)
let children = result |> Seq.map (fun x -> snd x) |> Seq.concat
let parents = result |> Seq.map (fun x -> fst x)
(parents, children)
(I accept IGrouping<int, int> instead of seq<int * seq<int>> because this code needs to interoperate with C#.)
However, when I run with the following:
let parents, children = makeAll(Enumerable.Range(0, 100).GroupBy(fun x -> x % 10))
then none of the children.[i].parent guids correlate with the parents.[j].children.[k] guids for i, j, k.
Why is this not the case? How can I get it to be so?
Didn't test that, but it seems the problem is in the fact you enumerate the result seq twice, once in the let children, once in the let parents line. And since guid generation is side-effecting, you get two different results for each of the enumerations.
If you cache the seq in the let result line (or materialize it by turning it into an array or a list in the same line), you should get what you're looking for:
let result = groups |> Seq.map (fun x -> makeParent x) |> Seq.cache
The same in the makeParent function. The ids seq needs to be cached as well.
"Traps" like this are why I find it preferable to use concrete collection types rather than seqs on the boundaries of functions or interfaces. And if you're looking for laziness, you can make it explicit by using Lazy type.
I have defined a record type for some client data in F# as follows:-
type DataPoint = {
date: string;
dr: string;
Group: string;
Product: string;
Book: int;
Revenue: int} with
static member fromFile file =
file
|> File.ReadLines
|> Seq.skip 1 //skip the header
|> Seq.map (fun s-> s.Split ',') // split each line into array
|> Seq.map (fun a -> {date = string a.[0]; dr = string a.[1];
Group = string a.[2]; Product = string a.[3];
Book = int a.[4]; Revenue = int a.[5] });;
// creates a record for each line
let pivot (file) = DataPoint.fromFile file
|> ??????????
For the rows where date, dr, Group and Product are all equal, I want to then sum all of the Book and Revenue entries, producing a pivoted row. So some kind of if else statement should be fine. I suspect I need to start at the first data point and recursively add each matching row and then delete the matching row to avoid duplicates in the output.
Once I have done this I will be easily able to write these pivoted rows to another csv file.
Can anyone get me started?
Seq.groupBy and Seq.reduce are what you're looking for:
let pivot file =
DataPoint.fromFile file
|> Seq.groupBy (fun dp -> dp.date, dp.dr, dp.Group, dp.Product)
|> Seq.map (snd >> Seq.reduce (fun acc dp ->
{ date = acc.date; dr = acc.dr;
Group = acc.Group; Product = acc.Product;
Book = acc.Book + dp.Book;
Revenue = acc.Revenue + dp.Revenue; }))
Quickly hacked up, should give you some idea:
// Sample data
let data = [
{date = "2012-01-01"
dr = "Test"
Group = "A"
Product = "B"
Book = 123
Revenue = 123}
{date = "2012-01-01"
dr = "Test"
Group = "A"
Product = "B"
Book = 123
Revenue = 123}
{date = "2012-01-01"
dr = "Test"
Group = "B"
Product = "B"
Book = 11
Revenue = 123}]
let grouped = data |> Seq.groupBy(fun d -> (d.date, d.dr, d.Group, d.Product))
|> Seq.map (fun (k,v) -> (k, v |> Seq.sumBy (fun v -> v.Book), v |> Seq.sumBy (fun v -> v.Revenue)))
for g,books,revs in grouped do
printfn "Books %A: %d" g books
printfn "Revenues %A: %d" g revs
prints
Books ("2012-01-01", "Test", "A", "B"): 246
Revenues ("2012-01-01", "Test", "A", "B"): 246
Books ("2012-01-01", "Test", "B", "B"): 11
Revenues ("2012-01-01", "Test", "B", "B"): 11