I have the following code and I had to add Threading.Thread.Sleep 5000 to wait for five seconds before shutdown the actor system. Otherwise, some of the Serilog loggings which write to a database will not save. Is there a way to make sure all the logging is done before exit the program?
open System
open Akka.FSharp
open Akka
type CRange = CRange of low: string * high: string
type ErrorMessage = string
type CError =
| LoginFailed of string // Optional reason
| CTimeOut of CRange list // new range
| CException of CRange list * exn
| CMessage of CRange list * ErrorMessage
type CTask =
CTask of
c: string *
username: string *
password: string *
proxyAddress: string *
cycle: DateTime *
issuerRange: CRange list *
filterTable: bool
type DoWork = Map<string, CTask -> Result<int * string, CError>>
type PartKeyCount = PartKeyCount of Key: string * Count: int
type PartGroupCount = PartGroupCount of Group: string list * Count: int
let system = System.create "ASystem" <| Configuration.load ()
let scheduler (actors: Actor.IActorRef list) task (mailbox: Actor<Result<int, CError>>) =
let newTask task range =
let (CTask(c, username, password, proxy, cycle, _, filter)) = task
CTask(c, username, password, proxy, cycle, range, filter)
let rec loop (list: CRange list list) running = actor {
let akkaName = mailbox.Self.Path.Name
printfn "%s scheduler loop (Running: %d Todo:%d)" akkaName running list.Length
let! m = mailbox.Receive ()
let sender = mailbox.Sender ()
printfn "%s received message %A from %A" akkaName m sender
match m with
| Ok _ ->
printfn "finished one"
match list with
| [] ->
if running = 1 then
//Log.Information("....")
Threading.Thread.Sleep 5000 // Wait for 5 seconds
mailbox.Context.System.Terminate() |> ignore
else
return! loop [] (running - 1)
| x :: xs ->
printfn "Finished one. Todo %d, running %d - %A. New task %A to %A" xs.Length running sender x sender
let t = newTask task x
sender.Tell(t, mailbox.Self) // <!
return! loop xs running
| _ -> ()
}
let groups = [(CRange ("A","A"), 1); (CRange ("B","B"), 1); (CRange ("C","C"), 1);
(CRange ("D","D"), 1); (CRange ("zzz","zzz"), 1)]
let n = if actors.Length < groups.Length then actors.Length else groups.Length
[0..n-1] |> List.iter(fun i ->
let t = newTask task [fst groups.[i]]
actors.[i].Tell(t, mailbox.Self))
let todo = groups |> Seq.skip n |> Seq.toList |> List.map(fun (x, _) -> [x])
Console.WriteLine("Groups {0}; Running {1}; Todo: {2}", groups.Length, n, todo.Length)
loop todo n
let processor (mailbox: Actor<CTask>) =
let rec loop () = actor {
let! m = mailbox.Receive ()
let sender = mailbox.Sender ()
let akkaName = mailbox.Self.Path.Name
printfn "* %s received message %A from %A" akkaName m sender
sender <! (Ok DateTime.Now.Millisecond :> Result<int, CError>)
printfn "* %s sent to %A." akkaName sender
return! loop()
}
loop ()
let spawnSystems n =
[1..n]
|> List.map(fun i ->
spawn system (sprintf "Processor%d" i) (processor)
)
let startAkka task actorNumber =
let actors = spawnSystems actorNumber
Threading.Thread.Sleep 1000
scheduler actors task
|> spawn system "Scheduler"
|> ignore
system.WhenTerminated.Wait()
[<EntryPoint>]
let main argv =
let c = "priv"
let cycle = new DateTime(2020, 1, 1)
let username, password = "username", "password"
let task = CTask(c, username, password, "", cycle, [], false)
startAkka task 2
0
Before shutting down the Akka.net actor system, you need to tell Serilog to "flush" any buffered messages. There are different ways of doing that depending on how you're using Serilog.
The easiest way to use Serilog is via the global Log class:
Log.Logger = new LoggerConfiguration()
.WriteTo.File(#"myapp\log.txt")
.CreateLogger();
Log.Information("Hello!");
// Your application runs, then:
Log.CloseAndFlush();
If don't use the static Log class, you will use LoggerConfiguration to create an ILogger and dispose that instance.
using (var log = new LoggerConfiguration()
.WriteTo.File(#"myapp\log.txt")
.CreateLogger())
{
log.Information("Hello again!");
// Your app runs, then disposal of `log` flushes any buffers
}
You can see more details on the Lifecycle of Loggers.
Related
I've coded the "classical" bank account kata with F# MailboxProcessor to be thread safe. But when I try to parallelize adding a transaction to an account, it's very slow very quick: 10 parallel calls are responsive (2ms), 20 not (9 seconds)! (See last test Account can be updated from multiple threads beneath)
Since MailboxProcessor supports 30 million messages per second (see theburningmonk's article), where the problem comes from?
// -- Domain ----
type Message =
| Open of AsyncReplyChannel<bool>
| Close of AsyncReplyChannel<bool>
| Balance of AsyncReplyChannel<decimal option>
| Transaction of decimal * AsyncReplyChannel<bool>
type AccountState = { Opened: bool; Transactions: decimal list }
type Account() =
let agent = MailboxProcessor<Message>.Start(fun inbox ->
let rec loop (state: AccountState) =
async {
let! message = inbox.Receive()
match message with
| Close channel ->
channel.Reply state.Opened
return! loop { state with Opened = false }
| Open channel ->
printfn $"Opening"
channel.Reply (not state.Opened)
return! loop { state with Opened = true }
| Transaction (tran, channel) ->
printfn $"Adding transaction {tran}, nb = {state.Transactions.Length}"
channel.Reply true
return! loop { state with Transactions = tran :: state.Transactions }
| Balance channel ->
let balance =
if state.Opened then
state.Transactions |> List.sum |> Some
else
None
balance |> channel.Reply
return! loop state
}
loop { Opened = false; Transactions = [] }
)
member _.Open () = agent.PostAndReply(Open)
member _.Close () = agent.PostAndReply(Close)
member _.Balance () = agent.PostAndReply(Balance)
member _.Transaction (transaction: decimal) =
agent.PostAndReply(fun channel -> Transaction (transaction, channel))
// -- API ----
let mkBankAccount = Account
let openAccount (account: Account) =
match account.Open() with
| true -> Some account
| false -> None
let closeAccount (account: Account option) =
account |> Option.bind (fun a ->
match a.Close() with
| true -> Some a
| false -> None)
let updateBalance transaction (account: Account option) =
account |> Option.bind (fun a ->
match a.Transaction(transaction) with
| true -> Some a
| false -> None)
let getBalance (account: Account option) =
account |> Option.bind (fun a -> a.Balance())
// -- Tests ----
let should_equal expected actual =
if expected = actual then
Ok expected
else
Error (expected, actual)
let should_not_equal expected actual =
if expected <> actual then
Ok expected
else
Error (expected, actual)
let ``Returns empty balance after opening`` =
let account = mkBankAccount() |> openAccount
getBalance account |> should_equal (Some 0.0m)
let ``Check basic balance`` =
let account = mkBankAccount() |> openAccount
let openingBalance = account |> getBalance
let updatedBalance =
account
|> updateBalance 10.0m
|> getBalance
openingBalance |> should_equal (Some 0.0m),
updatedBalance |> should_equal (Some 10.0m)
let ``Balance can increment or decrement`` =
let account = mkBankAccount() |> openAccount
let openingBalance = account |> getBalance
let addedBalance =
account
|> updateBalance 10.0m
|> getBalance
let subtractedBalance =
account
|> updateBalance -15.0m
|> getBalance
openingBalance |> should_equal (Some 0.0m),
addedBalance |> should_equal (Some 10.0m),
subtractedBalance |> should_equal (Some -5.0m)
let ``Account can be closed`` =
let account =
mkBankAccount()
|> openAccount
|> closeAccount
getBalance account |> should_equal None,
account |> should_not_equal None
#time
let ``Account can be updated from multiple threads`` =
let account =
mkBankAccount()
|> openAccount
let updateAccountAsync =
async {
account
|> updateBalance 1.0m
|> ignore
}
let nb = 10 // 👈 10 is quick (2ms), 20 is so long (9s)
updateAccountAsync
|> List.replicate nb
|> Async.Parallel
|> Async.RunSynchronously
|> ignore
getBalance account |> should_equal (Some (decimal nb))
#time
Your problem is that your code don't uses Async all the way up.
Your Account class has the method Open, Close, Balance and Transaction and you use a AsyncReplyChannel but
you use PostAndReply to send the message. This means: You send a message to the MailboxProcessor with a channel to reply. But, at this point, the method waits Synchronously to finish.
Even with Async.Parallel and multiple threads it can mean a lot of threads lock themsels. If you change
all your Methods to use PostAndAsyncReply then your problem goes away.
There are two other performance optimization that can speed up performance, but are not critical in your example.
Calling the Length of a list is bad. To calculate the length of a list, you must go through the whole list. You only
use this in Transaction to print the length, but consider if the transaction list becomes longer. You alway must go through
the whole list, whenever you add a transaction. This will be O(N) of your transaction list.
The same goes for calling (List.sum). You have to calculate the current Balance whenever you call Balance. Also O(N).
As you have a MailboxProcessor, you also could calculate those two values instead of completly recalculating those values again and again.Thus, they become O(1) operations.
On top, i would change the Open, Close and Transaction messages to return nothing, as in my Opinion, it doesn't make sense that they return anything. Your examples even makes me confused of what the bool return
values even mean.
In the Close message you return state.Opened before you set it to false. Why?
In the Open message you return the negated state.Opened. How you use it later it just looks wrong.
If there is more meaning behind the bool please make a distinct Discriminated Union out of it, that describes the purpose of what it returns.
You used an option<Acount> throughout your code, i removed it, as i don't see any purpose of it.
Anyway, here is a full example, of how i would write your code that don't have the speed problems.
type Message =
| Open
| Close
| Balance of AsyncReplyChannel<decimal option>
| Transaction of decimal
type AccountState = {
Opened: bool
Transactions: decimal list
TransactionsLength: int
CurrentBalance: decimal
}
type Account() =
let agent = MailboxProcessor<Message>.Start(fun inbox ->
let rec loop (state: AccountState) = async {
match! inbox.Receive() with
| Close ->
printfn "Closing"
return! loop { state with Opened = false }
| Open ->
printfn "Opening"
return! loop { state with Opened = true }
| Transaction tran ->
let l = state.TransactionsLength + 1
printfn $"Adding transaction {tran}, nb = {l}"
if state.Opened then
return! loop {
state with
Transactions = tran :: state.Transactions
TransactionsLength = l
CurrentBalance = state.CurrentBalance + tran
}
else
return! loop state
| Balance channel ->
if state.Opened
then channel.Reply (Some state.CurrentBalance)
else channel.Reply None
return! loop state
}
let defaultAccount = {
Opened = false
Transactions = []
TransactionsLength = 0
CurrentBalance = 0m
}
loop defaultAccount
)
member _.Open () = agent.Post(Open)
member _.Close () = agent.Post(Close)
member _.Balance () = agent.PostAndAsyncReply(Balance)
member _.Transaction transaction = agent.Post(Transaction transaction)
(* Test *)
let should_equal expected actual =
if expected = actual then
Ok expected
else
Error (expected, actual)
(* --- API --- *)
let mkBankAccount = Account
(* Opens the Account *)
let openAccount (account: Account) =
account.Open ()
(* Closes the Account *)
let closeAccount (account: Account) =
account.Close ()
(* Updates Account *)
let updateBalance transaction (account: Account) =
account.Transaction(transaction)
(* Gets the current Balance *)
let getBalance (account: Account) =
account.Balance ()
#time
let ``Account can be updated from multiple threads`` =
let account = mkBankAccount ()
openAccount account
let updateBalanceAsync = async {
updateBalance 1.0m account
}
let nb = 50
List.replicate nb updateBalanceAsync
|> Async.Parallel
|> Async.RunSynchronously
|> ignore
Async.RunSynchronously (async {
let! balance = getBalance account
printfn "Balance is %A should be (Some %f)" balance (1.0m * decimal nb)
})
#time
When I have a runtime error like this one
type Msg = Any
type Agent() =
let agent =
MailboxProcessor.Start(fun inbox ->
let rec messageLoop (oldState) =
async {
let! msg = inbox.Receive()
printfn "1"
match msg with
| Any ->
printfn "2"
let neverFound=
oldState
|> List.find (fun x -> x = 42)
printfn "3" // <- never happens, because I tried to find something that does not exists
return! messageLoop (oldState # [neverFound])
}
printfn "0"
messageLoop ([ 1; 2; 3 ]))
member __.Post a = agent.Post a
let agent = Agent()
agent.Post ( Any)
it doest crash, the error is completely silent, but if I do explicitly try .. with:
type Msg = Any
type Agent() =
let agent =
MailboxProcessor.Start(fun inbox ->
let rec messageLoop (oldState) =
async {
let! msg = inbox.Receive()
printfn "1"
match msg with
| Any ->
printfn "2"
try
let neverFound=
oldState
|> List.find (fun x -> x = 42)
printfn "3"
return! messageLoop (oldState # [neverFound])
with e ->
printfn "%A" e <-- does print
printfn "4"
}
printfn "0"
messageLoop ([ 1; 2; 3 ]))
member __.Post a = agent.Post a
let agent = Agent()
agent.Post ( Any)
if does catches the error.
This is not the only place this happens, apparently, errors that happen inside async are silent? How can I prevent this? Is there a flag one can run with no silent errors? or maybe a global async error handler?
This is by design. You don't expect actors to crash on receiving a message.
Start is essentially:
Async.Start(async { try do! body x with exn -> trigger exn })
So if you expose MailboxProcessor.Error with
member _.Error = agent.Error
and listen to it:
agent.Error
|> Observable.subscribe(fun (err) -> printfn "Oh no, an error: %s" err.Message)
|> ignore
you will indeed see:
Oh no, an error: An index satisfying the predicate was not found in the collection.
With Suave 2.4.0 supporting TransferEncoding.chunked and HttpOutput.writeChunk I have written the below code to stream out data over HTTP.
let sendStrings getStringsFromProducer : WebPart =
Writers.setStatus HTTP_200 >=>
TransferEncoding.chunked (fun conn -> socket {
let refConn = ref conn
for str in getStringsFromProducer do
let! (_, conn) = (str |> stringToBytes |> HttpOutput.writeChunk) !refConn
refConn := conn
return! HttpOutput.writeChunk [||] !refConn
}
)
While this works, I question the reliability of using ref and hoping there are better way out there to do the same in a more functional manner. Are there better way to do this? Assuming I cannot change getStringsFromProducer?
I think you cannot avoid all mutation in this case - writing chunks one by one is a fairly imperative operation and iterating over a lazy sequence also requires (mutable) iterator, so there is no way to avoid all mutation. I think your sendStrings function does a nice job at hiding the mutation from the consumer and provides a nice functional API.
You can avoid using ref cells and replace them with local mutable variable, which is a bit safer - because the mutable variable cannot escape the local scope:
TransferEncoding.chunked (fun conn -> socket {
let mutable conn = conn
for str in getStringsFromProducer do
let! _, newConn = HttpOutput.writeChunk (stringToBytes str) conn
conn <- newConn
return! HttpOutput.writeChunk [||] conn
}
You could avoid the mutable conn variable by using recursion, but this requires you to work with IEnumerator<'T> rather than using a nice for loop to iterate over the sequence, so I think this is actually less nice than the version using a mutable variable:
TransferEncoding.chunked (fun conn -> socket {
let en = getStringsFromProducer.GetEnumerator()
let rec loop conn = socket {
if en.MoveNext() then
let! _, conn = HttpOutput.writeChunk (stringToBytes en.Current) conn
return! loop conn }
do! loop conn
return! HttpOutput.writeChunk [||] conn })
I was looking for a way to replace refs/mutables in F# in a general way, and while I came up with a solution, it might be overkill in your case. It looks like the ref is a local that is only updated from within a single thread, so it's probably fairly safe. However, if you want to replace it, here's how I solved the problem:
type private StateMessage<'a> =
| Get of AsyncReplyChannel<'a>
| GetOrSet of 'a * AsyncReplyChannel<'a>
| GetOrSetResult of (unit -> 'a) * AsyncReplyChannel<'a>
| Set of 'a
| Update of ('a -> 'a) * AsyncReplyChannel<'a>
type Stateful<'a>(?initialValue: 'a) =
let agent = MailboxProcessor<StateMessage<'a>>.Start
<| fun inbox ->
let rec loop state =
async {
let! message = inbox.Receive()
match message with
| Get channel ->
match state with
| Some value -> channel.Reply(value)
| None -> channel.Reply(Unchecked.defaultof<'a>)
return! loop state
| GetOrSet (newValue, channel) ->
match state with
| Some value ->
channel.Reply(value)
return! loop state
| None ->
channel.Reply(newValue)
return! loop (Some newValue)
| GetOrSetResult (getValue, channel) ->
match state with
| Some value ->
channel.Reply(value)
return! loop state
| None ->
let newValue = getValue ()
channel.Reply(newValue)
return! loop (Some newValue)
| Set value ->
return! loop (Some value)
| Update (update, channel) ->
let currentValue =
match state with
| Some value -> value
| None -> Unchecked.defaultof<'a>
let newValue = update currentValue
channel.Reply(newValue)
return! loop (Some newValue)
}
loop initialValue
let get () = agent.PostAndReply Get
let asyncGet () = agent.PostAndAsyncReply Get
let getOrSet value = agent.PostAndReply <| fun reply -> GetOrSet (value, reply)
let asyncGetOrSet value = agent.PostAndAsyncReply <| fun reply -> GetOrSet (value, reply)
let getOrSetResult getValue = agent.PostAndReply <| fun reply -> GetOrSetResult (getValue, reply)
let asyncGetOrSetResult getValue = agent.PostAndAsyncReply <| fun reply -> GetOrSetResult (getValue, reply)
let set value = agent.Post <| Set value
let update f = agent.PostAndReply <| fun reply -> Update (f, reply)
let asyncUpdate f = agent.PostAndAsyncReply <| fun reply -> Update (f, reply)
member __.Get () = get ()
member __.AsyncGet () = asyncGet ()
member __.GetOrSet value = getOrSet value
member __.AsyncGetOrSet value = asyncGetOrSet value
member __.GetOrSetResult getValue = getOrSetResult getValue
member __.AsyncGetOrSetResult getValue = asyncGetOrSetResult getValue
member __.Set value = set value
member __.Update f = update f
member __.AsyncUpdate f = asyncUpdate f
This basically uses a MailboxProcessor to serialize updates to state that's managed by a tail-recursive function, similar to Tomas' second example. However, this allows you to call Get/Set/Update in a way that's more like traditional mutable state, even though it's not actually doing mutation. You can use it like this:
let state = Stateful(0)
state.Get() |> printfn "%d"
state.Set(1)
state.Get() |> printfn "%d"
state.Update(fun x -> x + 1) |> printfn "%d"
This will print:
0
1
2
I'm trying to learn about the Observable module in F# by writing a program that connects to a web socket, listens for messages, and then handles them in some set of streams based on Observables. However, I'm having a hard time understanding the actual behavior.
First, I set up a web socket like this:
open System
open System.Net.WebSockets
open System.Threading
let connectFeed =
let feedUrl = "blah blah"
let buffer : byte array = Array.zeroCreate 1024
let segment = ArraySegment(buffer)
let socketEvent = new Event<string>()
let task = async {
let random = Random(DateTime.Now.Millisecond)
use socket = new ClientWebSocket()
let! token = Async.CancellationToken
do! Async.AwaitTask (socket.ConnectAsync(Uri(feedUrl), token))
while not token.IsCancellationRequested do
let! result = Async.AwaitTask (socket.ReceiveAsync(segment, token))
socketEvent.Trigger (Encoding.UTF8.GetString(buffer))
Array.fill buffer 0 buffer.Length 0uy
}
(task, socketEvent.Publish)
let deserializeMsg (raw:string) =
// returns a MsgType based on the received message
let tryGetData (msg:MsgType) =
// returns Some data for specific kind of message; None otherwise
[<EntryPoint>]
let main argv =
let feedProc, feedStream = connectFeed
let msgStream = feedStream |> Observable.map deserializeMsg
msgStream |> Observable.subscribe (fun m -> printfn "got msg: %A" m) |> ignore
let dataStream = feedStream |> Observable.choose tryGetData
dataStream |> Observable.subscribe (fun d -> printfn "got data: %A" d) |> ignore
Async.RunSynchronously feedProc
0
I'm expecting to see a printout like:
got msg: { some: "field" }
got msg: { some: "other" }
got msg: { some: "data" }
got data: { // whatever }
got msg: ...
...
Instead, only the "got msg" messages appear, even though there are messages that would cause tryGetData to return Some.
What's going on here? How do I set up multiple Observable streams from a single event?
Update: I've updated my code with this:
let isMsgA msg =
printfn "isMsgA"
match msg with
| MsgA -> true // where MsgA is a member of a DU defined elsewhere, and is the result of deserializeMsg
| _ -> false
let isStringMsgA msg =
printfn "isStringMsgA"
if msg.StartsWith("{ \"type\": \"msga\"") then true else false
[<EntryPoint>]
let main argv =
let feedProc, feedStream = connectFeed
let msgStream = feedStream |> Observable.map deserializeMsg
msgStream
|> Observable.filter isMsgA
|> Observable.subscribe (fun m -> printfn "got msg MsgA")
|> ignore
feedStream
|> Observable.filter isStringMsgA
|> Observable.subscribe (fun m -> printfn "got string MsgA")
|> ignore
And I get a screen full of "isStringMsgA" and "got string MsgA" messages, but exactly one each of "isMsgA" and "got msg MsgA".
I am baffled.
Here is a trimmed-down, reproducible example for anyone interesting in fiddling with it:
https://github.com/aggieben/test-observable
Update 2: looks like I may be seeing this behavior due to an exception being thrown in the deserializeMsg function. Still digging...
I do not see any obvious reason why this should be happening - can you add some logging to tryGetData to check what inputs it gets and what results it returns?
When using the Observable module, you construct a description of the processing pipeline and Observable.subscribe creates a concrete chain of listeners that do the work and attach handlers to the primary event source. However, the events do not get "consumed" - they should be sent to all the observers.
For example, try playing with the following minimal demo:
let evt = Event<int>()
let e1 = evt.Publish |> Observable.choose (fun n ->
if n % 2 = 0 then Some "woop!" else None)
let e2 = evt.Publish |> Observable.map (fun n -> n * 10)
e1 |> Observable.subscribe (printfn "E1: %s")
e2 |> Observable.subscribe (printfn "E2: %d")
evt.Trigger(1)
evt.Trigger(2)
If you run this, it prints the expected result:
E2: 10
E1: woop!
E2: 20
After playing with F# agents I tried to do a map reduce using them.
The basic structure I use is:
map supervisor which queues up all the work to do in its state and receives work request from map workers
reduce supervisor does the same thing as map supervisor for reduce work
a bunch of map and reduce workers that map and reduce, if one fails its work it sends it back to the respective supervisr to be reprocessed.
The questions I wonder about is:
does this make any sense compared to a more traditional (yet very nice) map reduce like (http://tomasp.net/blog/fsharp-parallel-aggregate.aspx) that uses PSeq ?
the way I implemented the map and reduce workers seems ugly is there a better way ?
it seems like I can create a 1000 000 map workers and 1000 0000 reduce workers lol, how should I choose these numbers, the more the better ?
Thanks a lot,
type Agent<'T> = MailboxProcessor<'T>
//This is the response the supervisor
//gives to the worker request for work
type 'work SupervisorResponse =
| Work of 'work //a piece of work
| NoWork//no work left to do
//This is the message to the supervisor
type 'work WorkMsg =
| ToDo of 'work //piles up work in the Supervisor queue
| WorkReq of AsyncReplyChannel<SupervisorResponse<'work>> //'
//The supervisor agent can be interacted with
type AgentOperation =
| Stop //stop the agent
| Status //yield the current status of supervisor
type 'work SupervisorMsg =
| WorkRel of 'work WorkMsg
| Operation of AgentOperation
//Supervises Map and Reduce workers
module AgentSupervisor=
let getNew (name:string) =
new Agent<SupervisorMsg<'work>>(fun inbox -> //'
let rec loop state = async {
let! msg = inbox.Receive()
match msg with
| WorkRel(m) ->
match m with
| ToDo(work) ->
let newState = work:state
return! loop newState
| WorkReq(replyChannel) ->
match state with
| [] ->
replyChannel.Reply(NoWork)
return! loop []
| [item] ->
replyChannel.Reply(Work(item))
return! loop []
| (item::remaining) ->
replyChannel.Reply(Work(item))
return! loop remaining
| Operation(op) ->
match op with
| Status ->
Console.WriteLine(name+" current Work Queue "+
string (state.Length))
return! loop state
| Stop ->
Console.WriteLine("Stoppped SuperVisor Agent "+name)
return()
}
loop [] )
let stop (agent:Agent<SupervisorMsg<'work>>) = agent.Post(Operation(Stop))
let status (agent:Agent<SupervisorMsg<'work>>) =agent.Post(Operation(Status))
//Code for the workers
type 'success WorkOutcome =
| Success of 'success
| Fail
type WorkerMsg =
| Start
| Stop
| Continue
module AgentWorker =
type WorkerSupervisors<'reduce,'work> =
{ Map:Agent<SupervisorMsg<'work>> ; Reduce:Agent<SupervisorMsg<'reduce>> }
let stop (agent:Agent<WorkerMsg>) = agent.Post(Stop)
let start (agent:Agent<WorkerMsg>) = agent.Start()
agent.Post(Start)
let getNewMapWorker( map, supervisors:WorkerSupervisors<'reduce,'work> ) =
new Agent<WorkerMsg>(fun inbox ->
let rec loop () = async {
let! msg = inbox.Receive()
match msg with
| Start -> inbox.Post(Continue)
return! loop ()
| Continue ->
let! supervisorOrder =
supervisors.Map.PostAndAsyncReply(
fun replyChannel ->
WorkRel(WorkReq(replyChannel)))
match supervisorOrder with
| Work(work) ->
let! res = map work
match res with
| Success(toReduce) ->
supervisors.Reduce
.Post(WorkRel(ToDo(toReduce)))
| Fail ->
Console.WriteLine("Map Fail")
supervisors.Map
.Post(WorkRel(ToDo(work)))
inbox.Post(Continue)
| NoWork ->
inbox.Post(Continue)
return! loop ()
| Stop ->
Console.WriteLine("Map worker stopped")
return ()
}
loop () )
let getNewReduceWorker(reduce,reduceSupervisor:Agent<SupervisorMsg<'work>>)=//'
new Agent<WorkerMsg>(fun inbox ->
let rec loop () = async {
let! msg = inbox.Receive()
match msg with
| Start -> inbox.Post(Continue)
return! loop()
| Continue ->
let! supervisorOrder =
reduceSupervisor.PostAndAsyncReply(fun replyChannel ->
WorkRel(WorkReq(replyChannel)))
match supervisorOrder with
| Work(work) ->
let! res = reduce work
match res with
| Success(toReduce) -> inbox.Post(Continue)
| Fail ->
Console.WriteLine("ReduceFail")
reduceSupervisor.Post(WorkRel(ToDo(work)))
inbox.Post(Continue)
| NoWork -> inbox.Post(Continue)
return! loop()
|Stop ->Console.WriteLine("Reduce worker stopped"); return ()
}
loop() )
open AgentWorker
type MapReduce<'work,'reduce>( numberMap:int ,
numberReduce: int,
toProcess:'work list,
map:'work->Async<'reduce WorkOutcome>,
reduce:'reduce-> Async<unit WorkOutcome>) =
let mapSupervisor= AgentSupervisor.getNew("MapSupervisor")
let reduceSupervisor = AgentSupervisor.getNew("ReduceSupervisor")
let workerSupervisors = {Map = mapSupervisor ; Reduce = reduceSupervisor }
let mapWorkers =
[for i in 1..numberMap ->
AgentWorker.getNewMapWorker(map,workerSupervisors) ]
let reduceWorkers =
[for i in 1..numberReduce ->
AgentWorker.getNewReduceWorker(reduce,workerSupervisors.Reduce) ]
member this.Start() =
//Post work to do
toProcess
|>List.iter(fun elem -> mapSupervisor.Post( WorkRel(ToDo(elem))))
//Start supervisors
mapSupervisor.Start()
reduceSupervisor.Start()
//start workers
List.iter( fun mapper -> mapper |>start) mapWorkers
List.iter( fun reducer ->reducer|>start) reduceWorkers
member this.Status() = (mapSupervisor|>AgentSupervisor.status)
(reduceSupervisor|>AgentSupervisor.status)
member this.Stop() =
List.map2(fun mapper reducer ->
mapper |>stop; reducer|>stop) mapWorkers reduceWorkers
//Run some tests
let map = function (n:int64) -> async{ return Success(n) }
let reduce = function (toto: int64) -> async{ return Success() }
let mp = MapReduce<int64,int64>( 1,1,[for i in 1L..1000000L->i],map,reduce)
mp.Start()
mp.Status()
mp.Stop()
I like to use MailboxProcessor for the reduce part of the algorithm, and async block that's invoked with Async.Parallel for the map part. It makes things more explicit, giving you finer control over exception handling, timeouts, and cancellation.
The following code was designed with Brian's help, and with the help of his excellent F# block highlighting "F# Depth Colorizer" plug-in for VS2010.
This code is meant to pull RSS feeds from yahoo weather server in a map-reduce pattern. It demonstrates how we can control execution flow from the outside of actual algorithm.
fetchWeather is the map part, and mailboxLoop is the reduce part of the algorithm.
#r "System.Xml.Linq.dll"
#r "FSharp.PowerPack.dll"
open System
open System.Diagnostics
open System.IO
open System.Linq
open System.Net
open System.Xml.Linq
open Microsoft.FSharp.Control.WebExtensions
type Weather (city, region, temperature) = class
member x.City = city
member x.Region = region
member x.Temperature : int = temperature
override this.ToString() =
sprintf "%s, %s: %d F" this.City this.Region this.Temperature
end
type MessageForActor =
| ProcessWeather of Weather
| ProcessError of int
| GetResults of (Weather * Weather * Weather list) AsyncReplyChannel
let parseRss woeid (rssStream : Stream) =
let xn str = XName.Get str
let yweather elementName = XName.Get(elementName, "http://xml.weather.yahoo.com/ns/rss/1.0")
let channel = (XDocument.Load rssStream).Descendants(xn "channel").First()
let location = channel.Element(yweather "location")
let condition = channel.Element(xn "item").Element(yweather "condition")
// If the RSS server returns error, condition XML element won't be available.
if not(condition = null) then
let temperature = Int32.Parse(condition.Attribute(xn "temp").Value)
ProcessWeather(new Weather(
location.Attribute(xn "city").Value,
location.Attribute(xn "region").Value,
temperature))
else
ProcessError(woeid)
let fetchWeather (actor : MessageForActor MailboxProcessor) woeid =
async {
let rssAddress = sprintf "http://weather.yahooapis.com/forecastrss?w=%d&u=f" woeid
let webRequest = WebRequest.Create rssAddress
use! response = webRequest.AsyncGetResponse()
use responseStream = response.GetResponseStream()
let weather = parseRss woeid responseStream
//do! Async.Sleep 1000 // enable this line to see amplified timing that proves concurrent flow
actor.Post(weather)
}
let mailboxLoop initialCount =
let chooseCityByTemperature op (x : Weather) (y : Weather) =
if op x.Temperature y.Temperature then x else y
let sortWeatherByCityAndState (weatherList : Weather list) =
weatherList
|> List.sortWith (fun x y -> x.City.CompareTo(y.City))
|> List.sortWith (fun x y -> x.Region.CompareTo(y.Region))
MailboxProcessor.Start(fun inbox ->
let rec loop minAcc maxAcc weatherList remaining =
async {
let! message = inbox.Receive()
let remaining = remaining - 1
match message with
| ProcessWeather weather ->
let colderCity = chooseCityByTemperature (<) minAcc weather
let warmerCity = chooseCityByTemperature (>) maxAcc weather
return! loop colderCity warmerCity (weather :: weatherList) remaining
| ProcessError woeid ->
let errorWeather = new Weather(sprintf "Error with woeid=%d" woeid, "ZZ", 99999)
return! loop minAcc maxAcc (errorWeather :: weatherList) remaining
| GetResults replyChannel ->
replyChannel.Reply(minAcc, maxAcc, sortWeatherByCityAndState weatherList)
}
let minValueInitial = new Weather("", "", Int32.MaxValue)
let maxValueInitial = new Weather("", "", Int32.MinValue)
loop minValueInitial maxValueInitial [] initialCount
)
let RunSynchronouslyWithExceptionAndTimeoutHandlers computation =
let timeout = 30000
try
Async.RunSynchronously(Async.Catch(computation), timeout)
|> function Choice1Of2 answer -> answer |> ignore
| Choice2Of2 (except : Exception) -> printfn "%s" except.Message; printfn "%s" except.StackTrace; exit -4
with
| :? System.TimeoutException -> printfn "Timed out waiting for results for %d seconds!" <| timeout / 1000; exit -5
let main =
// Should have script name, sync/async select, and at least one woeid
if fsi.CommandLineArgs.Length < 3 then
printfn "Expecting at least two arguments!"
printfn "There were %d arguments" (fsi.CommandLineArgs.Length - 1)
exit -1
let woeids =
try
fsi.CommandLineArgs
|> Seq.skip 2 // skip the script name and sync/async select
|> Seq.map Int32.Parse
|> Seq.toList
with
| except -> printfn "One of supplied arguments was not an integer: %s" except.Message; exit -2
let actor = mailboxLoop woeids.Length
let processWeatherItemsConcurrently woeids =
woeids
|> Seq.map (fetchWeather actor)
|> Async.Parallel
|> RunSynchronouslyWithExceptionAndTimeoutHandlers
let processOneWeatherItem woeid =
woeid
|> fetchWeather actor
|> RunSynchronouslyWithExceptionAndTimeoutHandlers
let stopWatch = new Stopwatch()
stopWatch.Start()
match fsi.CommandLineArgs.[1].ToUpper() with
| "C" -> printfn "Concurrent execution: "; processWeatherItemsConcurrently woeids
| "S" -> printfn "Synchronous execution: "; woeids |> Seq.iter processOneWeatherItem
| _ -> printfn "Unexpected run options!"; exit -3
let (min, max, weatherList) = actor.PostAndReply GetResults
stopWatch.Stop()
assert (weatherList.Length = woeids.Length)
printfn "{"
weatherList |> List.iter (printfn " %O")
printfn "}"
printfn "Coldest place: %O" min
printfn "Hottest place: %O" max
printfn "Completed in %d millisec" stopWatch.ElapsedMilliseconds
main