How to deal with null values in F# - f#

I am trying to fetch data(type:double) from MS access below. There are number of null values stored in A&B below. is there a way to change those null values to zeros?
let query sql w=
seq{
let conn = new OleDbConnection( #"Provider=Microsoft.ACE.OLEDB.12.0;
Data Source=Portfolio.accdb;
Persist Security Info=False;" )
conn.Open()
let DAdapter = new OleDbDataAdapter(sql,conn)
let DTable = new DataSet()
let i= DAdapter.Fill(DTable)
let rowCol = DTable.Tables.[0].Rows
let rowCount = rowCol.Count
for i in 0 .. (rowCount - 1) do
yield w (rowCol.[i])
}
type Table1= {
A:double;
B:double}
let cf=query "SELECT * FROM T" (fun row ->
{
A=unbox(row.["A"]);
B=unbox(row.["B"]);})

Define a function
let toFloat = function
| null -> 0.0
| obj -> unbox obj
And then use it as follows
let cf = query "SELECT * FROM T" (fun row ->
{ A = toFloat row.["A"]
B = toFloat row.["B"] } )

Maybe, your columns in DB have different type (for example, int and double). Or try check return value with DBNull type:
let toDouble x =
if System.Convert.IsDBNull(x) then 0.0
else System.Double.Parse(x.ToString())
To check I create that table:
And with your code:
open System.Data
open System.Data.OleDb
let toDouble x =
if System.Convert.IsDBNull(x) then 0.0
else System.Double.Parse(x.ToString())
let query sql w=
seq{
let conn = new OleDbConnection( #"Provider=Microsoft.ACE.OLEDB.12.0;
Data Source=F:/Portfolio.accdb;Persist Security Info=False;" )
conn.Open()
let DAdapter = new OleDbDataAdapter(sql,conn)
let DTable = new DataSet()
let i = DAdapter.Fill(DTable)
let rowCol = DTable.Tables.[0].Rows
let rowCount = rowCol.Count
for i in 0 .. (rowCount - 1) do
yield w (rowCol.[i])
conn.Close()
}
type Table1= { A:double; B:double }
let cf = query "SELECT * FROM T" (fun row -> { A = toDouble row.["A"]; B = toDouble row.["B"] } )
cf |> Seq.iter(fun x -> printfn "%A" x)
Result:
{A = 1.0;
B = 2.2;}
{A = 3.0;
B = 0.0;}
{A = 4.0;
B = 0.0;}

Related

How to initialize a repeated field (Google.Protocol.ProtoBuf) from F#?

I have a Google.Protocol.ProtoBuf message defined as:
message Rectangle {
int32 X = 1;
int32 Y = 2;
int32 Width = 3;
int32 Height =4;
}
message Word {
google.protobuf.Int32Value VocabularyId = 1;
bytes Strokes = 2;
string Text = 3;
string Confidence = 4;
Rectangle BoundingBox =5;
}
message SaveVocabularyRequest {
repeated Word Words =1 ;
}
In F#, how do I initialize the repeated field "Words" ?
( using: https://github.com/Arshia001/FSharp.GrpcCodeGenerator )
This fails:
let h = words |> Seq.map InkWordToWord |> Seq.toList
{ Protocol.ProtoBuf.SaveVocabularyRequest.empty() with Words = { Words = {h}} }
Error Message: No Assignment given for field '_UnknownFields' of type 'Protocol.ProtoBuf.SaveVocabularyRequest'
Thank you in advance.
let InkWordToWord (w:Models.InkWord) : Protocol.ProtoBuf.InkWord =
let rectangle (dr: Drawing.Rectangle) : Protocol.ProtoBuf.Rectangle =
{Protocol.ProtoBuf.Rectangle.empty() with X = ValueSome dr.X; Y = ValueSome dr.Y; Width = ValueSome dr.Width; Height = ValueSome dr.Height }
let gg (i:int) : Int32Value =
let g = Int32Value.empty()
g.Value <- ValueSome i
g
let toBytes (ss:System.Windows.Ink.StrokeCollection) : byte[] =
let ms = new MemoryStream()
ss.Save(ms)
ms.ToArray()
{ Protocol.ProtoBuf.InkWord.empty() with VocabularyId = match w.VocabularyId with
| None -> ValueNone
| Some s -> ValueSome (gg s) ;
Strokes = ValueSome (Google.Protobuf.ByteString.CopyFrom(toBytes w.Strokes));
Text = ValueSome w.Text;
Confidence = ValueSome w.Confidence;
BoundingBox = ValueSome (rectangle w.BoundingBox);
}
let SaveVocabularyRequestToGrpc (words:seq<Models.InkWord>) : SaveVocabularyRequest =
let v =
let h = words |> Seq.map InkWordToWord |> Seq.toList
let r = Protocol.ProtoBuf.Vocabulary.empty()
// InkWords is a Collection (i.e RepeatedField) in Protobuf.
r.InkWords.Add(h)
r
{Protocol.ProtoBuf.SaveVocabularyRequest.empty() with Vocabulary = ValueSome v }

With Elmish.wpf/F#, how to display in WPF a string option as the string or null, not "Some(string)"?

I am a newbie to F#. In WPF, I am using DisplayMemberBinding within a Datagrid as:
<DataGridTemplateColumn.CellTemplate>
<DataTemplate>
<local:AppointmentListView ItemsSource="{Binding Columns[0].AppointmentKeys}" Height="140" Background="Bisque">
<ListView.View>
<GridView>
<GridViewColumn Header="First" DisplayMemberBinding="{Binding FirstName}" Width="100"/>
<GridViewColumn Header="Last" DisplayMemberBinding="{Binding LastName}" Width="120"/>
<GridViewColumn Header="BirthDate" DisplayMemberBinding="{Binding BirthDate, StringFormat=d}" Width="100"/>
</GridView>
</ListView.View>
</local:AppointmentListView>
</DataTemplate>
</DataGridTemplateColumn.CellTemplate>
The (complete) backing F# module (in Elmish.wpf) is:
module MyDataGrid.DataGrid
open Elmish
open Elmish.WPF
open System
type Visit =
{ ServiceTime: DateTime option
DoNotSee: Boolean option
ChartNumber: int option
LastName: string option
FirstName: string option
Mi: string option
BirthDate: DateTime option
PostingTime: DateTime option
AppointmentTime: DateTime option }
type Cell =
{RowNumber: int
ColumnNumber: int
AppointmentKeys: Visit list
ColumnTime: TimeSpan
AppointmentCount: int
AppointmentTime: DateTime option // all lines in the cell have the same appointment time.
}
let SetCell (rowNumber: int, columnNumber: int) =
let AppointmentsPerCell = 4
{RowNumber = rowNumber
ColumnNumber = columnNumber
AppointmentKeys = [for x in 1 .. AppointmentsPerCell ->
{
ServiceTime = Some System.DateTime.Now
DoNotSee = Some false
ChartNumber = Some 8812
LastName= Some ("LastName" + string x)
FirstName= Some ("FirstName" + string x)
Mi = Some "J"
BirthDate = Some(DateTime(2020,09,14))
PostingTime = Some DateTime.Now
AppointmentTime = Some DateTime.Now
}]
ColumnTime = System.TimeSpan.FromMinutes(float(columnNumber * 15))
AppointmentCount = 4
AppointmentTime = Some(DateTime.Now)
}
type Row =
{RowTime: string
Columns: Cell list}
let SetRow (rowNumber: int, startTime: System.TimeSpan)=
let columnCount = 4
let hr = System.TimeSpan.FromHours(1.0)
let rowTime = startTime + System.TimeSpan.FromTicks(hr.Ticks * int64(rowNumber))
{ RowTime = rowTime.ToString("h':00'")
Columns = [for columnNumber in 1 .. columnCount -> SetCell(rowNumber, columnNumber) ]
}
type Model =
{ AppointmentDate: DateTime
Rows: Row list
SelectedRow: Row option}
type Msg =
| SetAppointmentDate of DateTime
| SetSelectedRow of Row option
let init =
let rowCount = 9
let startTime = TimeSpan.FromHours(float(8))
{ AppointmentDate = DateTime.Now
Rows = [for rowNumber in 0 .. rowCount -> SetRow(rowNumber, startTime)]
SelectedRow = None
}
let update msg m =
match msg with
| SetAppointmentDate d -> {m with AppointmentDate = d}
| SetSelectedRow r -> {m with SelectedRow = r}
let bindings () : Binding<Model, Msg> list = [
"SelectedAppointmentDate" |> Binding.twoWay( (fun m -> m.AppointmentDate), SetAppointmentDate)
"Rows" |> Binding.oneWay( fun m -> m.Rows)
"SelectedRow" |> Binding.twoWay( (fun m -> m.SelectedRow), SetSelectedRow)
]
let designVm = ViewModel.designInstance init (bindings ())
let main window =
Program.mkSimpleWpf (fun () -> init) update bindings
|> Program.withConsoleTrace
|> Program.runWindowWithConfig
{ ElmConfig.Default with LogConsole = true; Measure = true }
window
The DisplayMememberBindings show LastName as "Some(LastName1)" and BirthDate as "Some(09/14/2020 00:00:00)".
How can I get the LastName: string option to return either null or the value of the string so the display shows "LastName1" and not "Some(LastName1)?
The same goes for the birth date, how to show BirthDate as "9/14/2020" and not "Some(09/14/2020 00:00:00)?
TIA
Full source code at: Example DataGrid
Your code only has three bindings. You should have a binding for every individual piece of data. Specifically, you should change your Rows binding from a OneWay binding to a SubModel binding. Then repeat this for all your other types.
Then, the question you specifically asked about is how to display LastName1 instead of Some(LastName1) and 9/14/2020 instead of Some(09/14/2020 00:00:00). Create the bindings for these individual pieces of optional data with Binding methods that ends in Opt like Binding.oneWayOpt or Binding.twoWayOpt.
For newbies like me, here is my full F# working solution in Elmish.WPF/F#:
module MyDataGrid.DataGrid
open Elmish
open Elmish.WPF
open System
module Visit =
type Model =
{ ServiceTime: DateTime option
DoNotSee: Boolean option
ChartNumber: int option
LastName: string option
FirstName: string option
Mi: string option
BirthDate: DateTime option
PostingTime: DateTime option
AppointmentTime: DateTime option
Id: int}
let SetVisits appointmentsPerCell = [for x in 1 .. appointmentsPerCell ->
{
ServiceTime = Some System.DateTime.Now
DoNotSee = Some false
ChartNumber = Some 8812
LastName= Some ("LastName" + string x)
FirstName= Some ("FirstName" + string x)
Mi = Some "J"
BirthDate = Some(DateTime(2020,09,14))
PostingTime = Some DateTime.Now
AppointmentTime = Some DateTime.Now
Id = x
}]
let bindings() = [
"FirstName" |> Binding.oneWayOpt( fun (_, m) -> m.FirstName)
"LastName" |> Binding.oneWayOpt( fun (_, m) -> m.LastName)
"BirthDate" |> Binding.oneWayOpt( fun (_, m) -> m.BirthDate)
"ServiceTime" |> Binding.oneWayOpt( fun (_, m) -> m.ServiceTime)
]
module Cell =
type Model =
{ RowNumber: int
ColumnNumber: int
AppointmentKeys: Visit.Model list
ColumnTime: TimeSpan
AppointmentCount: int
AppointmentTime: DateTime option // all lines in the cell have the same appointment time.
Id: int
}
let SetCell (rowNumber: int, columnNumber: int) =
let AppointmentsPerCell = 4
{RowNumber = rowNumber
ColumnNumber = columnNumber
AppointmentKeys = Visit.SetVisits AppointmentsPerCell
ColumnTime = System.TimeSpan.FromMinutes(float(columnNumber * 15))
AppointmentCount = 4
AppointmentTime = Some(DateTime.Now)
Id=rowNumber*10 + columnNumber
}
let bindings() =[
"AppointmentKeys" |> Binding.subModelSeq(
(fun (_, m) -> m.AppointmentKeys),
(fun v -> v.Id),
Visit.bindings
)
]
module Row =
type Model =
{ RowTime: string
Columns: Cell.Model list
Id: int }
let SetRow (rowNumber: int, startTime: System.TimeSpan)=
let columnCount = 4
let hr = System.TimeSpan.FromHours(1.0)
let rowTime = startTime + System.TimeSpan.FromTicks(hr.Ticks * int64(rowNumber))
{ RowTime = rowTime.ToString("h':00'")
Columns = [for columnNumber in 1 .. columnCount -> Cell.SetCell(rowNumber, columnNumber) ]
Id = rowNumber
}
let bindings () = [
"RowTime" |> Binding.oneWay( fun (_,r) -> r.RowTime)
"Columns" |> Binding.subModelSeq(
(fun (_, m) -> m.Columns),
(fun c -> c.Id),
Cell.bindings
)
]
type Model =
{ AppointmentDate: DateTime
Rows: Row.Model list
SelectedRow: Row.Model option}
type Msg =
| SetAppointmentDate of DateTime
| SetSelectedRow of Row.Model option
let init () =
let rowCount = 9
let startTime = TimeSpan.FromHours(float(8))
{ AppointmentDate = DateTime.Now
Rows = [for rowNumber in 0 .. rowCount -> Row.SetRow(rowNumber, startTime)]
SelectedRow = None
}
let update msg m =
match msg with
| SetAppointmentDate d -> {m with AppointmentDate = d}
| SetSelectedRow r -> {m with SelectedRow = r}
let bindings () : Binding<Model, Msg> list = [
"SelectedAppointmentDate" |> Binding.twoWay( (fun m -> m.AppointmentDate), SetAppointmentDate)
"Rows" |> Binding.subModelSeq(
(fun m -> m.Rows),
(fun r -> r.Id),
Row.bindings
)
"SelectedRow" |> Binding.twoWay( (fun m -> m.SelectedRow), SetSelectedRow)
]
let main window =
Program.mkSimpleWpf init update bindings
|> Program.withConsoleTrace
|> Program.runWindowWithConfig
{ ElmConfig.Default with LogConsole = true; Measure = true }
window
enter code here

How can I link a label to a mutable variable in F#?

I want to create a label in F# which uses a mutable variable to return a value. Unfortunately F# sets this label to a constant value. If the value of the mutable changes, the value of the label remains. Isn't it a bit inconsistent? Is there a way to get the label ("a") being dependent of the mutable ("x")?
let mutable x = 0;
let a = x + 2; // I want not to set a to a constant value
let b two = x + two;
x <- 1;
let c = b 2;
let isConsistent = a = c;
val mutable x : int = 1
val a : int = 2
val b : two:int -> int
val c : int = 3
val isConsistent : bool = false
From your own comment you want a to be a function returning x + 2
Direct translation of that is :
let mutable x = 0
let a () = x + 2
let b two = x + two
x <- 1
let c = b 2
let isConsistent = a () = c // don't forget to call the function 'a'
(*
val mutable x : int = 1
val a : unit -> int
val b : two:int -> int
val c : int = 3
val isConsistent : bool = true
*)

Return a list from recursive function

I'm trying to return a list from a function, but I'm getting an error that says that an unit was expected instead. Also, I would like to know if this code appears to be structured correctly in general.
code:
let rec calculateVariants (attList: NewProductAttributeInfo list) (activeCount: int)
(currentList: (int * NewProductAttributeInfo) list) =
// group attribute list by category id
let attGrouped = attList |> List.groupBy (fun x -> x.AttributeCategoryId)
// define mutable list
let mutable stageList = currentList
// begin iteration
for catId,details in attGrouped do
for d in details do
if activeCount = 0
then stageList <- (activeCount,d) :: stageList
let groupLength = attGrouped.Length
if (activeCount + 1) <= groupLength
then
let selectCat,selectDetails = attGrouped.[activeCount + 1]
selectDetails
|> List.filter (fun x ->
stageList
|> List.exists (fun (x') ->
not(x' = (activeCount,x))))
|> (fun x ->
match x with
| [] -> ()
| head :: tail ->
stageList <- (activeCount, head) :: stageList
let currentCategory = activeCount + 1
calculateVariants attList currentCategory stageList
)
stageList // <-- error Unit expected
if .. then .. else should return the same type on both branches. If you omit else branch then compiler assuming that it returns unit. Add else branch returning list.
Edit:
Given your problem description, the easiest way would be something like this:
type NewProductAttributeInfo = {AttributeCategoryId: string; AttributeId: string}
let products = [ { AttributeCategoryId = "Size"; AttributeId = "S"};
{ AttributeCategoryId = "Mat"; AttributeId = "Linen" };
{ AttributeCategoryId = "Mat"; AttributeId = "Poliester" };
{ AttributeCategoryId = "Color"; AttributeId = "White" };
{ AttributeCategoryId = "Color"; AttributeId = "Green" };
{ AttributeCategoryId = "Mat"; AttributeId = "Linen" };
{ AttributeCategoryId = "Mat"; AttributeId = "Cotton" };
{ AttributeCategoryId = "Mat"; AttributeId = "Poliester" };
{ AttributeCategoryId = "Size"; AttributeId = "XL" } ]
let group list =
list
|> Set.ofList // Provides uniqueness of attribute combinations
|> Seq.groupBy (fun x -> x.AttributeCategoryId) // Grouping by CatId
|> List.ofSeq
let res = group products
Result:
val it : (string * seq<NewProductAttributeInfo>) list =
[("Color", seq [{AttributeCategoryId = "Color";
AttributeId = "Green";}; {AttributeCategoryId = "Color";
AttributeId "White";}]);
("Mat",
seq
[{AttributeCategoryId = "Mat";
AttributeId = "Cotton";}; {AttributeCategoryId = "Mat";
AttributeId = "Linen";};
{AttributeCategoryId = "Mat";
AttributeId = "Poliester";}]);
("Size", seq [{AttributeCategoryId = "Size";
AttributeId = "S";}; {AttributeCategoryId = "Size";
AttributeId = "XL";}])]
This is the solution that I came with. It works, but I'm sure it can be optimized quite a bit. I have a duplicate issue that is solved with the Set.ofList function externally after this code runs, which I'm still working on.
type NewProductAttributeInfo = {
AttributeId : string;
AttributeCategoryId : string
}
let rec private returnVariant (curIdx: int) (listLength: int)
(attList: (int * NewProductAttributeInfo * NewProductAttributeInfo) list)
(curList: NewProductAttributeInfo list) =
match curList with
| x when x.Length = listLength -> curList
| x ->
let attTup =
attList
|> List.filter (fun x' ->
let idx1,att1,att2' = x'
idx1 >= curIdx && not(curList
|> List.exists (fun x'' ->
x'' = att2'))
)
let idx1,att1,att2 = attTup |> List.head
let newList = curList # [att2]
returnVariant idx1 newList.Length attList newList
let rec calculateVariants (attList: NewProductAttributeInfo list)
(currentList: (int * NewProductAttributeInfo * NewProductAttributeInfo) list) =
// group attribute list by category id
let attGrouped = attList |> List.groupBy (fun x -> x.AttributeCategoryId)
let (firstGroupCatId,firstGroupDetails) = attGrouped.[0]
match currentList with
| [] ->
let rawVariants = [for nxt in 0 .. (attGrouped.Length - 1) do
if nxt > 0
then
// begin iteration
for d in firstGroupDetails do
let _,det = attGrouped.[nxt]
for det' in det do
yield (nxt, d, det')
]
calculateVariants attList rawVariants
| x ->
let groupLength = x |> List.groupBy (fun (idx,d0,nxtD) -> idx)
|> List.length |> ((+)1)
let sortedGroup = x |> List.sortBy (fun (x,y,z) -> x)
if groupLength > 2
then // below is the block that generates the duplicates
[for att in sortedGroup do
for attCompare in sortedGroup do
let idx1,att1,att2 = att
let idx2,attC1,attC2 = attCompare
if idx2 > idx1 && att2 <> attC2
then
let idString =
returnVariant idx2 groupLength x [att1; att2; attC2]
|> List.map (fun nl -> nl.AttributeId)
yield String.concat "," idString
]
else
[
for att in sortedGroup do
let idx1,att1,att2 = att
let idString =
returnVariant idx1 groupLength x [att1; att2]
|> List.map (fun nl -> nl.AttributeId)
yield String.concat "," idString
]

How to execute a function, that creates a lot of objects, in parallel?

I am using Array.Parallel.map on a function but find that it is not executing at anywhere near full processor capacity. I am assuming this is because the function creates a lot of objects when running List.map and List.map2. Would this be causing a synchronization issue and is there a more appropriate way of doing this? At the moment the only way I can think of getting around this is by running each process as a separate executable using something like xargs under Linux.
I put together the script below to demonstrate the problem. It is a very basic data categorizer which relies on a field having a certain value as a rule to determine if this will predict a category:
open System
type CategoryAssessment =
{ fieldIndex: int
value: int
ruleAssessments: list<int> }
let InitAssessment categorizeFields rules =
let ruleAssessments = List.init (List.length rules) (fun x -> 0)
List.map (fun categorizeField ->
let fieldIndex, categoryValue = categorizeField
{ CategoryAssessment.fieldIndex = fieldIndex;
value = categoryValue;
ruleAssessments = ruleAssessments })
categorizeFields
let AssessCategory ruleMatches (row : int[]) categoryAssessment =
let fieldIndex = categoryAssessment.fieldIndex
let categoryValue = categoryAssessment.value
let categoryMatch = categoryValue = row.[fieldIndex]
let newRuleAssessments =
List.map2 (fun ruleAssessment ruleMatch ->
if ruleMatch = categoryMatch then
ruleAssessment + 1
else
ruleAssessment)
categoryAssessment.ruleAssessments
ruleMatches
{ categoryAssessment with ruleAssessments = newRuleAssessments }
let MatchRule (row : int[]) rule =
let fieldIndex, eqVal = rule
row.[fieldIndex] = eqVal
let Assess categorizeFields rules input =
printfn "START - Assess"
let d =
Array.fold (fun categoryAssessment row ->
let ruleMatches = List.map (MatchRule row) rules
List.map (AssessCategory ruleMatches row) categoryAssessment)
(InitAssessment categorizeFields rules)
input
printfn "END - Assess"
d
let JoinAssessments assessments =
let numAssessments = Array.length assessments
Array.fold (fun accAssessment assessment ->
List.map2 (fun accCategory category ->
let newRuleAssessments =
List.map2 (+)
accCategory.ruleAssessments
category.ruleAssessments
{ accCategory with
ruleAssessments = newRuleAssessments })
accAssessment
assessment)
assessments.[0]
assessments.[1..(numAssessments-1)]
let numRecords = 10000
let numFields = 20
let numSplits = 10
let numRules = 10000
let inputs = Array.create numSplits
[| for i in 1 .. (numRecords / numSplits) ->
[| for j in 1 .. numFields ->
(i % 10) + j |] |]
let categorizeFields = [ (1, 6); (2, 3); (2, 4); (3, 2) ]
let rules = [ for i in 1 .. numRules -> (i % numFields, i) ]
let assessments =
Array.Parallel.map (Assess categorizeFields rules) inputs
|> JoinAssessments
printfn "Assessments: %A" assessments
0
After a fair bit of investigation, the ultimate answer to my question seems to be to find a way of not creating lots of objects. The easiest change to do this is moving to using arrays instead of lists. I have written up my findings more fully in an article: Beware of Immutable Lists for F# Parallel Processing.
The above program when altered as follows, runs better between threads and runs much quicker even on a single thread. Further improvements can be made by making the ruleAssessments field mutable as demonstrated in the referenced article.
open System
type CategoryAssessment =
{ fieldIndex: int
value: int
ruleAssessments: int[] }
let InitAssessment categorizeFields rules =
let ruleAssessments = Array.create (Array.length rules) 0
Array.map (fun categorizeField ->
let fieldIndex, categoryValue = categorizeField
{ CategoryAssessment.fieldIndex = fieldIndex;
value = categoryValue;
ruleAssessments = ruleAssessments })
categorizeFields
let AssessCategory ruleMatches (row : int[]) categoryAssessment =
let fieldIndex = categoryAssessment.fieldIndex
let categoryValue = categoryAssessment.value
let categoryMatch = categoryValue = row.[fieldIndex]
let newRuleAssessments =
Array.map2 (fun ruleAssessment ruleMatch ->
if ruleMatch = categoryMatch then
ruleAssessment + 1
else
ruleAssessment)
categoryAssessment.ruleAssessments
ruleMatches
{ categoryAssessment with ruleAssessments = newRuleAssessments }
let MatchRule (row : int[]) rule =
let fieldIndex, eqVal = rule
row.[fieldIndex] = eqVal
let Assess categorizeFields rules input =
printfn "START - Assess"
let d =
Array.fold (fun categoryAssessment row ->
let ruleMatches = Array.map (MatchRule row) rules
Array.map (AssessCategory ruleMatches row) categoryAssessment)
(InitAssessment categorizeFields rules)
input
printfn "END - Assess"
d
let JoinAssessments assessments =
let numAssessments = Array.length assessments
Array.fold (fun accAssessment assessment ->
Array.map2 (fun accCategory category ->
let newRuleAssessments =
Array.map2 (+)
accCategory.ruleAssessments
category.ruleAssessments
{ accCategory with
ruleAssessments = newRuleAssessments })
accAssessment
assessment)
assessments.[0]
assessments.[1..(numAssessments-1)]
let numRecords = 10000
let numFields = 20
let numSplits = 10
let numRules = 10000
let inputs = Array.create numSplits
[| for i in 1 .. (numRecords / numSplits) ->
[| for j in 1 .. numFields ->
(i % 10) + j |] |]
let categorizeFields = [| (1, 6); (2, 3); (2, 4); (3, 2) |]
let rules = [| for i in 1 .. numRules -> (i % numFields, i) |]
let assessments =
Array.Parallel.map (Assess categorizeFields rules) inputs
|> JoinAssessments
printfn "Assessments: %A" assessments
0
This is a version of your program that doesn't require mutability and uses nearly all of the 4 cpus on my iMac.
To pull it off, it's driven by assessing each rule in parallel, not by processing records. That also required the input array to be transposed making it be fields by records.
open System
type CategoryAssessment =
{ fieldIndex: int
value: int
ruleAssessments: list<int> }
let MatchRule rVal fVal =
rVal = fVal
let AssessRule cMatches (inputs:int[][]) (rIndex, rVal) =
// printfn "START - Assess" // uses more cpu than the code itself
let matches = inputs.[rIndex] |>
Array.map2 (fun cVal fVal -> (MatchRule rVal fVal) = cVal) cMatches
let assessment = matches |>
Array.map ( fun v -> if v then 1 else 0 ) |>
Array.sum
// printfn "END - Assess"
assessment
let Assess categorizeFields rules (inputs:int[][]) =
categorizeFields |> List.map (fun (catIndex, catValue) ->
let catMatches = inputs.[catIndex] |> Array.map( fun v -> v = catValue )
let assessments = rules |> Array.Parallel.map
(AssessRule catMatches inputs)
|> Array.toList
{ CategoryAssessment.fieldIndex = catIndex;
value = catValue;
ruleAssessments = assessments }
)
let numRecords = 10000
let numFields = 20
let numRules = 10000
let inputs = [| for j in 1 .. numFields ->
[| for i in 1 .. numRecords -> (i % 10) + j |] |]
let categorizeFields = [ (1, 6); (2, 3); (2, 4); (3, 2) ]
let rules = [| for i in 1 .. numRules -> (i % numFields, i) |]
let assessments = Assess categorizeFields rules inputs
printfn "Assessments: %A" assessments
Assessing by rule allowed the summing of a single integer across all records for a given rule, avoiding mutable state and extra memory allocations.
I used a lot of array iteration to get the speed up but didn't remove all the lists.
I fear I changed the functionality while refactoring or made assumptions that can't be applied to your actual problem, however I do hope it's a useful example.

Resources