I am using mlr3proba package for machine learning survival analysis. My dataset contains factor, numeric and integer features.
I used 'scale' and 'encode' pipeops to preprocess my dataset for deephit and deepsurv neural network methods as following codes:
task.mlr <- TaskSurv$new(id = "id", backend = dataset, time = time, event = status)
inner.rsmp <- rsmp("cv", folds = 5)
measure <- msr("surv.cindex")
tuner <- tnr("random_search")
terminator <- trm("evals", n_evals = 30)
deephit.learner <- lrn("surv.deephit", optimizer = "adam", epochs = 50)
nn.search_space <- ps(dropout = p_dbl(lower = 0, upper = 1),alpha = p_dbl(lower = 0, upper = 1))
deephit.learner <- po("encode") %>>% po("scale") %>>% po("learner", deephit.learner)
deephit.instance <- TuningInstanceSingleCrit$new(
task = task.mlr,
learner = deephit.learner,
search_space = nn.search_space,
resampling = inner.rsmp,
measure = measure,
terminator = terminator
)
tuner$optimize(deephit.instance)
But when I run the last line, it shows the error below:
Error in self$assert(xs):
Assertion on 'xs' failed: Parameter 'dropout' not available. Did you mean 'encode.method'/'encode.affect_columns' / 'scale.center'?.
I really appreciate your help.
Hi thanks for using mlr3proba! The reason for this is because the parameter names change when wrapped in the pipeline, you can see this in the example below. There are a few options to solve this, you could change the parameter ids to match the new names after wrapping in PipeOps (Option 1 below), or you could specify the tuning ranges for the learner first then wrap it in the PipeOp (Option 2 below), or you could use an AutoTuner and wrap this in the PipeOps. I use the final option in this tutorial.
library(mlr3proba)
library(mlr3)
library(paradox)
library(mlr3tuning)
library(mlr3extralearners)
library(mlr3pipelines)
task.mlr <- tsk("rats")
inner.rsmp <- rsmp("holdout")
measure <- msr("surv.cindex")
tuner <- tnr("random_search")
terminator <- trm("evals", n_evals = 2)
###########
# Option 1
###########
deephit.learner <- lrn("surv.deephit", optimizer = "adam", epochs = 50)
deephit.learner <- po("encode") %>>% po("scale") %>>% po("learner", deephit.learner)
deephit.learner$param_set$ids()
#> [1] "encode.method" "encode.affect_columns"
#> [3] "scale.center" "scale.scale"
#> [5] "scale.robust" "scale.affect_columns"
#> [7] "surv.deephit.frac" "surv.deephit.cuts"
#> [9] "surv.deephit.cutpoints" "surv.deephit.scheme"
#> [11] "surv.deephit.cut_min" "surv.deephit.num_nodes"
#> [13] "surv.deephit.batch_norm" "surv.deephit.dropout"
#> [15] "surv.deephit.activation" "surv.deephit.custom_net"
#> [17] "surv.deephit.device" "surv.deephit.mod_alpha"
#> [19] "surv.deephit.sigma" "surv.deephit.shrink"
#> [21] "surv.deephit.optimizer" "surv.deephit.rho"
#> [23] "surv.deephit.eps" "surv.deephit.lr"
#> [25] "surv.deephit.weight_decay" "surv.deephit.learning_rate"
#> [27] "surv.deephit.lr_decay" "surv.deephit.betas"
#> [29] "surv.deephit.amsgrad" "surv.deephit.lambd"
#> [31] "surv.deephit.alpha" "surv.deephit.t0"
#> [33] "surv.deephit.momentum" "surv.deephit.centered"
#> [35] "surv.deephit.etas" "surv.deephit.step_sizes"
#> [37] "surv.deephit.dampening" "surv.deephit.nesterov"
#> [39] "surv.deephit.batch_size" "surv.deephit.epochs"
#> [41] "surv.deephit.verbose" "surv.deephit.num_workers"
#> [43] "surv.deephit.shuffle" "surv.deephit.best_weights"
#> [45] "surv.deephit.early_stopping" "surv.deephit.min_delta"
#> [47] "surv.deephit.patience" "surv.deephit.interpolate"
#> [49] "surv.deephit.inter_scheme" "surv.deephit.sub"
nn.search_space <- ps(surv.deephit.dropout = p_dbl(lower = 0, upper = 1),
surv.deephit.alpha = p_dbl(lower = 0, upper = 1))
deephit.instance <- TuningInstanceSingleCrit$new(
task = task.mlr,
learner = deephit.learner,
search_space = nn.search_space,
resampling = inner.rsmp,
measure = measure,
terminator = terminator
)
tuner$optimize(deephit.instance)
#> INFO [08:15:29.770] [bbotk] Starting to optimize 2 parameter(s) with '<OptimizerRandomSearch>' and '<TerminatorEvals> [n_evals=2]'
#> INFO [08:15:29.841] [bbotk] Evaluating 1 configuration(s)
#> INFO [08:15:30.115] [mlr3] Running benchmark with 1 resampling iterations
#> INFO [08:15:30.314] [mlr3] Applying learner 'encode.scale.surv.deephit' on task 'rats' (iter 1/1)
#> INFO [08:15:39.997] [mlr3] Finished benchmark
#> INFO [08:15:40.296] [bbotk] Result of batch 1:
#> INFO [08:15:40.302] [bbotk] surv.deephit.dropout surv.deephit.alpha surv.harrell_c
#> INFO [08:15:40.302] [bbotk] 0.06494213 0.7109244 0.7516212
#> INFO [08:15:40.302] [bbotk] uhash
#> INFO [08:15:40.302] [bbotk] 27794d84-ba46-4900-8835-de24fcda8c7f
#> INFO [08:15:40.307] [bbotk] Evaluating 1 configuration(s)
#> INFO [08:15:40.395] [mlr3] Running benchmark with 1 resampling iterations
#> INFO [08:15:40.406] [mlr3] Applying learner 'encode.scale.surv.deephit' on task 'rats' (iter 1/1)
#> INFO [08:15:41.807] [mlr3] Finished benchmark
#> INFO [08:15:41.903] [bbotk] Result of batch 2:
#> INFO [08:15:41.905] [bbotk] surv.deephit.dropout surv.deephit.alpha surv.harrell_c
#> INFO [08:15:41.905] [bbotk] 0.05524693 0.2895437 0.7749676
#> INFO [08:15:41.905] [bbotk] uhash
#> INFO [08:15:41.905] [bbotk] 013795a3-766c-48f9-a3fe-2aae5d4cad48
#> INFO [08:15:41.918] [bbotk] Finished optimizing after 2 evaluation(s)
#> INFO [08:15:41.919] [bbotk] Result:
#> INFO [08:15:41.920] [bbotk] surv.deephit.dropout surv.deephit.alpha learner_param_vals x_domain
#> INFO [08:15:41.920] [bbotk] 0.05524693 0.2895437 <list[6]> <list[2]>
#> INFO [08:15:41.920] [bbotk] surv.harrell_c
#> INFO [08:15:41.920] [bbotk] 0.7749676
#> surv.deephit.dropout surv.deephit.alpha learner_param_vals x_domain
#> 1: 0.05524693 0.2895437 <list[6]> <list[2]>
#> surv.harrell_c
#> 1: 0.7749676
###########
# Option 2
###########
deephit.learner <- lrn("surv.deephit", optimizer = "adam", epochs = 50)
deephit.learner$param_set$values = list(
dropout = to_tune(0, 1),
alpha = to_tune(0, 1)
)
deephit.learner <- po("encode") %>>%
po("scale") %>>%
po("learner", deephit.learner)
deephit.learner = GraphLearner$new(deephit.learner)
tuned.deephit = tune_nested(
method = "random_search",
task = task.mlr,
learner = deephit.learner,
inner_resampling = rsmp("holdout"),
outer_resampling = rsmp("holdout"),
measure = msr("surv.cindex"),
term_evals = 2
)
#> INFO [08:15:43.167] [mlr3] Applying learner 'encode.scale.surv.deephit.tuned' on task 'rats' (iter 1/1)
#> INFO [08:15:43.477] [bbotk] Starting to optimize 2 parameter(s) with '<OptimizerRandomSearch>' and '<TerminatorRunTime> [secs=2]'
#> INFO [08:15:43.495] [bbotk] Evaluating 1 configuration(s)
#> INFO [08:15:43.565] [mlr3] Running benchmark with 1 resampling iterations
#> INFO [08:15:43.575] [mlr3] Applying learner 'encode.scale.surv.deephit' on task 'rats' (iter 1/1)
#> INFO [08:15:44.969] [mlr3] Finished benchmark
#> INFO [08:15:45.058] [bbotk] Result of batch 1:
#> INFO [08:15:45.064] [bbotk] surv.deephit.dropout surv.deephit.alpha surv.harrell_c
#> INFO [08:15:45.064] [bbotk] 0.3492627 0.2304623 0.6745362
#> INFO [08:15:45.064] [bbotk] uhash
#> INFO [08:15:45.064] [bbotk] 4ce96658-4d4a-4835-9d9f-a93398471aed
#> INFO [08:15:45.069] [bbotk] Evaluating 1 configuration(s)
#> INFO [08:15:45.127] [mlr3] Running benchmark with 1 resampling iterations
#> INFO [08:15:45.136] [mlr3] Applying learner 'encode.scale.surv.deephit' on task 'rats' (iter 1/1)
#> INFO [08:15:46.064] [mlr3] Finished benchmark
#> INFO [08:15:46.171] [bbotk] Result of batch 2:
#> INFO [08:15:46.176] [bbotk] surv.deephit.dropout surv.deephit.alpha surv.harrell_c
#> INFO [08:15:46.176] [bbotk] 0.1118406 0.7810053 0.6020236
#> INFO [08:15:46.176] [bbotk] uhash
#> INFO [08:15:46.176] [bbotk] 6a065d27-a7e0-4e72-8e1e-6151408510cf
#> INFO [08:15:46.186] [bbotk] Finished optimizing after 2 evaluation(s)
#> INFO [08:15:46.187] [bbotk] Result:
#> INFO [08:15:46.191] [bbotk] surv.deephit.dropout surv.deephit.alpha learner_param_vals x_domain
#> INFO [08:15:46.191] [bbotk] 0.3492627 0.2304623 <list[4]> <list[2]>
#> INFO [08:15:46.191] [bbotk] surv.harrell_c
#> INFO [08:15:46.191] [bbotk] 0.6745362
Created on 2021-04-26 by the reprex package (v0.3.0)
Related
How can one find the closest larger and closest smaller number from a maxima list? Which maxima functions should I explore?
Here's a solution based on finding the sublist of elements which are less than or greater than x, and returning the greatest or least such element. If there is no such element, false is returned.
(%i4) next_smaller (L, x) :=
(sort(L),
sublist (%%, lambda ([y], y < x)),
if %% # [] then last(%%)) $
(%i5) next_larger (L, x) :=
(sort(L),
sublist (%%, lambda ([y], y > x)),
if %% # [] then first(%%)) $
(%i6) list: [0, 0.014, 0.021, 0.028, 0.042, 0.056, 0.07, 0.084, 0.11, 0.17, 0.28, 0.42, 0.56] $
(%i7) next_smaller (list, 0.04);
(%o7) 0.028
(%i8) next_larger (list, 0.04);
(%o8) 0.042
(%i9) next_larger (list, 0.56);
(%o9) false
(%i10) next_smaller (list, 0.56);
(%o10) 0.42
(%i11) next_smaller (list, 0);
(%o11) false
(%i12) next_larger (list, 0);
(%o12) 0.014
(%i13) next_larger (list, -1);
(%o13) 0
(%i14) next_smaller (list, -1);
(%o14) false
(%i15) next_smaller (list, 1);
(%o15) 0.56
(%i16) next_larger (list, 1);
(%o16) false
These functions probably could be made more efficient, but you might notice the difference only if you're working with long lists. Maybe see if this works before trying to optimize.
I'm looking to optimize my solution for the maximum Collatz sequence problem in Erlang. Right now I've tried using ETS, and the following solution uses maps, but I'm getting worse performance than I feel I should. Is there perhaps some optimization I could do to improve it?
-module(collatzMaps).
-export([start/2, s/4]).
collatz(0, Map) ->
{0, Map};
collatz(M, Map) ->
Exists = maps:is_key(M, Map),
case Exists of
false ->
case M rem 2 == 0 of
true ->
Result = collatz(M div 2, Map),
Val = (1 + element(1, Result)),
Map1 = maps:put(M, Val, element(2, Result)),
{maps:get(M, Map1), Map1};
false ->
Result = collatz((3 * M + 1), Map),
Val = (1 + element(1, Result)),
Map2 = maps:put(M, Val, element(2, Result)),
{maps:get(M, Map2), Map2}
end;
true ->
{maps:get(M, Map), Map}
end.
s(N, M, Max, Map) ->
if
N =< M ->
Result = collatz(N, Map),
if
element(1, Result) > Max ->
NextMax = element(1, Result),
MapNext = element(2, Result),
s(N + 1, M, NextMax, MapNext);
true ->
MapNext = element(2, Result),
s(N + 1, M, Max, MapNext)
end;
true ->
Max
end.
start(N, M)->
statistics(runtime),
statistics(wall_clock),
Map = maps:new(),
Map1 = maps:put(1, 1, Map),
G = s(N, M, 0, Map1),
{_, Time2} = statistics(wall_clock),
U2 = Time2 / 1000,
io:format("~p seconds~n", [U2]),
G.
Well, first let's tweak up invocation which will allow us to make some simple statistics and compare different approaches
-export([start/2, max_collatz/2]).
...
max_collatz(N, M) ->
Map = maps:new(),
Map1 = maps:put(1, 1, Map),
s(N, M, 0, Map1).
start(N, M)->
{T, Result} = timer:tc( fun() -> max_collatz(N, M) end),
io:format("~p seconds~n", [T / 1000000]),
Result.
So let's write it more Erlang idiomatic way
-module(collatz).
-export([start/2, max_collatz/2]).
collatz_next(N) when N rem 2 =:= 0 ->
N div 2;
collatz_next(N) ->
3 * N + 1.
collatz_length(N, Map) ->
case Map of
#{N := L} -> {L, Map};
_ ->
{L, Map2} = collatz_length(collatz_next(N), Map),
{L + 1, Map2#{N => L + 1}}
end.
max_collatz(N, M) ->
Map = lists:foldl(fun(X, Map) -> {_, Map2} = collatz_length(X, Map), Map2 end,
#{1 => 1}, lists:seq(N, M)),
lists:max(maps:values(Map)).
start(N, M) ->
{T, Result} = timer:tc(fun() -> max_collatz(N, M) end),
io:format("~p seconds~n", [T / 1000000]),
Result.
Then we can compare speed using for example eministat.
Clone in
git clone https://github.com/jlouis/eministat.git
cd eministat
make
If you run in a problem like
DEPEND eministat.d
ERLC eministat.erl eministat_analysis.erl eministat_ds.erl eministat_plot.erl eministat_report.erl eministat_resample.erl eministat_ts.erl
compile: warnings being treated as errors
src/eministat_resample.erl:8: export_all flag enabled - all functions will be exported
erlang.mk:4940: recipe for target 'ebin/eministat.app' failed
make[1]: *** [ebin/eministat.app] Error 1
erlang.mk:4758: recipe for target 'app' failed
make: *** [app] Error 2
You can fix it
diff --git src/eministat_resample.erl src/eministat_resample.erl
index 1adf401..0887b2c 100644
--- src/eministat_resample.erl
+++ src/eministat_resample.erl
## -5,7 +5,7 ##
-include("eministat.hrl").
-export([resample/3, bootstrap_bca/3]).
--compile(export_all).
+-compile([nowarn_export_all, export_all]).
%% #doc resample/3 is the main resampler of eministat
%% #end
So then run it
$ erl -pa eministat/ebin/
Erlang/OTP 21 [erts-10.1] [source] [64-bit] [smp:4:4] [ds:4:4:10] [async-threads:1] [hipe]
Eshell V10.1 (abort with ^G)
1> c(collatzMaps), c(collatz).
{ok,collatz}
2> eministat:x(95.0, eministat:s(orig, fun() -> collatzMaps:max_collatz(1, 100000) end, 30), eministat:s(new, fun() -> collatz:max_collatz(1, 100000) end, 30)).
x orig
+ new
+--------------------------------------------------------------------------+
|+ ++++++++ +++++ * + +x+**+xxxx**x xxx xx+x xxx *x x + x x|
| + + + x x xx x |
| + |
| |_______M___A__________| |
| |________M_____A______________| |
+--------------------------------------------------------------------------+
Dataset: x N=30 CI=95.0000
Statistic Value [ Bias] (Bootstrapped LB‥UB)
Min: 1.76982e+5
1st Qu. 1.81610e+5
Median: 1.82954e+5
3rd Qu. 1.87030e+5
Max: 1.94944e+5
Average: 1.84280e+5 [ 8.00350] ( 1.82971e+5 ‥ 1.85749e+5)
Std. Dev: 3999.87 [ -102.524] ( 3128.74 ‥ 5431.13)
Outliers: 0/0 = 0 (μ=1.84288e+5, σ=3897.35)
Outlier variance: 3.22222e-2 (slight)
------
Dataset: + N=30 CI=95.0000
Statistic Value [ Bias] (Bootstrapped LB‥UB)
Min: 1.69179e+5
1st Qu. 1.72501e+5
Median: 1.74614e+5
3rd Qu. 1.79850e+5
Max: 1.90638e+5
Average: 1.76517e+5 [ 3.11862] ( 1.74847e+5 ‥ 1.78679e+5)
Std. Dev: 5343.46 [ -147.802] ( 4072.99 ‥ 7072.53)
Outliers: 0/0 = 0 (μ=1.76520e+5, σ=5195.66)
Outlier variance: 9.43164e-2 (slight)
Difference at 95.0% confidence
-7762.60 ± 2439.69
-4.21240% ± 1.32391%
(Student's t, pooled s = 4719.72)
------
ok
So it seems like 4% faster now which is not much. First, we can inline collatz_next/1 which is basically what you have in your collatz/2 function. I like to be specific so I put between -export and a first function
-compile({inline, [collatz_next/1]}).
It have very little effect
Difference at 95.0% confidence
-9895.27 ± 5524.91
-5.24520% ± 2.92860%
(Student's t, pooled s = 1.06882e+4)
Then we can try roll out lists:fold/2, lists:seq/2 and lists:max/1 as in your s/4 function but let's do it more idiomatic way.
max_collatz(N, M) ->
max_collatz(N, M, 1, #{1 => 1}).
max_collatz(M, M, Max, _) -> Max;
max_collatz(N, M, Max, Map) ->
case collatz_length(N + 1, Map) of
{L, Map2} when L > Max ->
max_collatz(N + 1, M, L, Map2);
{_, Map2} ->
max_collatz(N + 1, M, Max, Map2)
end.
Well it's better but still not much
Difference at 95.0% confidence
-1.78775e+4 ± 1980.35
-9.66832% ± 1.07099%
Now, when we removed all external code calls it's worth to try native compiling (external function call usually ruins any native compilation benefit). We could also add little type hint for HiPE but it seems to have barely any effect (it is usually worth to try for floating point arithmetic which is not this case and heavy usage of maps is probably issuing problem here as well).
max_collatz(N, M) when N < M, is_integer(N), is_integer(M) ->
max_collatz(N, M, 1, #{1 => 1}).
Not much better
c(collatz, [native]).
...
Difference at 95.0% confidence
-2.26703e+4 ± 2651.32
-12.1721% ± 1.42354%
(Student's t, pooled s = 5129.13)
So its time try it dirty. Process dictionary is not the recommended place to store your data but if it is inside special process it is an acceptable solution.
collatz_length(N) ->
case get(N) of
undefined ->
L = collatz_length(collatz_next(N)),
put(N, L + 1),
L + 1;
L -> L
end.
max_collatz(N, M) when N < M, is_integer(N), is_integer(M) ->
P = self(),
W = spawn_link(fun() ->
put(1, 1),
P ! {self(), max_collatz(N, M, 1)}
end),
receive {W, Max} -> Max end.
max_collatz(M, M, Max) -> Max;
max_collatz(N, M, Max) ->
case collatz_length(N + 1) of
L when L > Max ->
max_collatz(N + 1, M, L);
_ ->
max_collatz(N + 1, M, Max)
end.
Yes, its dirty but working solution and its worth it (even without native)
Difference at 95.0% confidence
-1.98173e+5 ± 5450.92
-80.9384% ± 2.22628%
(Student's t, pooled s = 1.05451e+4)
So here we are from 3.6s down to 0.93s using some dirty tricks but anyway, if you would do this sort of tasks, you would probably use NIF written in C. It is not a type of task where Erlang shine.
> collatzMaps:start(1, 1000000).
3.576669 seconds
525
> collatz:start(1, 1000000).
0.931186 seconds
525
At the moment I'm trying to learn Haskell by writing a parser for a certain type of log file.
if I execetue the following singleBlock parser:
singleBlock :: ReadP (String, String)
singleBlock = do
st <- look
if "LOAD INCREMENT" `isInfixOf` st then do
fmap (head . splitOn "LOAD INCREMENT") look >>= string
increment <- (munch PP.nonDigit >> munch PP.floatDot)
fmap (head. splitOn "STEP") look >>= string
munch PP.nonDigit
step <- munch PP.digit
return (increment, step)
else pfail
It produces the following output:
[("3.000E-01","1"),("3.000E-01","2"),("3.000E-01","3"),("1.000E-01","4"),("1","5")]
The problem is that the upon step 5 the file changes and thus the Float isn't parsed anymore.
If I change the increment line to:
increment <- (munch PP.nonDigitOnLine >> munch PP.floatDot >> munch PP.nonDigitOnLine >> munch PP.floatDot)
[("","1"),("","2"),("","3"),("","4"),("2.500E-01","5")]
The fift step gets parsed correctly. The first 4 steps produce an empty string, thus I thought I could change the increment line to this:
increment <- (munch PP.nonDigitOnLine >> munch PP.floatDot >> munch PP.nonDigitOnLine >> munch PP.floatDot) <++ (munch PP.nonDigit >> munch PP.floatDot)
Using the <++ left biased choice. It however doesn't change my output:
[("","1"),("","2"),("","3"),("","4"),("2.500E-01","5")]
Edit:
The log file is something like this (Focusing on the LOAD INCREMENT line):
STEP 4 INITIATED:
LOAD INCREMENT: START STEPS * 1.000E-01
SPARSE: DIM=272114 NNZ(MAT)=19119044
SOLVE: REDUCTION RES= 0.14E-12 (INIT. RES= 0.96E+06) NI= 1
ETA-ENERGY DIAGRAM 0 0.000E+00 3.182E+02
ETA-ENERGY DIAGRAM 1 1.000E+00 2.344E+00
STEP 4 : DISPLACEMENT NORM = 3.851E-03 TOLERANCE = 1.000E-02
STEP 4 : FORCE NORM = 6.558E+05 TOLERANCE = 1.000E-02
RELATIVE OUT OF BALANCE FORCE = 2.708E-01 CHECK = FALSE
SPARSE: DIM=272114 NNZ(MAT)=19119044
SOLVE: REDUCTION RES= 0.44E-14 (INIT. RES= 0.18E+06) NI= 1
ETA-ENERGY DIAGRAM 0 0.000E+00 2.239E+00
ETA-ENERGY DIAGRAM 1 1.000E+00 1.464E+00
...
...
RELATIVE DISPLACEMENT VARIATION = 6.156E-03 CHECK = TRUE
RELATIVE OUT OF BALANCE FORCE = 1.722E-01 CHECK = FALSE
STEP 4 TERMINATED, CONVERGENCE AFTER 2 ITERATIONS
EXECUTION STOPPED ON TOTAL LOAD CRITERION
CONTINUED ANALYSIS POSSIBLE.
TOTAL LOAD FACTOR: LOADING(12) * 1.000E+00
PLASTICITY LOGGING SUMMARY
GROUP NAME PLAST, PRV. PL, CRITIC, PLAST NEW, PRV.PL NEW, CRITIC NEW
TOTAL MODEL 0 0 0 0 0 0
CRACKING LOGGING SUMMARY
GROUP NAME CRACK, OPEN, CLOSED, ACTIVE, INACTI, ARISES, RE-OPENS, CLOSES
TOTAL MODEL 698 698 0 694 4 209 0 0
CUMULATIVE REACTION: FORCE X FORCE Y FORCE Z
0.89594D-09 0.11246D+02 -0.67820D-08
STEP 5 INITIATED:
LOAD INCREMENT: LOADING( 1) * 2.500E-01
SPARSE: DIM=272114 NNZ(MAT)=19119044
SOLVE: REDUCTION RES= 0.53E-10 (INIT. RES= 0.11E+06) NI= 1
STEP 5 : ENERGY NORM = 7.379E+02 TOLERANCE = 1.000E-04
SPARSE: DIM=272114 NNZ(MAT)=19119044
SOLVE: REDUCTION RES= 0.56E-14 (INIT. RES= 0.11E+06) NI= 1
...
...
STEP 5 TERMINATED, CONVERGENCE AFTER 13 ITERATIONS
TOTAL LOAD FACTOR: LOADING( 1) * 2.500E-01
Just 20 minutes later, it turns out I was to snappy with asking questions.
The problem is that the munch function always succeeds, thus I needed to provide an helper function that fails when the parsed string is null.
isFloatDot = do
s <- munch floatDot
if null s then pfail
else return s
The following snippet produces the correct output:
singleBlock :: ReadP (String, String)
singleBlock = do
st <- look
if "LOAD INCREMENT" `isInfixOf` st then do
fmap (head . splitOn "LOAD INCREMENT") look >>= string
increment <- (munch PP.nonDigitOnLine >> munch PP.floatDot >> munch PP.nonDigitOnLine >> PP.isFloatDot) <++ (munch PP.nonDigit >> munch PP.floatDot)
fmap (head. splitOn "STEP") look >>= string
munch PP.nonDigit
step <- munch PP.digit
return (increment, step)
else pfail
Lets say I have:
[ X || X<- L, some_expensive_boolean(X), some_expensive_boolean2(X)]
If, for any X in L, some_expensive_boolean(X) is false, is some_expensive_boolean2(X) executed?
TL;DR: No, some_expensive_boolean2/1 is not called.
There a couple of ways you can verify this.
1. Having functions print something as they are called.
-module(lc).
-export([lc/1]).
lc(L) ->
[X || X <- L, f(X), g(X)].
f(X = 2) ->
erlang:display({f, 2}),
false;
f(X) ->
erlang:display({f, X}),
true.
g(X) ->
erlang:display({g, X}),
true.
Then on the Erlang shell:
1> lc:lc(lists:seq(1, 4)).
{f,1}
{g,1}
{f,2} %% g is not called here
{f,3}
{g,3}
{f,4}
{g,4}
[1,3,4]
2. Check the generated Core Erlang code.
Compiling the module with the +to_core option will produce a lc.core file with the Core Erlang code, which looks a little bit like Erlang but has its own syntax but very similar semantics.
erlc +to_core lc.erl
The code generated is quite verbose so I won't paste it here, but the gist is that there are two nested case expressions, one calling f/1 with the clause that matches on true containing the other case that calls g/1.
The answer is no. It is short-circuited.
1> [ X || X <-[1,2], begin io:format("Test 1: ~p~n", [X]), X rem 2 =:= 0 end, io:format("Test 2: ~p~n", [X]) =:= ok ].
Test 1: 1
Test 1: 2
Test 2: 2
[2]
Short-circuits based on the following:
-module(shortcircuit).
-export([test/0]).
test() ->
L = [1, 2, 3],
[ X || X <- L, some_expensive_boolean(X), some_expensive_boolean2(X)].
some_expensive_boolean(X) ->
io:format("In some_expensive_boolean: ~p~n", [X]),
false.
some_expensive_boolean2(X) ->
io:format("In some_expensive_boolean2: ~p~n", [X]),
true.
Execute:
1> shortcircuit:test().
In some_expensive_boolean: 1
In some_expensive_boolean: 2
In some_expensive_boolean: 3
[]
2>
Let's create an example:
$ cat test.erl
-module(test).
-export([show/0]).
show() ->
[ X || X <- [1,2,3,4,5], bigger(X), smaller(X)].
bigger(X) ->
io:format("bigger ~p~n", [X]),
X > 2.
smaller(X) ->
io:format("smaller ~p~n", [X]),
X < 4.
and test it:
14> c(test).
{ok,test}
15> test:show().
bigger 1
bigger 2
bigger 3
smaller 3
bigger 4
smaller 4
bigger 5
smaller 5
[3]
So the answer is: NO.
I'm still working on my implementation of a siamese neural network in Torch, as mentioned in some of my previous questions.
I finally got a good working implementation of it, but now I'd like to add a mini-batch training. That is, I would like to train the siamese neural network with a set of training elements, instead of using just one.
Unfortunately, my implementation for 2 minibatches does not work. There's a problem in the back-propagation of the error, that I cannot solve.
Here's the main architecture:
th> perceptron_general
nn.Sequential {
[input -> (1) -> output]
(1): nn.ParallelTable {
input
|`-> (1): nn.Sequential {
| [input -> (1) -> (2) -> output]
| (1): nn.ParallelTable {
| input
| |`-> (1): nn.Sequential {
| | [input -> (1) -> (2) -> (3) -> (4) -> (5) -> output]
| | (1): nn.Linear(6 -> 3)
| | (2): nn.Tanh
| | (3): nn.Dropout
| | (4): nn.Linear(3 -> 2)
| | (5): nn.Tanh
| | }
| |`-> (2): nn.Sequential {
| | [input -> (1) -> (2) -> (3) -> (4) -> (5) -> output]
| | (1): nn.Linear(6 -> 3)
| | (2): nn.Tanh
| | (3): nn.Dropout
| | (4): nn.Linear(3 -> 2)
| | (5): nn.Tanh
| | }
| ... -> output
| }
| (2): nn.CosineDistance
| }
|`-> (2): nn.Sequential {
| [input -> (1) -> (2) -> output]
| (1): nn.ParallelTable {
| input
| |`-> (1): nn.Sequential {
| | [input -> (1) -> (2) -> (3) -> (4) -> (5) -> output]
| | (1): nn.Linear(6 -> 3)
| | (2): nn.Tanh
| | (3): nn.Dropout
| | (4): nn.Linear(3 -> 2)
| | (5): nn.Tanh
| | }
| |`-> (2): nn.Sequential {
| | [input -> (1) -> (2) -> (3) -> (4) -> (5) -> output]
| | (1): nn.Linear(6 -> 3)
| | (2): nn.Tanh
| | (3): nn.Dropout
| | (4): nn.Linear(3 -> 2)
| | (5): nn.Tanh
| | }
| ... -> output
| }
| (2): nn.CosineDistance
| }
... -> output
}
}
I've an upper neural network, put together with a lower neural network. They all are insereted into a parallel table. This parallel table is then inserted into a perceptron
The same is made for a second parallel table.
Then the two parallel-table-perceptrons are put together into a general parallel table, that is inserted in a general percepron.
I think this architecture is right, but I'm missing something with the gradient_update function.
Here's my code:
-- rounds a real number num to the number having idp values after the dot
function round(num, idp)
local mult = 10^(idp or 0)
return math.floor(num * mult + 0.5) / mult
end
idp = 4
-- change the sign of an array
function changeSignToArray(array)
newArray={}
for i=1,#array do
newArray[i]= -1 * array[i]
end
return newArray;
end
-- subtable function
function subtable(table, lower_index, upper_index)
return_table = {}
k = 1
for i=lower_index,upper_index do
return_table[k] = table[i]
k = k+1
end
return return_table;
end
-- training
function gradientUpdate(perceptron, dataset, target, learningRate)
temp_dataset = dataset
temp_target = target
temp_perceptron = perceptron
print("### new gradientUpdate() ###");
print("#dataset "..#dataset);
print("(#dataset[1][1])[1] "..(#dataset[1][1])[1]);
print("#target "..#target);
predictionValue = (perceptron:forward(dataset)[1])[1]
print('predictionValue '..predictionValue);
-- if predictionValue*target < 1 then
realTarget=changeSignToArray(target)
gradientWrtOutput = torch.Tensor(realTarget)
temp_gradient = gradientWrtOutput
perceptron:zeroGradParameters()
perceptron:backward(dataset, gradientWrtOutput)
perceptron:updateParameters(learningRate)
-- end
return perceptron;
end
require "os"
require "nn"
dropOutFlag=TRUE
input_number=6
hiddenUnits=3
output_number=2
hiddenLayers=5
-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
-- LET'S PREPARE THE DATA -- -- -- -- -- -- --
-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
dim = 483
trainDataset = {};
targetDataset = {}
for i=1,dim do
trainDataset[i]={torch.rand(input_number), torch.rand(input_number)}
if i%2==0 then targetDataset[i] = 1
else targetDataset[i] = -1
end
end
function targetDataset:size() return #targetDataset end
target = -1 -- the target for cosine similarity is +1 for genuine signatures, and -1 for forgeries
io.write("#trainDataset="..#trainDataset.." \n");
io.write("#trainDataset[1]="..#trainDataset[1].." \n");
io.write("#targetDataset="..#targetDataset.." \n");
-- matrix having 5 rows * 2 columns
max_iterations = 25
learnRate = 0.1
minibatchSize = 10
for m=30,1,-1 do
if (dim % m) == 0 then minibatchSize=m; break; end
end
minibatchSize = 2
print('minibatchSize='..minibatchSize);
span_number = dim/minibatchSize
print('span_number '..span_number);
minibatch_train = {torch.Tensor(span_number)}
target_train = {torch.Tensor(span_number)}
i=1
for m=1, span_number do
minibatch_train[i] = torch.Tensor(minibatchSize)
target_train[i] = torch.Tensor(minibatchSize)
lower_index = 1+minibatchSize*(m-1)
upper_index = (m-1)*minibatchSize+minibatchSize
io.write("i= "..i.." lower_index ".. lower_index)
io.write(" upper_index "..upper_index.."\n")
minibatch_train[i] = subtable(trainDataset, lower_index, upper_index)
target_train[i] = subtable(targetDataset, lower_index, upper_index)
i = i + 1
end
print('\n#minibatch_train '.. #minibatch_train);
print('#minibatch_train[1] '.. #minibatch_train[1]);
print('#target_train '.. #target_train);
print('#target_train[1] '.. #target_train[1]..'\n');
-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
-- LET'S PREPARE THE SIAMESE NEURAL NETWORK --
-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
-- imagine we have one network we are interested in, it is called "perceptronUpper"
perceptronUpper= nn.Sequential()
perceptronUpper:add(nn.Linear(input_number, hiddenUnits))
perceptronUpper:add(nn.Tanh())
if dropOutFlag==TRUE then perceptronUpper:add(nn.Dropout()) end
-- for w=1, hiddenLayers do
-- perceptronUpper:add(nn.Linear(hiddenUnits,hiddenUnits))
-- perceptronUpper:add(nn.Tanh())
-- if dropOutFlag==TRUE then perceptronUpper:add(nn.Dropout()) end
-- end
perceptronUpper:add(nn.Linear(hiddenUnits,output_number))
perceptronUpper:add(nn.Tanh())
-- But we want to push examples towards or away from each other
-- so we make another copy of it called perceptronLower
-- this *shares* the same weights via the set command, but has its own set of temporary gradient storage
-- that's why we create it again (so that the gradients of the pair don't wipe each other)
perceptronLower= perceptronUpper:clone('weight', 'gradWeights', 'gradBias', 'bias')
-- updates the gradient weights and gradient bias
-- we make a parallel table that takes a pair of examples as input. they both go through the same (cloned) perceptron
-- ParallelTable is a container module that, in its forward() method, applies the i-th member module to the i-th input, and outputs a table of the set of outputs.
parallel_table = nn.ParallelTable()
parallel_table:add(perceptronUpper)
parallel_table:add(perceptronLower)
-- now we define our top level network that takes this parallel table and computes the cosine distance betweem
-- the pair of outputs
perceptron= nn.Sequential()
perceptron:add(parallel_table)
perceptron:add(nn.CosineDistance())
-- For the minibatch
general_parallel= nn.ParallelTable()
for mb=1,minibatchSize do
general_parallel:add(perceptron)
end
perceptron_general = nn.Sequential()
perceptron_general:add(general_parallel)
-- -- # TRAINING:
-- -- training on only 1 example for TRUE
for i = 1, max_iterations do
perceptron_general = gradientUpdate(perceptron_general, minibatch_train[1], target_train[1], learnRate)
perceptron_general = round((perceptron_general:forward(dataset)[1]),idp);
io.write("i="..i..") optimization predictionValue= "..prediction.."\n");
if(prediction==target) then io.write("\tprediction==target OUT"); break end
end
The problem comes with the call to backwards() function.
Possibly there's a problem in the dimensions...
Do you have any ideas on how to solve this?
The problem comes with the call to backwards() function. Possibly there's a problem in the dimensions...
Technically speaking regarding the structure of perceptron_general when you perform a backward the 2nd argument (= gradOutput) should be a table made of 2 x 1D tensors (i.e. one gradOutput per branch of your top parallel table) which gives something like:
gradientWrtOutput = {
torch.Tensor{realTarget[1]},
torch.Tensor{realTarget[2]}
}
Note: right after there is another error within your main training loop.