Z3 timing variation - z3

Since upgrading to the open source release of Z3 (latest git master), I've noticed a significant timing variation between repeated runs of nearly-identical SMT queries using the C API (anywhere from 2-122s). The only difference between the queries is the naming of arrays (in the QF_AUFBV logic).
We're allocating arrays as follows:
Z3_symbol s = Z3_mk_string_symbol(z3_context, arrayName);
Z3_mk_const(z3_context, s,
Z3_mk_array_sort(z3_context, getSort(32), getSort(8)));
Below is an example query (converted to SMT-LIB). Replacing "arr51" with other names (e.g., "a" or "arr51_0x2628008") significantly changes the duration of the query, by up to two orders of magnitude. Repeated runs without varying the array name don't exhibit a significant timing variation.
Interestingly, the old binary release of Z3 3.2 doesn't seem to be affected by array naming (and runs faster for most of our queries).
(benchmark klee
:status unsat
:logic QF_AUFBV
:extrafuns ((arr51 Array[32:8]))
:assumption
(let (?x13 (concat (select arr51 bv58[32]) (concat (select arr51 bv57[32]) (select arr51 bv56[32]))))
(let (?x16 (concat (select arr51 bv59[32]) ?x13))
(let (?x23 (concat bv0[32] ?x16))
(let (?x34 (bvsub (bvadd (concat (extract[33:0] ?x23) bv0[30]) (concat (extract[35:0] ?x23) bv0[28])) (concat (extract[40:0] ?x23) bv0[23])))
(let (?x42 (bvadd (bvadd ?x34 (concat (extract[44:0] ?x23) bv0[19])) (concat (extract[45:0] ?x23) bv0[18])))
(let (?x50 (bvadd (bvsub ?x42 (concat (extract[47:0] ?x23) bv0[16])) (concat (extract[49:0] ?x23) bv0[14])))
(let (?x58 (bvsub (bvadd ?x50 (concat (extract[50:0] ?x23) bv0[13])) (concat (extract[52:0] ?x23) bv0[11])))
(let (?x66 (bvadd (bvadd ?x58 (concat (extract[56:0] ?x23) bv0[7])) (concat (extract[59:0] ?x23) bv0[4])))
(let (?x68 (extract[63:32] (bvsub ?x66 ?x23)))
(flet ($x79 (= bv1[32] bv30[32]))
(let (?x80 (ite $x79 (concat bv0[30] (extract[31:30] (bvsub ?x16 ?x68))) (ite (= bv1[32] bv31[32]) (concat bv0[31] (extract[31:31] (bvsub ?x16 ?x68))) bv0[32])))
(flet ($x85 (= bv1[32] bv29[32]))
(flet ($x90 (= bv1[32] bv28[32]))
(let (?x91 (ite $x90 (concat bv0[28] (extract[31:28] (bvsub ?x16 ?x68))) (ite $x85 (concat bv0[29] (extract[31:29] (bvsub ?x16 ?x68))) ?x80)))
(flet ($x96 (= bv1[32] bv27[32]))
(flet ($x102 (= bv1[32] bv26[32]))
(let (?x103 (ite $x102 (concat bv0[26] (extract[31:26] (bvsub ?x16 ?x68))) (ite $x96 (concat bv0[27] (extract[31:27] (bvsub ?x16 ?x68))) ?x91)))
(flet ($x108 (= bv1[32] bv25[32]))
(flet ($x114 (= bv1[32] bv24[32]))
(let (?x115 (ite $x114 (concat bv0[24] (extract[31:24] (bvsub ?x16 ?x68))) (ite $x108 (concat bv0[25] (extract[31:25] (bvsub ?x16 ?x68))) ?x103)))
(flet ($x119 (= bv1[32] bv23[32]))
(flet ($x125 (= bv1[32] bv22[32]))
(let (?x126 (ite $x125 (concat bv0[22] (extract[31:22] (bvsub ?x16 ?x68))) (ite $x119 (concat bv0[23] (extract[31:23] (bvsub ?x16 ?x68))) ?x115)))
(flet ($x131 (= bv1[32] bv21[32]))
(flet ($x137 (= bv1[32] bv20[32]))
(let (?x138 (ite $x137 (concat bv0[20] (extract[31:20] (bvsub ?x16 ?x68))) (ite $x131 (concat bv0[21] (extract[31:21] (bvsub ?x16 ?x68))) ?x126)))
(flet ($x142 (= bv1[32] bv19[32]))
(flet ($x147 (= bv1[32] bv18[32]))
(let (?x148 (ite $x147 (concat bv0[18] (extract[31:18] (bvsub ?x16 ?x68))) (ite $x142 (concat bv0[19] (extract[31:19] (bvsub ?x16 ?x68))) ?x138)))
(flet ($x153 (= bv1[32] bv17[32]))
(flet ($x157 (= bv1[32] bv16[32]))
(let (?x158 (ite $x157 (concat bv0[16] (extract[31:16] (bvsub ?x16 ?x68))) (ite $x153 (concat bv0[17] (extract[31:17] (bvsub ?x16 ?x68))) ?x148)))
(flet ($x163 (= bv1[32] bv15[32]))
(flet ($x168 (= bv1[32] bv14[32]))
(let (?x169 (ite $x168 (concat bv0[14] (extract[31:14] (bvsub ?x16 ?x68))) (ite $x163 (concat bv0[15] (extract[31:15] (bvsub ?x16 ?x68))) ?x158)))
(flet ($x173 (= bv1[32] bv13[32]))
(flet ($x179 (= bv1[32] bv12[32]))
(let (?x180 (ite $x179 (concat bv0[12] (extract[31:12] (bvsub ?x16 ?x68))) (ite $x173 (concat bv0[13] (extract[31:13] (bvsub ?x16 ?x68))) ?x169)))
(flet ($x184 (= bv1[32] bv11[32]))
(flet ($x190 (= bv1[32] bv10[32]))
(let (?x191 (ite $x190 (concat bv0[10] (extract[31:10] (bvsub ?x16 ?x68))) (ite $x184 (concat bv0[11] (extract[31:11] (bvsub ?x16 ?x68))) ?x180)))
(flet ($x196 (= bv1[32] bv9[32]))
(flet ($x202 (= bv1[32] bv8[32]))
(let (?x203 (ite $x202 (concat bv0[8] (extract[31:8] (bvsub ?x16 ?x68))) (ite $x196 (concat bv0[9] (extract[31:9] (bvsub ?x16 ?x68))) ?x191)))
(flet ($x207 (= bv1[32] bv7[32]))
(flet ($x213 (= bv1[32] bv6[32]))
(let (?x214 (ite $x213 (concat bv0[6] (extract[31:6] (bvsub ?x16 ?x68))) (ite $x207 (concat bv0[7] (extract[31:7] (bvsub ?x16 ?x68))) ?x203)))
(flet ($x219 (= bv1[32] bv5[32]))
(flet ($x224 (= bv1[32] bv4[32]))
(let (?x225 (ite $x224 (concat bv0[4] (extract[31:4] (bvsub ?x16 ?x68))) (ite $x219 (concat bv0[5] (extract[31:5] (bvsub ?x16 ?x68))) ?x214)))
(flet ($x230 (= bv1[32] bv3[32]))
(flet ($x236 (= bv1[32] bv2[32]))
(let (?x237 (ite $x236 (concat bv0[2] (extract[31:2] (bvsub ?x16 ?x68))) (ite $x230 (concat bv0[3] (extract[31:3] (bvsub ?x16 ?x68))) ?x225)))
(flet ($x241 (= bv1[32] bv1[32]))
(let (?x69 (bvsub ?x16 ?x68))
(flet ($x243 (= bv1[32] bv0[32]))
(let (?x245 (bvadd (ite $x243 ?x69 (ite $x241 (concat bv0[1] (extract[31:1] ?x69)) ?x237)) ?x68))
(let (?x253 (ite (= bv16[32] bv30[32]) (concat bv0[30] (extract[31:30] ?x245)) (ite (= bv16[32] bv31[32]) (concat bv0[31] (extract[31:31] ?x245)) bv0[32])))
(let (?x261 (ite (= bv16[32] bv28[32]) (concat bv0[28] (extract[31:28] ?x245)) (ite (= bv16[32] bv29[32]) (concat bv0[29] (extract[31:29] ?x245)) ?x253)))
(let (?x269 (ite (= bv16[32] bv26[32]) (concat bv0[26] (extract[31:26] ?x245)) (ite (= bv16[32] bv27[32]) (concat bv0[27] (extract[31:27] ?x245)) ?x261)))
(let (?x277 (ite (= bv16[32] bv24[32]) (concat bv0[24] (extract[31:24] ?x245)) (ite (= bv16[32] bv25[32]) (concat bv0[25] (extract[31:25] ?x245)) ?x269)))
(let (?x285 (ite (= bv16[32] bv22[32]) (concat bv0[22] (extract[31:22] ?x245)) (ite (= bv16[32] bv23[32]) (concat bv0[23] (extract[31:23] ?x245)) ?x277)))
(let (?x293 (ite (= bv16[32] bv20[32]) (concat bv0[20] (extract[31:20] ?x245)) (ite (= bv16[32] bv21[32]) (concat bv0[21] (extract[31:21] ?x245)) ?x285)))
(let (?x301 (ite (= bv16[32] bv18[32]) (concat bv0[18] (extract[31:18] ?x245)) (ite (= bv16[32] bv19[32]) (concat bv0[19] (extract[31:19] ?x245)) ?x293)))
(let (?x309 (ite (= bv16[32] bv16[32]) (concat bv0[16] (extract[31:16] ?x245)) (ite (= bv16[32] bv17[32]) (concat bv0[17] (extract[31:17] ?x245)) ?x301)))
(let (?x317 (ite (= bv16[32] bv14[32]) (concat bv0[14] (extract[31:14] ?x245)) (ite (= bv16[32] bv15[32]) (concat bv0[15] (extract[31:15] ?x245)) ?x309)))
(let (?x325 (ite (= bv16[32] bv12[32]) (concat bv0[12] (extract[31:12] ?x245)) (ite (= bv16[32] bv13[32]) (concat bv0[13] (extract[31:13] ?x245)) ?x317)))
(let (?x333 (ite (= bv16[32] bv10[32]) (concat bv0[10] (extract[31:10] ?x245)) (ite (= bv16[32] bv11[32]) (concat bv0[11] (extract[31:11] ?x245)) ?x325)))
(let (?x341 (ite (= bv16[32] bv8[32]) (concat bv0[8] (extract[31:8] ?x245)) (ite (= bv16[32] bv9[32]) (concat bv0[9] (extract[31:9] ?x245)) ?x333)))
(let (?x349 (ite (= bv16[32] bv6[32]) (concat bv0[6] (extract[31:6] ?x245)) (ite (= bv16[32] bv7[32]) (concat bv0[7] (extract[31:7] ?x245)) ?x341)))
(let (?x357 (ite (= bv16[32] bv4[32]) (concat bv0[4] (extract[31:4] ?x245)) (ite (= bv16[32] bv5[32]) (concat bv0[5] (extract[31:5] ?x245)) ?x349)))
(let (?x365 (ite (= bv16[32] bv2[32]) (concat bv0[2] (extract[31:2] ?x245)) (ite (= bv16[32] bv3[32]) (concat bv0[3] (extract[31:3] ?x245)) ?x357)))
(let (?x371 (ite (= bv16[32] bv0[32]) ?x245 (ite (= bv16[32] bv1[32]) (concat bv0[1] (extract[31:1] ?x245)) ?x365)))
(let (?x372 (concat bv0[32] ?x371))
(let (?x380 (bvsub (bvadd (concat (extract[32:0] ?x372) bv0[31]) (concat (extract[34:0] ?x372) bv0[29])) (concat (extract[36:0] ?x372) bv0[27])))
(let (?x386 (bvsub (bvadd ?x380 (concat (extract[38:0] ?x372) bv0[25])) (concat (extract[40:0] ?x372) bv0[23])))
(let (?x392 (bvsub (bvadd ?x386 (concat (extract[42:0] ?x372) bv0[21])) (concat (extract[44:0] ?x372) bv0[19])))
(let (?x398 (bvsub (bvadd ?x392 (concat (extract[46:0] ?x372) bv0[17])) (concat (extract[48:0] ?x372) bv0[15])))
(let (?x404 (bvsub (bvadd ?x398 (concat (extract[50:0] ?x372) bv0[13])) (concat (extract[52:0] ?x372) bv0[11])))
(let (?x410 (bvsub (bvadd ?x404 (concat (extract[54:0] ?x372) bv0[9])) (concat (extract[56:0] ?x372) bv0[7])))
(let (?x416 (bvsub (bvadd ?x410 (concat (extract[58:0] ?x372) bv0[5])) (concat (extract[60:0] ?x372) bv0[3])))
(let (?x420 (extract[63:32] (bvadd ?x416 (concat (extract[62:0] ?x372) bv0[1]))))
(let (?x427 (ite $x79 (concat bv0[30] (extract[31:30] (bvsub ?x371 ?x420))) (ite (= bv1[32] bv31[32]) (concat bv0[31] (extract[31:31] (bvsub ?x371 ?x420))) bv0[32])))
(let (?x433 (ite $x90 (concat bv0[28] (extract[31:28] (bvsub ?x371 ?x420))) (ite $x85 (concat bv0[29] (extract[31:29] (bvsub ?x371 ?x420))) ?x427)))
(let (?x439 (ite $x102 (concat bv0[26] (extract[31:26] (bvsub ?x371 ?x420))) (ite $x96 (concat bv0[27] (extract[31:27] (bvsub ?x371 ?x420))) ?x433)))
(let (?x445 (ite $x114 (concat bv0[24] (extract[31:24] (bvsub ?x371 ?x420))) (ite $x108 (concat bv0[25] (extract[31:25] (bvsub ?x371 ?x420))) ?x439)))
(let (?x451 (ite $x125 (concat bv0[22] (extract[31:22] (bvsub ?x371 ?x420))) (ite $x119 (concat bv0[23] (extract[31:23] (bvsub ?x371 ?x420))) ?x445)))
(let (?x457 (ite $x137 (concat bv0[20] (extract[31:20] (bvsub ?x371 ?x420))) (ite $x131 (concat bv0[21] (extract[31:21] (bvsub ?x371 ?x420))) ?x451)))
(let (?x463 (ite $x147 (concat bv0[18] (extract[31:18] (bvsub ?x371 ?x420))) (ite $x142 (concat bv0[19] (extract[31:19] (bvsub ?x371 ?x420))) ?x457)))
(let (?x469 (ite $x157 (concat bv0[16] (extract[31:16] (bvsub ?x371 ?x420))) (ite $x153 (concat bv0[17] (extract[31:17] (bvsub ?x371 ?x420))) ?x463)))
(let (?x475 (ite $x168 (concat bv0[14] (extract[31:14] (bvsub ?x371 ?x420))) (ite $x163 (concat bv0[15] (extract[31:15] (bvsub ?x371 ?x420))) ?x469)))
(let (?x481 (ite $x179 (concat bv0[12] (extract[31:12] (bvsub ?x371 ?x420))) (ite $x173 (concat bv0[13] (extract[31:13] (bvsub ?x371 ?x420))) ?x475)))
(let (?x487 (ite $x190 (concat bv0[10] (extract[31:10] (bvsub ?x371 ?x420))) (ite $x184 (concat bv0[11] (extract[31:11] (bvsub ?x371 ?x420))) ?x481)))
(let (?x493 (ite $x202 (concat bv0[8] (extract[31:8] (bvsub ?x371 ?x420))) (ite $x196 (concat bv0[9] (extract[31:9] (bvsub ?x371 ?x420))) ?x487)))
(let (?x499 (ite $x213 (concat bv0[6] (extract[31:6] (bvsub ?x371 ?x420))) (ite $x207 (concat bv0[7] (extract[31:7] (bvsub ?x371 ?x420))) ?x493)))
(let (?x505 (ite $x224 (concat bv0[4] (extract[31:4] (bvsub ?x371 ?x420))) (ite $x219 (concat bv0[5] (extract[31:5] (bvsub ?x371 ?x420))) ?x499)))
(let (?x511 (ite $x236 (concat bv0[2] (extract[31:2] (bvsub ?x371 ?x420))) (ite $x230 (concat bv0[3] (extract[31:3] (bvsub ?x371 ?x420))) ?x505)))
(let (?x421 (bvsub ?x371 ?x420))
(let (?x516 (bvadd (ite $x243 ?x421 (ite $x241 (concat bv0[1] (extract[31:1] ?x421)) ?x511)) ?x420))
(let (?x524 (ite (= bv3[32] bv30[32]) (concat bv0[30] (extract[31:30] ?x516)) (ite (= bv3[32] bv31[32]) (concat bv0[31] (extract[31:31] ?x516)) bv0[32])))
(let (?x532 (ite (= bv3[32] bv28[32]) (concat bv0[28] (extract[31:28] ?x516)) (ite (= bv3[32] bv29[32]) (concat bv0[29] (extract[31:29] ?x516)) ?x524)))
(let (?x540 (ite (= bv3[32] bv26[32]) (concat bv0[26] (extract[31:26] ?x516)) (ite (= bv3[32] bv27[32]) (concat bv0[27] (extract[31:27] ?x516)) ?x532)))
(let (?x548 (ite (= bv3[32] bv24[32]) (concat bv0[24] (extract[31:24] ?x516)) (ite (= bv3[32] bv25[32]) (concat bv0[25] (extract[31:25] ?x516)) ?x540)))
(let (?x556 (ite (= bv3[32] bv22[32]) (concat bv0[22] (extract[31:22] ?x516)) (ite (= bv3[32] bv23[32]) (concat bv0[23] (extract[31:23] ?x516)) ?x548)))
(let (?x564 (ite (= bv3[32] bv20[32]) (concat bv0[20] (extract[31:20] ?x516)) (ite (= bv3[32] bv21[32]) (concat bv0[21] (extract[31:21] ?x516)) ?x556)))
(let (?x572 (ite (= bv3[32] bv18[32]) (concat bv0[18] (extract[31:18] ?x516)) (ite (= bv3[32] bv19[32]) (concat bv0[19] (extract[31:19] ?x516)) ?x564)))
(let (?x580 (ite (= bv3[32] bv16[32]) (concat bv0[16] (extract[31:16] ?x516)) (ite (= bv3[32] bv17[32]) (concat bv0[17] (extract[31:17] ?x516)) ?x572)))
(let (?x588 (ite (= bv3[32] bv14[32]) (concat bv0[14] (extract[31:14] ?x516)) (ite (= bv3[32] bv15[32]) (concat bv0[15] (extract[31:15] ?x516)) ?x580)))
(let (?x596 (ite (= bv3[32] bv12[32]) (concat bv0[12] (extract[31:12] ?x516)) (ite (= bv3[32] bv13[32]) (concat bv0[13] (extract[31:13] ?x516)) ?x588)))
(let (?x604 (ite (= bv3[32] bv10[32]) (concat bv0[10] (extract[31:10] ?x516)) (ite (= bv3[32] bv11[32]) (concat bv0[11] (extract[31:11] ?x516)) ?x596)))
(let (?x612 (ite (= bv3[32] bv8[32]) (concat bv0[8] (extract[31:8] ?x516)) (ite (= bv3[32] bv9[32]) (concat bv0[9] (extract[31:9] ?x516)) ?x604)))
(let (?x620 (ite (= bv3[32] bv6[32]) (concat bv0[6] (extract[31:6] ?x516)) (ite (= bv3[32] bv7[32]) (concat bv0[7] (extract[31:7] ?x516)) ?x612)))
(let (?x628 (ite (= bv3[32] bv4[32]) (concat bv0[4] (extract[31:4] ?x516)) (ite (= bv3[32] bv5[32]) (concat bv0[5] (extract[31:5] ?x516)) ?x620)))
(let (?x636 (ite (= bv3[32] bv2[32]) (concat bv0[2] (extract[31:2] ?x516)) (ite (= bv3[32] bv3[32]) (concat bv0[3] (extract[31:3] ?x516)) ?x628)))
(let (?x642 (ite (= bv3[32] bv0[32]) ?x516 (ite (= bv3[32] bv1[32]) (concat bv0[1] (extract[31:1] ?x516)) ?x636)))
(let (?x648 (bvsub ?x371 (bvadd (concat (extract[28:0] ?x642) bv0[3]) (concat (extract[30:0] ?x642) bv0[1]))))
(= bv253[8] (extract[7:0] ?x648))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))
:assumption
(let (?x13 (concat (select arr51 bv58[32]) (concat (select arr51 bv57[32]) (select arr51 bv56[32]))))
(let (?x16 (concat (select arr51 bv59[32]) ?x13))
(not (= bv4294967295[32] ?x16))))
:assumption
(let (?x13 (concat (select arr51 bv58[32]) (concat (select arr51 bv57[32]) (select arr51 bv56[32]))))
(let (?x16 (concat (select arr51 bv59[32]) ?x13))
(bvule bv100000[32] ?x16)))
:assumption
(let (?x13 (concat (select arr51 bv58[32]) (concat (select arr51 bv57[32]) (select arr51 bv56[32]))))
(let (?x16 (concat (select arr51 bv59[32]) ?x13))
(bvule ?x16 bv999999[32])))
:assumption
(let (?x13 (concat (select arr51 bv58[32]) (concat (select arr51 bv57[32]) (select arr51 bv56[32]))))
(let (?x16 (concat (select arr51 bv59[32]) ?x13))
(let (?x17 (sign_extend[32] ?x16))
(bvsle bv0[64] ?x17))))
:formula
true
)
I've tried explicitly setting the random seeds, but this (unsurprisingly) hasn't helped:
Z3_set_param_value(z3_config, "ARITH_RANDOM_SEED", "0");
Z3_set_param_value(z3_config, "RANDOM_SEED", "0");
Is it normal for Z3 to display such significant timing variation just by changing the names of symbols?
Also, is there any array naming scheme that would reduce solver time across the board?
Thanks!

We observe this kind of discrepancy in benchmarks that contain expressions of the form
(bvadd t_1 ... t_n), or
(bvmul t_1 ... t_n)
The benchmark may not explicitly contain this kind of term. For example, the term (bvadd a (bvsub b (bvadd c d)) is simplified to a nary sum.
In many instances, the order of the terms t_is have a dramatic effect on performance. The variable names affect the order of these terms. Z3 has two formula simplifiers.
The old one (located at src/ast/simplifier) uses the internal ids associated with each expression to sort arguments of AC operators. This approach is not affected by name changes, but it has another nasty side-effect: the order we create expressions affect the internal id assignment, and consequently the order of the terms t_i. This was an issue in many Z3 applications.
The new simplifier (located at src/ast/rewriter) uses a different approach. It sorts expressions using a total order based on the structure of expressions. In this new approach, the order we create expressions does not matter, but the names do. New code uses this new formula simplifier. However, we still have old code that uses the old simplifier.
For QF_AUFBV benchmarks, both formula simplifiers are used. This will change in the future, after we replace all occurrences of the old simplifier with the new one.
Finally, it would be great if you could send us the set of benchmarks where you are having performance problems. It would help us to improve Z3.
EDIT
I'd like to emphasize that the main issue is the occurrence of expressions of the form (bvadd t_1 ... t_n). Second, for QF_AUFBV benchmarks both simplifiers are used. In the current version, it is hard to avoid this timing fluctuations. For example, we should also observe timing fluctuations if we reorder the assumptions.
Here is a description of what happens in your instance, and why the name affects the behavior. It is a little bit technical, but it should clarify what is going on.
1- The new simplifier is executed. This simplifier caches intermediate results using a hashtable. The hashcode of an AST depends on the names used for constants and function symbols.
2- After the new simplifier is done, the cache is deleted. We traverse the ASTs stored in the cache and decrement their reference counters. If the counter is zero, the AST is deleted. IMPORTANT: the order of the ASTs in the hashtable depends on their hash code. So, the hash code (and consequently the names) may affect the order ASTs are deleted.
3- The AST manager in Z3 assigns an internal ID to each AST node. When an AST node is deleted, its ID is recycled. That is, the ID can be assigned to new AST nodes. We do that because we don't want to run out of IDs.
4- When the old simplifier is executed, it will create new ASTs, and recycled IDs are assigned to these new ASTs.
5- Since, the old simplifier uses IDs to sort the arguments of bvadd, we can get a different order when we change the name of a variable.
Summary
Different name ==> Different Hash ==> Different order in the Hashtable ==> Different deletion order ==> Recycled IDs are reused in different order ==> new ASTs with different IDs ==> Affects how the old simplifier orders the arguments of bvadd

Related

Which approach is better in terms of performance to modify a hash, is it `Hash#merge` or double splat operator?

From the below example, which approach is better in terms of performance?
h = {a: 1, b: 2}
{**h, c: 3} => {:a=>1, :b=>2, :c=>3}
# or
h.merge(c: 3) => {:a=>1, :b=>2, :c=>3}
Basic benchmarking
require 'benchmark/ips'
Benchmark.ips do |x|
x.config(:time => 10, :warmup => 2)
h = {a: 1, b: 2}
x.report("splat") {{**h, c: 3}}
x.report("merge") {h.merge(c: 3)}
x.compare!
end
suggests that merge is faster, for example
Warming up --------------------------------------
splat 243.017k i/100ms
merge 315.349k i/100ms
Calculating -------------------------------------
splat 3.388M (±11.8%) i/s - 33.293M in 10.005951s
merge 4.721M (±12.5%) i/s - 46.356M in 10.037133s
Comparison:
merge: 4720869.7 i/s
splat: 3388413.3 i/s - 1.39x (± 0.00) slower

How to debug msgpack serialisation issue in Google Cloud Dataflow job?

I have a Google Cloud Dataflow job with which I would like to extract named entities from a text using a specific spacy model neural coref.
Running the extraction without beam I can extract entities but when I try to run it with the DirectRunner the job fails due to a serialisation error from msgpack. I am not sure how to proceed in debugging this problem.
My requirements are quite barebones with requirements of:
apache-beam[gcp]==2.4
spacy==2.0.12
ujson==1.35
The issue might be something related to how spacy and beam are interplaying as the stacktrace shows spacy spouting out some of its methods which it shouldn't be doing.
Weird spacy log behaviour from stacktrace:
T4: <class 'entity.extract_entities.EntityExtraction'>
# T4
D2: <dict object at 0x1126c0398>
T4: <class 'spacy.lang.en.English'>
# T4
D2: <dict object at 0x1126b54b0>
D2: <dict object at 0x1126d1168>
F2: <function is_alpha at 0x11266d320>
# F2
F2: <function is_ascii at 0x112327c08>
# F2
F2: <function is_digit at 0x11266d398>
# F2
F2: <function is_lower at 0x11266d410>
# F2
F2: <function is_punct at 0x112327b90>
# F2
F2: <function is_space at 0x11266d488>
# F2
F2: <function is_title at 0x11266d500>
# F2
F2: <function is_upper at 0x11266d578>
# F2
F2: <function like_url at 0x11266d050>
# F2
F2: <function like_num at 0x110d55140>
# F2
F2: <function like_email at 0x112327f50>
# F2
Fu: <functools.partial object at 0x11266c628>
F2: <function _create_ftype at 0x1070af500>
# F2
T1: <type 'functools.partial'>
F2: <function _load_type at 0x1070af398>
# F2
# T1
F2: <function is_stop at 0x11266d5f0>
# F2
D2: <dict object at 0x1126b7168>
T4: <type 'set'>
# T4
# D2
# Fu
F2: <function is_oov at 0x11266d668>
# F2
F2: <function is_bracket at 0x112327cf8>
# F2
F2: <function is_quote at 0x112327d70>
# F2
F2: <function is_left_punct at 0x112327de8>
# F2
F2: <function is_right_punct at 0x112327e60>
# F2
F2: <function is_currency at 0x112327ed8>
# F2
Fu: <functools.partial object at 0x110d49ba8>
F2: <function _get_attr_unless_lookup at 0x1106e26e0>
# F2
F2: <function lower at 0x11266d140>
# F2
D2: <dict object at 0x112317c58>
# D2
D2: <dict object at 0x110e38168>
# D2
D2: <dict object at 0x112669c58>
# D2
# Fu
F2: <function word_shape at 0x11266d0c8>
# F2
F2: <function prefix at 0x11266d1b8>
# F2
F2: <function suffix at 0x11266d230>
# F2
F2: <function get_prob at 0x11266d6e0>
# F2
F2: <function cluster at 0x11266d2a8>
# F2
F2: <function _return_en at 0x11266f0c8>
# F2
# D2
B2: <built-in function unpickle_vocab>
# B2
T4: <type 'spacy.strings.StringStore'>
# T4
My current hypothesis is that perhaps there is some problem with my setup.py but I am not sure what is causing the issue currently.
The full stacktrace is:
/Users/chris/coref_entity_extraction/venv/lib/python2.7/site-packages/msgpack_numpy.py:183: DeprecationWarning: encoding is deprecated, Use raw=False instead.
return _unpackb(packed, **kwargs)
/Users/chris/coref_entity_extraction/venv/lib/python2.7/site-packages/msgpack_numpy.py:132: DeprecationWarning: encoding is deprecated.
use_bin_type=use_bin_type)
T4: <class 'entity.extract_entities.EntityExtraction'>
# T4
D2: <dict object at 0x1126c0398>
T4: <class 'spacy.lang.en.English'>
# T4
D2: <dict object at 0x1126b54b0>
D2: <dict object at 0x1126d1168>
F2: <function is_alpha at 0x11266d320>
# F2
F2: <function is_ascii at 0x112327c08>
# F2
F2: <function is_digit at 0x11266d398>
# F2
F2: <function is_lower at 0x11266d410>
# F2
F2: <function is_punct at 0x112327b90>
# F2
F2: <function is_space at 0x11266d488>
# F2
F2: <function is_title at 0x11266d500>
# F2
F2: <function is_upper at 0x11266d578>
# F2
F2: <function like_url at 0x11266d050>
# F2
F2: <function like_num at 0x110d55140>
# F2
F2: <function like_email at 0x112327f50>
# F2
Fu: <functools.partial object at 0x11266c628>
F2: <function _create_ftype at 0x1070af500>
# F2
T1: <type 'functools.partial'>
F2: <function _load_type at 0x1070af398>
# F2
# T1
F2: <function is_stop at 0x11266d5f0>
# F2
D2: <dict object at 0x1126b7168>
T4: <type 'set'>
# T4
# D2
# Fu
F2: <function is_oov at 0x11266d668>
# F2
F2: <function is_bracket at 0x112327cf8>
# F2
F2: <function is_quote at 0x112327d70>
# F2
F2: <function is_left_punct at 0x112327de8>
# F2
F2: <function is_right_punct at 0x112327e60>
# F2
F2: <function is_currency at 0x112327ed8>
# F2
Fu: <functools.partial object at 0x110d49ba8>
F2: <function _get_attr_unless_lookup at 0x1106e26e0>
# F2
F2: <function lower at 0x11266d140>
# F2
D2: <dict object at 0x112317c58>
# D2
D2: <dict object at 0x110e38168>
# D2
D2: <dict object at 0x112669c58>
# D2
# Fu
F2: <function word_shape at 0x11266d0c8>
# F2
F2: <function prefix at 0x11266d1b8>
# F2
F2: <function suffix at 0x11266d230>
# F2
F2: <function get_prob at 0x11266d6e0>
# F2
F2: <function cluster at 0x11266d2a8>
# F2
F2: <function _return_en at 0x11266f0c8>
# F2
# D2
B2: <built-in function unpickle_vocab>
# B2
T4: <type 'spacy.strings.StringStore'>
# T4
Traceback (most recent call last):
File "/Users/chris/.pyenv/versions/2.7.14/lib/python2.7/runpy.py", line 174, in _run_module_as_main
"__main__", fname, loader, pkg_name)
File "/Users/chris/.pyenv/versions/2.7.14/lib/python2.7/runpy.py", line 72, in _run_code
exec code in run_globals
File "/Users/chris/coref_entity_extraction/main.py", line 29, in <module>
run()
File "/Users/chris/coref_entity_extraction/main.py", line 24, in run
entities = records | 'ExtractEntities' >> beam.ParDo(EntityExtraction())
File "/Users/chris/coref_entity_extraction/venv/lib/python2.7/site-packages/apache_beam/transforms/core.py", line 784, in __init__
super(ParDo, self).__init__(fn, *args, **kwargs)
File "/Users/chris/coref_entity_extraction/venv/lib/python2.7/site-packages/apache_beam/transforms/ptransform.py", line 638, in __init__
self.fn = pickler.loads(pickler.dumps(self.fn))
File "/Users/chris/coref_entity_extraction/venv/lib/python2.7/site-packages/apache_beam/internal/pickler.py", line 204, in dumps
s = dill.dumps(o)
File "/Users/chris/coref_entity_extraction/venv/lib/python2.7/site-packages/dill/dill.py", line 259, in dumps
dump(obj, file, protocol, byref, fmode, recurse)#, strictio)
File "/Users/chris/coref_entity_extraction/venv/lib/python2.7/site-packages/dill/dill.py", line 252, in dump
pik.dump(obj)
File "/Users/chris/.pyenv/versions/2.7.14/lib/python2.7/pickle.py", line 224, in dump
self.save(obj)
File "/Users/chris/.pyenv/versions/2.7.14/lib/python2.7/pickle.py", line 331, in save
self.save_reduce(obj=obj, *rv)
File "/Users/chris/.pyenv/versions/2.7.14/lib/python2.7/pickle.py", line 425, in save_reduce
save(state)
File "/Users/chris/.pyenv/versions/2.7.14/lib/python2.7/pickle.py", line 286, in save
f(self, obj) # Call unbound method with explicit self
File "/Users/chris/coref_entity_extraction/venv/lib/python2.7/site-packages/apache_beam/internal/pickler.py", line 172, in new_save_module_dict
return old_save_module_dict(pickler, obj)
File "/Users/chris/coref_entity_extraction/venv/lib/python2.7/site-packages/dill/dill.py", line 841, in save_module_dict
StockPickler.save_dict(pickler, obj)
File "/Users/chris/.pyenv/versions/2.7.14/lib/python2.7/pickle.py", line 655, in save_dict
self._batch_setitems(obj.iteritems())
File "/Users/chris/.pyenv/versions/2.7.14/lib/python2.7/pickle.py", line 692, in _batch_setitems
save(v)
File "/Users/chris/.pyenv/versions/2.7.14/lib/python2.7/pickle.py", line 331, in save
self.save_reduce(obj=obj, *rv)
File "/Users/chris/.pyenv/versions/2.7.14/lib/python2.7/pickle.py", line 425, in save_reduce
save(state)
File "/Users/chris/.pyenv/versions/2.7.14/lib/python2.7/pickle.py", line 286, in save
f(self, obj) # Call unbound method with explicit self
File "/Users/chris/coref_entity_extraction/venv/lib/python2.7/site-packages/apache_beam/internal/pickler.py", line 172, in new_save_module_dict
return old_save_module_dict(pickler, obj)
File "/Users/chris/coref_entity_extraction/venv/lib/python2.7/site-packages/dill/dill.py", line 841, in save_module_dict
StockPickler.save_dict(pickler, obj)
File "/Users/chris/.pyenv/versions/2.7.14/lib/python2.7/pickle.py", line 655, in save_dict
self._batch_setitems(obj.iteritems())
File "/Users/chris/.pyenv/versions/2.7.14/lib/python2.7/pickle.py", line 687, in _batch_setitems
save(v)
File "/Users/chris/.pyenv/versions/2.7.14/lib/python2.7/pickle.py", line 331, in save
self.save_reduce(obj=obj, *rv)
File "/Users/chris/.pyenv/versions/2.7.14/lib/python2.7/pickle.py", line 401, in save_reduce
save(args)
File "/Users/chris/.pyenv/versions/2.7.14/lib/python2.7/pickle.py", line 286, in save
f(self, obj) # Call unbound method with explicit self
File "/Users/chris/.pyenv/versions/2.7.14/lib/python2.7/pickle.py", line 568, in save_tuple
save(element)
File "/Users/chris/.pyenv/versions/2.7.14/lib/python2.7/pickle.py", line 306, in save
rv = reduce(self.proto)
File "vectors.pyx", line 108, in spacy.vectors.Vectors.__reduce__
File "vectors.pyx", line 409, in spacy.vectors.Vectors.to_bytes
File "/Users/chris/coref_entity_extraction/venv/lib/python2.7/site-packages/spacy/util.py", line 485, in to_bytes
serialized[key] = getter()
File "vectors.pyx", line 404, in spacy.vectors.Vectors.to_bytes.serialize_weights
File "/Users/chris/coref_entity_extraction/venv/lib/python2.7/site-packages/msgpack_numpy.py", line 165, in packb
return Packer(**kwargs).pack(o)
File "msgpack/_packer.pyx", line 282, in msgpack._cmsgpack.Packer.pack
File "msgpack/_packer.pyx", line 288, in msgpack._cmsgpack.Packer.pack
File "msgpack/_packer.pyx", line 285, in msgpack._cmsgpack.Packer.pack
File "msgpack/_packer.pyx", line 232, in msgpack._cmsgpack.Packer._pack
File "msgpack/_packer.pyx", line 279, in msgpack._cmsgpack.Packer._pack
TypeError: can not serialize 'buffer' object
I have no idea about how to debug this issue with beam. To reproduce the whole issue I have setup a repo with instructions about how to set everything: https://github.com/swartchris8/coref_barebones
Are you able to run the same code from a regular Python program (not from a Beam DoFn) ?
If not, check whether you are storing any non-serializable state in a Beam DoFn (or any other function that will be serialized by Beam). This prevents Beam runners from serializing these functions (to be sent to workers) hence should be avoided.
In the end I got rid of the above the issue by changing the package versions installed. I do think it debugging the beam setup process is quite painful though my approach was just to manually try different package permutations.

Write to multiple files simultaneously on Julia

How do I print to multiple files simultaneously in Julia? Is there a cleaner way other than:
for f in [open("file1.txt", "w"), open("file2.txt", "w")]
write(f, "content")
close(f)
end
From your question I assume that you do not mean write in parallel (which probably would not speed up things due to the fact that the operation is probably IO bound).
Your solution has one small problem - it does not guarantee that f is closed if write throws an exception.
Here are three alternative ways to do it making sure the file is closed even on error:
for fname in ["file1.txt", "file2.txt"]
open(fname, "w") do f
write(f, "content")
end
end
for fname in ["file1.txt", "file2.txt"]
open(f -> write(f, "content"), fname, "w")
end
foreach(fn -> open(f -> write(f, "content"), fn, "w"),
["file1.txt", "file2.txt"])
They give the same result so the choice is a matter of taste (you can derive some more variations of similar implementations).
All the methods are based on the following method of open function:
open(f::Function, args...; kwargs....)
Apply the function f to the result of open(args...; kwargs...)
and close the resulting file descriptor upon completion.
Observe that the processing will still be terminated if an exception is actually thrown somewhere (it is only guaranteed that the file descriptor will be closed). In order to ensure that every write operation is actually attempted you can do something like:
for fname in ["file1.txt", "file2.txt"]
try
open(fname, "w") do f
write(f, "content")
end
catch ex
# here decide what should happen on error
# you might want to investigate the value of ex here
end
end
See https://docs.julialang.org/en/latest/manual/control-flow/#The-try/catch-statement-1 for the documentation of try/catch.
If you really want to write in parallel (using multiple processes) you can do it as follows:
using Distributed
addprocs(4) # using, say, 4 examples
function ppwrite()
#sync #distributed for i in 1:10
open("file$(i).txt", "w") do f
write(f, "content")
end
end
end
For comparison, the serial version would be
function swrite()
for i in 1:10
open("file$(i).txt", "w") do f
write(f, "content")
end
end
end
On my machine (ssd + quadcore) this leads to a ~70% speedup:
julia> #btime ppwrite();
3.586 ms (505 allocations: 25.56 KiB)
julia> #btime swrite();
6.066 ms (90 allocations: 6.41 KiB)
However, be aware that these timings might drastically change for real content, which might have to be transferred to different processes. Also they probably won't scale as IO will typically be the bottleneck at some point.
Update: larger (string) content
julia> using Distributed, Random, BenchmarkTools
julia> addprocs(4);
julia> global const content = [string(rand(1000,1000)) for _ in 1:10];
julia> function ppwrite()
#sync #distributed for i in 1:10
open("file$(i).txt", "w") do f
write(f, content[i])
end
end
end
ppwrite (generic function with 1 method)
julia> function swrite()
for i in 1:10
open("file$(i).txt", "w") do f
write(f, content[i])
end
end
end
swrite (generic function with 1 method)
julia> #btime swrite()
63.024 ms (110 allocations: 6.72 KiB)
julia> #btime ppwrite()
23.464 ms (509 allocations: 25.63 KiB) # ~ 2.7x speedup
Doing the same thing with string representations of larger 10000x10000 matrices (3 instead of 10) results in
julia> #time swrite()
7.189072 seconds (23.60 k allocations: 1.208 MiB)
julia> #time swrite()
7.293704 seconds (37 allocations: 2.172 KiB)
julia> #time ppwrite();
16.818494 seconds (2.53 M allocations: 127.230 MiB) # > 2x slowdown of first call
julia> #time ppwrite(); # 30%$ slowdown of second call
9.729389 seconds (556 allocations: 35.453 KiB)
Just to add a coroutine version that does IO in parallel like the multiple-process one, but also avoids the data duplication and transfer.
julia> using Distributed, Random
julia> global const content = [randstring(10^8) for _ in 1:10];
julia> function swrite()
for i in 1:10
open("file$(i).txt", "w") do f
write(f, content[i])
end
end
end
swrite (generic function with 1 method)
julia> #time swrite()
1.339323 seconds (23.68 k allocations: 1.212 MiB)
julia> #time swrite()
1.876770 seconds (114 allocations: 6.875 KiB)
julia> function awrite()
#sync for i in 1:10
#async open("file$(i).txt", "w") do f
write(f, "content")
end
end
end
awrite (generic function with 1 method)
julia> #time awrite()
0.243275 seconds (155.80 k allocations: 7.465 MiB)
julia> #time awrite()
0.001744 seconds (144 allocations: 14.188 KiB)
julia> addprocs(4)
4-element Array{Int64,1}:
2
3
4
5
julia> function ppwrite()
#sync #distributed for i in 1:10
open("file$(i).txt", "w") do f
write(f, "content")
end
end
end
ppwrite (generic function with 1 method)
julia> #time ppwrite()
1.806847 seconds (2.46 M allocations: 123.896 MiB, 1.74% gc time)
Task (done) #0x00007f23fa2a8010
julia> #time ppwrite()
0.062830 seconds (5.54 k allocations: 289.161 KiB)
Task (done) #0x00007f23f8734010
If you only needed to read the files line by line, you could probably do something like this:
for (line_a, line_b) in zip(eachline("file_a.txt"), eachline("file_b.txt"))
# do stuff
end
As eachline will return an iterable EachLine, which will have an I/O stream linked to it.

Rails Logfiles "not opened for writing"

I am in a process of migrating my application from ruby 1.8.7 to 1.9.3 (and later upgrading Rails). However I geht some problems with logging in 1.9.3.
By using the following code in environment.rb when console or server starts up
logfile = File.open(File.join(RAILS_ROOT,'/log/call_log.log'), 'a')
CALL_LOGGER = AuditLogger.new(logfile)
CALL_LOGGER.info "CALL_LOGGER: Server started."
I get
/usr/local/rvm/scripts/irbrc.rb:32:in `write': not opened for writing (IOError)
AuditLogger is defined as follows:
class AuditLogger < Logger
def format_message(severity, timestamp, progname, msg)
"#{timestamp.strftime("%Y-%m-%d %H:%M:%S")} #{progname} #{severity} #{msg}\n"
end
end
If I do this manually with irb, this works
1.9.3-p551 :006 > logfile = File.open(File.join('/var/my_app/log/call_log.log'), 'a')
=> #<File:/var/my_app/log/call_log.log>
=> #<AuditLogger:0x00000001ad95e8 #progname=nil, #level=0, #default_formatter=#<Logger::Formatter:0x00000001ad95c0 #datetime_format=nil>, #formatter=nil, #logdev=#<Logger::LogDevice:0x00000001ad9570 #shift_size=nil, #shift_age=nil, #filename=nil, #dev=#<File:/var/my_app/log/call_log.log>, #mutex=#<Logger::LogDevice::LogDeviceMutex:0x00000001ad9548 #mon_owner=nil, #mon_count=0, #mon_mutex=#<Mutex:0x00000001ad94f8>>>>
1.9.3-p551 :011 > CALL_LOGGER.info "CALL_LOGGER: Server started."
=> true
I suspect that the class may have been overriden by another class? So I added some code to determine the class of the file which was currently used
#file.class.ancestors
[File, IO, File::Constants, Enumerable, Object, ActiveSupport::Dependencies::Loadable, PP::ObjectMixin, JSON::Ext::Generator::GeneratorMethods::Object, Kernel, BasicObject]
#file.methods.sort
results in
[:!, :!=, :!~, :<<, :<=>, :==, :===, :=~, :__id__, :__send__, :`, :acts_like?, :advise, :all?, :any?, :as_json, :atime, :autoclose=, :autoclose?, :binmode, :binmode?, :blank?, :breakpoint, :bytes, :chars, :chmod, :chown, :chunk, :class, :class_eval, :clone, :close, :close_on_exec=, :close_on_exec?, :close_read, :close_write, :closed?, :codepoints, :collect, :collect_concat, :copy_instance_variables_from, :count, :ctime, :cycle, :daemonize, :dclone, :debugger, :define_singleton_method, :detect, :display, :drop, :drop_while, :dup, :duplicable?, :each, :each_byte, :each_char, :each_codepoint, :each_cons, :each_entry, :each_line, :each_slice, :each_with_index, :each_with_object, :enable_warnings, :entries, :enum_for, :eof, :eof?, :eql?, :equal?, :exclude?, :extend, :extend_with_included_modules_from, :extended_by, :external_encoding, :fcntl, :fdatasync, :fileno, :find, :find_all, :find_index, :first, :flat_map, :flock, :flush, :freeze, :frozen?, :fsync, :getbyte, :getc, :gets, :grep, :group_by, :hash, :html_safe?, :include?, :index_by, :initialize_clone, :initialize_dup, :inject, :inspect, :instance_eval, :instance_exec, :instance_of?, :instance_values, :instance_variable_defined?, :instance_variable_get, :instance_variable_names, :instance_variable_set, :instance_variables, :internal_encoding, :ioctl, :is_a?, :isatty, :kind_of?, :lineno, :lineno=, :lines, :load_with_new_constant_marking, :lstat, :many?, :map, :max, :max_by, :member?, :metaclass, :metaclass_with_deprecation, :metaclass_without_deprecation, :method, :methods, :min, :min_by, :minmax, :minmax_by, :mtime, :nil?, :none?, :object_id, :one?, :partition, :path, :pid, :pos, :pos=, :presence, :present?, :pretty_inspect, :pretty_print, :pretty_print_cycle, :pretty_print_inspect, :pretty_print_instance_variables, :print, :printf, :private_methods, :protected_methods, :psych_to_yaml, :psych_y, :public_method, :public_methods, :public_send, :putc, :puts, :read, :read_nonblock, :readbyte, :readchar, :readline, :readlines, :readpartial, :reduce, :reject, :remove_subclasses_of, :reopen, :require, :require_association, :require_dependency, :require_library_or_gem, :require_or_load, :respond_to?, :respond_to_missing?, :returning, :reverse_each, :rewind, :seek, :select, :send, :set_encoding, :silence_stderr, :silence_stream, :silence_warnings, :singleton_class, :singleton_methods, :size, :slice_before, :sort, :sort_by, :stat, :subclasses_of, :sum, :suppress, :syck_to_yaml, :sync, :sync=, :sysread, :sysseek, :systemu, :syswrite, :taguri, :taguri=, :taint, :tainted?, :take, :take_while, :tap, :tell, :to_a, :to_enum, :to_hash, :to_i, :to_io, :to_json, :to_param, :to_path, :to_query, :to_s, :to_set, :to_yaml, :to_yaml_properties, :to_yaml_style, :truncate, :trust, :try, :tty?, :ungetbyte, :ungetc, :unloadable, :untaint, :untrust, :untrusted?, :with_options, :write, :write_nonblock, :zip]
According to Ruby 1.9.3 File class definition there should be some methods like 'readable?', 'writable?' in the method's list. But they aren't.
Here is also my list of rubygems which may be helpful
actionmailer (2.3.18)
actionpack (2.3.18)
activerecord (2.3.18)
activeresource (2.3.18)
activesupport (2.3.18)
archive-tar-minitar (0.5.2)
bigdecimal (1.1.0)
bundler (1.7.6)
bundler-unload (1.0.2)
cgi_multipart_eof_fix (2.5.0)
charlock_holmes (0.6.9)
columnize (0.9.0)
daemons (1.0.10)
debugger-ruby_core_source (1.3.8)
eventmachine (1.0.3)
executable-hooks (1.3.2)
fastercsv (1.5.5)
fastthread (1.0.7)
gem-wrappers (1.2.7)
gem_plugin (0.2.3)
geoip-c (0.9.1)
hoe (2.7.0)
hpricot (0.8.6)
httpclient (2.3.4.1)
image_size (1.2.0)
imagesize (0.1.1)
io-console (0.3)
io-tail (0.0.3)
json (1.8.1, 1.5.5)
json_pure (1.8.2, 1.8.1)
juggernaut (2.1.1)
linecache19 (0.5.12)
macaddr (1.6.7)
mechanize (2.0.1)
memcache-client (1.8.5)
mime-types (1.16)
mini_portile (0.6.2, 0.5.2)
minitest (2.5.1)
mongrel (1.2.0.pre2)
mysql (2.9.1)
net-http-digest_auth (1.4)
net-http-persistent (1.9)
nokogiri (1.6.6.2, 1.5.0)
oj (2.5.5)
rack (1.1.6)
rails (2.3.18)
rake (10.1.1, 0.9.2.2)
rbx-require-relative (0.0.9)
rcov (0.9.11)
rdoc (4.1.1, 3.9.5)
redis (3.2.1, 3.0.7)
ruby-debug-base19 (0.11.25)
ruby-debug-base19x (0.11.31)
ruby-debug-ide (0.4.22)
ruby-ole (1.2.11.8)
ruby_core_source (0.1.5)
rubyforge (2.0.4)
rubygems-bundler (1.4.4)
rubyzip (1.1.7)
rvm (1.11.3.9)
soap4r (1.5.8)
spreadsheet (0.9.7)
svg-graph (1.0.5)
systemu (2.6.5, 2.6.4)
thin (1.6.2)
tins (0.13.2)
uuid (2.3.7)
vpim (13.11.11)
webrobots (0.1.1)
xmpp4r (0.5)
Question: Anybody knows how to overcome this issue?
You're using an old version of rails, I'm not sure how things were handled back then, but Logger.new should accept a simple filename as a string.
Forget File.open and let it figure out how to handle the file:
logfile = File.join(Rails.root, '/log/call_log.log')
CALL_LOGGER = AuditLogger.new(logfile)
CALL_LOGGER.info "CALL_LOGGER: Server started."

Clojure building of URL from constituent parts

In Python I would do the following:
>>> q = urllib.urlencode({"q": "clojure url"})
>>> q
'q=clojure+url'
>>> url = "http://stackoverflow.com/search?" + q
>>> url
'http://stackoverflow.com/search?q=clojure+url'
How do I do all the encoding that's done for me above in Clojure? In other words, how do I do something akin to the following:
=> (build-url "http://stackoverflow.com/search" {"q" "clojure url"})
"http://stackoverflow.com/search?q=clojure+url"
There is a url-encode function in Ring's ring.util.codec namespace:
(ring.util.codec/url-encode "clojure url")
; => "clojure+url"
I'm not sure if there's a prepackaged function to turn a map into a query string, but perhaps this could do the job:
(use '[ring.util.codec :only [url-encode]])
(defn make-query-string [m]
(->> (for [[k v] m]
(str (url-encode k) "=" (url-encode v)))
(interpose "&")
(apply str)))
An example:
user> (make-query-string {"q" "clojure url" "foo" "bar"})
"q=clojure+url&foo=bar"
All that remains is concatenating the result onto the end of a URL:
(defn build-url [url-base query-map]
(str url-base "?" (make-query-string query-map)))
Seems to work:
user> (build-url "http://stackoverflow.com/search" {"q" "clojure url"})
"http://stackoverflow.com/search?q=clojure+url"
Update:
Perhaps a modified version might make for a more Clojure-friendly experience. Also handles encoding via a Ring-style optional argument with utf-8 as default.
(defn make-query-string [m & [encoding]]
(let [s #(if (instance? clojure.lang.Named %) (name %) %)
enc (or encoding "UTF-8")]
(->> (for [[k v] m]
(str (url-encode (s k) enc)
"="
(url-encode (str v) enc)))
(interpose "&")
(apply str))))
(defn build-url [url-base query-map & [encoding]]
(str url-base "?" (make-query-string query-map encoding)))
So now we can do
user> (build-url "http://example.com/q" {:foo 1})
"http://example.com/q?foo=1"
Here's one way:
user=> (import [java.net URLEncoder])
java.net.URLEncoder
user=> (str "http://stackoverflow.com/search?q=" (URLEncoder/encode "clojure url" "UTF-8"))
"http://stackoverflow.com/search?q=clojure+url"
I know this is not exactly the same as your Python snippet though. Please see the following post from the Clojure mailing list for a more complete answer:
http://www.mail-archive.com/clojure#googlegroups.com/msg29338.html
The code from there will allow you to do this:
user=> (encode-params {"q" "clojure url"})
"q=clojure+url"
This is the exact REPL equivalent of your python session, using clj-http.
user=> (require ['clj-http.client :as 'client])
nil
user=> (str "http://stackoverflow.com/search?"
user=* (client/generate-query-string {"q" "clojure url"}))
"http://stackoverflow.com/search?q=clojure+url"
but clj-http makes it even easier:
user=> (client/get "http://stackoverflow.com/search?"
user=* {:query-params {"q" "clojure url"}})
... <a lot of output, omitted to protect the innocent>...
assuming that you want to perform a GET request, that is.
clj-apache-http is pretty useful. With it you can do the following:
user=> (require ['com.twinql.clojure.http :as 'http])
nil
user=> (def q (http/encode-query {"q" "clojure url"}))
#'user/q
user=> (def url (str "http://stackoverflow.com/search?" q))
#'user/url
user=> url
"http://stackoverflow.com/search?q=clojure+url"
For anyone still looking for this in 2021 - ring.util.codec/form-encode does this perfectly:
http://ring-clojure.github.io/ring-codec/ring.util.codec.html#var-form-encode
A simple solution using str(ings):
(def q (str "clojure+url"))
(def url (str "http://stackoverflow.com/search?" q))

Resources