Neo4j Cypher query execution plan optimization - neo4j
I have the following Cypher query:
MATCH (dg:DecisionGroup {id: -2})-[rdgd:CONTAINS]->(childD:Decision:Profile )
MATCH (childD)-[:EMPLOYMENT_AS]->(root2:Employment )
WHERE root2.id IN ([1]) WITH DISTINCT childD, dg, rdgd
MATCH path3=(root3:Location )-[:CONTAINS*0..]->(descendant3:Location)
WHERE (descendant3.id IN ([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35]) OR root3.id IN ([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35]))
UNWIND nodes(path3) AS pathNode3 WITH childD, dg, rdgd, COLLECT(DISTINCT pathNode3) AS pathNodes3
MATCH (childD)-[:LOCATED_IN]->(pathNode3) WHERE pathNode3 IN pathNodes3 WITH DISTINCT childD, dg, rdgd WHERE (childD.`active` = true) AND (childD.`experienceMonths` >= 129) AND ( (childD.`minSalaryUsd` <= 8883) OR (childD.`minHourlyRateUsd` <= 126) )
MATCH (childD)-[criterionRelationship8:HAS_VOTE_ON]->(c:Criterion {id: 2}) WHERE (criterionRelationship8.`properties.experienceMonths` >= 1) WITH DISTINCT childD, dg, rdgd
MATCH (childD)-[criterionRelationship10:HAS_VOTE_ON]->(c:Criterion {id: 36}) WHERE (criterionRelationship10.`avgVotesWeight` >= 1.0) AND (criterionRelationship10.`properties.experienceMonths` >= 1) WITH DISTINCT childD, dg, rdgd
MATCH (childD)-[criterionRelationship13:HAS_VOTE_ON]->(c:Criterion {id: 4}) WHERE (criterionRelationship13.`properties.experienceMonths` >= 0) WITH DISTINCT childD, dg, rdgd
MATCH (childD)-[criterionRelationship15:HAS_VOTE_ON]->(c:Criterion {id: 22}) WHERE (criterionRelationship15.`avgVotesWeight` >= 1.0) AND (criterionRelationship15.`properties.experienceMonths` >= 1) WITH DISTINCT childD, dg, rdgd
OPTIONAL MATCH (childD)-[ru:CREATED_BY]->(u:User) WITH childD, u, ru, dg, rdgd
OPTIONAL MATCH (childD)-[vg:HAS_VOTE_ON]->(c:Criterion) WHERE c.id IN [2, 36, 4, 22] WITH c, childD, u, ru, dg, rdgd, (vg.avgVotesWeight * (CASE WHEN c IS NOT NULL THEN coalesce({`22`:1.2236918603185925, `2`:2.9245935245152226, `36`:0.2288013749943646, `4`:3.9599506966378435}[toString(c.id)], 1.0) ELSE 1.0 END)) as weight, vg.totalVotes as totalVotes
WITH childD, u, ru , dg, rdgd , toFloat(sum(weight)) as weight, toInteger(sum(totalVotes)) as totalVotes
ORDER BY weight DESC , childD.createdAt DESC
SKIP 0 LIMIT 20
WITH * OPTIONAL MATCH (childD)-[rup:UPDATED_BY]->(up:User)
RETURN rdgd, ru, u, rup, up, childD AS decision, weight, totalVotes, [ (c1)<-[vg1:HAS_VOTE_ON]-(childD) WHERE c1.id IN [2, 36, 4, 22] | {criterion: c1, relationship: vg1} ] AS weightedCriteria
This query is automatically generated by my Cypher query builder. Right now on 1000 Profiles the query executes ~8 seconds.
Looks like this part of the query causes most of the issues:
MATCH (childD)-[:EMPLOYMENT_AS]->(root2:Employment )
WHERE root2.id IN ([1]) WITH DISTINCT childD, dg, rdgd
MATCH path3=(root3:Location )-[:CONTAINS*0..]->(descendant3:Location)
WHERE (descendant3.id IN ([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35]) OR root3.id IN ([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35]))
UNWIND nodes(path3) AS pathNode3 WITH childD, dg, rdgd, COLLECT(DISTINCT pathNode3) AS pathNodes3
MATCH (childD)-[:LOCATED_IN]->(pathNode3) WHERE pathNode3 IN pathNodes3 WITH DISTINCT childD, dg, rdgd
Is there a way to optimize this?
This is PROFILE output:
UPDATED
I reimplemented initial part of the query to the following:
WITH [] as ceNodeList MATCH (root2:Employment )
WHERE root2.id IN ([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16])
WITH ceNodeList, root2, COLLECT(root2) AS listRoot2
WITH apoc.coll.unionAll(ceNodeList, listRoot2) AS ceNodeList
WITH apoc.coll.toSet(ceNodeList) as ceNodeList
MATCH (root3:Location )
WHERE root3.id IN ([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73])
WITH ceNodeList, root3, COLLECT(root3) AS listRoot3
OPTIONAL MATCH (root3)-[:CONTAINS*0..]->(descendant3:Location)
OPTIONAL MATCH (ascendant3:Location)-[:CONTAINS*0..]->(root3)
WITH ceNodeList, listRoot3, COLLECT( DISTINCT ascendant3) AS listAscendant3, COLLECT( DISTINCT descendant3) AS listDescendant3
WITH listRoot3, listAscendant3, apoc.coll.unionAll(ceNodeList, apoc.coll.unionAll(listDescendant3, apoc.coll.unionAll(listRoot3, listAscendant3))) AS ceNodeList
WITH apoc.coll.toSet(ceNodeList) as ceNodeList
UNWIND ceNodeList AS ceNode
WITH DISTINCT ceNode MATCH (dg:DecisionGroup {id: -2})-[rdgd:CONTAINS]->(childD:Decision:Profile ) -[:REQUIRES]->(ceNode)
WITH DISTINCT childD, dg, rdgd, collect(ceNode) as ceNodes
WITH childD, dg, rdgd, ceNodes, reduce(ceNodeLabels = [], n IN ceNodes | ceNodeLabels + labels(n)) as ceNodeLabels
WHERE all(x IN ['Employment', 'Location']
WHERE x IN ceNodeLabels) WITH childD, dg, rdgd return count(childD)
Now it works several times faster, but still not perfect. Is there something I may do in order to improve this?
UPDATED1
WITH [] as ceNodeList
MATCH (root2:Location )
WHERE root2.id IN ([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100])
WITH ceNodeList, root2
OPTIONAL MATCH (root2)-[:CONTAINS*0..]->(descendant2:Location)
OPTIONAL MATCH (ascendant2:Location)-[:CONTAINS*0..]->(root2)
WITH ceNodeList, COLLECT(root2) AS listRoot2, COLLECT( DISTINCT ascendant2) AS listAscendant2, COLLECT( DISTINCT descendant2) AS listDescendant2
WITH apoc.coll.union(ceNodeList, apoc.coll.union(listDescendant2, apoc.coll.union(listRoot2, listAscendant2))) AS ceNodeList
WITH ceNodeList MATCH (root3:Employment )
WHERE root3.id IN ([101, 102, 103, 104, 105])
WITH ceNodeList, COLLECT(root3) AS listRoot3
WITH apoc.coll.union(ceNodeList, listRoot3) AS ceNodeList
WITH ceNodeList
UNWIND ceNodeList as seNode
WITH collect(seNode.id) as seNodeIds with apoc.coll.toSet(seNodeIds) as seNodeIds
MATCH (dg:DecisionGroup {id: -2})-[rdgd:CONTAINS]->(childD:Profile ) -[:REQUIRES]->(ceNode)
WHERE ceNode.id in seNodeIds
WITH DISTINCT childD, dg, rdgd, collect(ceNode) as ceNodes
WITH childD, dg, rdgd, ceNodes, reduce(ceNodeLabels = [], n IN ceNodes | ceNodeLabels + labels(n)) as ceNodeLabels
WHERE all(x IN ['Employment', 'Location']
WHERE x IN ceNodeLabels)
WITH childD, dg, rdgd
Try this:
WITH [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35] AS ids
WITH reduce(idsMap = {}, x IN ids | apoc.map.setEntry(idsMap, toString(x), true))
MATCH (dg:DecisionGroup {id: -2})-[rdgd:CONTAINS]->(childD:Decision:Profile )
MATCH (childD)-[:EMPLOYMENT_AS]->(root2:Employment )
WHERE root2.id = 1
WITH DISTINCT childD, dg, rdgd, idsMap
MATCH (descendant3:Location) WHERE apoc.map.get(idsMap, toString(descendant3.id), false) = true
MATCH path3=(root3:Location )-[:CONTAINS*0..]->(descendant3)
WHERE apoc.map.get(idsMap, toString(root3.id), false) = true
UNWIND nodes(path3) AS pathNode3 WITH childD, dg, rdgd, COLLECT(DISTINCT pathNode3) AS pathNodes3
MATCH (childD)-[:LOCATED_IN]->(pathNode3) WHERE pathNode3 IN pathNodes3 WITH DISTINCT childD, dg, rdgd WHERE (childD.`active` = true) AND (childD.`experienceMonths` >= 129) AND ( (childD.`minSalaryUsd` <= 8883) OR (childD.`minHourlyRateUsd` <= 126) )
MATCH (childD)-[criterionRelationship8:HAS_VOTE_ON]->(c:Criterion {id: 2}) WHERE (criterionRelationship8.`properties.experienceMonths` >= 1) WITH DISTINCT childD, dg, rdgd
MATCH (childD)-[criterionRelationship10:HAS_VOTE_ON]->(c:Criterion {id: 36}) WHERE (criterionRelationship10.`avgVotesWeight` >= 1.0) AND (criterionRelationship10.`properties.experienceMonths` >= 1) WITH DISTINCT childD, dg, rdgd
MATCH (childD)-[criterionRelationship13:HAS_VOTE_ON]->(c:Criterion {id: 4}) WHERE (criterionRelationship13.`properties.experienceMonths` >= 0) WITH DISTINCT childD, dg, rdgd
MATCH (childD)-[criterionRelationship15:HAS_VOTE_ON]->(c:Criterion {id: 22}) WHERE (criterionRelationship15.`avgVotesWeight` >= 1.0) AND (criterionRelationship15.`properties.experienceMonths` >= 1) WITH DISTINCT childD, dg, rdgd
OPTIONAL MATCH (childD)-[ru:CREATED_BY]->(u:User) WITH childD, u, ru, dg, rdgd
OPTIONAL MATCH (childD)-[vg:HAS_VOTE_ON]->(c:Criterion) WHERE c.id IN [2, 36, 4, 22] WITH c, childD, u, ru, dg, rdgd, (vg.avgVotesWeight * (CASE WHEN c IS NOT NULL THEN coalesce({`22`:1.2236918603185925, `2`:2.9245935245152226, `36`:0.2288013749943646, `4`:3.9599506966378435}[toString(c.id)], 1.0) ELSE 1.0 END)) as weight, vg.totalVotes as totalVotes
WITH childD, u, ru , dg, rdgd , toFloat(sum(weight)) as weight, toInteger(sum(totalVotes)) as totalVotes
ORDER BY weight DESC , childD.createdAt DESC
SKIP 0 LIMIT 20
WITH * OPTIONAL MATCH (childD)-[rup:UPDATED_BY]->(up:User)
RETURN rdgd, ru, u, rup, up, childD AS decision, weight, totalVotes, [ (c1)<-[vg1:HAS_VOTE_ON]-(childD) WHERE c1.id IN [2, 36, 4, 22] | {criterion: c1, relationship: vg1} ] AS weightedCriteria
Here, I have created a map from the ids given and then used it instead of IN operator.
Update:
I think your new query can be simplified a bit. We can combine apoc.coll.unionAll and apoc.coll.toSet, with a single call to apoc.coll.union, try this:
MATCH (root2:Employment)
WHERE root2.id IN ([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16])
WITH COLLECT(root2) AS ceNodeList
MATCH (root3:Location)
WHERE root3.id IN ([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73])
WITH ceNodeList, root3, COLLECT(root3) AS listRoot3
OPTIONAL MATCH (root3)-[:CONTAINS*0..]-(descendants:Location)
WITH ceNodeList, listRoot3, COLLECT(DISTINCT descendant3) AS listDescendant3
WITH apoc.coll.union(ceNodeList, apoc.coll.union(listDescendant3, listRoot3)) AS ceNodeList
UNWIND ceNodeList AS ceNode
WITH DISTINCT ceNode MATCH (dg:DecisionGroup {id: -2})-[rdgd:CONTAINS]->(childD:Decision:Profile)-[:REQUIRES]->(ceNode)
WITH DISTINCT childD, dg, rdgd, collect(ceNode) as ceNodes
WITH childD, dg, rdgd, ceNodes, reduce(ceNodeLabels = [], n IN ceNodes | ceNodeLabels + labels(n)) as ceNodeLabels
WHERE all(x IN ['Employment', 'Location']
WHERE x IN ceNodeLabels) WITH childD, dg, rdgd return count(childD)
Related
Neo4j Cypher query does many DB hits
I have the following query: MATCH (dg:DecisionGroup {id: -3})-[rdgd:CONTAINS]->(childD:Vacancy ) -[:REQUIRES]->(ceNode:Requirable) WHERE ceNode.id in [2, 4, 8, 9] WITH childD , collect(ceNode) as ceNodes with childD, apoc.coll.toSet(reduce(ceNodeLabels = [], n IN ceNodes | ceNodeLabels + labels(n))) as ceNodeLabels WHERE all(x IN ['Employment', 'Location'] WHERE x IN ceNodeLabels) WITH childD WHERE ( (childD.`hourlyRateUsd` >= 35) OR (childD.`salaryUsd` >= 5000) ) AND (childD.`active` = true) WITH childD MATCH (childD)-[:CONTAINS]->(childDStat:JobableStatistic) MATCH (childD)-[:HAS_VOTE_ON]->(vc:Criterion) WHERE vc.id IN [64, 65, 67, 71, 72, 74, 75, 76, 78, 79, 80, 81, 82, 83, 84, 85, 63] WITH childD, childDStat, collect(DISTINCT vc.id) as vacancyCriterionIds WHERE ALL(id IN childDStat.detailedCriterionIds WHERE id IN [64, 65, 67, 71, 72, 74, 75, 76, 78, 79, 80, 81, 82, 83, 84, 85, 63]) UNWIND childDStat.detailedCriterionIds AS mCId WITH childD, childDStat, mCId WHERE (childDStat['criterionAvgVoteWeights.' + mCId] = 0 OR childDStat['criterionAvgVoteWeights.' + mCId] <= {`80`:1.4, `84`:2.8, `72`:3.0, `83`:1.4, `82`:1.4, `71`:5.0, `81`:4.2, `77`:0.0, `76`:5.0, `65`:2.0, `64`:4.0, `75`:3.0, `74`:4.0, `85`:2.8, `63`:4.0, `79`:5.0, `68`:0.0, `78`:2.8, `67`:1.0}[toString(mCId)]) AND (childDStat['criterionExperienceMonths.' + mCId] = 0 OR childDStat['criterionExperienceMonths.' + mCId] <= {`80`:0, `84`:0, `72`:48, `83`:0, `82`:0, `71`:36, `81`:0, `77`:0, `76`:0, `65`:7, `64`:96, `75`:36, `74`:72, `85`:0, `63`:60, `79`:0, `68`:0, `78`:0, `67`:4}[toString(mCId)]) WITH childD, collect(mCId) as mCIds, childDStat WHERE size(mCIds) >= size(childDStat.detailedCriterionIds) WITH childD, childDStat UNWIND childDStat.criterionIds AS cId WITH childD, sum(childDStat['criterionCoefficients.' + cId] * {`80`:1.4, `84`:2.8, `72`:3.0, `83`:1.4, `82`:1.4, `71`:5.0, `81`:4.2, `77`:0.0, `76`:5.0, `65`:2.0, `64`:4.0, `75`:3.0, `74`:4.0, `85`:2.8, `63`:4.0, `79`:5.0, `68`:0.0, `78`:2.8, `67`:1.0}[toString(cId)]) as weight, sum({`80`:1, `84`:1, `72`:1, `83`:1, `82`:1, `71`:1, `81`:1, `77`:1, `76`:1, `65`:1, `64`:1, `75`:1, `74`:1, `85`:1, `63`:1, `79`:1, `68`:1, `78`:1, `67`:1}[toString(cId)]) as totalVotes, sum(childDStat['criterionCoefficients.' + cId]) as criterionCoefficientSum WITH childD, weight, totalVotes, criterionCoefficientSum MATCH (dg:DecisionGroup {id: -3})-[rdgd:CONTAINS]->(childD) OPTIONAL MATCH (childD)-[ru:CREATED_BY]->(u:User) WITH childD, dg, rdgd, u, ru, weight, totalVotes , criterionCoefficientSum ORDER BY weight / criterionCoefficientSum DESC, childD.createdAt DESC SKIP 0 LIMIT 10 OPTIONAL MATCH (jobable:Decision:Profile {id: 35463}) RETURN childD AS decision, dg, rdgd, u, ru, weight, totalVotes, [ (jobable)-[vg1:HAS_VOTE_ON]->(c1:Criterion)<-[:HAS_VOTE_ON]-(childD) | {criterion: c1, relationship: vg1} ] AS jobableWeightedCriteria , [(jobable)-[:HAS_VOTE_ON]->(c1:Criterion)<-[vg1:HAS_VOTE_ON]-(childD) | {criterion: c1, relationship: vg1} ] AS weightedCriteria , [ (childD)-[:REQUIRES]->(ce:CompositeEntity) | {entity: ce} ] AS decisionCompositeEntities, [ (childD)-[:REQUIRES]->(ce:CompositeEntity)-[:CONTAINS]->(trans:Translation) WHERE trans.iso6391 = 'uk' | {entityId: toInteger(id(ce)), translation: trans} ] AS decisionCompositeEntitiesTranslations, [ (childD)-[:CONTAINS]->(trans:Translation) WHERE trans.iso6391 = 'uk' | {entityId: toInteger(childD.id), translation: trans} ] AS decisionTranslations I pass every single value to the query as parameters. This is just the output for debugging. Most of the query works fine except the part at the beginning: Cypher version: CYPHER 4.4, planner: COST, runtime: INTERPRETED. 781495 total db hits in 577 ms. Please advise how to optimize/refactor this part of the query in order to reduce DB hits. Thanks!
Implementing WeightedRandomSampler on imbalanced data set: RuntimeError: invalid multinomial distribution
I am trying to implement a weighted sampler for a very imbalanced data set. There are 182 different classes. Here is an array of the bin counts per class: array([69487, 5770, 5753, 138, 4308, 10, 1161, 29, 5611, 350, 7, 183, 218, 4, 3, 3872, 5, 950, 33, 3, 443, 16, 20, 330, 4353, 186, 19, 122, 546, 6, 44, 6, 3561, 2186, 3, 48, 8440, 338, 9, 610, 74, 236, 160, 449, 72, 6, 37, 1729, 2255, 1392, 12, 1, 3426, 513, 44, 3, 28, 12, 9, 27, 5, 75, 15, 3, 21, 549, 7, 25, 871, 240, 128, 28, 253, 62, 55, 12, 8, 57, 16, 99, 6, 5, 150, 7, 110, 8, 2, 1296, 70, 1927, 470, 1, 1, 511, 2, 620, 946, 36, 19, 21, 39, 6, 101, 15, 7, 1, 90, 29, 40, 14, 1, 4, 330, 1099, 1248, 1146, 7414, 934, 156, 80, 755, 3, 6, 6, 9, 21, 70, 219, 3, 3, 15, 15, 12, 69, 21, 15, 3, 101, 9, 9, 11, 6, 32, 6, 32, 4422, 16282, 12408, 2959, 3352, 146, 1329, 1300, 3795, 90, 1109, 120, 48, 23, 9, 1, 6, 2, 1, 11, 5, 27, 3, 7, 1, 3, 70, 1598, 254, 90, 20, 120, 380, 230, 180, 10, 10]) In some classes, instances are as low as 1. I am trying to implement a Weighted random sampler from torch for this dataset. However, as the class imbalance is so large, when I calculate weights using count_occr = np.bincount(dataset.y) lbl_weights = 1. / count_occr weights = np.array(lbl_weights) weights = torch.from_numpy(weights) sampler = WeightedRandomSampler(weights.type('torch.DoubleTensor'), len(weights*2)) I get two error messages: RuntimeWarning: divide by zero encountered in true_divide and RuntimeError: invalid multinomial distribution (encountering probability entry = infinity or NaN) Does anyone have a work around for this ? I was considering multiplying the lbl_weights by some scalar however I am not sure if this is a viable option.
InfluxDB: Points are Missing
When running the following piece of code: !pip install influxdb-client from influxdb_client import InfluxDBClient, BucketRetentionRules,WritePrecision import influxdb_client import time client = InfluxDBClient(url="http://localhost:8086", token= "my_password", org="primary") write_api = client.write_api() query_api =client.query_api() write_api.write("myFirstBucket", "primary", ["myMeasurement,location=coyote_creek water_level=1"],write_precision=WritePrecision.MS) time.sleep(1) write_api.write("myFirstBucket", "primary", ["myMeasurement,location=coyote_creek water_level=2"],write_precision=WritePrecision.MS) time.sleep(1) write_api.write("myFirstBucket", "primary", ["myMeasurement,location=coyote_creek water_level=3"],write_precision=WritePrecision.MS) time.sleep(1) write_api.write("myFirstBucket", "primary", ["myMeasurement,location=coyote_creek water_level=4"],write_precision=WritePrecision.MS) time.sleep(1) tables=query_api.query('from(bucket:"myFirstBucket") |> range(start: -15s)') #last 15 seconds for table in tables: print(table) for row in table.records: print (row.values) I get: FluxTable() columns: 9, records: 2 {'result': '_result', 'table': 0, '_start': datetime.datetime(2022, 3, 2, 11, 32, 18, 591779, tzinfo=tzutc()), '_stop': datetime.datetime(2022, 3, 2, 11, 32, 33, 591779, tzinfo=tzutc()), '_time': datetime.datetime(2022, 3, 2, 11, 32, 30, 595000, tzinfo=tzutc()), '_value': 2.0, '_field': 'water_level', '_measurement': 'myMeasurement', 'location': 'coyote_creek'} {'result': '_result', 'table': 0, '_start': datetime.datetime(2022, 3, 2, 11, 32, 18, 591779, tzinfo=tzutc()), '_stop': datetime.datetime(2022, 3, 2, 11, 32, 33, 591779, tzinfo=tzutc()), '_time': datetime.datetime(2022, 3, 2, 11, 32, 32, 590000, tzinfo=tzutc()), '_value': 3.0, '_field': 'water_level', '_measurement': 'myMeasurement', 'location': 'coyote_creek'} Why do I get only two records? I would expect four records: one record for each write command!
How to create array from 1 to n digit with single line of code in ruby [duplicate]
This question already has answers here: Create array of n items based on integer value (6 answers) Closed 4 years ago. Need to create an array of 1 to n numbers with a single line of code in ruby. I have tried it using while loop. But I'm sure there are other simpler way of doing this in ruby. a = [] b = 1 while b < 100 do a << b b += 1 end [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]
Convert a range into an array. (1..n).to_a
another way You can just splat a range: [*1..n] example [*1..10] =>[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Or a= Array(0..10) puts a # => =>[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
Improve precision algorithm to detect facial expression using LBP
I'm developping a simple algorithm to detect several facial expressions (happiness, sadness, anger...). I'm based on this paper to do that. I'm preprocessing before to apply LBP uniform operator dividing the normalized image into 6x6 regions as shown in the example below: By applying uniform LBP 59 feats are extracted for each region, so finally I have 2124 feats by image (6x6x59). I think it's a too large number of feats when I have about 700 images to train a model. I have read that's not good to get a good precission. My question is how can I reduce the dimension of the feats or another technique to improve the precision of the algorithm.
A straightforward way to reduce feature dimensionality - and increase robustness at the same time - would be using rotation-invariant uniform patterns. For a circular neighbourhood of radius and formed by pixels, the texture descriptor represents each region through 10 features. Thus dimensionality is reduced from 2124 to 6 × 6 × 10 = 360.
PCA can help to reduce the size of descriptor without loosing important information. Just google "opencv pca example". Another helpful thing is to add rotation invariance to your uniform lbp features. This will improve the precision as well as dramatically decrease size of descriptor from 59 to 10. static cv::Mat rotate_table = (cv::Mat_<uchar>(1, 256) << 0, 1, 1, 3, 1, 5, 3, 7, 1, 9, 5, 11, 3, 13, 7, 15, 1, 17, 9, 19, 5, 21, 11, 23, 3, 25, 13, 27, 7, 29, 15, 31, 1, 33, 17, 35, 9, 37, 19, 39, 5, 41, 21, 43, 11, 45, 23, 47, 3, 49, 25, 51, 13, 53, 27, 55, 7, 57, 29, 59, 15, 61, 31, 63, 1, 65, 33, 67, 17, 69, 35, 71, 9, 73, 37, 75, 19, 77, 39, 79, 5, 81, 41, 83, 21, 85, 43, 87, 11, 89, 45, 91, 23, 93, 47, 95, 3, 97, 49, 99, 25, 101, 51, 103, 13, 105, 53, 107, 27, 109, 55, 111, 7, 113, 57, 115, 29, 117, 59, 119, 15, 121, 61, 123, 31, 125, 63, 127, 1, 3, 65, 7, 33, 97, 67, 15, 17, 49, 69, 113, 35, 99, 71, 31, 9, 25, 73, 57, 37, 101, 75, 121, 19, 51, 77, 115, 39, 103, 79, 63, 5, 13, 81, 29, 41, 105, 83, 61, 21, 53, 85, 117, 43, 107, 87, 125, 11, 27, 89, 59, 45, 109, 91, 123, 23, 55, 93, 119, 47, 111, 95, 127, 3, 7, 97, 15, 49, 113, 99, 31, 25, 57, 101, 121, 51, 115, 103, 63, 13, 29, 105, 61, 53, 117, 107, 125, 27, 59, 109, 123, 55, 119, 111, 127, 7, 15, 113, 31, 57, 121, 115, 63, 29, 61, 117, 125, 59, 123, 119, 127, 15, 31, 121, 63, 61, 125, 123, 127, 31, 63, 125, 127, 63, 127, 127, 255 ); // the well known original uniform2 pattern static cv::Mat uniform_table = (cv::Mat_<uchar>(1, 256) << 0,1,2,3,4,58,5,6,7,58,58,58,8,58,9,10,11,58,58,58,58,58,58,58,12,58,58,58,13,58, 14,15,16,58,58,58,58,58,58,58,58,58,58,58,58,58,58,58,17,58,58,58,58,58,58,58,18, 58,58,58,19,58,20,21,22,58,58,58,58,58,58,58,58,58,58,58,58,58,58,58,58,58,58,58, 58,58,58,58,58,58,58,58,58,58,58,58,23,58,58,58,58,58,58,58,58,58,58,58,58,58, 58,58,24,58,58,58,58,58,58,58,25,58,58,58,26,58,27,28,29,30,58,31,58,58,58,32,58, 58,58,58,58,58,58,33,58,58,58,58,58,58,58,58,58,58,58,58,58,58,58,34,58,58,58,58, 58,58,58,58,58,58,58,58,58,58,58,58,58,58,58,58,58,58,58,58,58,58,58,58,58,58, 58,35,36,37,58,38,58,58,58,39,58,58,58,58,58,58,58,40,58,58,58,58,58,58,58,58,58, 58,58,58,58,58,58,41,42,43,58,44,58,58,58,45,58,58,58,58,58,58,58,46,47,48,58,49, 58,58,58,50,51,52,58,53,54,55,56,57 ); static cv::Mat rotuni_table = (cv::Mat_<uchar>(1, 256) << 0, 1, 1, 2, 1, 9, 2, 3, 1, 9, 9, 9, 2, 9, 3, 4, 1, 9, 9, 9, 9, 9, 9, 9, 2, 9, 9, 9, 3, 9, 4, 5, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 2, 9, 9, 9, 9, 9, 9, 9, 3, 9, 9, 9, 4, 9, 5, 6, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 2, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 3, 9, 9, 9, 9, 9, 9, 9, 4, 9, 9, 9, 5, 9, 6, 7, 1, 2, 9, 3, 9, 9, 9, 4, 9, 9, 9, 9, 9, 9, 9, 5, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 6, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 7, 2, 3, 9, 4, 9, 9, 9, 5, 9, 9, 9, 9, 9, 9, 9, 6, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 7, 3, 4, 9, 5, 9, 9, 9, 6, 9, 9, 9, 9, 9, 9, 9, 7, 4, 5, 9, 6, 9, 9, 9, 7, 5, 6, 9, 7, 6, 7, 7, 8 ); static void hist_patch_uniform(const Mat_<uchar> &fI, Mat &histo, int histSize, bool norm, bool rotinv) { cv::Mat ufI, h, n; if (rotinv) { cv::Mat r8; // rotation invariant transform cv::LUT(fI, rotate_table, r8); // uniformity for rotation invariant cv::LUT(r8, rotuni_table, ufI); // histSize is max 10 bins } else { cv::LUT(fI, uniform_table, ufI); } // the upper boundary is exclusive float range[] = {0, (float)histSize}; const float *histRange = {range}; cv::calcHist(&ufI, 1, 0, Mat(), h, 1, &histSize, &histRange, true, false); if (norm) normalize(h, n); else n = h; histo.push_back(n.reshape(1, 1)); } The input is your CV_8U grey-scaled patch (one of those rects). The out is the rotation invariant, uniform, normalized reshaped histogram (1 line). Then you concat your patches histograms into the face descriptor. You will have 6*6*10 = 360. This is good by itself but with pca you can make it 300 or less without loosing important information and even improving the quality of detection because removed dimensions (let's say with variances less than 5%) not just occupy space but also contain mostly the noise (coming from, for example, gaussian noise from the sensor). Then you can compare this concat histogram with the bank of faces or using svm (rbf kernel fits better). If you do it correctly, then predict for one face should not take more than 1-15ms (5 ms on my iphone7). Hope this helps.