Searchkick Aggregations behavior - ruby-on-rails

I am working with the Searchkick Gem and Elastic search and am trying to understand the aggregations behavior.
I have three facets (Aggregations): City, State and Company.
If I filter by any one of them, the counts of other two are reduced to reflect the total in the result set. But the selected facet comes back with all values. So say I had 100 items in the index, and I filtered by a Company that had 2 total items in the index, the City and State counts are updated to reflect no more than 2. But the Company count remains at 100.
Example (filtered to City=Atlanta)
{
"query": {
"function_score": {
"functions": [
{
"filter": {
"and": [
{
"term": {
"featured": true
}
}
]
},
"boost_factor": 1000
}
],
"query": {
"match_all": {}
},
"score_mode": "sum"
}
},
"size": 20,
"from": 0,
"post_filter": {
"bool": {
"filter": [
{
"range": {
"expiration_date": {
"from": "2016-08-18T23:07:15.670-04:00",
"include_lower": true
}
}
},
{
"range": {
"created_at": {
"to": "2016-08-18T23:07:15.670-04:00",
"include_upper": true
}
}
},
{
"term": {
"published": true
}
},
{
"term": {
"tenant_id": 4
}
},
{
"term": {
"city": "Atlanta"
}
}
]
}
},
"aggs": {
"company": {
"filter": {
"bool": {
"must": [
{
"range": {
"expiration_date": {
"from": "2016-08-18T23:07:15.670-04:00",
"include_lower": true
}
}
},
{
"range": {
"created_at": {
"to": "2016-08-18T23:07:15.670-04:00",
"include_upper": true
}
}
},
{
"term": {
"published": true
}
},
{
"term": {
"tenant_id": 4
}
},
{
"term": {
"city": "Atlanta"
}
}
]
}
},
"aggs": {
"company": {
"terms": {
"field": "company",
"size": 10
}
}
}
},
"city": {
"filter": {
"bool": {
"must": [
{
"range": {
"expiration_date": {
"from": "2016-08-18T23:07:15.670-04:00",
"include_lower": true
}
}
},
{
"range": {
"created_at": {
"to": "2016-08-18T23:07:15.670-04:00",
"include_upper": true
}
}
},
{
"term": {
"published": true
}
},
{
"term": {
"tenant_id": 4
}
}
]
}
},
"aggs": {
"city": {
"terms": {
"field": "city",
"size": 10
}
}
}
},
"state": {
"filter": {
"bool": {
"must": [
{
"range": {
"expiration_date": {
"from": "2016-08-18T23:07:15.670-04:00",
"include_lower": true
}
}
},
{
"range": {
"created_at": {
"to": "2016-08-18T23:07:15.670-04:00",
"include_upper": true
}
}
},
{
"term": {
"published": true
}
},
{
"term": {
"tenant_id": 4
}
},
{
"term": {
"city": "Atlanta"
}
}
]
}
},
"aggs": {
"state": {
"terms": {
"field": "state",
"size": 10
}
}
}
}
},
"fields": []
}
Result (2 result returned, but 58 City Aggregations come back). Note Company and City return correct # of Aggregations:
{
"took": 114,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 1,
"hits": [
{
"_index": "jobs_development_20160818140128648",
"_type": "job",
"_id": "457134",
"_score": 1
},
{
"_index": "jobs_development_20160818140128648",
"_type": "job",
"_id": "457137",
"_score": 1
}
]
},
"aggregations": {
"city": {
"doc_count": 58,
"city": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 19,
"buckets": [
{
"key": "Los Angeles",
"doc_count": 8
},
{
"key": "London",
"doc_count": 7
},
{
"key": "New York",
"doc_count": 7
},
{
"key": "Burbank",
"doc_count": 5
},
{
"key": "Pasig",
"doc_count": 3
},
{
"key": "Atlanta",
"doc_count": 2
},
{
"key": "Chicago",
"doc_count": 2
},
{
"key": "Culver City",
"doc_count": 2
},
{
"key": "London Borough of Hackney",
"doc_count": 2
},
{
"key": "Birmingham",
"doc_count": 1
}
]
}
},
"company": {
"doc_count": 2,
"company": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Second Story",
"doc_count": 2
}
]
}
},
"state": {
"doc_count": 2,
"state": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Georgia",
"doc_count": 2
}
]
}
}
}
}
What am I missing? Is this correct behavior?

Related

New Node should be added without changing the position of existing one using highchart

Highcharts.chart('container', {
chart: {
type: 'networkgraph',
...
I am using network-graph while adding or modifying node the existing node position is changing very much. If i want the changes without making the position change the what should I do.
code:
Highcharts.chart('container', {
chart: {
type: 'networkgraph',
backgroundColor: '#000000',
animation: 'false',
height: '500px',
//marginRight: '100px',
events: {
render() {
let chart = this;
//console.log(chart)
//render custom back button
chart.label = chart.renderer.label('Back', chart.plotWidth - 100, 50)
.css({
color: '#FFFFFF'
})
.attr({
fill: 'rgba(0, 0, 0, 0.75)',
padding: 8,
r: 5,
zIndex: 6,
})
.add()
//show it after initial load
chart.label.hide()
if (chart.forRender) {
chart.series[0].points.forEach((p, i) => {
p.graphic.element.onclick = function () {
chart.series[0].update({
//data: drilldownData[i][0].data,
//nodes: drilldownData[i][0].nodes
})
chart.forRender = false
chart.label.show();
}
})
}
chart.label.element.onclick = function () {
chart.forRender = true
chart.label.hide();
chart.series[0].update({
data: this.initialSeries.data,
nodes: this.initialSeries.nodes
})
}
}
}
},
title: {
text: '',
color: 'white'
},
plotOptions: {
networkgraph: {
layoutAlgorithm: {
linkLength: 30, // in pixels ,
enableSimulation: false ,
initialPositions: 'square',
// integration:"euler",
//attractiveForce: function (d, k) { return ( d -k); },
// repulsiveForce: function(){ return 1.5}
},
draggable: false,
dataLabels:{
enabled: true,
padding:12,
style:{
color:'#cbb6dd',
fontWeight: 'bold',
textOutline: '0px contrast'
}
}
}
},
series: [this.initialSeries]
}, function (chart) {
chart.forRender = true
});
json:
{
"data": [
{ "from": "OLO", "to": "CS003", "color": "#ffdd03" },
{ "from": "OLO", "to": "RGA", "color": "#ffffff" },
{ "from": "OLO", "to": "RGM", "color": "#ff0703" },
{ "from": "OLO", "to": "BDT", "color": "#ffffff" },
{ "from": "OLO", "to": "CW", "color": "#ffdd03" },
{ "from": "CS902", "to": "QTH", "color": "#ffffff" },
{ "from": "RGA", "to": "QTH", "color": "#ffdd03" },
{ "from": "RGA", "to": "CLC", "color": "#ff0703" },
{ "from": "RGM", "to": "SYE", "color": "#ffffff" },
{ "from": "SYE", "to": "CLC", "color": "#ff0703" },
{ "from": "BDT", "to": "CLC", "color": "#ffdd03" },
{ "from": "EHR", "to": "LVC", "color": "#ffffff" },
{ "from": "EHR", "to": "CLC", "color": "#ff0703" },
{ "from": "EHR", "to": "GWW", "color": "#ffffff" },
{ "from": "EHR", "to": "GWT1", "color": "#ff0703" },
{ "from": "EHR", "to": "TPZ", "color": "#ffdd03" },
{ "from": "EHR", "to": "NGC", "color": "#ffdd03" },
{ "from": "EHR", "to": "CS004", "color": "#ffffff" },
{ "from": "GWW", "to": "L/T1", "color": "#ff0703" },
{ "from": "CS004", "to": "MRT1", "color": "#ffdd03" }
],
"nodes": [
{
"id": "999 TXT",
"marker": {
"symbol": "url(assets/images/computer.png)"
}
},
{
"id": "ASU",
"marker": {
"symbol": "url(assets/images/computer.png)"
}
},
{
"id": "VRU",
"marker": {
"symbol": "url(assets/images/computer.png)"
}
},
{
"id": "Internet",
"marker": {
"symbol": "url(assets/images/computer.png)"
}
},
{
"id": "Dail IT",
"marker": {
"symbol": "url(assets/images/computer.png)"
}
},
{
"id": "VMP",
"marker": {
"symbol": "url(assets/images/computer.png)"
}
},
{
"id": "CS05",
"marker": {
"symbol": "url(assets/images/computer.png)"
}
},
{
"id": "MSP",
"marker": {
"symbol": "url(assets/images/computer.png)"
}
},
{
"id": "VO",
"marker": {
"symbol": "url(assets/images/computer.png)"
}
},
{
"id": "OR",
"marker": {
"symbol": "url(assets/images/computer.png)"
}
},
{
"id": "O2",
"marker": {
"symbol": "url(assets/images/computer.png)"
}
},
{
"id": "Mobile",
"marker": {
"symbol": "url(assets/images/computer.png)"
}
},
{
"id": "CW",
"marker": {
"symbol": "url(assets/images/computer.png)"
}
},
{
"id": "TW",
"marker": {
"symbol": "url(assets/images/computer.png)"
}
},
{
"id": "Resilient Network",
"marker": {
"symbol": "url(assets/images/computer.png)"
}
},
{
"id": "Jet",
"marker": {
"symbol": "url(assets/images/computer.png)"
}
},
{
"id": "ONLY X",
"marker": {
"symbol": "url(assets/images/computer.png)"
}
},
{
"id": "TOAD",
"marker": {
"symbol": "url(assets/images/computer.png)"
}
},
{
"id": "VUL",
"marker": {
"symbol": "url(assets/images/computer.png)"
}
},
{
"id": "CRAWLEY",
"marker": {
"symbol": "url(assets/images/computer.png)"
}
},
{
"id": "BT",
"marker": {
"symbol": "url(assets/images/computer.png)"
}
},
{
"id": "Others",
"marker": {
"symbol": "url(assets/images/computer.png)"
}
},
{
"id": "VOIP ITP",
"marker": {
"symbol": "url(assets/images/computer.png)"
}
},
{
"id": "Old Exchanges",
"marker": {
"symbol": "url(assets/images/computer.png)"
}
},
{
"id": "SDIN",
"marker": {
"symbol": "url(assets/images/computer.png)"
}
},
{
"id": "PH/1",
"marker": {
"symbol": "url(assets/images/computer.png)"
}
},
{
"id": "OLO",
"marker": {
"symbol": "url(assets/images/server.png)"
}
},
{
"id": "CS003",
"marker": {
"symbol": "url(assets/images/computer.png)"
}
},
{
"id": "RGA",
"marker": {
"symbol": "url(assets/images/computer.png)"
}
},
{
"id": "RGM",
"marker": {
"symbol": "url(assets/images/computer.png)"
}
},
{
"id": "BDT",
"marker": {
"symbol": "url(assets/images/computer.png)"
}
},
{
"id": "CS902",
"marker": {
"symbol": "url(assets/images/computer.png)"
}
},
{
"id": "QTH",
"marker": {
"symbol": "url(assets/images/computer.png)"
}
},
{
"id": "CLC",
"marker": {
"symbol": "url(assets/images/customer-service.png)"
}
},
{
"id": "SYE",
"marker": {
"symbol": "url(assets/images/computer.png)"
}
},
{
"id": "EHR",
"marker": {
"symbol": "url(assets/images/teamwork.png)"
}
},
{
"id": "LVC",
"marker": {
"symbol": "url(assets/images/computer.png)"
}
},
{
"id": "GWW",
"marker": {
"symbol": "url(assets/images/computer.png)"
}
},
{
"id": "GWT1",
"marker": {
"symbol": "url(assets/images/computer.png)"
}
},
{
"id": "TPZ",
"marker": {
"symbol": "url(assets/images/computer.png)"
}
},
{
"id": "NGC",
"marker": {
"symbol": "url(assets/images/computer.png)"
}
},
{
"id": "L/T1",
"marker": {
"symbol": "url(assets/images/computer.png)"
}
},
{
"id": "CS004",
"marker": {
"symbol": "url(assets/images/computer.png)"
}
},
{
"id": "MRT1",
"marker": {
"symbol": "url(assets/images/computer.png)"
}
}
],
"name": "Main Series",
"dataLabels": {
"draggable": "false",
"enabled": "true"
},
"link": {
"width": 3
}
}
So can you please let me know how we can achieve this.
You can zero maxIterations property and define custom postions for the new points in redraw event:
chart: {
...,
events: {
load: function() {
var chart = this;
setTimeout(function() {
chart.series[0].addPoint(['G', 'Z'], true);
}, 2000);
},
redraw: function() {
var newNode = this.series[0].nodes[7];
newNode.plotX = 100;
newNode.plotY = 100;
}
}
}
Live demo: https://jsfiddle.net/BlackLabel/t742a9kh/
API Refefence:
https://api.highcharts.com/highcharts/series.networkgraph.layoutAlgorithm.maxIterations
https://api.highcharts.com/highcharts/chart.events.redraw

Serializing an F# function

Using F# I've some configuration that I want to serialize to disk using Newtonsoft.Json. The config data is a record type. One of the fields in the record type is a function of type string->bool and is used to compare a string to a given value. e.g.
(fun (x:string) -> x = "1")
Serializing the type succeeds but the field isn't successfully stored (it is recorded as '{}') and therefore deserializing fails. This seems to be by design.
How do I store a function so that it can be used to populate the record type from disk when deserialized and still be executable?
I've looked at quotations as a means of storing expressions as data but I'm not sure if this is the way to go. If it is then I'm struggling to get this working.
How can achieve what I need?
Update;
type Logic = string -> bool
The record type storing config;
type Exclusion =
| FromDataValue of QItem * Logic * ExcludedItems
| FromDataValuesAnd of (QItem * Logic) seq * ExcludedItems
| FromDataValuesOr of (QItem * Logic) seq seq * ExcludedItems
The Exclusion record is populated by a user and runs against some data sets excluding items from the return collection. The QItem represents a string in the dataset that the Logic function is applied to, and if returns true the items in ExcludedItems are excluded from the results.
Initially the config is created by a user within the solution so all works fine. However once a config is created I want to be able to save the config to disc so that it can be loaded and run again when required.
So I need to be able to store this, but I want to be able to store it as a string then run it again when loaded.
You can serialize a function to JSON and then deserialize it and execute the function later using FSPickler:
open MBrace.FsPickler
open MBrace.FsPickler.Json
open System.IO
open System.Text
let serializer = JsonSerializer(indent = true)
let utf8 = UTF8Encoding(false)
let toJson (x: 'a) =
use stream = new MemoryStream()
serializer.Serialize(stream, x)
stream.ToArray() |> utf8.GetString
let parseJson<'a> json =
use reader = new StringReader(json)
serializer.Deserialize<'a>(reader)
let f = fun x -> x + 1
let serialized = toJson f
let deserialized = parseJson<int -> int> serialized
deserialized 1 // returns 2
The serialized JSON for the function looks like this:
{
"FsPickler": "4.0.0",
"type": "Microsoft.FSharp.Core.FSharpFunc`2[System.Int32,System.Int32]",
"value": {
"_flags": "subtype",
"subtype": {
"Case": "NamedType",
"Name": "FSI_0002+serialized#23",
"Assembly": {
"Name": "FSI-ASSEMBLY",
"Version": "0.0.0.0",
"Culture": "neutral",
"PublicKeyToken": ""
}
},
"instance": {}
}
}
Although instance is blank, it records the metadata about the anonymous type created for the function. That way, it can invoke the correct code when you call the deserialized version, as long as the function type is available in the AppDomain where you do the deserialization.
EDIT
If you want to literally serialize the logic for the function, you can use FSPickler to serialize a code quotation instead:
open MBrace.FsPickler
open MBrace.FsPickler.Json
open FSharp.Quotations
open FSharp.Quotations.Evaluator
open System.IO
open System.Text
let serializer = JsonSerializer(indent = true)
let utf8 = UTF8Encoding(false)
let toJson (x: 'a) =
use stream = new MemoryStream()
serializer.Serialize(stream, x)
stream.ToArray() |> utf8.GetString
let parseJson<'a> json =
use reader = new StringReader(json)
serializer.Deserialize<'a>(reader)
let f = <# fun x -> x + 1 #>
let serialized = toJson f
let deserialized = parseJson<Expr<int -> int>> serialized
let increment = deserialized |> QuotationEvaluator.Evaluate
increment 1
This way, the quotation gets serialized to JSON with all the logic described as an expression tree, and when you deserialize it you can use the FSharp.Quotations.Evaluator library to turn it into a runnable function that you can invoke.
The JSON is now considerably larger, but this can be deserialized and evaluated anywhere:
{
"FsPickler": "4.0.0",
"type": "Microsoft.FSharp.Quotations.FSharpExpr`1[Microsoft.FSharp.Core.FSharpFunc`2[System.Int32,System.Int32]]",
"value": {
"attribs": [
{
"attribs": [],
"term": {
"Case": "CombTerm",
"Item1": {
"Case": "NewTupleOp",
"Item": {
"Case": "GenericTypeInstance",
"GenericDefinition": {
"Case": "NamedType",
"Name": "System.Tuple`2",
"Assembly": {
"Name": "mscorlib",
"Version": "4.0.0.0",
"Culture": "neutral",
"PublicKeyToken": "b77a5c561934e089"
}
},
"TypeArgs": [
{
"Case": "NamedType",
"Name": "System.String",
"Assembly": {
"_flags": "cached",
"id": 9
}
},
{
"Case": "GenericTypeInstance",
"GenericDefinition": {
"Case": "NamedType",
"Name": "System.Tuple`5",
"Assembly": {
"_flags": "cached",
"id": 9
}
},
"TypeArgs": [
{
"_flags": "cached",
"id": 11
},
{
"Case": "NamedType",
"Name": "System.Int32",
"Assembly": {
"_flags": "cached",
"id": 9
}
},
{
"_flags": "cached",
"id": 15
},
{
"_flags": "cached",
"id": 15
},
{
"_flags": "cached",
"id": 15
}
]
}
]
}
},
"Item2": [
{
"attribs": {
"_flags": "cached",
"id": 4
},
"term": {
"Case": "CombTerm",
"Item1": {
"Case": "ValueOp",
"Item1": {
"_flags": "subtype",
"subtype": {
"_flags": "cached",
"id": 11
},
"instance": "DebugRange"
},
"Item2": {
"_flags": "cached",
"id": 11
},
"Item3": null
},
"Item2": {
"_flags": "cached",
"id": 4
}
}
},
{
"attribs": {
"_flags": "cached",
"id": 4
},
"term": {
"Case": "CombTerm",
"Item1": {
"Case": "NewTupleOp",
"Item": {
"_flags": "cached",
"id": 12
}
},
"Item2": [
{
"attribs": {
"_flags": "cached",
"id": 4
},
"term": {
"Case": "CombTerm",
"Item1": {
"Case": "ValueOp",
"Item1": {
"_flags": "subtype",
"subtype": {
"_flags": "cached",
"id": 11
},
"instance": "C:\\Users\\aeshbach\\AppData\\Local\\Temp\\~vs220A.fsx"
},
"Item2": {
"_flags": "cached",
"id": 11
},
"Item3": null
},
"Item2": {
"_flags": "cached",
"id": 4
}
}
},
{
"attribs": {
"_flags": "cached",
"id": 4
},
"term": {
"Case": "CombTerm",
"Item1": {
"Case": "ValueOp",
"Item1": {
"_flags": "subtype",
"subtype": {
"_flags": "cached",
"id": 15
},
"instance": 32
},
"Item2": {
"_flags": "cached",
"id": 15
},
"Item3": null
},
"Item2": {
"_flags": "cached",
"id": 4
}
}
},
{
"attribs": {
"_flags": "cached",
"id": 4
},
"term": {
"Case": "CombTerm",
"Item1": {
"Case": "ValueOp",
"Item1": {
"_flags": "subtype",
"subtype": {
"_flags": "cached",
"id": 15
},
"instance": 11
},
"Item2": {
"_flags": "cached",
"id": 15
},
"Item3": null
},
"Item2": {
"_flags": "cached",
"id": 4
}
}
},
{
"attribs": {
"_flags": "cached",
"id": 4
},
"term": {
"Case": "CombTerm",
"Item1": {
"Case": "ValueOp",
"Item1": {
"_flags": "subtype",
"subtype": {
"_flags": "cached",
"id": 15
},
"instance": 32
},
"Item2": {
"_flags": "cached",
"id": 15
},
"Item3": null
},
"Item2": {
"_flags": "cached",
"id": 4
}
}
},
{
"attribs": {
"_flags": "cached",
"id": 4
},
"term": {
"Case": "CombTerm",
"Item1": {
"Case": "ValueOp",
"Item1": {
"_flags": "subtype",
"subtype": {
"_flags": "cached",
"id": 15
},
"instance": 25
},
"Item2": {
"_flags": "cached",
"id": 15
},
"Item3": null
},
"Item2": {
"_flags": "cached",
"id": 4
}
}
}
]
}
}
]
}
}
],
"term": {
"Case": "LambdaTerm",
"Item1": {
"isMutable104": false,
"name": "x",
"stamp": 0,
"typ": {
"_flags": "cached",
"id": 15
}
},
"Item2": {
"attribs": {
"_flags": "cached",
"id": 4
},
"term": {
"Case": "CombTerm",
"Item1": {
"Case": "StaticMethodCallOp",
"Item": {
"Case": "GenericMethodInstance",
"GenericDefinition": {
"Case": "Method",
"Signature": "T3 op_Addition[T1,T2,T3](T1,T2)",
"IsStatic": true,
"DeclaringType": {
"Case": "NamedType",
"Name": "Microsoft.FSharp.Core.Operators",
"Assembly": {
"Name": "FSharp.Core",
"Version": "4.4.1.0",
"Culture": "neutral",
"PublicKeyToken": "b03f5f7f11d50a3a"
}
},
"ReflectedType": null
},
"TypeArgs": [
{
"_flags": "cached",
"id": 15
},
{
"_flags": "cached",
"id": 15
},
{
"_flags": "cached",
"id": 15
}
]
}
},
"Item2": [
{
"attribs": {
"_flags": "cached",
"id": 4
},
"term": {
"Case": "VarTerm",
"Item": {
"_flags": "cached",
"id": 40
}
}
},
{
"attribs": {
"_flags": "cached",
"id": 4
},
"term": {
"Case": "CombTerm",
"Item1": {
"Case": "ValueOp",
"Item1": {
"_flags": "subtype",
"subtype": {
"_flags": "cached",
"id": 15
},
"instance": 1
},
"Item2": {
"_flags": "cached",
"id": 15
},
"Item3": null
},
"Item2": {
"_flags": "cached",
"id": 4
}
}
}
]
}
}
}
}
}

Combine multiple has_child to single has_child Elasticsearch

I have a type in elasticsearch type_1 in which it has only two fields name and value.
I have following elasticsearch query, Is there any alternative of below query that produce same result using only one has_child ?
GET /my_index/my_type/_search
{
"query": {
"bool": {
"must": [
{
"has_child": {
"type": "type_1",
"query": {
"bool": {
"must": [
{
"term": {
"name": "field_1"
}
},
{
"term": {
"value": "val1"
}
}
]
}
}
}
},
{
"has_child": {
"type": "type_1",
"query": {
"bool": {
"must": [
{
"term": {
"name": "field_2"
}
},
{
"term": {
"value": "val2"
}
}
]
}
}
}
}
]
}
}
}

Count ocurrence of two fields

I have this kind of documents in a MongoDb collection:
[
{
"_id": {
"id": 892,
"answer": "C",
"level": "regular"
},
"total": 4
},
{
"_id": {
"id": 891,
"answer": "Regular",
"level": "neutral"
},
"total": 3
},
{
"_id": {
"id": 892,
"answer": "B",
"level": "regular"
},
"total": 3
},
{
"_id": {
"id": 891,
"answer": "Ótimo",
"level": "positive"
},
"total": 5
},
{
"_id": {
"id": 892,
"answer": "E",
"level": "regular"
},
"total": 3
},
{
"_id": {
"id": 891,
"answer": "Bom",
"level": "positive"
},
"total": 1
},
{
"_id": {
"id": 891,
"answer": "Ruim",
"level": "negative"
},
"total": 2
},
{
"_id": {
"id": 892,
"answer": "D",
"level": "regular"
},
"total": 3
},
{
"_id": {
"id": 891,
"answer": "Péssimo",
"level": "negative"
},
"total": 3
},
{
"_id": {
"id": 892,
"answer": "F",
"level": "regular"
},
"total": 1
}
]
I'm trying to count answer and level ocurrences using MongoDb aggregation pipiline. I'm expecting some output like this:
[
{
"id": 891,
"answers": [
{
"answer": "Ótimo",
"count": 5
},
{
"answer": "Bom",
"count": 1
},
{
"answer": "Regular",
"count": 3
},
{
"answer": "Ruim",
"count": 2
},
{
"answer": "Péssimo",
"count": 3
}
],
"levels": [
{
"level": "positive",
"count": 6
},
{
"level": "neutral",
"count": 3
},
{
"level": "negative",
"count": 5
}
],
"total": 14
},
{
"id": 892,
"answers": [
{
"answer": "B",
"count": 3
},
{
"answer": "C",
"count": 4
},
{
"answer": "D",
"count": 3
},
{
"answer": "E",
"count": 3
},
{
"answer": "F",
"count": 1
},
],
"levels": [
{
"level": "regular",
"count": 14
}
],
"total": 14
}
]
How could I achieve the desired output using MongoDb aggregation pipeline?
EDIT: Actually I'm already using $group to achieve something like that, but I guess only one $group step on the pipeline will not achieve the desired output. Here is my current $group step:
{
$group: {
_id: {
id: "$_id.id"
},
answers: {
$push: {
answer: "$_id.answer",
count: "$count"
}
},
levels: {
$push: {
level: "$_id.level",
count: "$count"
}
},
total: { $sum: "$count" }
}
}
Here is the output I have so far:
[
{
"_id": {
"id": 892
},
"answers": [
{
"answer": "F",
"count": 1
},
{
"answer": "D",
"count": 3
},
{
"answer": "E",
"count": 3
},
{
"answer": "C",
"count": 4
},
{
"answer": "B",
"count": 3
}
],
"levels": [
{
"level": "regular"
},
{
"level": "regular"
},
{
"level": "regular"
},
{
"level": "regular"
},
{
"level": "regular"
}
]
},
{
"_id": {
"id": 891
},
"answers": [
{
"answer": "Ruim",
"count": 2
},
{
"answer": "Péssimo",
"count": 3
},
{
"answer": "Bom",
"count": 1
},
{
"answer": "Regular",
"count": 3
},
{
"answer": "Ótimo",
"count": 5
}
],
"levels": [
{
"level": "negative"
},
{
"level": "negative"
},
{
"level": "positive"
},
{
"level": "neutral"
},
{
"level": "positive"
}
]
}
]
The following aggregation pipeline:
{ $group: {
_id: { id: "$_id.id" },
answers: {
$push: {
answer: "$_id.answer",
level: "$_id.level",
count: "$total"
}
},
levels: {
$push: {
level: "$_id.level",
count: "$total"
}
}
}},
{ $unwind: $levels },
{ $group: {
_id: {
id: "$_id.id",
level: "$levels.level"
},
answers: { $addToSet: "$answers" },
total: { $sum: "$levels.count" }
}},
{ $group: {
_id: { id: "$_id.id" },
answers: { $addToSet: "$answers" },
levels: {
$push: {
level: "$_id.level",
count: "$total"
}
},
total: { $sum: "$total" }
}},
{ $project: {
_id: 0,
id: "$_id.id",
answers: 1,
levels: 1,
total: 1
}}
Will produce the desired output.

Elasticsearch - using the path hierarchy tokenizer to access different level of categories

I'm very new in Elasticsearch and have a question about the hierarchical tokenizer of a path. Here is my code example:
My mapping code:
PUT /my_index
{
"settings": {
"analysis": {
"analyzer": {
"path-analyzer": {
"type": "custom",
"tokenizer": "path-tokenizer"
}
},
"tokenizer": {
"path-tokenizer": {
"type": "path_hierarchy",
"delimiter": "."
}
}
}
},
"mappings": {
"my_type": {
"dynamic": "strict",
"properties": {
"group_path": {
"type": "string",
"index_analyzer": "path-analyzer",
"search_analyzer": "keyword"
}
}
}
}
}
This is my PUT:
PUT /my_index/my_type/1
{
"group_path": ["Book.Thriller.Adult","DVD.Comedy.Kids"]
}
This is my Query:
GET /my_index/my_type/_search?search_type=count
{
"aggs": {
"category": {
"terms": {
"field": "group_path",
"size": 0
}
}
}
}
And the result:
{
...
"aggregations": {
"category": {
"buckets": [
{
"key": "Book",
"doc_count": 1
},
{
"key": "Book.Thriller",
"doc_count": 1
},
{
"key": "Book.Thriller.Adult",
"doc_count": 1
},
{
"key": "DVD",
"doc_count": 1
},
{
"key": "DVD.Comedy",
"doc_count": 1
},
{
"key": "DVD.Comedy.Kids",
"doc_count": 1
}
]
}
}
}
So far is everything good. What I'm looking for is that how can I create buckets for example only for the first category. How can I get result like that:
{
...
"aggregations": {
"category": {
"buckets": [
{
"key": "Book",
"doc_count": 1
},
{
"key": "DVD",
"doc_count": 1
}
]
}
}
}
Thank you for any help.
The only way I found to do this is to use the exclude syntax to exclude the levels you don't want.
{
"aggs": {
"category": {
"terms": {
"field": "group_path",
"size": 0,
"exclude" : ".*\\..*"
}
}
}
}
Will then return
aggregations: {
category: {
buckets: [
{
key: Book
doc_count: 1
}
{
key: DVD
doc_count: 1
}
]
}
}
If you select book, you can then search like this
{
"query" : {
"filtered": {
"filter": {
"prefix": {
"group_path": "Book"
}
}
}
},
"aggs" : {
"category": {
"terms": {
"field": "group_path",
"size": 0,
"include" : "Book\\..*",
"exclude": ".*\\..*\\..*"
}
}
}
}
Will then return
aggregations: {
category: {
buckets: [
{
key: Book.Thriller
doc_count: 1
}
]
}
}

Resources