I am facing issues with elasticsearch aggregation grouping inside top_hits. or i need unique students count in the tophits
Elastic search mapping:
{
"board" : {
"properties" : {
"notApplied" : {
"type" : "date"
}
}
}
}
Query :
{
"size": 0,
"query": {},
"aggs": {
"notApplied": {
"filter": {
"exists": {
"field": "board.notApplied"
}
},
"aggs": {
"top_student_hits": {
"top_hits": {
"sort": [
{
"board.notApplied": {
"order": "desc"
}
}
],
"script_fields": {
"dues": {
"script": {
"source": "if (doc.containsKey('board.notApplied') && doc['board.notApplied'].size() != 0) { (doc['board.notApplied'].value.toInstant().toEpochMilli()-params.date)/86400000 } else { 0; }",
"params": {
"date": 1669939199059 // --> < 1 day
}
}
}
},
"_source": {
"includes": [
"id",
"studentName",
"usercode",
"board.notApplied",
"userId"
]
},
"size": 5
}
}
}
}
}
}
Output for the above query :
{
"took" : 11,
...
"aggregations" : {
"notApplied" : {
"doc_count" : 42,
"top_student_hits" : {
"hits" : {
"total" : {
"value" : 42,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "applications",
"_type" : "_doc",
"_id" : "4b85533822f91e9b99392f16dedaae1f",
"_score" : null,
"_source" : {
"board" : {
"notApplied" : "2022-10-25T00:00:00.000Z"
},
"studentName" : "Joe",
"id" : "4b85533822f91e9b99392f16dedaae1f",
"userId" : "45a47d1314041ab287a277679ff19922"
},
"fields" : {
"dues" : [
-37
]
},
"sort" : [
1666656000000
]
},
{
"_index" : "applications",
"_type" : "_doc",
"_id" : "1897f32d2d7f691e42c3fe6ebe631c7d",
"_score" : null,
"_source" : {
"board" : {
"notApplied" : "2022-10-25T00:00:00.000Z"
},
"studentName" : "Joe",
"id" : "1897f32d2d7f691e42c3fe6ebe631c7d",
"userId" : "45a47d1314041ab287a277679ff19922"
},
"fields" : {
"dues" : [
-37
]
},
"sort" : [
1666656000000
]
},
{
"_index" : "applications",
"_type" : "_doc",
"_id" : "f0b25dc9a911782ace5af36db7bfbc1f",
"_score" : null,
"_source" : {
"board" : {
"notApplied" : "2022-10-25T00:00:00.000Z"
},
"studentName" : "Sam",
"id" : "f0b25dc9a911782ace5af36db7bfbc1f",
"userId" : "d84f9e5231daa902c37921de9126cad7"
},
"fields" : {
"dues" : [
-37
]
},
"sort" : [
1666656000000
]
},
{
"_index" : "applications",
"_type" : "_doc",
"_id" : "e7f84fa978a553e77716ab479d3d6ce5",
"_score" : null,
"_source" : {
"board" : {
"notApplied" : "2022-10-13T00:00:00.000Z"
},
"id" : "e7f84fa978a553e77716ab479d3d6ce5",
"studentName" : "Sam",
"userId" : "d84f9e5231daa902c37921de9126cad7"
},
"fields" : {
"dues" : [
-49
]
},
"sort" : [
1665619200000
]
},
{
"_index" : "applications",
"_type" : "_doc",
"_id" : "9cba9f6b0d7a28ef739b321291d00170",
"_score" : null,
"_source" : {
"board" : {
"notApplied" : "2022-09-20T00:00:00.000Z"
},
"studentName" : "Ctest17 ",
"id" : "9cba9f6b0d7a28ef739b321291d00170",
"userId" : "ddaf6d6162c8317fd90fec0b870132ce"
},
"fields" : {
"dues" : [
-72
]
},
"sort" : [
1663632000000
]
}
]
}
}
}
}
}
I am getting the exact results but it has been duplicated by userId.
i need a result in top_hits without duplicates or the buckets should be grouped by userId. also the result should be sort desc by (dues or notApplied) field.
can any one help me to resolve this?
Related
Swagger/OpenAPI definition:
{
"openapi" : "3.0.1",
"info" : {
"title" : "OpenAPI definition",
"version" : "v0"
},
"servers" : [ {
"url" : "http://sandbox.test.com:8063/api/recs",
"description" : "Generated server url"
} ],
"paths" : {
"/data" : {
"get" : {
"tags" : [ "Data" ],
"operationId" : "getData",
"parameters" : [ {
"name" : "goal",
"in" : "query",
"required" : false,
"schema" : {
"$ref" : "#/components/schemas/GoalsEnum_User"
}
} ],
"responses" : {
"404" : {
"description" : "Not Found",
"content" : {
"*/*" : {
"schema" : {
"type" : "object"
}
}
}
},
"200" : {
"description" : "Result generated successfully",
"content" : {
"application/json" : {
"schema" : {
"type" : "array",
"items" : {
"oneOf" : [ {
"$ref" : "#/components/schemas/EventDataDto"
}, {
"$ref" : "#/components/schemas/FreeRideDataDto"
}]
}
}
}
}
}
}
}
}
},
"components" : {
"schemas" : {
"GoalsEnum_User" : {
"type" : "string",
"enum" : [ "User1", "User2" ]
},
"EventDataDto" : {
"type" : "object",
"allOf" : [ {
"$ref" : "#/components/schemas/ParentDataSchema_UserData"
}, {
"type" : "object",
"properties" : {
"rules" : {
"type" : "array",
"items" : {
"$ref" : "#/components/schemas/RuleDto"
}
}
}
}, {
"$ref" : "#/components/schemas/ParentDataSchema"
} ]
},
"FreeRideDataDto" : {
"type" : "object",
"allOf" : [ {
"$ref" : "#/components/schemas/ParentDataSchema"
}, {
"type" : "object",
"properties" : {
"completedRoutes" : {
"type" : "array",
"items" : {
"type" : "integer",
"format" : "int64"
}
},
"averageDistance" : {
"type" : "number",
"format" : "double"
},
"averageDuration" : {
"type" : "number",
"format" : "double"
}
}
}, {
"$ref" : "#/components/schemas/ParentDataSchema_UserData"
} ]
},
"ParentDataSchema" : {
"required" : [ "type" ],
"type" : "object",
"properties" : {
"type" : {
"type" : "string",
"enum" : [ "FREE_RIDE", "EVENT" ]
}
},
"discriminator" : {
"propertyName" : "type",
"mapping" : {
"EVENT" : "#/components/schemas/EventRecommendationDto",
"FREE_RIDE" : "#/components/schemas/FreeRideRecommendationDto"
}
}
},
"ParentDataSchema_UserData" : {
"required" : [ "type" ],
"type" : "object",
"properties" : {
"type" : {
"type" : "string",
"enum" : [ "FREE_RIDE", "EVENT" ]
}
},
"discriminator" : {
"propertyName" : "type",
"mapping" : {
"EVENT" : "#/components/schemas/EventRecommendationDto",
"FREE_RIDE" : "#/components/schemas/FreeRideRecommendationDto"
}
}
}
}
}
}
Generated Example:
[
{
"type": "FREE_RIDE",
"rules": [
{
"ruleId": 0,
"categoryId": 0,
"name": "string",
"type": "string",
"value": "string"
}
]
},
{
"type": "FREE_RIDE",
"completedRoutes": [
0
],
"averageDistance": 0,
"averageDuration": 0
}
]
Since there are specific values for the discriminating field "type", I expect the examples to have the correct value for detected types. Although the types were listed correctly, the type field is not set to the discriminating value.
Is there anything I can do to the Swagger/OpenAPI definitions or Swagger UI to fix this? I'm even open to adding a bug-fix if you can point me to where the values of the field examples are set and how can I choose the discriminating value instead of the first one in the enum instead.
I have documents as
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 3,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "journeys-development-latest",
"_type" : "_doc",
"_id" : "1399",
"_score" : 1.0,
"_source" : {
"draft_recent_edit_at" : "2023-01-14T04:16:41.318Z",
"recent_edit_at" : "2022-09-23T14:13:41.246Z"
}
},
{
"_index" : "journeys-development-latest",
"_type" : "_doc",
"_id" : "1394",
"_score" : 1.0,
"_source" : {
"draft_recent_edit_at" : "2022-07-02T16:19:41.347Z",
"recent_edit_at" : "2022-12-26T10:12:41.333Z"
}
},
{
"_index" : "journeys-development-latest",
"_type" : "_doc",
"_id" : "1392",
"_score" : 1.0,
"_source" : {
"draft_recent_edit_at" : "2022-05-20T11:33:41.372Z",
"recent_edit_at" : "2021-12-21T03:36:41.359Z"
}
}
]
}
}
What I know is if I do
{
"size": 12,
"from": 0,
"query": {
......,
......
},
"sort": [
{
"recent_edit_at": {
"order": "desc"
}
}
]
}
This will order by recent_edit_at in desc order.
Similarly replacing recent_edit_at with draft_recent_edit_at will order by draft_recent_edit_at in desc order.
What I am struggling is to find a way where I can say I want to order by max in draft_recent_edit_at, recent_edit_at and then order the documents according to those.
===========================Update===========================
After adding sort proposed by HPringles the output is
{
"error": {
"root_cause": [
{
"type": "script_exception",
"reason": "runtime error",
"script_stack": [
"Math.max(doc['draft_recent_edit_at'].value.toInstant().toEpochMilli(),\n doc['recent_edit_at'].value.toInstance().toEpochMilli())\n ",
" ^---- HERE"
],
"script": "\n Math.max(doc['draft_recent_edit_at'].value.toInstant().toEpochMilli(),\n doc['recent_edit_at'].value.toInstance().toEpochMilli())\n ",
"lang": "painless"
}
],
"type": "search_phase_execution_exception",
"reason": "all shards failed",
"phase": "query",
"grouped": true,
"failed_shards": [
{
"shard": 0,
"index": "journeys-development-latest",
"node": "GGAHq1ufQQmSqeLRyzka5A",
"reason": {
"type": "script_exception",
"reason": "runtime error",
"script_stack": [
"Math.max(doc['draft_recent_edit_at'].value.toInstant().toEpochMilli(),\n doc['recent_edit_at'].value.toInstance().toEpochMilli())\n ",
" ^---- HERE"
],
"script": "\n Math.max(doc['draft_recent_edit_at'].value.toInstant().toEpochMilli(),\n doc['recent_edit_at'].value.toInstance().toEpochMilli())\n ",
"lang": "painless",
"caused_by": {
"type": "illegal_argument_exception",
"reason": "dynamic method [org.elasticsearch.script.JodaCompatibleZonedDateTime, toInstance/0] not found"
}
}
}
]
},
"status": 400
}
If I'm understanding correctly, you can do this with a painless script at runtime.
See below:
"sort": {
"_script": {
"type": "number",
"script": {
"lang": "painless",
"source": """
Math.max(doc['draft_recent_edit_at'].value.toInstant().toEpochMilli(),
doc['recent_edit_at'].value.toInstance().toEpochMilli())
""",
"params": {
"factor": 1.1
}
},
"order": "asc"
}
}
This will work out the maximum of the two, and then sort based on that value.
As far as I know you might also want to convert the Epoch values to long.
Something like -
"sort": {
"_script": {
"type": "number",
"script": {
"lang": "painless",
"source": """
long draft_recent_edit_at = doc['draft_recent_edit_at'].value.toInstant().toEpochMilli();
long recent_edit_at = doc['recent_edit_at'].value.toInstant().toEpochMilli();
Math.max(draft_recent_edit_at, recent_edit_at);
"""
},
"order": "asc"
}
}
I have 2 indexes products & shop_inventory_6(shop wise inventory)
products mapping
{
"products_staging" : {
"aliases" : { },
"mappings" : {
"product" : {
"properties" : {
"alternate_names" : {
"type" : "text"
},
"brand" : {
"properties" : {
"id" : {
"type" : "integer"
},
"image_url" : {
"type" : "text",
"index" : false
},
"name" : {
"type" : "text",
"analyzer" : "standard"
}
}
},
"brand_suggest" : {
"type" : "completion",
"analyzer" : "autocomplete",
"search_analyzer" : "whitespace_analyzer",
"preserve_separators" : true,
"preserve_position_increments" : true,
"max_input_length" : 50
},
"category" : {
"properties" : {
"id" : {
"type" : "integer"
},
"image_url" : {
"type" : "text",
"index" : false
},
"name" : {
"type" : "text",
"analyzer" : "standard"
}
}
},
"id" : {
"type" : "text"
},
"image_url" : {
"type" : "text",
"index" : false
},
"name" : {
"type" : "text",
"fields" : {
"raw" : {
"type" : "keyword"
}
},
"analyzer" : "standard"
},
"name_suggest" : {
"type" : "completion",
"analyzer" : "autocomplete",
"search_analyzer" : "whitespace_analyzer",
"preserve_separators" : true,
"preserve_position_increments" : true,
"max_input_length" : 50
},
"product_alternate_name_suggest" : {
"type" : "completion",
"analyzer" : "autocomplete",
"search_analyzer" : "whitespace_analyzer",
"preserve_separators" : true,
"preserve_position_increments" : true,
"max_input_length" : 50
},
"product_sizes" : {
"type" : "nested",
"properties" : {
"ean_code" : {
"type" : "keyword"
},
"id" : {
"type" : "integer"
},
"is_deleted" : {
"type" : "boolean"
},
"price" : {
"type" : "float"
},
"shop_category_type_ids" : {
"type" : "text"
},
"uom" : {
"type" : "keyword"
},
"weight" : {
"type" : "float"
}
}
},
"sub_category" : {
"properties" : {
"alternate_names" : {
"type" : "text"
},
"id" : {
"type" : "integer"
},
"image_url" : {
"type" : "text",
"index" : false
},
"name" : {
"type" : "text",
"analyzer" : "standard"
}
}
},
"sub_category_alternate_suggest" : {
"type" : "completion",
"analyzer" : "autocomplete",
"search_analyzer" : "whitespace_analyzer",
"preserve_separators" : true,
"preserve_position_increments" : true,
"max_input_length" : 50
},
"sub_category_suggest" : {
"type" : "completion",
"analyzer" : "autocomplete",
"search_analyzer" : "whitespace_analyzer",
"preserve_separators" : true,
"preserve_position_increments" : true,
"max_input_length" : 50
}
}
}
},
"settings" : {
"index" : {
"number_of_shards" : "3",
"provided_name" : "products_staging",
"creation_date" : "1566968865962",
"analysis" : {
"filter" : {
"autocomplete_filter" : {
"type" : "edge_ngram",
"min_gram" : "2",
"max_gram" : "20"
}
},
"analyzer" : {
"autocomplete" : {
"filter" : [
"lowercase",
"autocomplete_filter"
],
"type" : "custom",
"tokenizer" : "standard"
},
"whitespace_analyzer" : {
"filter" : [
"lowercase",
"asciifolding"
],
"type" : "custom",
"tokenizer" : "whitespace"
}
}
},
"number_of_replicas" : "1",
"uuid" : "M5GE3TK9QOKVaBMcOkCJPQ",
"version" : {
"created" : "6000199"
}
}
}
}
}
shop_inventory mapping
{
"staging_shop_inventory_17" : {
"aliases" : { },
"mappings" : {
"shop_inventory" : {
"properties" : {
"brand" : {
"properties" : {
"created_at" : {
"type" : "date"
},
"id" : {
"type" : "integer"
},
"image" : {
"type" : "text",
"index" : false
},
"is_selected" : {
"type" : "boolean"
},
"name" : {
"type" : "text",
"analyzer" : "standard"
},
"updated_at" : {
"type" : "date"
}
}
},
"brand_suggest" : {
"type" : "text",
"analyzer" : "ngram_analyzer"
},
"category" : {
"properties" : {
"id" : {
"type" : "integer"
},
"image" : {
"type" : "text",
"index" : false
},
"name" : {
"type" : "text",
"analyzer" : "standard"
}
}
},
"deleted_at" : {
"type" : "date"
},
"id" : {
"type" : "integer"
},
"image" : {
"type" : "text",
"index" : false
},
"is_deleted" : {
"type" : "boolean"
},
"name" : {
"type" : "text",
"fields" : {
"raw" : {
"type" : "keyword"
}
},
"analyzer" : "gramAnalyzer",
"search_analyzer" : "whitespace_analyzer"
},
"name_suggest" : {
"type" : "text",
"analyzer" : "ngram_analyzer"
},
"product_deleted" : {
"type" : "keyword"
},
"product_id" : {
"type" : "integer"
},
"product_sizes" : {
"type" : "nested",
"properties" : {
"deleted_at" : {
"type" : "date"
},
"ean_code" : {
"type" : "keyword"
},
"id" : {
"type" : "integer"
},
"in_stock" : {
"type" : "boolean"
},
"is_deleted" : {
"type" : "boolean"
},
"price" : {
"type" : "float"
},
"product_update_on" : {
"type" : "date"
},
"product_update_status" : {
"type" : "integer"
},
"uom" : {
"type" : "keyword"
},
"weight" : {
"type" : "float"
}
}
},
"sub_category" : {
"properties" : {
"created_at" : {
"type" : "date"
},
"id" : {
"type" : "integer"
},
"image" : {
"type" : "text",
"index" : false
},
"is_selected" : {
"type" : "boolean"
},
"name" : {
"type" : "text",
"analyzer" : "standard"
},
"updated_at" : {
"type" : "date"
}
}
},
"sub_category_suggest" : {
"type" : "text",
"analyzer" : "gramAnalyzer",
"search_analyzer" : "whitespace_analyzer"
}
}
}
},
"settings" : {
"index" : {
"number_of_shards" : "3",
"provided_name" : "staging_shop_inventory_17",
"creation_date" : "1569230448054",
"analysis" : {
"filter" : {
"gramFilter" : {
"token_chars" : [
"letter",
"digit"
],
"min_gram" : "1",
"type" : "edge_ngram",
"max_gram" : "20"
}
},
"analyzer" : {
"whitespace_analyzer" : {
"filter" : [
"lowercase",
"asciifolding"
],
"type" : "custom",
"tokenizer" : "whitespace"
},
"ngram_analyzer" : {
"token_chars" : [
"letter",
"digit"
],
"min_gram" : "4",
"type" : "custom",
"max_gram" : "20",
"tokenizer" : "ngram"
},
"gramAnalyzer" : {
"filter" : [
"lowercase",
"asciifolding",
"gramFilter"
],
"type" : "custom",
"tokenizer" : "whitespace"
}
}
},
"number_of_replicas" : "1",
"uuid" : "q9BkwXMVQnGoga8tznNFgg",
"version" : {
"created" : "6000199"
}
}
}
}
}
I want to select products from products index which are not in shop_inventory index. without two queries
Also I want select product by sub_category_id & brand_ids where I have multiple sub_category_id & brand ids (because my brand belongs to multiple categories) without using OR condition
When i tried to post the below code in cerebro plugin
POST /_bulk
{
"index" : { "_index" : "test", "_type" : "type1", "_id" : "1" },
"field1" : "value1" ,
"delete" : { "_index" : "test", "_type" : "type1", "_id" : "2" },
"create" : { "_index" : "test", "_type" : "type1", "_id" : "3" },
"field2" : "value3" ,
"update" : {"_id" : "1", "_type" : "type1", "_index" : "test"},
"doc" : {"field3" : "value2"}
}
It is showing error like this in cerebro plugin:
{
"error": {
"root_cause": [
{
"type": "action_request_validation_exception",
"reason": "Validation Failed: 1: no requests added;"
}
],
"type": "action_request_validation_exception",
"reason": "Validation Failed: 1: no requests added;"
},
"status": 400
}
What if you have your json body ending up with a new line \n character, which could look something like this:
{ "index" : { "_index" : "test", "_type" : "type1", "_id" : "1" }}
{"field1" : "value1"}
{"delete" : { "_index" : "test", "_type" : "type1", "_id" : "2" }}
{"create" : { "_index" : "test", "_type" : "type1", "_id" : "3" }}
{"field2" : "value3"}
{"update" : {"_id" : "1", "_type" : "type1", "_index" : "test"}}
{"doc" : {"field3" : "value2"}}\n
Also make sure that you have your indentation properly. As per the doc, you should be having the new line character at the end of your json. You may have a look at this SO as well. Hope it helps!
I use a Web API from Spotify in my app to find a track by a particular artist within Spotify. The results look like this:
"tracks" : {
"href" : "https://api.spotify.com/v1/search?query=track%3A%22Dude+Looks+Like+A+Lady+%22+artist%3A%22+Aerosmith%22&offset=0&limit=1&type=track",
"items" : [ {
"album" : {
"album_type" : "album",
"available_markets" : [ "CA", "MX", "US" ],
"external_urls" : {
"spotify" : "https://open.spotify.com/album/3XYqOJI1YlX40kJTdzFEzp"
},
"href" : "https://api.spotify.com/v1/albums/3XYqOJI1YlX40kJTdzFEzp",
"id" : "3XYqOJI1YlX40kJTdzFEzp",
"images" : [ {
"height" : 640,
"url" : "https://i.scdn.co/image/948208cdb26864468ee4320070cd10e6b580d852",
"width" : 640
}, {
"height" : 300,
"url" : "https://i.scdn.co/image/087aeee7ed7b7397f5cf5a4c90bc0532d7a3319c",
"width" : 300
}, {
"height" : 64,
"url" : "https://i.scdn.co/image/562e9fa179952065137a17b175b5bca0647d5f47",
"width" : 64
} ],
"name" : "Permanent Vacation (Remastered)",
"type" : "album",
"uri" : "spotify:album:3XYqOJI1YlX40kJTdzFEzp"
},
"artists" : [ {
"external_urls" : {
"spotify" : "https://open.spotify.com/artist/7Ey4PD4MYsKc5I2dolUwbH"
},
"href" : "https://api.spotify.com/v1/artists/7Ey4PD4MYsKc5I2dolUwbH",
"id" : "7Ey4PD4MYsKc5I2dolUwbH",
"name" : "Aerosmith",
"type" : "artist",
"uri" : "spotify:artist:7Ey4PD4MYsKc5I2dolUwbH"
} ],
"available_markets" : [ "CA", "MX", "US" ],
"disc_number" : 1,
"duration_ms" : 265773,
"explicit" : false,
"external_ids" : {
"isrc" : "USIR10000454"
},
"external_urls" : {
"spotify" : "https://open.spotify.com/track/6gQUbFwwdYXlKdmqRoWKJe"
},
"href" : "https://api.spotify.com/v1/tracks/6gQUbFwwdYXlKdmqRoWKJe",
"id" : "6gQUbFwwdYXlKdmqRoWKJe",
"name" : "Dude (Looks Like A Lady)",
"popularity" : 55,
"preview_url" : "https://p.scdn.co/mp3-preview/7d85766664041815e16b54eb014d3d120f883db8",
"track_number" : 5,
"type" : "track",
"uri" : "spotify:track:6gQUbFwwdYXlKdmqRoWKJe"
} ],
"limit" : 1,
"next" : "https://api.spotify.com/v1/search?query=track%3A%22Dude+Looks+Like+A+Lady+%22+artist%3A%22+Aerosmith%22&offset=1&limit=1&type=track",
"offset" : 0,
"previous" : null,
"total" : 7
}
The only part that I actually need is the FINAL external_urls line that looks like this:
"external_urls" : {
"spotify" : "https://open.spotify.com/track/6gQUbFwwdYXlKdmqRoWKJe"
},
As this is the one that contains the specific app. I have done about 100 tries with the Web API, and this is the way it always appears. So my question is HOW can I pull out JUST this one area?