Return distance in elasticsearch results? - geolocation

My question is similar to this one.
Simply, is there a way to return the geo distance when NOT sorting with _geo_distance?
Update:
To clarify, I want the results in random order AND include distance.

Yes you can, by using a script field.
For instance, assuming your doc have a geo-point field called location, you could use the following:
(note the \u0027 is just an escaped single quote, so \u0027location\u0027 is really 'location')
curl -XGET 'http://127.0.0.1:9200/geonames/_search?pretty=1' -d '
{
"script_fields" : {
"distance" : {
"params" : {
"lat" : 2.27,
"lon" : 50.3
},
"script" : "doc[\u0027location\u0027].distanceInKm(lat,lon)"
}
}
}
'
# [Thu Feb 16 11:20:29 2012] Response:
# {
# "hits" : {
# "hits" : [
# {
# "_score" : 1,
# "fields" : {
# "distance" : 466.844095463887
# },
# "_index" : "geonames_1318324623",
# "_id" : "6436641_en",
# "_type" : "place"
# },
... etc
If you want the _source field to be returned as well, then you can specify that as follows:
curl -XGET 'http://127.0.0.1:9200/geonames/_search?pretty=1' -d '
{
"fields" : [ "_source" ],
"script_fields" : {
"distance" : {
"params" : {
"lat" : 2.27,
"lon" : 50.3
},
"script" : "doc[\u0027location\u0027].distanceInKm(lat,lon)"
}
}
}
'

Great answer by DrTech ... here is an updated version for Elasticsearch 5.x with painless as the script language. I also added "store_fields" to include _source in the result:
curl -XGET 'http://127.0.0.1:9200/geonames/_search?pretty=1' -d '
{
"stored_fields" : [ "_source" ],
"script_fields" : {
"distance" : {
"script" : {
"inline": "doc['location'].arcDistance(params.lat,params.lon) * 0.001",
"lang": "painless",
"params": {
"lat": 2.27,
"lon": 50.3
}
}
}
}
}'

To return distance aswel as as all the default fields/source, you could also do this:
To avoid that it sorts by distance (primarily) you just sort by _score (or whatever you want the results sorted by) first.
{
"sort": [
"_score",
{
"_geo_distance": {
"location": {
"lat": 40.715,
"lon": -73.998
},
"order": "asc",
"unit": "km",
"distance_type": "plane"
}
}
]
}

Since ES 1.3 MVEL is disabled by default so use a query like:
GET some-index/_search
{
"sort": [
{
"_geo_distance": {
"geo_location": "47.1, 8.1",
"order": "asc",
"unit": "m"
}
}
],
"query": {
"match_all": {}
},
"script_fields" : {
"distance" : {
"lang": "groovy",
"params" : {
"lat" : 47.1,
"lon" : 8.1
},
"script" : "doc[\u0027geo_location\u0027].distanceInKm(lat,lon)"
}
}
}
see: "lang": "groovy", part

Related

how to merge two aggregation results in elasticsearch

I am facing issues with elasticsearch aggregation grouping inside top_hits. or i need unique students count in the tophits
Elastic search mapping:
{
"board" : {
"properties" : {
"notApplied" : {
"type" : "date"
}
}
}
}
Query :
{
"size": 0,
"query": {},
"aggs": {
"notApplied": {
"filter": {
"exists": {
"field": "board.notApplied"
}
},
"aggs": {
"top_student_hits": {
"top_hits": {
"sort": [
{
"board.notApplied": {
"order": "desc"
}
}
],
"script_fields": {
"dues": {
"script": {
"source": "if (doc.containsKey('board.notApplied') && doc['board.notApplied'].size() != 0) { (doc['board.notApplied'].value.toInstant().toEpochMilli()-params.date)/86400000 } else { 0; }",
"params": {
"date": 1669939199059 // --> < 1 day
}
}
}
},
"_source": {
"includes": [
"id",
"studentName",
"usercode",
"board.notApplied",
"userId"
]
},
"size": 5
}
}
}
}
}
}
Output for the above query :
{
"took" : 11,
...
"aggregations" : {
"notApplied" : {
"doc_count" : 42,
"top_student_hits" : {
"hits" : {
"total" : {
"value" : 42,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "applications",
"_type" : "_doc",
"_id" : "4b85533822f91e9b99392f16dedaae1f",
"_score" : null,
"_source" : {
"board" : {
"notApplied" : "2022-10-25T00:00:00.000Z"
},
"studentName" : "Joe",
"id" : "4b85533822f91e9b99392f16dedaae1f",
"userId" : "45a47d1314041ab287a277679ff19922"
},
"fields" : {
"dues" : [
-37
]
},
"sort" : [
1666656000000
]
},
{
"_index" : "applications",
"_type" : "_doc",
"_id" : "1897f32d2d7f691e42c3fe6ebe631c7d",
"_score" : null,
"_source" : {
"board" : {
"notApplied" : "2022-10-25T00:00:00.000Z"
},
"studentName" : "Joe",
"id" : "1897f32d2d7f691e42c3fe6ebe631c7d",
"userId" : "45a47d1314041ab287a277679ff19922"
},
"fields" : {
"dues" : [
-37
]
},
"sort" : [
1666656000000
]
},
{
"_index" : "applications",
"_type" : "_doc",
"_id" : "f0b25dc9a911782ace5af36db7bfbc1f",
"_score" : null,
"_source" : {
"board" : {
"notApplied" : "2022-10-25T00:00:00.000Z"
},
"studentName" : "Sam",
"id" : "f0b25dc9a911782ace5af36db7bfbc1f",
"userId" : "d84f9e5231daa902c37921de9126cad7"
},
"fields" : {
"dues" : [
-37
]
},
"sort" : [
1666656000000
]
},
{
"_index" : "applications",
"_type" : "_doc",
"_id" : "e7f84fa978a553e77716ab479d3d6ce5",
"_score" : null,
"_source" : {
"board" : {
"notApplied" : "2022-10-13T00:00:00.000Z"
},
"id" : "e7f84fa978a553e77716ab479d3d6ce5",
"studentName" : "Sam",
"userId" : "d84f9e5231daa902c37921de9126cad7"
},
"fields" : {
"dues" : [
-49
]
},
"sort" : [
1665619200000
]
},
{
"_index" : "applications",
"_type" : "_doc",
"_id" : "9cba9f6b0d7a28ef739b321291d00170",
"_score" : null,
"_source" : {
"board" : {
"notApplied" : "2022-09-20T00:00:00.000Z"
},
"studentName" : "Ctest17 ",
"id" : "9cba9f6b0d7a28ef739b321291d00170",
"userId" : "ddaf6d6162c8317fd90fec0b870132ce"
},
"fields" : {
"dues" : [
-72
]
},
"sort" : [
1663632000000
]
}
]
}
}
}
}
}
I am getting the exact results but it has been duplicated by userId.
i need a result in top_hits without duplicates or the buckets should be grouped by userId. also the result should be sort desc by (dues or notApplied) field.
can any one help me to resolve this?

Restricting Values Assignable to Discriminating Property

Swagger/OpenAPI definition:
{
"openapi" : "3.0.1",
"info" : {
"title" : "OpenAPI definition",
"version" : "v0"
},
"servers" : [ {
"url" : "http://sandbox.test.com:8063/api/recs",
"description" : "Generated server url"
} ],
"paths" : {
"/data" : {
"get" : {
"tags" : [ "Data" ],
"operationId" : "getData",
"parameters" : [ {
"name" : "goal",
"in" : "query",
"required" : false,
"schema" : {
"$ref" : "#/components/schemas/GoalsEnum_User"
}
} ],
"responses" : {
"404" : {
"description" : "Not Found",
"content" : {
"*/*" : {
"schema" : {
"type" : "object"
}
}
}
},
"200" : {
"description" : "Result generated successfully",
"content" : {
"application/json" : {
"schema" : {
"type" : "array",
"items" : {
"oneOf" : [ {
"$ref" : "#/components/schemas/EventDataDto"
}, {
"$ref" : "#/components/schemas/FreeRideDataDto"
}]
}
}
}
}
}
}
}
}
},
"components" : {
"schemas" : {
"GoalsEnum_User" : {
"type" : "string",
"enum" : [ "User1", "User2" ]
},
"EventDataDto" : {
"type" : "object",
"allOf" : [ {
"$ref" : "#/components/schemas/ParentDataSchema_UserData"
}, {
"type" : "object",
"properties" : {
"rules" : {
"type" : "array",
"items" : {
"$ref" : "#/components/schemas/RuleDto"
}
}
}
}, {
"$ref" : "#/components/schemas/ParentDataSchema"
} ]
},
"FreeRideDataDto" : {
"type" : "object",
"allOf" : [ {
"$ref" : "#/components/schemas/ParentDataSchema"
}, {
"type" : "object",
"properties" : {
"completedRoutes" : {
"type" : "array",
"items" : {
"type" : "integer",
"format" : "int64"
}
},
"averageDistance" : {
"type" : "number",
"format" : "double"
},
"averageDuration" : {
"type" : "number",
"format" : "double"
}
}
}, {
"$ref" : "#/components/schemas/ParentDataSchema_UserData"
} ]
},
"ParentDataSchema" : {
"required" : [ "type" ],
"type" : "object",
"properties" : {
"type" : {
"type" : "string",
"enum" : [ "FREE_RIDE", "EVENT" ]
}
},
"discriminator" : {
"propertyName" : "type",
"mapping" : {
"EVENT" : "#/components/schemas/EventRecommendationDto",
"FREE_RIDE" : "#/components/schemas/FreeRideRecommendationDto"
}
}
},
"ParentDataSchema_UserData" : {
"required" : [ "type" ],
"type" : "object",
"properties" : {
"type" : {
"type" : "string",
"enum" : [ "FREE_RIDE", "EVENT" ]
}
},
"discriminator" : {
"propertyName" : "type",
"mapping" : {
"EVENT" : "#/components/schemas/EventRecommendationDto",
"FREE_RIDE" : "#/components/schemas/FreeRideRecommendationDto"
}
}
}
}
}
}
Generated Example:
[
{
"type": "FREE_RIDE",
"rules": [
{
"ruleId": 0,
"categoryId": 0,
"name": "string",
"type": "string",
"value": "string"
}
]
},
{
"type": "FREE_RIDE",
"completedRoutes": [
0
],
"averageDistance": 0,
"averageDuration": 0
}
]
Since there are specific values for the discriminating field "type", I expect the examples to have the correct value for detected types. Although the types were listed correctly, the type field is not set to the discriminating value.
Is there anything I can do to the Swagger/OpenAPI definitions or Swagger UI to fix this? I'm even open to adding a bug-fix if you can point me to where the values of the field examples are set and how can I choose the discriminating value instead of the first one in the enum instead.

How to order by two different attributes on same document in elasticsearch?

I have documents as
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 3,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "journeys-development-latest",
"_type" : "_doc",
"_id" : "1399",
"_score" : 1.0,
"_source" : {
"draft_recent_edit_at" : "2023-01-14T04:16:41.318Z",
"recent_edit_at" : "2022-09-23T14:13:41.246Z"
}
},
{
"_index" : "journeys-development-latest",
"_type" : "_doc",
"_id" : "1394",
"_score" : 1.0,
"_source" : {
"draft_recent_edit_at" : "2022-07-02T16:19:41.347Z",
"recent_edit_at" : "2022-12-26T10:12:41.333Z"
}
},
{
"_index" : "journeys-development-latest",
"_type" : "_doc",
"_id" : "1392",
"_score" : 1.0,
"_source" : {
"draft_recent_edit_at" : "2022-05-20T11:33:41.372Z",
"recent_edit_at" : "2021-12-21T03:36:41.359Z"
}
}
]
}
}
What I know is if I do
{
"size": 12,
"from": 0,
"query": {
......,
......
},
"sort": [
{
"recent_edit_at": {
"order": "desc"
}
}
]
}
This will order by recent_edit_at in desc order.
Similarly replacing recent_edit_at with draft_recent_edit_at will order by draft_recent_edit_at in desc order.
What I am struggling is to find a way where I can say I want to order by max in draft_recent_edit_at, recent_edit_at and then order the documents according to those.
===========================Update===========================
After adding sort proposed by HPringles the output is
{
"error": {
"root_cause": [
{
"type": "script_exception",
"reason": "runtime error",
"script_stack": [
"Math.max(doc['draft_recent_edit_at'].value.toInstant().toEpochMilli(),\n doc['recent_edit_at'].value.toInstance().toEpochMilli())\n ",
" ^---- HERE"
],
"script": "\n Math.max(doc['draft_recent_edit_at'].value.toInstant().toEpochMilli(),\n doc['recent_edit_at'].value.toInstance().toEpochMilli())\n ",
"lang": "painless"
}
],
"type": "search_phase_execution_exception",
"reason": "all shards failed",
"phase": "query",
"grouped": true,
"failed_shards": [
{
"shard": 0,
"index": "journeys-development-latest",
"node": "GGAHq1ufQQmSqeLRyzka5A",
"reason": {
"type": "script_exception",
"reason": "runtime error",
"script_stack": [
"Math.max(doc['draft_recent_edit_at'].value.toInstant().toEpochMilli(),\n doc['recent_edit_at'].value.toInstance().toEpochMilli())\n ",
" ^---- HERE"
],
"script": "\n Math.max(doc['draft_recent_edit_at'].value.toInstant().toEpochMilli(),\n doc['recent_edit_at'].value.toInstance().toEpochMilli())\n ",
"lang": "painless",
"caused_by": {
"type": "illegal_argument_exception",
"reason": "dynamic method [org.elasticsearch.script.JodaCompatibleZonedDateTime, toInstance/0] not found"
}
}
}
]
},
"status": 400
}
If I'm understanding correctly, you can do this with a painless script at runtime.
See below:
"sort": {
"_script": {
"type": "number",
"script": {
"lang": "painless",
"source": """
Math.max(doc['draft_recent_edit_at'].value.toInstant().toEpochMilli(),
doc['recent_edit_at'].value.toInstance().toEpochMilli())
""",
"params": {
"factor": 1.1
}
},
"order": "asc"
}
}
This will work out the maximum of the two, and then sort based on that value.
As far as I know you might also want to convert the Epoch values to long.
Something like -
"sort": {
"_script": {
"type": "number",
"script": {
"lang": "painless",
"source": """
long draft_recent_edit_at = doc['draft_recent_edit_at'].value.toInstant().toEpochMilli();
long recent_edit_at = doc['recent_edit_at'].value.toInstant().toEpochMilli();
Math.max(draft_recent_edit_at, recent_edit_at);
"""
},
"order": "asc"
}
}

Elasticsearch possessive_english stemmer query returns no hits

I was able to find this other question: Using of possessive_english stemmer in Elasticsearch
but its been 3 years since there was any activity on it
I am trying to get elasticsearch to ignore ' when indexing and searching. For example:
POST my_index/_doc/
{
"message" : "Mike's bike"
}
I want to be able to search for this document using "mikes", "mike's", "mike". I looked and thought that possessive_english should accomplish this task but I have been unable to get the expected results.
I created the index with
PUT /my_index
{
"settings": {
"analysis": {
"analyzer": {
"rebuilt_standard": {
"tokenizer": "standard",
"filter": [
"lowercase", "my_stemmer"
]
}
},
"filter": {
"my_stemmer":{
"type": "stemmer",
"language": "possessive_english"
}
}
}
}
}
I tested the analyzer with
POST /my_index/_analyze
{
"analyzer": "rebuilt_standard",
"text": "Mike's bike"
}
And this is the result
{
"tokens" : [
{
"token" : "mike",
"start_offset" : 0,
"end_offset" : 6,
"type" : "<ALPHANUM>",
"position" : 0
},
{
"token" : "bike",
"start_offset" : 7,
"end_offset" : 11,
"type" : "<ALPHANUM>",
"position" : 1
}
]
}
Looks like the analyzer is working. Then I inserted the document with:
POST my_index/_doc/
{
"message" : "Mike's bike"
}
When searching for it, it returned 0 results
GET /my_index/_search
{
"query": {
"match": {"message": "mike"}
}
}
GET /my_index/_search
{
"query": {
"match": {"message": "mikes"}
}
}
but
GET /my_index/_search
{
"query": {
"match": {"message": "mike's"}
}
}
returned results
It seems like I am missing the configuration on the mapping side of things from the linked question but I am not sure how to set it.
I tested the above with kibana but I am actually using rails and gems 'elasticsearch-model', 'elasticsearch-rails', 'elasticsearch-persistence' with the repository pattern. I am also new to rails so I don't know if its my configs with rails, or elasticsearch, or both that needs work.
I'll post them just in case
include Elasticsearch::Persistence::Repository
include Elasticsearch::Persistence::Repository::DSL
client = Elasticsearch::Client.new(url: 'http://localhost:9200', log: true)
settings index: {
number_of_shards: 1,
analysis: {
analyzer: {
custom: {
type: "custom",
tokenizer: "standard",
filter: [
"lowercase",
"english_possessive_stemmer",
]
}
},
filter: {
english_possessive_stemmer: {
type: "stemmer",
language: "possessive_english",
}
}
}
}
mappings {
indexes :icon, index: false
indexes :properties, type: 'nested' do
indexes :values
end
indexes :name
}
in the controller
repository = Repository.new
repository.create_index!(force: true)
repository.save(json)
results = repository.search(query: { match: { name: 'Mikes' } })
Your analyzer is working fine. I think you have not applied it to your mapping
PUT /my_index
{
"settings": {
"analysis": {
"analyzer": {
"rebuilt_standard": {
"tokenizer": "standard",
"filter": [
"lowercase", "my_stemmer","english_stemmer"
]
}
},
"filter": {
"my_stemmer":{
"type": "stemmer",
"language": "possessive_english"
},
"english_stemmer": {
"type": "stemmer",
"language": "english"
}
}
}
},
"mappings": {
"properties": {
"message":{
"type": "text",
"analyzer": "rebuilt_standard" ---> pass the analyzer
}
}
}
}
possessive_english filter only removes "'" , you cannot use it to search for mikes (it will work for mike though). You will need to use stemmer which reduces words to their base form.
I have an excellent article here for further reference.

How to write script(inline) in script_score including some custom logic with if conditions on doc[] element?

I am a novice with elastic search and while writing script_score I am facing parse exception saying 'expected field name but got [START_ARRAY]'
Here is the mapping:
PUT toadb
{
"mappings":{
"keywords":{
"properties":{
"Name":{"type":"string","analyzer": "simple"},
"Type":{"type":"string","index": "not_analyzed"},
"Id":{"type":"string","index": "not_analyzed"},
"Boosting Field":{"type" : "integer", "store" : "yes"}
}
},
"businesses":{
"properties": {
"Name":{"type":"string","analyzer": "simple"},
"Type":{"type":"string","index": "not_analyzed"},
"Id":{"type":"string","index": "not_analyzed"},
"Business_seq":{"type":"string","index": "not_analyzed"},
"Status":{"type":"string","index": "not_analyzed"},
"System_rating":{"type" : "integer", "store" : "yes"},
"System_rating_weight":{"type" : "integer", "store" : "yes"},
"Position":{ "type":"geo_point","lat_lon": true},
"Display Pic":{"type": "string","index": "not_analyzed"},
"Boosting Field":{"type" : "integer", "store" : "yes"}
}
}
}
}
Here is the query I am trying to execute:
GET /toadb/_search
{
"query":{
"function_score" : {
"query" : {
"multi_match" : {
"query": "Restaurant",
"fields": [ "Name"],"fuzziness":1
}},
"script_score":
{
"script":"if(doc['Status'] && doc['Status']=='A'){ _score+ (doc['Boosting Field'].value);}"
}
},
"size":10
}
}
Please provide sample examples if any (Already referred to elasticsearch documentation)
It looks like you have mistakenly placed the size option in your query. In your example, you have added it as a field next to the function_score query. Instead, it belongs as a sibling to the root query object.
Try this:
GET /toadb/_search
{
"query": {
"function_score": {
"query": {
"multi_match": {
"query": "Restaurant",
"fields": [
"Name"
],
"fuzziness": 1
}
},
"script_score": {
"script": "if(doc['Status'] && doc['Status']=='A'){ _score+ (doc['Boosting Field'].value);}"
}
}
},
"size": 10
}
Have a look at the documentation for the request body search.

Resources