Exclude nil values from ElasticSearch Aggregation - ruby-on-rails

I was using this query to retrieve the most significant values:
keywords = Answer.search(
:size => 5,
:query => {
:match => {
:question_id => 32481
}
},
:aggregations => {
:keywords => {
:significant_terms => {
:field => 'text'
}
}
}
)
The field is :text, but it has nil values, so the answer is always:
2.1.2 :135 > keywords.map(&:text)
=> [nil, nil, nil, nil, nil]
I tried to add a filter, as the documentation suggests, but it gives me a parse error:
keywords = Answer.search(
:size => 5,
:query => {
:match => {
:question_id => 32481
},
:filtered => {
:filter => {
:exists => { :field => 'text' }
}
}
},
:aggregations => {
:keywords => {
:significant_terms => {
:field => 'text'
}
}
}
)
I've tried many combinations, with no success. How can I get only the valid text answers?

I believe your ES query should translate to something like this:
"size": 5,
"query": {
"filtered": {
"query": { "match": { "question_id" : 32481 } },
"filter": {
"exists": {
"field": "text"
}
}
}
},
"aggs": {
"keywords": {
"significant_terms": {
"field": "text"
}
}
}
meaning your "question_id" "match" should be enclosed in the "filtered" element.

Related

How to filter documents by computed embedded field

I have the following schema
class User
include Mongoid::Timestamps::CamelCaseCreated
include Mongoid::Timestamps::CamelCaseUpdated
embeds_many :intervals
field :name, type: String
# ...
end
class Interval
embedded_in :user
field :startTime, type: DateTime
field :endTime, type: DateTime
# ...
end
I need to get users where at least one interval has duration greater or equal to 4 hours.
So for the following data I should receive only Walter White
users = [
{
name: 'Walter White',
intervals: [
{ startTime: '2021-01-27T08:00:00Z', endTime: '2021-01-27T16:00:00Z' }
]
},
{
name: 'Jesse Pinkman',
intervals: [
{ startTime: '2021-01-27T08:00:00Z', endTime: '2021-01-27T10:00:00Z' }
{ startTime: '2021-01-27T10:00:00Z', endTime: '2021-01-27T13:00:00Z' }
]
},
{
name: 'Saul Goodman',
intervals: []
}
]
I know that on a document itself I could filter by computed field like this
User.collection.aggregate([
{ '$project' => { 'durationInMilliseconds' => { '$subtract' => ['$updatedAt', '$createdAt'] } } },
{ '$match' => { 'durationInMilliseconds' => { '$gte' => four_hours } } }
])
# or
User.where('$expr' => { '$gt' => [{ '$subtract': ['$updatedAt', '$createdAt']}, four_hours] })
I was hoping that those would work with $elemMatch, but I receive the following errors
User.where(
intervals: {
'$elemMatch' => [
{ '$project' => { 'durationInMilliseconds' => { '$subtract' => ['$endTime', '$startTime'] } } },
{ '$match' => { 'durationInMilliseconds' => { '$gt' => four_hours } } }
]
}
)
=> $elemMatch needs an Object
User.where(
intervals: {
'$elemMatch' => {
{ '$expr' => { '$max' => some_logic_here } },
}
}
)
=> $expr can only be applied to the top-level document
Is there a way to make it work with the embedded collection?
versions that I'm using:
gem mongo (2.10.2)
gem mongoid (6.1.1)
mongo --version - 4.2.9

How to post nested JSON to HTTParty in Ruby on Rails

I am trying to work out the correct way to post a nested JSON object to an API using HTTParty.
I am getting a successful response using Postman to test the call:
POST: http://service.net/api
Headers: x-api-key : apikey123
Body :
{
"VehicleRequests": [{
"Id": "Vehicle1",
"Parameters": {
"Term": 60,
"CashDeposit": 10,
"DepositType": "Percentage",
"AnnualMileage": 10000
},
"PhysicalVehicle": {
"ExternalVehicleId": "12345",
"Type": "Car",
"Status": "PreOwned",
"OnTheRoadPrice": "30000",
"Mileage": "12345",
"Registration": {
"RegistrationNumber": "REGN0",
"DateRegisteredWithDvla": "01/01/2018"
}
}
}]
}
This returns:
{
"Vehicles": [
{
"Id": "Vehicle1",
"HasError": false,
"Error": null,
"FinanceQuotations": [
{
"HasError": false,
"Error": null,
"Finance": {
"Key": "HP",
"Notifications": [],
"Quote": {
.....
}
}
}
}
]
}
But i'm struggling to replicate the call from my rails app. I have a class set up which i'm calling on create
class Monthlyprice
def initialize()
#response = HTTParty.post('http://service.net/api',
:body =>{
:VehicleRequests=> [{
:Id => "Vehicle1",
:Parameters => {
:Term => 60,
:CashDeposit => 10,
:DepositType => "Percentage",
:AnnualMileage => 10000
},
:PhysicalVehicle => {
:ExternalVehicleId => "12345",
:Type => "Car",
:Status => "PreOwned",
:OnTheRoadPrice => "30000",
:Mileage => "12345",
:Registration => {
:RegistrationNumber => "REGN0",
:DateRegisteredWithDvla => "01/01/2018"
}
}
}].to_json
},
:headers => {"x-api-key" => "apikey123"})
puts(#response)
end
end
But this is returning the following error message from the API:
{"Error"=>{"UserMessage"=>"Request is invalid.", "TechnicalMessage"=>"Request Validation failed. Request had 2 error(s). 1: request.VehicleRequests[0].Id - The Id field is required.\r\n2: request.VehicleRequests[0].Parameters - The Parameters field is required.", "Code"=>"80000"}}
This is the same error that I get from the api in postman if I remove the Id and Parameters objects which suggests the contents of my VehicleRequests object is formatted incorrectly? Any advice would be great!
Can you please change the syntax like below :-
:body => {
}.to_json
that means you have to use .to_json where the body parenthesis close I think it's only syntax error.
Syntax :-
response = HTTParty.post("your request URL",
headers: {
.....
#your header content
.....
},
body: {
......
#your body content
.....
}.to_json
)
I have just edited in your code please try below code :-
#response = HTTParty.post('http://service.net/api',
:headers => {"x-api-key" => "apikey123"},
:body =>{
:VehicleRequests=> [{
:Id => "Vehicle1",
:Parameters => {
:Term => 60,
:CashDeposit => 10,
:DepositType => "Percentage",
:AnnualMileage => 10000
},
:PhysicalVehicle => {
:ExternalVehicleId => "12345",
:Type => "Car",
:Status => "PreOwned",
:OnTheRoadPrice => "30000",
:Mileage => "12345",
:Registration => {
:RegistrationNumber => "REGN0",
:DateRegisteredWithDvla => "01/01/2018"
}
}
}]
}.to_json
)
Hope this will help you :)

Failed to parse date field [0] with format [MMM, YY] with elastic search 5.0

I am trying to get the date parsed into a string format as month and numerical year format like "JAN, 92". My mapping is as below:
size" => 0,
"query" => {
"bool" => {
"must" => [
{
"term" => {
"checkin_progress_for" => {
"value" => "Goal"
}
}
},
{
"term" => {
"goal_owner_id" => {
"value" => "#{current_user.access_key}"
}
}
}
]
}
},
"aggregations" => {
"chekins_over_time" => {
"range" => {
"field" => "checkin_at",
"format" => "MMM, YY",
"ranges" => [
{
"from" => "now-6M",
"to" => "now"
}
]
},
"aggs" => {
"checkins_monthly" => {
"date_histogram" => {
"field" => "checkin_at",
"format" => "MMM, YY",
"interval" => "month",
"min_doc_count" => 0,
"missing" => 0,
"extended_bounds" => {
"min" => "now-6M",
"max" => "now"
}
}
}
}
}
}
}
I throws the following error:
elasticsearch.transport.RemoteTransportException: [captia-america][127.0.0.1:9300][indices:data/read/search[phase/query]]
Caused by: elasticsearch.ElasticsearchParseException: failed to parse date field [0] with format [MMM, YY]
If I remove the {MMM, YY} and put the normal date format it works.
What could the solution to rectify this.Help appreciated.
Your checkins_monthly aggregation is a bit wrong. The missing part should have the same format for the date to use when the field is missing. A 0 is not actually a date.
For example:
"aggs": {
"checkins_monthly": {
"date_histogram": {
"field": "checkin_at",
"format": "MMM, YY",
"interval": "month",
"min_doc_count": 0,
"missing": "Jan, 17",
"extended_bounds": {
"min": "now-6M",
"max": "now"
}
}
}

ElasticSearch sorting

ElasticSearch Version: 1.3.2
I am trying to sort simple collection but no matter what I try it just ignore sorting to me...
{
"query":{
"filtered":{
"query":{
"match_all":{
}
},
"filter":{
"bool":{
"must":[
{
"terms":{
"status":[
"active",
"featured"
]
}
}
]
}
}
}
},
"sort":[
{
"price_cents":{
"order":"asc"
}
}
]
}
I've noticed in my mapping I have auto_boost = true
{
"items" : {
"mappings" : {
"item" : {
"dynamic" : "false",
"_all" : {
"auto_boost" : true
},
"properties" : {
"price_cents" : {
"type" : "integer"
},
"status" : {
"type" : "string"
},
"title" : {
"type" : "string",
"boost" : 10.0,
"analyzer" : "snowball"
}
}
}
}
}
}
this attribute has been added automatically by https://github.com/elasticsearch/elasticsearch-rails gem which I use:
mappings :dynamic => false do
indexes :title, :analyzer => 'snowball', :boost => 10.0
indexes :status
indexes :price_cents, :type => :integer, :index => 'not_analyzed'
end
I wonder is the "auto_boost": true the reason of sort ignore? I can't find the correct way how to turn it to false and check...
I found the issue, it is kind of related to the current bug with .records (https://github.com/elasticsearch/elasticsearch-rails/issues/206), because of it does not paginating correctly I used temporary this construction
#paginates_items = Item.search(params).page(params[:page]).per(60).results
item_ids = #paginates_items.collect(&:id)
#items = Item.where(:id => item_ids)
but I forgot that where(:id => items_id) completely break the order...

Elasticsearch and Rails: Using ngram to search for part of a word

I am trying to use the Elasticsearch-Gem in my project. As I understand: By now there is no need for the Tire-Gem anymore, or am I wrong?
In my project I have a search (obivously), which currently applies to one model. Now I am trying to avoid wildcards, since they don't scale well, but I can't seem to get the ngram-Analyzers work properly. If I search for whole words, the search still works, but not for parts of it.
class Pictures < ActiveRecord::Base
include Elasticsearch::Model
include Elasticsearch::Model::Callbacks
settings :analysis => {
:analyzer => {
:my_index_analyzer => {
:tokenizer => "keyword",
:filter => ["lowercase", "substring"]
},
:my_search_analyzer => {
:tokenizer => "keyword",
:filter => ["lowercase", "substring"]
}
},
:filter => {
:substring => {
:type => "nGram",
:min_gram => 2,
:max_gram => 50
}
}
} do
mapping do
indexes :title,
:properties => {
:type => "string",
:index_analyzer => 'my_index_analyzer',
:search_analyzer => "my_search_analyzer"
}
Maybe somebody can give me a hint into the right direction.
I have given up on defining schema in the model class. In fact, it does not make much sense too.
So here is what I have done. A schema/mapping definition the db/ folder and a rake task to build it.
https://gist.github.com/geordee/9313f4867d61ce340a08
In the model
def as_indexed_json(options={})
self.as_json(only: [:id, :name, :description, :price])
end
I'm using an index for suggestions based on edgeNGram (like nGram, but always starting at the left side of the word) with this settings:
{
"en_suggestions": {
"settings": {
"index": {
"analysis": {
"filter": {
"tpNGramFilter": {
"min_gram": "4",
"type": "edgeNGram",
"max_gram": "50"
}
},
"analyzer": {
"tpNGramAnalyzer": {
"type": "custom",
"filter": [
"tpNGramFilter"
],
"tokenizer": "lowercase"
}
}
}
}
}
}
}
and this mapping:
{
"en_suggestions": {
"mappings": {
"suggest": {
"properties": {
"proposal": {
"type": "string",
"analyzer": "tpNGramAnalyzer"
}
}
}
}
}
}

Resources