How to Join a Collection to a Document using Mongocxx - join

I have a single document (or collection, whichever would work best to get to the solution) named "main":
{
"field1":"value1",
"objects":[ {"key":1}, {"key":2} ]
}
I have another collection named "foreign" with the following three documents:
//document 1
{
"foreignKey":1,
"name": "Mike"
}
//document 2 {
"foreignKey":2,
"name": "Michael"
}
//document 3
{
"foreignKey":3,
"name": "Mick"
}
I want the joined results to be:
//results
{
"field1":"value1",
"objects":[
{
"foreignKey":1,
"name": "Mike"
},
{
"foreignKey":2,
"name": "Michael"
}
]
}
I've only found examples in MongoDB that almost accomplishes this but it's example only has an array of values; but I have an array of objects.
I don't know how to translate this to Mongo-cxx.
I copied the following example from the MongoDB site for convenience
------------------------------------------------
//Consider a collection orders with the following //document:
({ "_id" : 1, "item" : "MON1003", "price" : 350, "quantity" : 2, "specs" :
[ "27 inch", "Retina display", "1920x1080" ], "type" : "Monitor" }
//Another collection inventory contains the following //documents:
{ "_id" : 1, "sku" : "MON1003", "type" : "Monitor", "instock" : 120,
"size" : "27 inch", "resolution" : "1920x1080" }
{ "_id" : 2, "sku" : "MON1012", "type" : "Monitor", "instock" : 85,
"size" : "23 inch", "resolution" : "1280x800" }
{ "_id" : 3, "sku" : "MON1031", "type" : "Monitor", "instock" : 60,
"size" : "23 inch", "display_type" : "LED" }
(
//The following aggregation operation performs a join //on documents in the orders collection which match a //particular element of the specs array to the size //field in the inventory collection.
db.orders.aggregate([
//stage
{
$unwind: "$specs"
},
//stage
{
$lookup:
{
from: "inventory",
localField: "specs",
foreignField: "size",
as: "inventory_docs"
}
},
//stage
{
$match: { "inventory_docs": { $ne: [] } }
}
])
//The operation returns the following document:
{
"_id" : 1,
"item" : "MON1003",
"price" : 350,
"quantity" : 2,
"specs" : "27 inch",
"type" : "Monitor",
"inventory_docs" : [
{
"_id" : 1,
"sku" : "MON1003",
"type" : "Monitor",
"instock" : 120,
"size" : "27 inch",
"resolution" : "1920x1080"
}
]
}

Using the example you linked from MongoDB's $lookup with an array, and based on aggregation_examples.cpp and test/collection.cpp, the following code implements an aggregation operation with $unwind, $lookup and $match that returns the same document as in the example:
#include <iostream>
#include <bsoncxx/json.hpp>
#include <mongocxx/client.hpp>
#include <mongocxx/instance.hpp>
#include <mongocxx/uri.hpp>
using bsoncxx::builder::basic::kvp;
using bsoncxx::builder::basic::make_document;
using bsoncxx::builder::basic::array;
int main(int, char**)
{
std::cout << "Start program" << std::endl;
mongocxx::instance instance{};
mongocxx::client client{ mongocxx::uri{} };
mongocxx::database db = client["stack"];
mongocxx::collection colInventory = db["inventory"];
// Pipeline stages: $unwind, $lookup and $match
mongocxx::pipeline pipe{};
pipe.unwind("$specs");
pipe.lookup(
make_document(
kvp("from", colInventory.name()),
kvp("localField", "specs"),
kvp("foreignField", "size"),
kvp("as", "inventory_docs")
)
);
pipe.match(
make_document(
kvp("inventory_docs", make_document(kvp("$ne", array{})))
)
);
auto cursor = db["orders"].aggregate(pipe, mongocxx::options::aggregate{});
for (auto doc : cursor) {
std::cout << bsoncxx::to_json(doc) << std::endl;
}
std::cout << "End program" << std::endl;
}
Output:
Start program
{ "_id" : 1.0, "item" : "MON1003", "price" : 350.0, "quantity" : 2.0, "specs" : "27 inch", "type" : "Monitor", "inventory_docs" : [ { "_id" : 1.0, "sku" : "MON1003", "type" : "Monitor", "instock" : 120.0, "size" : "27 inch", "resolution" : "1920x1080" } ] }
End program
Pretty print:
{
"_id": 1.0,
"item": "MON1003",
"price": 350.0,
"quantity": 2.0,
"specs": "27 inch",
"type": "Monitor",
"inventory_docs": [
{
"_id": 1.0,
"sku": "MON1003",
"type": "Monitor",
"instock": 120.0,
"size": "27 inch",
"resolution": "1920x1080"
}
]
}

Related

how to merge two aggregation results in elasticsearch

I am facing issues with elasticsearch aggregation grouping inside top_hits. or i need unique students count in the tophits
Elastic search mapping:
{
"board" : {
"properties" : {
"notApplied" : {
"type" : "date"
}
}
}
}
Query :
{
"size": 0,
"query": {},
"aggs": {
"notApplied": {
"filter": {
"exists": {
"field": "board.notApplied"
}
},
"aggs": {
"top_student_hits": {
"top_hits": {
"sort": [
{
"board.notApplied": {
"order": "desc"
}
}
],
"script_fields": {
"dues": {
"script": {
"source": "if (doc.containsKey('board.notApplied') && doc['board.notApplied'].size() != 0) { (doc['board.notApplied'].value.toInstant().toEpochMilli()-params.date)/86400000 } else { 0; }",
"params": {
"date": 1669939199059 // --> < 1 day
}
}
}
},
"_source": {
"includes": [
"id",
"studentName",
"usercode",
"board.notApplied",
"userId"
]
},
"size": 5
}
}
}
}
}
}
Output for the above query :
{
"took" : 11,
...
"aggregations" : {
"notApplied" : {
"doc_count" : 42,
"top_student_hits" : {
"hits" : {
"total" : {
"value" : 42,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "applications",
"_type" : "_doc",
"_id" : "4b85533822f91e9b99392f16dedaae1f",
"_score" : null,
"_source" : {
"board" : {
"notApplied" : "2022-10-25T00:00:00.000Z"
},
"studentName" : "Joe",
"id" : "4b85533822f91e9b99392f16dedaae1f",
"userId" : "45a47d1314041ab287a277679ff19922"
},
"fields" : {
"dues" : [
-37
]
},
"sort" : [
1666656000000
]
},
{
"_index" : "applications",
"_type" : "_doc",
"_id" : "1897f32d2d7f691e42c3fe6ebe631c7d",
"_score" : null,
"_source" : {
"board" : {
"notApplied" : "2022-10-25T00:00:00.000Z"
},
"studentName" : "Joe",
"id" : "1897f32d2d7f691e42c3fe6ebe631c7d",
"userId" : "45a47d1314041ab287a277679ff19922"
},
"fields" : {
"dues" : [
-37
]
},
"sort" : [
1666656000000
]
},
{
"_index" : "applications",
"_type" : "_doc",
"_id" : "f0b25dc9a911782ace5af36db7bfbc1f",
"_score" : null,
"_source" : {
"board" : {
"notApplied" : "2022-10-25T00:00:00.000Z"
},
"studentName" : "Sam",
"id" : "f0b25dc9a911782ace5af36db7bfbc1f",
"userId" : "d84f9e5231daa902c37921de9126cad7"
},
"fields" : {
"dues" : [
-37
]
},
"sort" : [
1666656000000
]
},
{
"_index" : "applications",
"_type" : "_doc",
"_id" : "e7f84fa978a553e77716ab479d3d6ce5",
"_score" : null,
"_source" : {
"board" : {
"notApplied" : "2022-10-13T00:00:00.000Z"
},
"id" : "e7f84fa978a553e77716ab479d3d6ce5",
"studentName" : "Sam",
"userId" : "d84f9e5231daa902c37921de9126cad7"
},
"fields" : {
"dues" : [
-49
]
},
"sort" : [
1665619200000
]
},
{
"_index" : "applications",
"_type" : "_doc",
"_id" : "9cba9f6b0d7a28ef739b321291d00170",
"_score" : null,
"_source" : {
"board" : {
"notApplied" : "2022-09-20T00:00:00.000Z"
},
"studentName" : "Ctest17 ",
"id" : "9cba9f6b0d7a28ef739b321291d00170",
"userId" : "ddaf6d6162c8317fd90fec0b870132ce"
},
"fields" : {
"dues" : [
-72
]
},
"sort" : [
1663632000000
]
}
]
}
}
}
}
}
I am getting the exact results but it has been duplicated by userId.
i need a result in top_hits without duplicates or the buckets should be grouped by userId. also the result should be sort desc by (dues or notApplied) field.
can any one help me to resolve this?

How to order by two different attributes on same document in elasticsearch?

I have documents as
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 3,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "journeys-development-latest",
"_type" : "_doc",
"_id" : "1399",
"_score" : 1.0,
"_source" : {
"draft_recent_edit_at" : "2023-01-14T04:16:41.318Z",
"recent_edit_at" : "2022-09-23T14:13:41.246Z"
}
},
{
"_index" : "journeys-development-latest",
"_type" : "_doc",
"_id" : "1394",
"_score" : 1.0,
"_source" : {
"draft_recent_edit_at" : "2022-07-02T16:19:41.347Z",
"recent_edit_at" : "2022-12-26T10:12:41.333Z"
}
},
{
"_index" : "journeys-development-latest",
"_type" : "_doc",
"_id" : "1392",
"_score" : 1.0,
"_source" : {
"draft_recent_edit_at" : "2022-05-20T11:33:41.372Z",
"recent_edit_at" : "2021-12-21T03:36:41.359Z"
}
}
]
}
}
What I know is if I do
{
"size": 12,
"from": 0,
"query": {
......,
......
},
"sort": [
{
"recent_edit_at": {
"order": "desc"
}
}
]
}
This will order by recent_edit_at in desc order.
Similarly replacing recent_edit_at with draft_recent_edit_at will order by draft_recent_edit_at in desc order.
What I am struggling is to find a way where I can say I want to order by max in draft_recent_edit_at, recent_edit_at and then order the documents according to those.
===========================Update===========================
After adding sort proposed by HPringles the output is
{
"error": {
"root_cause": [
{
"type": "script_exception",
"reason": "runtime error",
"script_stack": [
"Math.max(doc['draft_recent_edit_at'].value.toInstant().toEpochMilli(),\n doc['recent_edit_at'].value.toInstance().toEpochMilli())\n ",
" ^---- HERE"
],
"script": "\n Math.max(doc['draft_recent_edit_at'].value.toInstant().toEpochMilli(),\n doc['recent_edit_at'].value.toInstance().toEpochMilli())\n ",
"lang": "painless"
}
],
"type": "search_phase_execution_exception",
"reason": "all shards failed",
"phase": "query",
"grouped": true,
"failed_shards": [
{
"shard": 0,
"index": "journeys-development-latest",
"node": "GGAHq1ufQQmSqeLRyzka5A",
"reason": {
"type": "script_exception",
"reason": "runtime error",
"script_stack": [
"Math.max(doc['draft_recent_edit_at'].value.toInstant().toEpochMilli(),\n doc['recent_edit_at'].value.toInstance().toEpochMilli())\n ",
" ^---- HERE"
],
"script": "\n Math.max(doc['draft_recent_edit_at'].value.toInstant().toEpochMilli(),\n doc['recent_edit_at'].value.toInstance().toEpochMilli())\n ",
"lang": "painless",
"caused_by": {
"type": "illegal_argument_exception",
"reason": "dynamic method [org.elasticsearch.script.JodaCompatibleZonedDateTime, toInstance/0] not found"
}
}
}
]
},
"status": 400
}
If I'm understanding correctly, you can do this with a painless script at runtime.
See below:
"sort": {
"_script": {
"type": "number",
"script": {
"lang": "painless",
"source": """
Math.max(doc['draft_recent_edit_at'].value.toInstant().toEpochMilli(),
doc['recent_edit_at'].value.toInstance().toEpochMilli())
""",
"params": {
"factor": 1.1
}
},
"order": "asc"
}
}
This will work out the maximum of the two, and then sort based on that value.
As far as I know you might also want to convert the Epoch values to long.
Something like -
"sort": {
"_script": {
"type": "number",
"script": {
"lang": "painless",
"source": """
long draft_recent_edit_at = doc['draft_recent_edit_at'].value.toInstant().toEpochMilli();
long recent_edit_at = doc['recent_edit_at'].value.toInstant().toEpochMilli();
Math.max(draft_recent_edit_at, recent_edit_at);
"""
},
"order": "asc"
}
}

Google Maps API - Autocomplete Suggestions are missing some locations/places

We're using Google Maps' API in our Rails application, with a VueJS frontend.
I don't think any of the above is important because I've checked the URL JSON response in the browser and it's missing locations. It seems to work ok for city names, postcodes - but it's not showing the same suggestions as when I type into a location field in Google Calendar for example.
Request URL (with obvious tokens/auth keys removed):-
https://maps.googleapis.com/maps/api/place/autocomplete/json?types=geocode&input=Ritz%20London
Response:-
{
"predictions" : [
{
"description" : "Ritz Parade, London, UK",
"matched_substrings" : [
{
"length" : 4,
"offset" : 0
},
{
"length" : 6,
"offset" : 13
}
],
"place_id" : "EhdSaXR6IFBhcmFkZSwgTG9uZG9uLCBVSyIuKiwKFAoSCbu6Pcob",
"reference" : "EhdSaXR6IFBhcmFkZSwgTG9uZG9uLCBVSyIuKiwKFAoSCbu6PcobEnZIEbPVf1MZfE",
"structured_formatting" : {
"main_text" : "Ritz Parade",
"main_text_matched_substrings" : [
{
"length" : 4,
"offset" : 0
}
],
"secondary_text" : "London, UK",
"secondary_text_matched_substrings" : [
{
"length" : 6,
"offset" : 0
}
]
},
"terms" : [
{
"offset" : 0,
"value" : "Ritz Parade"
},
{
"offset" : 13,
"value" : "London"
},
{
"offset" : 21,
"value" : "UK"
}
],
"types" : [ "route", "geocode" ]
}
],
"status" : "OK"
}
The actual Ritz Hotel in London isn't shown here, yet it's clearly showing in my Google Calendar location search.
I don't currently have access to our API Console but imagine this is a setting that either needs to be turned on in the API Console, or passed as an additional param in our request? Or am I looking in the wrong place?
Doing the same search and using establishment as the types parameter returns the result you are probably after.
According to the types documentation:
establishment instructs the Place Autocomplete service to return only business results.
Request with types=establishment: https://maps.googleapis.com/maps/api/place/autocomplete/json?types=establishment&input=Ritz%20London&language=en&key=your_api_key_here
{
"predictions" : [
{
"description" : "The Ritz London, Piccadilly, London, UK",
"matched_substrings" : [
{
"length" : 4,
"offset" : 4
},
{
"length" : 6,
"offset" : 29
}
],
"place_id" : "ChIJV8gP0ykFdkgRFEAEHoE1YVk",
"reference" : "ChIJV8gP0ykFdkgRFEAEHoE1YVk",
"structured_formatting" : {
"main_text" : "The Ritz London",
"main_text_matched_substrings" : [
{
"length" : 4,
"offset" : 4
}
],
"secondary_text" : "Piccadilly, London, UK",
"secondary_text_matched_substrings" : [
{
"length" : 6,
"offset" : 12
}
]
},
"terms" : [
{
"offset" : 0,
"value" : "The Ritz London"
},
{
"offset" : 17,
"value" : "Piccadilly"
},
{
"offset" : 29,
"value" : "London"
},
{
"offset" : 37,
"value" : "UK"
}
],
"types" : [ "lodging", "restaurant", "food", "point_of_interest", "establishment" ]
},
{
"description" : "Ritz, London, UK",
"matched_substrings" : [
{
"length" : 4,
"offset" : 0
},
{
"length" : 6,
"offset" : 6
}
],
"place_id" : "ChIJ1wd5z6gFdkgRUdbMn3pMiKM",
"reference" : "ChIJ1wd5z6gFdkgRUdbMn3pMiKM",
"structured_formatting" : {
"main_text" : "Ritz",
"main_text_matched_substrings" : [
{
"length" : 4,
"offset" : 0
}
],
"secondary_text" : "London, UK",
"secondary_text_matched_substrings" : [
{
"length" : 6,
"offset" : 0
}
]
},
"terms" : [
{
"offset" : 0,
"value" : "Ritz"
},
{
"offset" : 6,
"value" : "London"
},
{
"offset" : 14,
"value" : "UK"
}
],
"types" : [ "laundry", "point_of_interest", "establishment" ]
},
{
"description" : "Ritz London Cigars, Piccadilly, London, UK",
"matched_substrings" : [
{
"length" : 11,
"offset" : 0
}
],
"place_id" : "ChIJa_Ih9V8FdkgRfNt8eaSIXuw",
"reference" : "ChIJa_Ih9V8FdkgRfNt8eaSIXuw",
"structured_formatting" : {
"main_text" : "Ritz London Cigars",
"main_text_matched_substrings" : [
{
"length" : 11,
"offset" : 0
}
],
"secondary_text" : "Piccadilly, London, UK"
},
"terms" : [
{
"offset" : 0,
"value" : "Ritz London Cigars"
},
{
"offset" : 20,
"value" : "Piccadilly"
},
{
"offset" : 32,
"value" : "London"
},
{
"offset" : 40,
"value" : "UK"
}
],
"types" : [ "point_of_interest", "store", "establishment" ]
},
{
"description" : "Hiro Miyoshi at The Ritz London, The Ritz, Piccadilly, London, UK",
"matched_substrings" : [
{
"length" : 11,
"offset" : 20
}
],
"place_id" : "ChIJcXuw6tkFdkgR89PgYYTudzU",
"reference" : "ChIJcXuw6tkFdkgR89PgYYTudzU",
"structured_formatting" : {
"main_text" : "Hiro Miyoshi at The Ritz London",
"main_text_matched_substrings" : [
{
"length" : 11,
"offset" : 20
}
],
"secondary_text" : "The Ritz, Piccadilly, London, UK"
},
"terms" : [
{
"offset" : 0,
"value" : "Hiro Miyoshi at The Ritz London"
},
{
"offset" : 33,
"value" : "The Ritz"
},
{
"offset" : 43,
"value" : "Piccadilly"
},
{
"offset" : 55,
"value" : "London"
},
{
"offset" : 63,
"value" : "UK"
}
],
"types" : [
"beauty_salon",
"hair_care",
"spa",
"point_of_interest",
"store",
"establishment"
]
},
{
"description" : "Ritz Restaurant, King Street, London, UK",
"matched_substrings" : [
{
"length" : 4,
"offset" : 0
},
{
"length" : 6,
"offset" : 30
}
],
"place_id" : "ChIJOVU94EkOdkgRqhr357HTODs",
"reference" : "ChIJOVU94EkOdkgRqhr357HTODs",
"structured_formatting" : {
"main_text" : "Ritz Restaurant",
"main_text_matched_substrings" : [
{
"length" : 4,
"offset" : 0
}
],
"secondary_text" : "King Street, London, UK",
"secondary_text_matched_substrings" : [
{
"length" : 6,
"offset" : 13
}
]
},
"terms" : [
{
"offset" : 0,
"value" : "Ritz Restaurant"
},
{
"offset" : 17,
"value" : "King Street"
},
{
"offset" : 30,
"value" : "London"
},
{
"offset" : 38,
"value" : "UK"
}
],
"types" : [
"cafe",
"restaurant",
"food",
"point_of_interest",
"store",
"establishment"
]
}
],
"status" : "OK"
}
On the same documentation page:
If nothing is specified, all types are returned. In general only a single type is allowed. The exception is that you can safely mix the geocode and establishment types, but note that this will have the same effect as specifying no types.
Request with no specified types: https://maps.googleapis.com/maps/api/place/autocomplete/json?input=Ritz%20London&language=en&key=your_api_key_here
{
"predictions" : [
{
"description" : "The Ritz London, Piccadilly, London, UK",
"matched_substrings" : [
{
"length" : 4,
"offset" : 4
},
{
"length" : 6,
"offset" : 29
}
],
"place_id" : "ChIJV8gP0ykFdkgRFEAEHoE1YVk",
"reference" : "ChIJV8gP0ykFdkgRFEAEHoE1YVk",
"structured_formatting" : {
"main_text" : "The Ritz London",
"main_text_matched_substrings" : [
{
"length" : 4,
"offset" : 4
}
],
"secondary_text" : "Piccadilly, London, UK",
"secondary_text_matched_substrings" : [
{
"length" : 6,
"offset" : 12
}
]
},
"terms" : [
{
"offset" : 0,
"value" : "The Ritz London"
},
{
"offset" : 17,
"value" : "Piccadilly"
},
{
"offset" : 29,
"value" : "London"
},
{
"offset" : 37,
"value" : "UK"
}
],
"types" : [ "lodging", "restaurant", "food", "point_of_interest", "establishment" ]
},
{
"description" : "Ritz, London, UK",
"matched_substrings" : [
{
"length" : 4,
"offset" : 0
},
{
"length" : 6,
"offset" : 6
}
],
"place_id" : "ChIJ1wd5z6gFdkgRUdbMn3pMiKM",
"reference" : "ChIJ1wd5z6gFdkgRUdbMn3pMiKM",
"structured_formatting" : {
"main_text" : "Ritz",
"main_text_matched_substrings" : [
{
"length" : 4,
"offset" : 0
}
],
"secondary_text" : "London, UK",
"secondary_text_matched_substrings" : [
{
"length" : 6,
"offset" : 0
}
]
},
"terms" : [
{
"offset" : 0,
"value" : "Ritz"
},
{
"offset" : 6,
"value" : "London"
},
{
"offset" : 14,
"value" : "UK"
}
],
"types" : [ "laundry", "point_of_interest", "establishment" ]
},
{
"description" : "Ritz London Cigars, Piccadilly, London, UK",
"matched_substrings" : [
{
"length" : 11,
"offset" : 0
}
],
"place_id" : "ChIJa_Ih9V8FdkgRfNt8eaSIXuw",
"reference" : "ChIJa_Ih9V8FdkgRfNt8eaSIXuw",
"structured_formatting" : {
"main_text" : "Ritz London Cigars",
"main_text_matched_substrings" : [
{
"length" : 11,
"offset" : 0
}
],
"secondary_text" : "Piccadilly, London, UK"
},
"terms" : [
{
"offset" : 0,
"value" : "Ritz London Cigars"
},
{
"offset" : 20,
"value" : "Piccadilly"
},
{
"offset" : 32,
"value" : "London"
},
{
"offset" : 40,
"value" : "UK"
}
],
"types" : [ "point_of_interest", "store", "establishment" ]
},
{
"description" : "Hiro Miyoshi at The Ritz London, The Ritz, Piccadilly, London, UK",
"matched_substrings" : [
{
"length" : 11,
"offset" : 20
}
],
"place_id" : "ChIJcXuw6tkFdkgR89PgYYTudzU",
"reference" : "ChIJcXuw6tkFdkgR89PgYYTudzU",
"structured_formatting" : {
"main_text" : "Hiro Miyoshi at The Ritz London",
"main_text_matched_substrings" : [
{
"length" : 11,
"offset" : 20
}
],
"secondary_text" : "The Ritz, Piccadilly, London, UK"
},
"terms" : [
{
"offset" : 0,
"value" : "Hiro Miyoshi at The Ritz London"
},
{
"offset" : 33,
"value" : "The Ritz"
},
{
"offset" : 43,
"value" : "Piccadilly"
},
{
"offset" : 55,
"value" : "London"
},
{
"offset" : 63,
"value" : "UK"
}
],
"types" : [
"beauty_salon",
"hair_care",
"spa",
"point_of_interest",
"store",
"establishment"
]
},
{
"description" : "Ritz Parade, London, UK",
"matched_substrings" : [
{
"length" : 4,
"offset" : 0
},
{
"length" : 6,
"offset" : 13
}
],
"place_id" : "EhdSaXR6IFBhcmFkZSwgTG9uZG9uLCBVSyIuKiwKFAoSCbu6PcobEnZIEbPVf1MZfEocEhQKEgnz8xe3Wxt2SBEKsgA5eS6RSQ",
"reference" : "EhdSaXR6IFBhcmFkZSwgTG9uZG9uLCBVSyIuKiwKFAoSCbu6PcobEnZIEbPVf1MZfEocEhQKEgnz8xe3Wxt2SBEKsgA5eS6RSQ",
"structured_formatting" : {
"main_text" : "Ritz Parade",
"main_text_matched_substrings" : [
{
"length" : 4,
"offset" : 0
}
],
"secondary_text" : "London, UK",
"secondary_text_matched_substrings" : [
{
"length" : 6,
"offset" : 0
}
]
},
"terms" : [
{
"offset" : 0,
"value" : "Ritz Parade"
},
{
"offset" : 13,
"value" : "London"
},
{
"offset" : 21,
"value" : "UK"
}
],
"types" : [ "route", "geocode" ]
}
],
"status" : "OK"
}

Apache NiFi not converting recognizing decimal type in convertJsontoAvro Processor

I have a ConvertJsontoAvro processor in NiFi 1.4 and am having difficulty getting the proper datatype of decimal within the avro. The data is being transformed into bytes using logical Avro data types within ExecuteSQL processor, converting avro to Json using ConvertAvrotoJSON processor, and then using ConvertJsonToAvro processor to put into HDFS using PutParquet.
My schema is :
{
"type" : "record",
"name" : "schema",
"fields" : [ {
"name" : "entryDate",
"type" : [ "null", {
"type" : "long",
"logicalType" : "timestamp-micros"
} ],
"default" : null
}, {
"name" : "points",
"type" : [ "null", {
"type" : "bytes",
"logicalType" : "decimal",
"precision" : 18,
"scale" : 6
} ],
"default" : null
}]
}
My JSON:
{
"entryDate" : 2018-01-26T13:48:22.087,
"points" : 6.000000
}
I get an error for the avro saying
Cannont convert field points: Cannot resolve union : {"bytes": "+|Ð" not in ["null", {"type":"bytes","logicalType":"decimal","precision":18,"scale":6}]"
Is there some type of work around for this?...
Currently you cannot mix null type and logical types due to bug in Avro. Check this still unresolved issue:
https://issues.apache.org/jira/browse/AVRO-1891
Also the defaults value cannot be null. This should work for you:
{
"type" : "record",
"name" : "schema",
"fields" : [ {
"name" : "entryDate",
"type" : {
"type" : "long",
"logicalType" : "timestamp-micros"
},
"default" : 0
}, {
"name" : "points",
"type" : {
"type" : "bytes",
"logicalType" : "decimal",
"precision" : 18,
"scale" : 6
},
"default" : ""
}]
}
For anyone interested, I was able to set the decimal and a default value as null (in cases when the field is null or missing), currently using Nifi 1.14.0
{
"name": "value",
"type": [
"null",
{
"type": "bytes",
"logicalType": "decimal",
"precision": 8,
"scale": 4
}
],
"default": null
}

Sorting Through NSData Results

I use a Web API from Spotify in my app to find a track by a particular artist within Spotify. The results look like this:
"tracks" : {
"href" : "https://api.spotify.com/v1/search?query=track%3A%22Dude+Looks+Like+A+Lady+%22+artist%3A%22+Aerosmith%22&offset=0&limit=1&type=track",
"items" : [ {
"album" : {
"album_type" : "album",
"available_markets" : [ "CA", "MX", "US" ],
"external_urls" : {
"spotify" : "https://open.spotify.com/album/3XYqOJI1YlX40kJTdzFEzp"
},
"href" : "https://api.spotify.com/v1/albums/3XYqOJI1YlX40kJTdzFEzp",
"id" : "3XYqOJI1YlX40kJTdzFEzp",
"images" : [ {
"height" : 640,
"url" : "https://i.scdn.co/image/948208cdb26864468ee4320070cd10e6b580d852",
"width" : 640
}, {
"height" : 300,
"url" : "https://i.scdn.co/image/087aeee7ed7b7397f5cf5a4c90bc0532d7a3319c",
"width" : 300
}, {
"height" : 64,
"url" : "https://i.scdn.co/image/562e9fa179952065137a17b175b5bca0647d5f47",
"width" : 64
} ],
"name" : "Permanent Vacation (Remastered)",
"type" : "album",
"uri" : "spotify:album:3XYqOJI1YlX40kJTdzFEzp"
},
"artists" : [ {
"external_urls" : {
"spotify" : "https://open.spotify.com/artist/7Ey4PD4MYsKc5I2dolUwbH"
},
"href" : "https://api.spotify.com/v1/artists/7Ey4PD4MYsKc5I2dolUwbH",
"id" : "7Ey4PD4MYsKc5I2dolUwbH",
"name" : "Aerosmith",
"type" : "artist",
"uri" : "spotify:artist:7Ey4PD4MYsKc5I2dolUwbH"
} ],
"available_markets" : [ "CA", "MX", "US" ],
"disc_number" : 1,
"duration_ms" : 265773,
"explicit" : false,
"external_ids" : {
"isrc" : "USIR10000454"
},
"external_urls" : {
"spotify" : "https://open.spotify.com/track/6gQUbFwwdYXlKdmqRoWKJe"
},
"href" : "https://api.spotify.com/v1/tracks/6gQUbFwwdYXlKdmqRoWKJe",
"id" : "6gQUbFwwdYXlKdmqRoWKJe",
"name" : "Dude (Looks Like A Lady)",
"popularity" : 55,
"preview_url" : "https://p.scdn.co/mp3-preview/7d85766664041815e16b54eb014d3d120f883db8",
"track_number" : 5,
"type" : "track",
"uri" : "spotify:track:6gQUbFwwdYXlKdmqRoWKJe"
} ],
"limit" : 1,
"next" : "https://api.spotify.com/v1/search?query=track%3A%22Dude+Looks+Like+A+Lady+%22+artist%3A%22+Aerosmith%22&offset=1&limit=1&type=track",
"offset" : 0,
"previous" : null,
"total" : 7
}
The only part that I actually need is the FINAL external_urls line that looks like this:
"external_urls" : {
"spotify" : "https://open.spotify.com/track/6gQUbFwwdYXlKdmqRoWKJe"
},
As this is the one that contains the specific app. I have done about 100 tries with the Web API, and this is the way it always appears. So my question is HOW can I pull out JUST this one area?

Resources