Nifi RecordReader & RecordWriter serialization error. IllegalTypeConversionException; Cannot convert value of class; because the type is not supported - avro

I'm trying to convert json to json using JoltTransformRecord in Apache Nifi. When I try transform json in https://jolt-demo.appspot.com/, I'm getting correct result. This is okey.
But, when I'm trying to transform json using JoltTransformRecord, it is throws an exception. Error is; "Cannot convert value of class [Ljava.lang.Object; because the type is not supported". But I didn't understand why I'm getting this error. I did check my input and output schemas, but I didn't find anything. They looks like correct.
In the below, my input and output json examples, jolt specification, input and output schemas are given. Also, for this, I'm using JsonTreeReader and JsonRecordSetWriter.
--- How can I solve this problem? ---
Example input json for JoltTransformRecord(In this example, there is only one json object inside array. But actually, there are a lot of json object inside array.);
[ {
"uuid" : "MFMS1-MC5",
"componentId" : "path1",
"Samples" : {
"PathFeedrate" : [ {
"dataItemId" : "pf",
"timestamp" : "2019-03-01T21:48:27.940558Z",
"sequence" : "68104",
"value" : "425.5333",
"name" : "Fact",
"subType" : "ACTUAL"
}, {
"dataItemId" : "pf",
"timestamp" : "2019-03-01T21:48:30.244219Z",
"sequence" : "68117",
"value" : "0",
"name" : "Fact",
"subType" : "ACTUAL"
} ]
},
"Events" : {
"SequenceNumber" : [ {
"dataItemId" : "seq",
"timestamp" : "2019-03-01T21:48:27.940558Z",
"sequence" : "68105",
"value" : "0",
"name" : "sequenceNum"
} ],
"Unit" : [ {
"dataItemId" : "unit",
"timestamp" : "2019-03-01T21:48:27.940558Z",
"sequence" : "68106",
"value" : "13",
"name" : "unitNum"
} ]
}
}]
Sample output Json I want;
{
"DataItems" : [ {
"uuid" : "MFMS1-MC5",
"componentId" : "path1",
"eventType" : "Samples",
"type" : "PathFeedrate",
"dataItemId" : "pf",
"timestamp" : "2019-03-01T21:48:27.940558Z",
"sequence" : "68104",
"value" : "425.5333",
"name" : "Fact",
"subType" : "ACTUAL"
}, {
"uuid" : "MFMS1-MC5",
"componentId" : "path1",
"eventType" : "Samples",
"type" : "PathFeedrate",
"dataItemId" : "pf",
"timestamp" : "2019-03-01T21:48:30.244219Z",
"sequence" : "68117",
"value" : "0",
"name" : "Fact",
"subType" : "ACTUAL"
}, {
"uuid" : "MFMS1-MC5",
"componentId" : "path1",
"eventType" : "Events",
"type" : "SequenceNumber",
"dataItemId" : "seq",
"timestamp" : "2019-03-01T21:48:27.940558Z",
"sequence" : "68105",
"value" : "0",
"name" : "sequenceNum"
}, {
"uuid" : "MFMS1-MC5",
"componentId" : "path1",
"eventType" : "Events",
"type" : "Unit",
"dataItemId" : "unit",
"timestamp" : "2019-03-01T21:48:27.940558Z",
"sequence" : "68106",
"value" : "13",
"name" : "unitNum"
} ]
}
My Jolt Specification;
[
{
"operation": "shift",
"spec": {
"Samples": {
"*": {
"*": {
"#(3,uuid)": "Items.&2[#2].uuid",
"#(3,componentId)": "Items.&2[#2].componentId",
"$2": "Items.&2[#2].eventType",
"$1": "Items.&2[#2].type",
"*": "Items.&2[#2].&"
}
}
},
"Events": {
"*": {
"*": {
"#(3,uuid)": "Items.&2[#2].uuid",
"#(3,componentId)": "Items.&2[#2].componentId",
"$2": "Items.&2[#2].eventType",
"$1": "Items.&2[#2].type",
"*": "Items.&2[#2].&"
}
}
},
"Condition": {
"*": {
"*": {
"#(3,uuid)": "Items.&2[#2].uuid",
"#(3,componentId)": "Items.&2[#2].componentId",
"$2": "Items.&2[#2].eventType",
"$1": "Items.&2[#2].value",
"*": "Items.&2[#2].&"
}
}
}
}
},
{
"operation": "shift",
"spec": {
"Items": {
"*": {
"*": "DataItems[]"
}
}
}
}
]
This specification is working correctly. Because I have tried it in Jolt transform demo.
I'm using JsonTreeReader for read json in JoltTransformRecord. And this is my input schema;
{
"name": "Items",
"namespace": "Items",
"type": "record",
"fields": [
{
"name": "uuid",
"type": "string"
},
{
"name": "componentId",
"type": "string"
},
{
"name": "Samples",
"type": ["null", {
"type": "map",
"values": {
"type": "array",
"items": {
"name": "SamplesDataItem",
"type": "record",
"fields": [
{
"name": "dataItemId",
"type": "string"
},
{
"name": "timestamp",
"type": "string"
},
{
"name": "sequence",
"type": "string"
},
{
"name": "value",
"type": "string"
},
{
"name": "name",
"type": ["null", "string"]
},
{
"name": "subType",
"type": ["null", "string"]
},
{
"name": "sampleRate",
"type": ["null", "string"]
},
{
"name": "statistic",
"type": ["null", "string"]
},
{
"name": "duration",
"type": ["null", "string"]
},
{
"name": "sampleCount",
"type": ["null", "string"]
},
{
"name": "compositionId",
"type": ["null", "string"]
},
{
"name": "resetTriggered",
"type": ["null", "string"]
}
]
}
}
}]
},
{
"name": "Events",
"type": ["null", {
"type": "map",
"values": {
"type": "array",
"items": {
"name": "EventsDataItem",
"type": "record",
"fields": [
{
"name": "dataItemId",
"type": "string"
},
{
"name": "timestamp",
"type": "string"
},
{
"name": "sequence",
"type": "string"
},
{
"name": "value",
"type": "string"
},
{
"name": "name",
"type": ["null", "string"]
},
{
"name": "subType",
"type": ["null", "string"]
},
{
"name": "compositionId",
"type": ["null", "string"]
},
{
"name": "resetTriggered",
"type": ["null", "string"]
}
]
}
}
}]
},
{
"name": "Condition",
"type": ["null", {
"type": "map",
"values": {
"type": "array",
"items": {
"name": "ConditionDataItem",
"type": "record",
"fields": [
{
"name": "dataItemId",
"type": "string"
},
{
"name": "timestamp",
"type": "string"
},
{
"name": "type",
"type": "string"
},
{
"name": "sequence",
"type": "string"
},
{
"name": "name",
"type": ["null", "string"]
},
{
"name": "subType",
"type": ["null", "string"]
},
{
"name": "nativeCode",
"type": ["null", "string"]
},
{
"name": "nativeSeverity",
"type": ["null", "string"]
},
{
"name": "qualifier",
"type": ["null", "string"]
},
{
"name": "statistic",
"type": ["null", "string"]
},
{
"name": "compositionId",
"type": ["null", "string"]
}
]
}
}
}]
}
]
}
I'm using JsonRecordSetWriter for write converted result in JoltTransformRecord. And this is my output schema;
{
"name": "Items",
"type": "record",
"namespace": "Items",
"fields": [
{
"name": "DataItems",
"type": {
"type": "array",
"items": {
"name": "DataItems",
"type": "record",
"fields": [
{
"name": "uuid",
"type": "string"
},
{
"name": "componentId",
"type": "string"
},
{
"name": "eventType",
"type": "string"
},
{
"name": "type",
"type": "string"
},
{
"name": "dataItemId",
"type": "string"
},
{
"name": "timestamp",
"type": "string"
},
{
"name": "value",
"type": "string"
},
{
"name": "name",
"type": ["null", "string"],
"default": null
},
{
"name": "subType",
"type": ["null", "string"],
"default": null
}
]
}
}
}
]
}

This is indeed a bug in the record handling utilities, I have written NIFI-6105 to cover the fix. Good catch!
As a workaround, since you have JSON as input and output, you can use JoltTransformJson instead of JoltTransformRecord. Alternatively, if you know the keys in the map (PathFeedrate, e.g.), you can change the schema to treat it as a record rather than a map, that might get you around the bug.

Related

Complex Nested JSON Conversion to Data Table without creating class in C#

{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Title",
"type": "object",
"properties": {
"location": {
"type": "string"
},
"dagno": {
"type": "string"
},
"pattano": {
"type": "string"
},
"pattatype": {
"type": "string"
},
"landclass": {
"type": "string"
},
"bigha": {
"type": "string"
},
"katha": {
"type": "string"
},
"lessa": {
"type": "string"
},
"pid": {
"type": "array",
"items": [
{
"type": "object",
"properties": {
"pdarid": {
"type": "string"
},
"pdarname": {
"type": "string"
},
"pdarfather": {
"type": "string"
}
},
"required": [
"pdarid",
"pdarname",
"pdarfather"
]
}
]
}
},
"required": [
"location",
"dagno",
"pattano",
"pattatype",
"landclass",
"bigha",
"katha",
"lessa",
"pid"
]
}

How can I write an avro schema for an array of arrays?

For example, I've tried this one, but it isn't working. I have to create a schema that have in one field an arrays of arrays and I couldn't do it.
{
"name": "SelfHealingStarter",
"namespace": "SCP.Kafka.AvroSchemas",
"doc": "Message with all the necessary information to run Self Healing process.",
"type": "record",
"fields": [
{
"name": "FiveMinutesAgoMeasurement",
"type": "record",
"doc": "Field with all five minutes ago measurement.",
"fields": [
{
"name": "equipments",
"doc": "List with all equipments measurement.",
"type": {
"type": "array",
"items": {
"type": {
"type": "array",
"items": "string"
},
"default": []
}
},
"default": []
}
]
}
]
}
IDL
protocol Example {
record Foo {
array<array<string>> data = [];
}
}
AVSC from java -jar ~/workspace/avro-tools-1.8.2.jar idl2schemata example.idl
{
"type" : "record",
"name" : "Foo",
"fields" : [ {
"name" : "data",
"type" : {
"type" : "array",
"items" : {
"type" : "array",
"items" : "string"
}
},
"default" : [ ]
} ]
}

Avro multiple enum int the same type (avro.SchemaParseException: Can't redefine)

A have the following avro schema, but if I want to parse it, then I got the following error:
Exception in thread "main" org.apache.avro.SchemaParseException: Can't redefine: ...
{
"name": "card_1_nr",
"type": "string"
}, {
"name": "card_1_type",
"type": {
"name": "card_type",
"type": "enum",
"symbols": ["diamonds", "clubs", "hearts", "spades"],
"default": "diamonds"
}
}, {
"name": "card_2_nr",
"type": "string"
}, {
"name": "card_2_type",
"type": {
"name": "card_type",
"type": "enum",
"symbols": ["diamonds", "clubs", "hearts", "spades"],
"default": "diamonds"
}
}
Just simply need to use the enume type name:
{
"name": "card_1_nr",
"type": "string"
}, {
"name": "card_1_type",
"type": {
"name": "card_type",
"type": "enum",
"symbols": ["diamonds", "clubs", "hearts", "spades"],
"default": "diamonds"
}
}, {
"name": "card_2_nr",
"type": "string"
}, {
"name": "card_2_type",
"type": "card_type"
}

Avro Nested array exception

I am trying to generate avro schema for nested array .
The top most array stores is the issue, however inner array Business is correct.
{"name": "Stores",
"type": {
"type": "array",
"items": {
"name": "Hours",
"type": "record",
"fields": [
{
"name": "Week",
"type": "string"
},
{"name": "Business",
"type":"array",
"items": {"name":"Business_record","type":"record","fields":[
{"name": "Day", "type":"string"},
{"name": "StartTime", "type": "string"},
{"name": "EndTime", "type": "string"}
]}
}
]
}
}
And the exception im getting is :
[ {
"level" : "fatal",
"message" : "illegal Avro schema",
"exceptionClass" : "org.apache.avro.SchemaParseException",
"exceptionMessage" : "No type: {\"name\":\"Stores\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"Hours\",\"type\":\"record\",\"fields\":[{\"name\":\"Week\",\"type\":\"string\"},{\"name\":\"Business\",\"type\":\"array\",\"items\":{\"name\":\"Business_record\",\"type\":\"record\",\"fields\":[{\"name\":\"Day\",\"type\":\"string\"},{\"name\":\"StartTime\",\"type\":\"string\"},{\"name\":\"EndTime\",\"type\":\"string\"}]}}]}}}",
"info" : "other messages follow (if any)"
} ]
I think something to do with [] Or{} for the outer array fields but I'm not able to figure it out.
Any help is appreciated.
I found the mistake i was doing:
when added the "type": for the nested array it worked for me.
{
"name": "Stores",
"type": "array",
"items": {
"name": "Hours",
"type": "record",
"fields": [
{
"name": "Week",
"type": "string"
},
{
"name": "Business",
"type": {
"type": "array",
"items": {
"name": "Business_record",
"type": "record",
"fields": [
{
"name": "Day",
"type": "string"
},
{
"name": "StartTime",
"type": "string"
},
{
"name": "EndTime",
"type": "string"
}
]
}
}
}
]
}
}

How to define a schema which have a union in an array in avro?

I want to define my array element as a union. Is it possible? If so please share a sample schema.
I think this is what you are looking for:
Avro Schema:
{
"type": "record",
"namespace": "example.avro",
"name": "array_union",
"fields": [
{
"name": "body",
"type": {
"name": "body",
"type": "record",
"fields": [
{
"name": "abc",
"type": [
"null",
{
"type": "array",
"name": "abc_name_0",
"items": {
"name": "_name_0",
"type": "record",
"fields": [
{
"name": "app_id",
"type": [
"null",
"string"
]
},
{
"name": "app_name",
"type": [
"null",
"string"
]
},
{
"name": "app_key",
"type": [
"null",
"string"
]
}
]
}
}
]
}
]
}
}
]
}
Valid Json that schema can accept:
{
"body": {
"abc": {
"array": [
{
"app_id": {
"string": "abc"
},
"app_name": {
"string": "bcd"
},
"app_key": {
"string": "cde"
}
}
]
}
}
}
Or,
{
"body": {
"abc": null
}
}
You could add this below piece of code as a record field type
{
"name": "some_type",
"type": {
"type": "array",
"items": {
"name": "SomeType",
"type": "record",
"fields": [
{
"name": "name",
"type": ["null", "string"]
},
{
"name": "text",
"type": "string"
},
{
"name": "type",
"type": {
"name": "NamedTextType",
"type": "enum",
"symbols": [ "named_text", "common_named_text" ]
}
}
]
}
}
}
Hope this helps!

Resources