So I have been banging my head against this for the last couple of days. I am having trouble de-serializing an Avro file that we are generating and sending into Azure Event Hub. We are attempting to do this with Databricks Runtime 7.2 Structured Streaming. Using the newer from_avro method described here to de-serialize the body of the event message.
import org.apache.spark.eventhubs._
import org.apache.spark.sql.functions._
import org.apache.spark.sql.avro._
import org.apache.avro._
import org.apache.spark.sql.types._
import org.apache.spark.sql.avro.functions._
val connStr = "<EventHubConnectionstring>"
val customEventhubParameters =
EventHubsConf(connStr.toString())
.setMaxEventsPerTrigger(5)
//.setStartingPosition(EventPosition.fromStartOfStream)
val incomingStream = spark
.readStream
.format("eventhubs")
.options(customEventhubParameters.toMap)
.load()
.filter($"properties".getItem("TableName") === "Branches")
val avroSchema = s"""{"type":"record","name":"Branches","fields":[{"name":"_src_ChangeOperation","type":["null","string"]},{"name":"_src_CurrentTrackingId","type":["null","long"]},{"name":"_src_RecordExtractUTCTimestamp","type":"string"},{"name":"ID","type":["null","int"]},{"name":"BranchCode","type":["null","string"]},{"name":"BranchName","type":["null","string"]},{"name":"Address1","type":["null","string"]},{"name":"Address2","type":["null","string"]},{"name":"City","type":["null","string"]},{"name":"StateID","type":["null","int"]},{"name":"ZipCode","type":["null","string"]},{"name":"Telephone","type":["null","string"]},{"name":"Contact","type":["null","string"]},{"name":"Title","type":["null","string"]},{"name":"DOB","type":["null","string"]},{"name":"TimeZoneID","type":["null","int"]},{"name":"ObserveDaylightSaving","type":["null","boolean"]},{"name":"PaySummerTimeHour","type":["null","boolean"]},{"name":"PayWinterTimeHour","type":["null","boolean"]},{"name":"BillSummerTimeHour","type":["null","boolean"]},{"name":"BillWinterTimeHour","type":["null","boolean"]},{"name":"Deleted","type":["null","boolean"]},{"name":"LastUpdated","type":["null","string"]},{"name":"txJobID","type":["null","string"]},{"name":"SourceID","type":["null","string"]},{"name":"HP_UseHolPayHourMethod","type":["null","boolean"]},{"name":"HP_HourlyRatePercent","type":["null","float"]},{"name":"HP_RequiredWeeksOfEmployment","type":["null","float"]},{"name":"rgUseSystemSettings","type":["null","boolean"]},{"name":"rgDutySplitBy","type":["null","int"]},{"name":"rgBasePeriodDate","type":["null","string"]},{"name":"rgFirstDayOfWeek","type":["null","int"]},{"name":"rgDutyStartOfDayTime","type":["null","string"]},{"name":"rgHolidayStartOfDayTime","type":["null","string"]},{"name":"rgMinimumTimePeriod","type":["null","int"]},{"name":"rgLoadPublicTable","type":["null","boolean"]},{"name":"rgPOTPayPeriodID","type":["null","int"]},{"name":"rgPOT1","type":["null","string"]},{"name":"rgPOT2","type":["null","string"]},{"name":"Facsimile","type":["null","string"]},{"name":"CountryID","type":["null","int"]},{"name":"EmailAddress","type":["null","string"]},{"name":"ContractSecurityHistoricalWeeks","type":["null","int"]},{"name":"ContractSecurityFutureWeeks","type":["null","int"]},{"name":"TimeLinkTelephone1","type":["null","string"]},{"name":"TimeLinkTelephone2","type":["null","string"]},{"name":"TimeLinkTelephone3","type":["null","string"]},{"name":"TimeLinkTelephone4","type":["null","string"]},{"name":"TimeLinkTelephone5","type":["null","string"]},{"name":"AutoTakeMissedCalls","type":["null","boolean"]},{"name":"AutoTakeMissedCallsDuration","type":["null","string"]},{"name":"AutoTakeApplyDurationToCheckCalls","type":["null","boolean"]},{"name":"AutoTakeMissedCheckCalls","type":["null","boolean"]},{"name":"AutoTakeMissedCheckCallsDuration","type":["null","string"]},{"name":"DocumentLocation","type":["null","string"]},{"name":"DefaultPortalAccess","type":["null","boolean"]},{"name":"DefaultPortalSecurityRoleID","type":["null","int"]},{"name":"EmployeeTemplateID","type":["null","int"]},{"name":"SiteCardTemplateID","type":["null","int"]},{"name":"TSAllowancesHeaderID","type":["null","int"]},{"name":"TSMinimumWageHeaderID","type":["null","int"]},{"name":"TimeLinkClaimMade","type":["null","boolean"]},{"name":"TSAllowancePeriodBaseDate","type":["null","string"]},{"name":"TSAllowancePeriodID","type":["null","int"]},{"name":"TSMinimumWageCalcMethodID","type":["null","int"]},{"name":"FlexibleShiftsHeaderID","type":["null","int"]},{"name":"SchedulingUseSystemSettings","type":["null","boolean"]},{"name":"MinimumRestPeriod","type":["null","int"]},{"name":"TSMealBreakHeaderID","type":["null","int"]},{"name":"ServiceTracImportType","type":["null","int"]},{"name":"StandDownDiaryEventID","type":["null","int"]},{"name":"ScheduledDutyChangeMessageTemplateId","type":["null","int"]},{"name":"ScheduledDutyAddedMessageTemplateId","type":["null","int"]},{"name":"ScheduledDutyRemovedMessageTemplateId","type":["null","int"]},{"name":"NegativeMessageResponsesPermitted","type":["null","boolean"]},{"name":"PortalEventsStandardLocFirst","type":["null","boolean"]},{"name":"ReminderMessage","type":["null","boolean"]},{"name":"ReminderMessageDaysBefore","type":["null","int"]},{"name":"ReminderMessageTemplateId","type":["null","int"]},{"name":"ScheduledDutyChangeMessageAllowReply","type":["null","boolean"]},{"name":"ScheduledDutyAddedMessageAllowReply","type":["null","boolean"]},{"name":"PayAlertEscalationGroup","type":["null","int"]},{"name":"BudgetedPay","type":["null","int"]},{"name":"PayAlertVariance","type":["null","string"]},{"name":"BusinessUnitID","type":["null","int"]},{"name":"APH_Hours","type":["null","float"]},{"name":"APH_Period","type":["null","int"]},{"name":"APH_PeriodCount","type":["null","int"]},{"name":"AveragePeriodHoursRuleId","type":["null","int"]},{"name":"HolidayScheduleID","type":["null","int"]},{"name":"AutomationRuleProfileId","type":["null","int"]}]}"""
val decoded_df = incomingStream
.select(
from_avro($"body",avroSchema).alias("payload")
)
val query1 = (
decoded_df
.writeStream
.format("memory")
.queryName("read_hub")
.start()
)
I have verified that the file we are sending has a valid schema, that it has data in it and that it is getting to the stream job in the notebook before failing with the following stack trace that states that the data is malformed. However I am able to write the generated file to a .avro file and de-serialize it using the normal .read.format("avro") method just fine.
at org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec.writeWithV2(WriteToDataSourceV2Exec.scala:413)
at org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec.writeWithV2$(WriteToDataSourceV2Exec.scala:361)
at org.apache.spark.sql.execution.datasources.v2.WriteToDataSourceV2Exec.writeWithV2(WriteToDataSourceV2Exec.scala:322)
at org.apache.spark.sql.execution.datasources.v2.WriteToDataSourceV2Exec.run(WriteToDataSourceV2Exec.scala:329)
at org.apache.spark.sql.execution.datasources.v2.V2CommandExec.result$lzycompute(V2CommandExec.scala:39)
at org.apache.spark.sql.execution.datasources.v2.V2CommandExec.result(V2CommandExec.scala:39)
at org.apache.spark.sql.execution.datasources.v2.V2CommandExec.executeCollect(V2CommandExec.scala:45)
at org.apache.spark.sql.execution.collect.Collector$.callExecuteCollect(Collector.scala:118)
at org.apache.spark.sql.execution.collect.Collector$.collect(Collector.scala:69)
at org.apache.spark.sql.execution.collect.Collector$.collect(Collector.scala:88)
at org.apache.spark.sql.execution.ResultCacheManager.getOrComputeResult(ResultCacheManager.scala:508)
at org.apache.spark.sql.execution.ResultCacheManager.getOrComputeResult(ResultCacheManager.scala:480)
at org.apache.spark.sql.execution.SparkPlan.executeCollectResult(SparkPlan.scala:396)
at org.apache.spark.sql.Dataset.collectResult(Dataset.scala:2986)
at org.apache.spark.sql.Dataset.collectFromPlan(Dataset.scala:3692)
at org.apache.spark.sql.Dataset.$anonfun$collect$1(Dataset.scala:2953)
at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3684)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withCustomExecutionEnv$5(SQLExecution.scala:116)
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:248)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withCustomExecutionEnv$1(SQLExecution.scala:101)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:835)
at org.apache.spark.sql.execution.SQLExecution$.withCustomExecutionEnv(SQLExecution.scala:77)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:198)
at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3682)
at org.apache.spark.sql.Dataset.collect(Dataset.scala:2953)
at org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runBatch$16(MicroBatchExecution.scala:586)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withCustomExecutionEnv$5(SQLExecution.scala:116)
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:248)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withCustomExecutionEnv$1(SQLExecution.scala:101)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:835)
at org.apache.spark.sql.execution.SQLExecution$.withCustomExecutionEnv(SQLExecution.scala:77)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:198)
at org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runBatch$15(MicroBatchExecution.scala:581)
at org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken(ProgressReporter.scala:276)
at org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken$(ProgressReporter.scala:274)
at org.apache.spark.sql.execution.streaming.StreamExecution.reportTimeTaken(StreamExecution.scala:71)
at org.apache.spark.sql.execution.streaming.MicroBatchExecution.runBatch(MicroBatchExecution.scala:581)
at org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runActivatedStream$2(MicroBatchExecution.scala:231)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken(ProgressReporter.scala:276)
at org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken$(ProgressReporter.scala:274)
at org.apache.spark.sql.execution.streaming.StreamExecution.reportTimeTaken(StreamExecution.scala:71)
at org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runActivatedStream$1(MicroBatchExecution.scala:199)
at org.apache.spark.sql.execution.streaming.ProcessingTimeExecutor.execute(TriggerExecutor.scala:57)
at org.apache.spark.sql.execution.streaming.MicroBatchExecution.runActivatedStream(MicroBatchExecution.scala:193)
at org.apache.spark.sql.execution.streaming.StreamExecution.org$apache$spark$sql$execution$streaming$StreamExecution$$runStream(StreamExecution.scala:346)
at org.apache.spark.sql.execution.streaming.StreamExecution$$anon$1.run(StreamExecution.scala:259)
Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 37.0 failed 4 times, most recent failure: Lost task 0.3 in stage 37.0 (TID 84, 10.139.64.5, executor 0): org.apache.spark.SparkException: Malformed records are detected in record parsing. Current parse Mode: FAILFAST. To process malformed records as null result, try setting the option 'mode' as 'PERMISSIVE'.
at org.apache.spark.sql.avro.AvroDataToCatalyst.nullSafeEval(AvroDataToCatalyst.scala:111)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:731)
at org.apache.spark.sql.execution.datasources.v2.DataWritingSparkTask$.$anonfun$run$7(WriteToDataSourceV2Exec.scala:438)
at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1615)
at org.apache.spark.sql.execution.datasources.v2.DataWritingSparkTask$.run(WriteToDataSourceV2Exec.scala:477)
at org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec.$anonfun$writeWithV2$2(WriteToDataSourceV2Exec.scala:385)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.doRunTask(Task.scala:144)
at org.apache.spark.scheduler.Task.run(Task.scala:117)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$9(Executor.scala:657)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1581)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:660)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.ArrayIndexOutOfBoundsException: -40
at org.apache.avro.io.parsing.Symbol$Alternative.getSymbol(Symbol.java:424)
at org.apache.avro.io.ResolvingDecoder.doAction(ResolvingDecoder.java:290)
at org.apache.avro.io.parsing.Parser.advance(Parser.java:88)
at org.apache.avro.io.ResolvingDecoder.readIndex(ResolvingDecoder.java:267)
at org.apache.avro.generic.GenericDatumReader.readWithoutConversion(GenericDatumReader.java:179)
at org.apache.avro.generic.GenericDatumReader.read(GenericDatumReader.java:153)
at org.apache.avro.generic.GenericDatumReader.readField(GenericDatumReader.java:232)
at org.apache.avro.generic.GenericDatumReader.readRecord(GenericDatumReader.java:222)
at org.apache.avro.generic.GenericDatumReader.readWithoutConversion(GenericDatumReader.java:175)
at org.apache.avro.generic.GenericDatumReader.read(GenericDatumReader.java:153)
at org.apache.avro.generic.GenericDatumReader.read(GenericDatumReader.java:145)
at org.apache.spark.sql.avro.AvroDataToCatalyst.nullSafeEval(AvroDataToCatalyst.scala:100)
... 16 more
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2478)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2427)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2426)
at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2426)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1131)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1131)
at scala.Option.foreach(Option.scala:407)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1131)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2678)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2625)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2613)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:917)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2313)
at org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec.writeWithV2(WriteToDataSourceV2Exec.scala:382)
... 46 more
Caused by: org.apache.spark.SparkException: Malformed records are detected in record parsing. Current parse Mode: FAILFAST. To process malformed records as null result, try setting the option 'mode' as 'PERMISSIVE'.
at org.apache.spark.sql.avro.AvroDataToCatalyst.nullSafeEval(AvroDataToCatalyst.scala:111)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:731)
at org.apache.spark.sql.execution.datasources.v2.DataWritingSparkTask$.$anonfun$run$7(WriteToDataSourceV2Exec.scala:438)
at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1615)
at org.apache.spark.sql.execution.datasources.v2.DataWritingSparkTask$.run(WriteToDataSourceV2Exec.scala:477)
at org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec.$anonfun$writeWithV2$2(WriteToDataSourceV2Exec.scala:385)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.doRunTask(Task.scala:144)
at org.apache.spark.scheduler.Task.run(Task.scala:117)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$9(Executor.scala:657)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1581)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:660)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.ArrayIndexOutOfBoundsException: -40
at org.apache.avro.io.parsing.Symbol$Alternative.getSymbol(Symbol.java:424)
at org.apache.avro.io.ResolvingDecoder.doAction(ResolvingDecoder.java:290)
at org.apache.avro.io.parsing.Parser.advance(Parser.java:88)
at org.apache.avro.io.ResolvingDecoder.readIndex(ResolvingDecoder.java:267)
at org.apache.avro.generic.GenericDatumReader.readWithoutConversion(GenericDatumReader.java:179)
at org.apache.avro.generic.GenericDatumReader.read(GenericDatumReader.java:153)
at org.apache.avro.generic.GenericDatumReader.readField(GenericDatumReader.java:232)
at org.apache.avro.generic.GenericDatumReader.readRecord(GenericDatumReader.java:222)
at org.apache.avro.generic.GenericDatumReader.readWithoutConversion(GenericDatumReader.java:175)
at org.apache.avro.generic.GenericDatumReader.read(GenericDatumReader.java:153)
at org.apache.avro.generic.GenericDatumReader.read(GenericDatumReader.java:145)
at org.apache.spark.sql.avro.AvroDataToCatalyst.nullSafeEval(AvroDataToCatalyst.scala:100)
... 16 more
Tech
C# Azure Function v3 .net core generating Avro file using Avro 1.8.2
Avro file is serialized to byte array using Generic Writer not Specific Writer and sent to Azure Event Hub
Databricks Runtime 7.2/Scala 3.0
Databricks notebooks written in Scala
Databricks Structured Stream Notebook to de-serialize the Avro message
and send to delta lake table
NOT using the following
Event Hub Capture
Kafka
Schema registry
Ok so I just figured out what the issue was. It was in how we were generating the avro message before sending it to event hub. In our serialization method we were using the var writer = new GenericDatumWriter<GenericRecord>(schema); and IFileWriter<GenericRecord> to write to a memory stream and then just getting the byte array of that stream as seen below.
public byte[] Serialize(DataCapture data)
{
var schema = GenerateSchema(data.Schema);
var writer = new GenericDatumWriter<GenericRecord>(schema);
using(var ms = new MemoryStream())
{
using (IFileWriter<GenericRecord> fileWriter = DataFileWriter<GenericRecord>.OpenWriter(writer, ms))
{
foreach (var jsonString in data.Rows)
{
var record = new GenericRecord(schema);
var obj = JsonConvert.DeserializeObject<JObject>(jsonString);
foreach (var column in data.Schema.Columns)
{
switch (MapDataType(column.DataTypeName))
{
case AvroTypeEnum.Boolean:
record.Add(column.ColumnName, obj.GetValue(column.ColumnName).Value<bool?>());
break;
//Map all datatypes ect....removed to shorten example
default:
record.Add(column.ColumnName, obj.GetValue(column.ColumnName).Value<string>());
break;
}
}
fileWriter.Append(record);
}
}
return ms.ToArray();
}
}
When what we actually should do is use var writer = new DefaultWriter(schema); and var encoder = new BinaryEncoder(ms); to then write the records with writer.Write(record, encoder); before returning the byte array of the stream.
public byte[] Serialize(DataCapture data)
{
var schema = GenerateSchema(data.Schema);
var writer = new DefaultWriter(schema);
using (var ms = new MemoryStream())
{
var encoder = new BinaryEncoder(ms);
foreach (var jsonString in data.Rows)
{
var record = new GenericRecord(schema);
var obj = JsonConvert.DeserializeObject<JObject>(jsonString);
foreach (var column in data.Schema.Columns)
{
switch (MapDataType(column.DataTypeName))
{
case AvroTypeEnum.Boolean:
record.Add(column.ColumnName, obj.GetValue(column.ColumnName).Value<bool?>());
break;
//Map all datatypes ect....removed to shorten example
default:
record.Add(column.ColumnName, obj.GetValue(column.ColumnName).Value<string>());
break;
}
}
writer.Write(record, encoder);
}
return ms.ToArray();
}
}
So lesson learned is that not all Avro memory streams converted to byte[] are the same. The from_avro method will only de-serialize avro data the has been binary encoded with the BinaryEncoder class not data created with the IFileWriter. If there is something that I should be doing instead please let me know but this fixed my issue. Hopefully my pain will spare others the same.
I am presently working on program on Android Things for connecting to Google Cloud IoT Core. I used to sample maven code provided by Google and modified it for Gradle(with all the imports and stuff). After doing every kind of check, whenever I am trying to run the program on a Raspberry Pi3 running Android Things it keeps giving this error
W/System.err: java.io.FileNotFoundException: com/example/adityaprakash/test/rsa_private.pem (No such file or directory)
telling me that the private key file that I am supposed to use for the JWT doesn't exist despite the fact it does and I have given the path for the pem file.Here are my java codes
package com.example.adityaprakash.test;
import android.support.v7.app.AppCompatActivity;
import android.os.Bundle;
import android.util.Log;
public class MainActivity extends AppCompatActivity {
#Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
//setContentView(R.layout.activity_main);
Log.i("#########","######");
MqttExample mqtt = new MqttExample();
try {
mqtt.Start();
} catch (Exception e) {
e.printStackTrace();
}
}
}
The MqttExample.java
package com.example.adityaprakash.test;
// [END cloudiotcore_mqtt_imports]
import org.eclipse.paho.client.mqttv3.MqttClient;
import org.eclipse.paho.client.mqttv3.MqttConnectOptions;
import org.eclipse.paho.client.mqttv3.MqttMessage;
import org.eclipse.paho.client.mqttv3.persist.MemoryPersistence;
import org.joda.time.DateTime;
import java.io.BufferedReader;
import java.io.FileReader;
import java.security.KeyFactory;
import java.security.spec.PKCS8EncodedKeySpec;
import android.util.Base64;
import io.jsonwebtoken.JwtBuilder;
import io.jsonwebtoken.Jwts;
import io.jsonwebtoken.SignatureAlgorithm;
public class MqttExample {
// [START cloudiotcore_mqtt_createjwt]
/** Create a Cloud IoT Core JWT for the given project id, signed with the given RSA key. */
public static String createJwtRsa(String projectId, String privateKeyFile) throws Exception {
DateTime now = new DateTime();
String strKeyPEM = "";
BufferedReader br = new BufferedReader(new FileReader(privateKeyFile));
String line;
while ((line = br.readLine()) != null) {
strKeyPEM += line + "\n";
}
br.close();
// Create a JWT to authenticate this device. The device will be disconnected after the token
// expires, and will have to reconnect with a new token. The audience field should always be set
// to the GCP project id.
JwtBuilder jwtBuilder =
Jwts.builder()
.setIssuedAt(now.toDate())
.setExpiration(now.plusMinutes(20).toDate())
.setAudience(projectId);
String privateKeyPEM = strKeyPEM;
privateKeyPEM = privateKeyPEM.replace("-----BEGIN PRIVATE KEY-----\n", "");
privateKeyPEM = privateKeyPEM.replace("-----END PRIVATE KEY-----", "");
byte[] encoded = Base64.decode(privateKeyPEM,Base64.DEFAULT);
PKCS8EncodedKeySpec spec = new PKCS8EncodedKeySpec(encoded);
KeyFactory kf = KeyFactory.getInstance("RSA");
return jwtBuilder.signWith(SignatureAlgorithm.RS256, kf.generatePrivate(spec)).compact();
}
/** Parse arguments, configure MQTT, and publish messages. */
public void Start() throws Exception {
// [START cloudiotcore_mqtt_configuremqtt]
MqttExampleOptions options = MqttExampleOptions.values();
if (options == null) {
// Could not parse.
System.exit(1);
}
// Build the connection string for Google's Cloud IoT Core MQTT server. Only SSL
// connections are accepted. For server authentication, the JVM's root certificates
// are used.
final String mqttServerAddress =
String.format("ssl://%s:%s", options.mqttBridgeHostname, options.mqttBridgePort);
// Create our MQTT client. The mqttClientId is a unique string that identifies this device. For
// Google Cloud IoT Core, it must be in the format below.
final String mqttClientId =
String.format(
"projects/%s/locations/%s/registries/%s/devices/%s",
options.projectId, options.cloudRegion, options.registryId, options.deviceId);
MqttConnectOptions connectOptions = new MqttConnectOptions();
// Note that the the Google Cloud IoT Core only supports MQTT 3.1.1, and Paho requires that we
// explictly set this. If you don't set MQTT version, the server will immediately close its
// connection to your device.
connectOptions.setMqttVersion(MqttConnectOptions.MQTT_VERSION_3_1_1);
// With Google Cloud IoT Core, the username field is ignored, however it must be set for the
// Paho client library to send the password field. The password field is used to transmit a JWT
// to authorize the device.
connectOptions.setUserName("unused");
System.out.println(options.algorithm);
if (options.algorithm.equals("RS256")) {
connectOptions.setPassword(
createJwtRsa(options.projectId, options.privateKeyFile).toCharArray());
}else {
throw new IllegalArgumentException(
"Invalid algorithm " + options.algorithm + ". Should be one of 'RS256' or 'ES256'.");
}
// [END cloudiotcore_mqtt_configuremqtt]
// [START cloudiotcore_mqtt_publish]
// Create a client, and connect to the Google MQTT bridge.
MqttClient client = new MqttClient(mqttServerAddress, mqttClientId, new MemoryPersistence());
try {
client.connect(connectOptions);
// Publish to the events or state topic based on the flag.
String subTopic = options.messageType.equals("event") ? "events" : options.messageType;
// The MQTT topic that this device will publish telemetry data to. The MQTT topic name is
// required to be in the format below. Note that this is not the same as the device registry's
// Cloud Pub/Sub topic.
String mqttTopic = String.format("/devices/%s/%s", options.deviceId, subTopic);
// Publish numMessages messages to the MQTT bridge, at a rate of 1 per second.
for (int i = 1; i <= options.numMessages; ++i) {
String payload = String.format("%s/%s-payload number-%d", options.registryId, options.deviceId, i);
System.out.format(
"Publishing %s message %d/%d: '%s'\n",
options.messageType, i, options.numMessages, payload);
// Publish "payload" to the MQTT topic. qos=1 means at least once delivery. Cloud IoT Core
// also supports qos=0 for at most once delivery.
MqttMessage message = new MqttMessage(payload.getBytes());
message.setQos(1);
client.publish(mqttTopic, message);
if (options.messageType.equals("event")) {
// Send telemetry events every second
Thread.sleep(1000);
}
else {
// Note: Update Device state less frequently than with telemetry events
Thread.sleep(5000);
}
}
} finally {
// Disconnect the client and finish the run.
client.disconnect();
}
System.out.println("Finished loop successfully. Goodbye!");
// [END cloudiotcore_mqtt_publish]
}
}
and the MqttExampleOptions.java code:
package com.example.adityaprakash.test;
public class MqttExampleOptions {
String projectId;
String registryId;
String deviceId;
String privateKeyFile;
String algorithm;
String cloudRegion;
int numMessages;
String mqttBridgeHostname;
short mqttBridgePort;
String messageType;
/** Construct an MqttExampleOptions class. */
public static MqttExampleOptions values() {
try {
MqttExampleOptions res = new MqttExampleOptions();
res.projectId = "_";
res.registryId = "_";
res.deviceId = "_";
res.privateKeyFile = "com/example/adityaprakash/test/rsa_private.pem";
res.algorithm = "RS256";
res.cloudRegion = "asia-east1";
res.numMessages = 100;
res.mqttBridgeHostname = "mqtt.googleapis.com";
res.mqttBridgePort = 8883;
res.messageType = "event";
return res;
} catch (Exception e) {
System.err.println(e.getMessage());
return null;
}
}
}
Please can anyone give a solution to this problem.
P.S. I know the code looks totally crappy.I don't have experience with Android programming,so please let it go.
The example you are following is not designed for Android.
res.privateKeyFile = "com/example/adityaprakash/test/rsa_private.pem";
Will not relate to the same directory on the Android file system.
I wrote up an AndroidThings explanation of how to talk to Cloud IoT Core here: http://blog.blundellapps.co.uk/tut-google-cloud-iot-core-mqtt-on-android/
You can setup communication like this (with your pem file going into the /raw directory)
// Setup the communication with your Google IoT Core details
communicator = new IotCoreCommunicator.Builder()
.withContext(this)
.withCloudRegion("your-region") // ex: europe-west1
.withProjectId("your-project-id") // ex: supercoolproject23236
.withRegistryId("your-registry-id") // ex: my-devices
.withDeviceId("a-device-id") // ex: my-test-raspberry-pi
.withPrivateKeyRawFileId(R.raw.rsa_private)
.build();
Source code is here: https://github.com/blundell/CloudIoTCoreMQTTExample
Note that the above is good enough for a secure environment or for testing that the end to end works. However if you wanted to release a production IoT device, you would look at embedding the PEM into the ROM and using private file storage access. https://developer.android.com/training/articles/keystore.html
An example of this can be found here: https://github.com/androidthings/sensorhub-cloud-iot
Specifically this class:
https://github.com/androidthings/sensorhub-cloud-iot/blob/e50bde0100fa81818ebbadb54561b3b68ccb64b8/app/src/main/java/com/example/androidthings/sensorhub/cloud/cloudiot/MqttAuthentication.java
You can then generate and use the PEM on the device:
public Certificate getCertificate() {
KeyStore ks = KeyStore.getInstance("AndroidKeyStore");
ks.load(null);
certificate = ks.getCertificate("Cloud IoT Authentication");
if (certificate == null) {
Log.w(TAG, "No IoT Auth Certificate found, generating new cert");
generateAuthenticationKey();
certificate = ks.getCertificate(keyAlias);
}
Log.i(TAG, "loaded certificate: " + keyAlias);
}
and
private void generateAuthenticationKey() throws GeneralSecurityException {
KeyPairGenerator kpg = KeyPairGenerator.getInstance(KeyProperties.KEY_ALGORITHM_RSA, "AndroidKeyStore");
kpg.initialize(new KeyGenParameterSpec.Builder("Cloud IoT Authentication",KeyProperties.PURPOSE_SIGN)
.setKeySize(2048)
.setCertificateSubject(new X500Principal("CN=unused"))
.setDigests(KeyProperties.DIGEST_SHA256)
.setSignaturePaddings(KeyProperties.SIGNATURE_PADDING_RSA_PKCS1)
.build());
kpg.generateKeyPair();
}
I'm pretty sure you're not doing the file I/O correctly. Your file, "com/example/adityaprakash/test/rsa_private.pem", doesn't correspond to an actual filepath on the device. The location of files on the device may be different than in your project. You will have to determine where on the device your file actually is.
On AndroidThings, it is easier to provide the authentication credentials in an Android Resource. See my fork of the WeatherStation sample to see how this works.
First, copy the private key file (e.g. rsa_private_pkcs8) to app/src/main/res/raw/privatekey.txt
Next, you can load the key used to calculate your JWT as:
Context mContext;
int resIdPk = getResources().getIdentifier("privatekey", "raw", getPackageName());
...
InputStream privateKey = mContext.getResources().openRawResource(resIdPk);
byte[] keyBytes = inputStreamToBytes(privateKey);
PKCS8EncodedKeySpec spec = new PKCS8EncodedKeySpec(keyBytes);
KeyFactory kf = KeyFactory.getInstance("EC");
One final note, it appears that you're referencing a file that is not in pkcs8 format, which will cause issues with Java. Make sure to use a key that is packaged in PKCS8 when opening credentials on Android (Java).