Spring Boot Kafka message consumer and lost message - docker

I use Spring Boot 2.0.2 and Spring Kafka. Also I use Kafka Docker image 1.1.0 from the following repository: https://hub.docker.com/r/wurstmeister/kafka/tags/
This are my Kafka configs:
#Configuration
#EnableKafka
public class KafkaConfig {
}
#Configuration
public class KafkaConsumerConfig {
#Value("${spring.kafka.bootstrap-servers}")
private String bootstrapServers;
#Value("${spring.kafka.consumer.group-id}")
private String consumerGroupId;
#Bean
public Map<String, Object> consumerConfigs() {
Map<String, Object> props = new HashMap<>();
props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
props.put(ConsumerConfig.GROUP_ID_CONFIG, consumerGroupId);
props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, JsonDeserializer.class);
props.put(ConsumerConfig.MAX_POLL_INTERVAL_MS_CONFIG, (int) TimeUnit.MINUTES.toMillis(10));
return props;
}
#Bean
public ConsumerFactory<String, String> consumerFactory() {
return new DefaultKafkaConsumerFactory<>(consumerConfigs(), new StringDeserializer(), new JsonDeserializer<>(String.class));
}
#Bean
public ConcurrentKafkaListenerContainerFactory<String, String> kafkaListenerContainerFactory() {
ConcurrentKafkaListenerContainerFactory<String, String> factory = new ConcurrentKafkaListenerContainerFactory<>();
factory.setConsumerFactory(consumerFactory());
return factory;
}
#Bean
public ConsumerFactory<String, Post> postConsumerFactory() {
return new DefaultKafkaConsumerFactory<>(consumerConfigs(), new StringDeserializer(), new JsonDeserializer<>(Post.class));
}
#Bean
public ConcurrentKafkaListenerContainerFactory<String, Post> postKafkaListenerContainerFactory() {
ConcurrentKafkaListenerContainerFactory<String, Post> factory = new ConcurrentKafkaListenerContainerFactory<>();
factory.setConsumerFactory(postConsumerFactory());
return factory;
}
}
#Configuration
public class KafkaProducerConfig {
#Value("${spring.kafka.bootstrap-servers}")
private String bootstrapServers;
#Bean
public Map<String, Object> producerConfigs() {
Map<String, Object> props = new HashMap<>();
props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, JsonSerializer.class);
props.put(ProducerConfig.MAX_REQUEST_SIZE_CONFIG, 15000000);
return props;
}
#Bean
public ProducerFactory<String, Post> postProducerFactory() {
return new DefaultKafkaProducerFactory<>(producerConfigs());
}
#Bean
public KafkaTemplate<String, Post> postKafkaTemplate() {
return new KafkaTemplate<>(postProducerFactory());
}
}
this is Kafka application.properties:
#Kafka
spring.kafka.bootstrap-servers=${kafka.host}:${kafka.port}
spring.kafka.consumer.auto-offset-reset=earliest
spring.kafka.consumer.group-id=postfenix
kafka.topic.posts.create=posts.create
This is my message listener:
#Component
public class PostConsumer {
static final Logger logger = LoggerFactory.getLogger(PostConsumer.class);
#KafkaListener(topics = "${kafka.topic.posts.create}", containerFactory = "postKafkaListenerContainerFactory")
public void createPost(ConsumerRecord<String, Post> consumerRecord) {
Post post = consumerRecord.value();
logger.info("Received message for post creation: {}", post);
}
}
I have also implemented the PostService which should send the Post to the Kafka topic:
#Service
public class PostServiceImpl implements PostService {
static final Logger logger = LoggerFactory.getLogger(PostServiceImpl.class);
#Value("${kafka.topic.posts.create}")
private String kafkaTopicPostsCreate;
#Autowired
private KafkaTemplate<String, Post> postKafkaTemplate;
#Override
public void sendPost(Post post) {
postKafkaTemplate.send(kafkaTopicPostsCreate, post);
logger.info("Message sent to the post creation queue: {}", post);
}
}
I have also implemented the SpringBoot test:
#RunWith(SpringRunner.class)
#SpringBootTest(classes = { TestApplication.class })
public class PostServiceIT {
#Autowired
private PostService postService;
#Autowired
private MessageRepository messageRepository;
#Before
public void setUp() {
messageRepository.deleteAll();
}
#Test
public void testCreatePost() throws InterruptedException {
assertEquals(0, messageRepository.findAll().size());
Post post = new Post();
...
postService.sendPost(post);
await().atMost(60, SECONDS).pollDelay(1000, MILLISECONDS).until(() -> messageRepository.findAll().size() == 1);
}
}
This is the log:
2018-06-09 16:12:37.547 INFO 17824 --- [ main] org.quartz.impl.StdSchedulerFactory : Quartz scheduler 'schedulerFactoryBean' initialized from an externally provided properties instance.
2018-06-09 16:12:37.547 INFO 17824 --- [ main] org.quartz.impl.StdSchedulerFactory : Quartz scheduler version: 2.3.0
2018-06-09 16:12:37.548 INFO 17824 --- [ main] org.quartz.core.QuartzScheduler : JobFactory set to: org.springframework.scheduling.quartz.AdaptableJobFactory#7a3e5cd3
2018-06-09 16:12:38.967 INFO 17824 --- [ main] o.s.c.support.DefaultLifecycleProcessor : Starting beans in phase 2147483547
2018-06-09 16:12:38.997 INFO 17824 --- [ main] o.a.k.clients.consumer.ConsumerConfig : ConsumerConfig values:
auto.commit.interval.ms = 5000
auto.offset.reset = latest
bootstrap.servers = [127.0.0.1:9093]
check.crcs = true
client.id =
connections.max.idle.ms = 540000
enable.auto.commit = true
exclude.internal.topics = true
fetch.max.bytes = 52428800
fetch.max.wait.ms = 500
fetch.min.bytes = 1
group.id = postfenix
heartbeat.interval.ms = 3000
interceptor.classes = []
internal.leave.group.on.close = true
isolation.level = read_uncommitted
key.deserializer = class org.apache.kafka.common.serialization.StringDeserializer
max.partition.fetch.bytes = 1048576
max.poll.interval.ms = 600000
max.poll.records = 500
metadata.max.age.ms = 300000
metric.reporters = []
metrics.num.samples = 2
metrics.recording.level = INFO
metrics.sample.window.ms = 30000
partition.assignment.strategy = [class org.apache.kafka.clients.consumer.RangeAssignor]
receive.buffer.bytes = 65536
reconnect.backoff.max.ms = 1000
reconnect.backoff.ms = 50
request.timeout.ms = 305000
retry.backoff.ms = 100
sasl.jaas.config = null
sasl.kerberos.kinit.cmd = /usr/bin/kinit
sasl.kerberos.min.time.before.relogin = 60000
sasl.kerberos.service.name = null
sasl.kerberos.ticket.renew.jitter = 0.05
sasl.kerberos.ticket.renew.window.factor = 0.8
sasl.mechanism = GSSAPI
security.protocol = PLAINTEXT
send.buffer.bytes = 131072
session.timeout.ms = 10000
ssl.cipher.suites = null
ssl.enabled.protocols = [TLSv1.2, TLSv1.1, TLSv1]
ssl.endpoint.identification.algorithm = null
ssl.key.password = null
ssl.keymanager.algorithm = SunX509
ssl.keystore.location = null
ssl.keystore.password = null
ssl.keystore.type = JKS
ssl.protocol = TLS
ssl.provider = null
ssl.secure.random.implementation = null
ssl.trustmanager.algorithm = PKIX
ssl.truststore.location = null
ssl.truststore.password = null
ssl.truststore.type = JKS
value.deserializer = class org.springframework.kafka.support.serializer.JsonDeserializer
2018-06-09 16:12:39.095 INFO 17824 --- [ main] o.a.kafka.common.utils.AppInfoParser : Kafka version : 1.1.0
2018-06-09 16:12:39.095 INFO 17824 --- [ main] o.a.kafka.common.utils.AppInfoParser : Kafka commitId : fdcf75ea326b8e07
2018-06-09 16:12:39.100 INFO 17824 --- [ main] o.s.s.c.ThreadPoolTaskScheduler : Initializing ExecutorService
2018-06-09 16:12:39.104 INFO 17824 --- [ main] o.s.c.support.DefaultLifecycleProcessor : Starting beans in phase 2147483647
2018-06-09 16:12:39.104 INFO 17824 --- [ main] o.s.s.quartz.SchedulerFactoryBean : Starting Quartz Scheduler now
2018-06-09 16:12:39.104 INFO 17824 --- [ main] org.quartz.core.QuartzScheduler : Scheduler schedulerFactoryBean_$_NON_CLUSTERED started.
2018-06-09 16:12:39.111 INFO 17824 --- [SchedulerThread] c.n.quartz.mongodb.dao.TriggerDao : Found 0 triggers which are eligible to be run.
2018-06-09 16:12:39.119 INFO 17824 --- [ main] com.postfenix.domain.post.PostServiceIT : Started PostServiceIT in 5.094 seconds (JVM running for 5.74)
2018-06-09 16:12:39.121 INFO 17824 --- [ main] c.p.d.configuration.TestApplication : Initializing application...
2018-06-09 16:12:39.258 INFO 17824 --- [ main] org.mongodb.driver.connection : Opened connection [connectionId{localValue:4, serverValue:4}] to localhost:27018
2018-06-09 16:12:39.338 WARN 17824 --- [ntainer#0-0-C-1] org.apache.kafka.clients.NetworkClient : [Consumer clientId=consumer-1, groupId=postfenix] Error while fetching metadata with correlation id 2 : {posts.create=LEADER_NOT_AVAILABLE}
2018-06-09 16:12:39.339 INFO 17824 --- [ntainer#0-0-C-1] org.apache.kafka.clients.Metadata : Cluster ID: BYqDmOq_SDCll0ILZI_KoA
2018-06-09 16:12:39.392 INFO 17824 --- [ main] o.a.k.clients.producer.ProducerConfig : ProducerConfig values:
acks = 1
batch.size = 16384
bootstrap.servers = [127.0.0.1:9093]
buffer.memory = 33554432
client.id =
compression.type = none
connections.max.idle.ms = 540000
enable.idempotence = false
interceptor.classes = []
key.serializer = class org.apache.kafka.common.serialization.StringSerializer
linger.ms = 0
max.block.ms = 60000
max.in.flight.requests.per.connection = 5
max.request.size = 15000000
metadata.max.age.ms = 300000
metric.reporters = []
metrics.num.samples = 2
metrics.recording.level = INFO
metrics.sample.window.ms = 30000
partitioner.class = class org.apache.kafka.clients.producer.internals.DefaultPartitioner
receive.buffer.bytes = 32768
reconnect.backoff.max.ms = 1000
reconnect.backoff.ms = 50
request.timeout.ms = 30000
retries = 0
retry.backoff.ms = 100
sasl.jaas.config = null
sasl.kerberos.kinit.cmd = /usr/bin/kinit
sasl.kerberos.min.time.before.relogin = 60000
sasl.kerberos.service.name = null
sasl.kerberos.ticket.renew.jitter = 0.05
sasl.kerberos.ticket.renew.window.factor = 0.8
sasl.mechanism = GSSAPI
security.protocol = PLAINTEXT
send.buffer.bytes = 131072
ssl.cipher.suites = null
ssl.enabled.protocols = [TLSv1.2, TLSv1.1, TLSv1]
ssl.endpoint.identification.algorithm = null
ssl.key.password = null
ssl.keymanager.algorithm = SunX509
ssl.keystore.location = null
ssl.keystore.password = null
ssl.keystore.type = JKS
ssl.protocol = TLS
ssl.provider = null
ssl.secure.random.implementation = null
ssl.trustmanager.algorithm = PKIX
ssl.truststore.location = null
ssl.truststore.password = null
ssl.truststore.type = JKS
transaction.timeout.ms = 60000
transactional.id = null
value.serializer = class org.springframework.kafka.support.serializer.JsonSerializer
2018-06-09 16:12:39.419 INFO 17824 --- [ main] o.a.kafka.common.utils.AppInfoParser : Kafka version : 1.1.0
2018-06-09 16:12:39.419 INFO 17824 --- [ main] o.a.kafka.common.utils.AppInfoParser : Kafka commitId : fdcf75ea326b8e07
2018-06-09 16:12:39.437 WARN 17824 --- [ad | producer-1] org.apache.kafka.clients.NetworkClient : [Producer clientId=producer-1] Error while fetching metadata with correlation id 1 : {posts.create=LEADER_NOT_AVAILABLE}
2018-06-09 16:12:39.437 INFO 17824 --- [ad | producer-1] org.apache.kafka.clients.Metadata : Cluster ID: BYqDmOq_SDCll0ILZI_KoA
2018-06-09 16:12:39.454 WARN 17824 --- [ntainer#0-0-C-1] org.apache.kafka.clients.NetworkClient : [Consumer clientId=consumer-1, groupId=postfenix] Error while fetching metadata with correlation id 4 : {posts.create=LEADER_NOT_AVAILABLE}
2018-06-09 16:12:39.565 WARN 17824 --- [ad | producer-1] org.apache.kafka.clients.NetworkClient : [Producer clientId=producer-1] Error while fetching metadata with correlation id 3 : {posts.create=LEADER_NOT_AVAILABLE}
2018-06-09 16:12:39.590 WARN 17824 --- [ntainer#0-0-C-1] org.apache.kafka.clients.NetworkClient : [Consumer clientId=consumer-1, groupId=postfenix] Error while fetching metadata with correlation id 6 : {posts.create=LEADER_NOT_AVAILABLE}
2018-06-09 16:12:39.704 INFO 17824 --- [ main] c.p.domain.service.post.PostServiceImpl : Message sent to the post creation queue: Post [chatId=#name, parseMode=HTML]
2018-06-09 16:12:40.229 INFO 17824 --- [ntainer#0-0-C-1] o.a.k.c.c.internals.AbstractCoordinator : [Consumer clientId=consumer-1, groupId=postfenix] Discovered group coordinator 10.0.75.1:9093 (id: 2147482646 rack: null)
2018-06-09 16:12:40.232 INFO 17824 --- [ntainer#0-0-C-1] o.a.k.c.c.internals.ConsumerCoordinator : [Consumer clientId=consumer-1, groupId=postfenix] Revoking previously assigned partitions []
2018-06-09 16:12:40.233 INFO 17824 --- [ntainer#0-0-C-1] o.s.k.l.KafkaMessageListenerContainer : partitions revoked: []
2018-06-09 16:12:40.233 INFO 17824 --- [ntainer#0-0-C-1] o.a.k.c.c.internals.AbstractCoordinator : [Consumer clientId=consumer-1, groupId=postfenix] (Re-)joining group
2018-06-09 16:12:40.295 INFO 17824 --- [ntainer#0-0-C-1] o.a.k.c.c.internals.AbstractCoordinator : [Consumer clientId=consumer-1, groupId=postfenix] Successfully joined group with generation 1
2018-06-09 16:12:40.297 INFO 17824 --- [ntainer#0-0-C-1] o.a.k.c.c.internals.ConsumerCoordinator : [Consumer clientId=consumer-1, groupId=postfenix] Setting newly assigned partitions [posts.create-0]
2018-06-09 16:12:40.313 INFO 17824 --- [ntainer#0-0-C-1] o.a.k.c.consumer.internals.Fetcher : [Consumer clientId=consumer-1, groupId=postfenix] Resetting offset for partition posts.create-0 to offset 1.
2018-06-09 16:12:40.315 INFO 17824 --- [ntainer#0-0-C-1] o.s.k.l.KafkaMessageListenerContainer : partitions assigned: [posts.create-0]
Right now my test fails on the following line:
await().atMost(60, SECONDS).pollDelay(1000, MILLISECONDS).until(() -> messageRepository.findAll().size() == 1);
because after the first test run the message for some reason is not delivered to the PostConsumer.createPost method. But if I run the same test second time on the same Kafka docker instance, the message from the previous test run will be successfully delivered into PostConsumer.createPost. What am I doing wrong and why the message is not delivered after the first test run and how to fix it?
UPDATED
This is my updated KafkaConsumerConfig:
#Configuration
public class KafkaConsumerConfig {
#Bean
public ConsumerFactory<String, String> consumerFactory(KafkaProperties kafkaProperties) {
return new DefaultKafkaConsumerFactory<>(kafkaProperties.buildConsumerProperties(), new StringDeserializer(), new JsonDeserializer<>(String.class));
}
#Bean
public ConcurrentKafkaListenerContainerFactory<String, String> kafkaListenerContainerFactory() {
ConcurrentKafkaListenerContainerFactory<String, String> factory = new ConcurrentKafkaListenerContainerFactory<>();
factory.setConsumerFactory(consumerFactory());
return factory;
}
#Bean
public ConsumerFactory<String, Post> postConsumerFactory() {
return new DefaultKafkaConsumerFactory<>(consumerConfigs(), new StringDeserializer(), new JsonDeserializer<>(Post.class));
}
#Bean
public ConcurrentKafkaListenerContainerFactory<String, Post> postKafkaListenerContainerFactory() {
ConcurrentKafkaListenerContainerFactory<String, Post> factory = new ConcurrentKafkaListenerContainerFactory<>();
factory.setConsumerFactory(postConsumerFactory());
return factory;
}
}
Right now I have 2 compilation errors in kafkaListenerContainerFactory and postConsumerFactory methods because consumerConfigs() method is absent and consumerFactory method in kafkaListenerContainerFactory requires KafkaProperties.

spring.kafka.consumer.auto-offset-reset=earliest
spring.kafka.consumer.group-id=postfenix
You are not using these boot properties since you are creating your own consumer configs.
You should replace this
#Value("${spring.kafka.bootstrap-servers}")
private String bootstrapServers;
#Value("${spring.kafka.consumer.group-id}")
private String consumerGroupId;
#Bean
public Map<String, Object> consumerConfigs() {
Map<String, Object> props = new HashMap<>();
props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
props.put(ConsumerConfig.GROUP_ID_CONFIG, consumerGroupId);
props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, JsonDeserializer.class);
props.put(ConsumerConfig.MAX_POLL_INTERVAL_MS_CONFIG, (int) TimeUnit.MINUTES.toMillis(10));
return props;
}
#Bean
public ConsumerFactory<String, String> consumerFactory() {
return new DefaultKafkaConsumerFactory<>(consumerConfigs(), new StringDeserializer(), new JsonDeserializer<>(String.class));
}
with
#Bean
public ConsumerFactory<String, String> consumerFactory(
KafkaProperties kafkaProperties) {
return new DefaultKafkaConsumerFactory<>(kafkaProperties.buildConsumerProperties(),
new StringDeserializer(), new JsonDeserializer<>(String.class));
}
EDIT
#Bean
public ConsumerFactory<String, String> consumerFactory(KafkaProperties kafkaProperties) {
return new DefaultKafkaConsumerFactory<>(kafkaProperties.buildConsumerProperties(), new StringDeserializer(), new JsonDeserializer<>(String.class));
}
#Bean
public ConcurrentKafkaListenerContainerFactory<String, String> kafkaListenerContainerFactory(KafkaProperties kafkaProperties) {
ConcurrentKafkaListenerContainerFactory<String, String> factory = new ConcurrentKafkaListenerContainerFactory<>();
factory.setConsumerFactory(consumerFactory(kafkaProperties));
return factory;
}
#Bean
public ConsumerFactory<String, Post> postConsumerFactory(KafkaProperties kafkaProperties) {
return new DefaultKafkaConsumerFactory<>(kafkaProperties.buildConsumerProperties(), new StringDeserializer(), new JsonDeserializer<>(Post.class));
}
#Bean
public ConcurrentKafkaListenerContainerFactory<String, Post> postKafkaListenerContainerFactory(KafkaProperties kafkaProperties) {
ConcurrentKafkaListenerContainerFactory<String, Post> factory = new ConcurrentKafkaListenerContainerFactory<>();
factory.setConsumerFactory(postConsumerFactory(kafkaProperties));
return factory;
}

Related

Logs are logged into the rollingfile and not console after rollingfile appender got initalized

Below are the programmatic configurations of consoleAppender and LogAppenders which I have converted for log4j upgrade from 1.. to 2.17.1 version.
//Programmatic configuration of ConsoleAppender log4j2
String pattern = "%d{DATE} [%p|%C{2}] %m%n";
ConfigurationBuilder<BuiltConfiguration> builder =
ConfigurationBuilderFactory.newConfigurationBuilder();
builder.setConfigurationName("DefaultLogger");
// Create pattern layout
LayoutComponentBuilder layoutBuilder = builder.newLayout("PatternLayout")
.addAttribute("pattern", pattern);
AppenderComponentBuilder appenderBuilder = builder.newAppender("Console", "CONSOLE")
.addAttribute("target", ConsoleAppender.Target.SYSTEM_OUT);
appenderBuilder.add(layoutBuilder);
builder.add(appenderBuilder);
RootLoggerComponentBuilder rootLogger
= builder.newRootLogger(Level.DEBUG);
rootLogger.add(builder.newAppenderRef("Console"));
builder.add(rootLogger);
Configurator.initialize(builder.build());
Configurator.reconfigure(builder.build());
//RollingFileAppender Programmatic configuration log4j2:
CdasLogger() {
// CreateLogger takes a path for each logger from config file
loadLog = createLogger("load.log");
updateLog = createLogger("update.log");
userLog = createLogger("user.log");
}
private Logger createLogger(String logType) {
String pattern = "%d %M - %m%n";
String consolePattern = "%d{DATE} [%p|%C{2}] %m%n";
String fileLogName = "/app/app.log";
String filePattern = "/app/app.log-%d{MM-dd-yy}.log.gz";
System.out.println("logtype is::" + logType);
String path = ConfigReader.getStringValue(logType);
System.out.println(path);
String daily = "0 0 12 1/1 * ? *";
// Initializing the logger context
LoggerContext ctx = (LoggerContext) LogManager.getContext(false);
ConfigurationBuilder<BuiltConfiguration> builder =
ConfigurationBuilderFactory.newConfigurationBuilder();
builder.setConfigurationName("rollingFileLogger");
//specifying the pattern layout
LayoutComponentBuilder layoutBuilder = builder.newLayout("PatternLayout")
.addAttribute("pattern", pattern);
//specifying the policy for rolling file
ComponentBuilder triggeringPolicy = builder.newComponent("Policies")
. addComponent(builder.newComponent("SizeBasedTriggeringPolicy").addAttribute("size", "10MB"));
//create a rollingfile appender
AppenderComponentBuilder appenderBuilder = builder.newAppender("rollingFile", "RollingFile")
.addAttribute("fileName", path)
.addAttribute("filePattern", path+"-%d{MM-dd-yy-HH-mm-ss}.log.")
.add(layoutBuilder)
.addComponent(triggeringPolicy);
builder.add(appenderBuilder);
RootLoggerComponentBuilder rootLogger = builder.newRootLogger(Level.TRACE);
rootLogger.add(builder.newAppenderRef("rollingFile"));
builder.add(rootLogger);
ctx = Configurator.initialize(builder.build());
Configurator.reconfigure(builder.build());
return ctx.getLogger(logType); // return the logger to the caller
}
Console Appender gets initialized first and the logs are written. After Initializing the rollingfileappender. All the logs are getting written to rollingfile Appender. There are no logs to consoleAppender once the rollingfile appender is initalized.
EDIT 1:
As per Piotr comments, I made the below change to have same configuration builder for all the appenders.
private void configureLogger() {
ConfigurationBuilder<BuiltConfiguration> builder = ConfigurationBuilderFactory.newConfigurationBuilder();
LoggerContext ctx = (LoggerContext) LogManager.getContext(false);
// create a console appender
AppenderComponentBuilder console
= builder.newAppender("stdout", "Console").addAttribute("target",
ConsoleAppender.Target.SYSTEM_OUT);
console.add(builder.newLayout("PatternLayout").addAttribute("pattern",
"%d{DATE} [%p|%C{2}] %m%n"));
RootLoggerComponentBuilder rootLogger
= builder.newRootLogger(Level.DEBUG);
rootLogger.add(builder.newAppenderRef("stdout"));
builder.add(console);
// create a rolling file appender
String pattern = "%d %M - %m%n";
//specifying the pattern layout
LayoutComponentBuilder layoutBuilder = builder.newLayout("PatternLayout")
.addAttribute("pattern", pattern);
//specifying the policy for rolling file
ComponentBuilder triggeringPolicy = builder.newComponent("Policies")
.addComponent(builder.newComponent("SizeBasedTriggeringPolicy").addAttribute("size", "10MB"));
String[] logTypes = {"load.log", "update.log", "user.log"};
for (String logType : logTypes) {
System.out.println("logtype is::" + logType);
String path = ConfigReader.getStringValue(logType);
System.out.println(path);
AppenderComponentBuilder appenderBuilder = builder.newAppender(logType, "RollingFile")
.addAttribute("fileName", path == null ? "/app1/app.log" : path)
.addAttribute("filePattern", path == null ? "/app1/app.log" : path+"-%d{MM-dd-yy-HH-mm-ss}.log.")
.add(layoutBuilder)
.addComponent(triggeringPolicy);
builder.add(appenderBuilder);
rootLogger.add(builder.newAppenderRef(logType));
}
builder.add(rootLogger);
Configurator.reconfigure(builder.build());
}
The logs are not updating still.
calling configureLogger() method during app start up.
In another class I have the code below
public class CdasLogger {
private final Logger updateLog, loadLog, userLog;
CdasLogger() {
loadLog = createLogger("load.log");
updateLog = createLogger("update.log");
userLog = createLogger("user.log");
}
private Logger createLogger(String logType) {
LoggerContext ctx = (LoggerContext) LogManager.getContext(false);
return ctx.getLogger(logType);
}
}

How do you retrieve transitive groups of user with Java Microsoft Graph SDK

I'm trying to retrieve all the groups (including transitive) that a user is a member of
Using this API:
https://graph.microsoft.com/v1.0/users/780b6216-3ae0-4396-975d-3739d5cbb063/transitiveMemberOf/microsoft.graph.group
I'm using:
<dependency>
<groupId>com.microsoft.graph</groupId>
<artifactId>microsoft-graph</artifactId>
<version>3.8.0</version>
</dependency>
My code looks like:
GroupCollectionPage collection = azure
.users(oid)
.transitiveMemberOfAsGroup()
.buildRequest()
.get();
List<AzureAdGroup> groups = new ArrayList<>();
while (collection != null) {
final List<Group> directoryObjects = collection.getCurrentPage();
List<AzureAdGroup> groupsFromPage = directoryObjects.stream()
.filter(group -> Boolean.TRUE.equals(group.securityEnabled))
.map(group -> new AzureAdGroup(group.id, group.displayName))
.collect(Collectors.toList());
groups.addAll(groupsFromPage);
GroupCollectionRequestBuilder nextPage = collection
.getNextPage();
if (nextPage == null) {
break;
} else {
collection = nextPage.buildRequest().get();
}
}
Stacktrace:
2021-06-15 07:26:29.134+0000 [id=91] INFO c.a.c.util.logging.ClientLogger#performLogging: Azure Identity => getToken() result for scopes [https://graph.microsoft.com/.default]: SUCCESS
2021-06-15 07:26:29.580+0000 [id=70] SEVERE c.m.graph.logger.DefaultLogger#logError: CoreHttpProvider[sendRequestInternal] - 396Error during http request
2021-06-15 07:26:29.580+0000 [id=70] SEVERE c.m.graph.logger.DefaultLogger#logError: Throwable detail: com.microsoft.graph.core.ClientException: Error during http request
2021-06-15 07:26:29.580+0000 [id=70] WARNING
com.google.gson.stream.MalformedJsonException: Unterminated array at line 1 column 3384 path $.value[3]
at com.google.gson.stream.JsonReader.syntaxError(JsonReader.java:1562)
at com.google.gson.stream.JsonReader.doPeek(JsonReader.java:475)
at com.google.gson.stream.JsonReader.hasNext(JsonReader.java:413)
at com.google.gson.internal.bind.TypeAdapters$29.read(TypeAdapters.java:714)
at com.google.gson.internal.bind.TypeAdapters$29.read(TypeAdapters.java:723)
at com.google.gson.internal.bind.TypeAdapters$29.read(TypeAdapters.java:698)
at com.google.gson.internal.bind.TypeAdapters$35$1.read(TypeAdapters.java:894)
at com.google.gson.Gson.fromJson(Gson.java:932)
Caused: com.google.gson.JsonSyntaxException
at com.google.gson.Gson.fromJson(Gson.java:947)
at com.google.gson.Gson.fromJson(Gson.java:870)
at com.microsoft.graph.serializer.DefaultSerializer.deserializeObject(DefaultSerializer.java:89)
at com.microsoft.graph.http.CoreHttpProvider.handleJsonResponse(CoreHttpProvider.java:527)
at com.microsoft.graph.http.CoreHttpProvider.processResponse(CoreHttpProvider.java:455)
Caused: com.microsoft.graph.core.ClientException: Error during http request
at com.microsoft.graph.http.CoreHttpProvider.processResponse(CoreHttpProvider.java:483)
at com.microsoft.graph.http.CoreHttpProvider.sendRequestInternal(CoreHttpProvider.java:396)
at com.microsoft.graph.http.CoreHttpProvider.send(CoreHttpProvider.java:222)
at com.microsoft.graph.http.CoreHttpProvider.send(CoreHttpProvider.java:199)
at com.microsoft.graph.http.BaseCollectionRequest.send(BaseCollectionRequest.java:102)
at com.microsoft.graph.http.BaseEntityCollectionRequest.get(BaseEntityCollectionRequest.java:78)
at com.microsoft.jenkins.azuread.AzureCachePool.lambda$getBelongingGroupsByOid$2(AzureCachePool.java:41)
at com.github.benmanes.caffeine.cache.BoundedLocalCache.lambda$doComputeIfAbsent$14(BoundedLocalCache.java:2405)
at java.base/java.util.concurrent.ConcurrentHashMap.compute(ConcurrentHashMap.java:1908)
at com.github.benmanes.caffeine.cache.BoundedLocalCache.doComputeIfAbsent(BoundedLocalCache.java:2403)
at com.github.benmanes.caffeine.cache.BoundedLocalCache.computeIfAbsent(BoundedLocalCache.java:2386)
at com.github.benmanes.caffeine.cache.LocalCache.computeIfAbsent(LocalCache.java:108)
at com.github.benmanes.caffeine.cache.LocalManualCache.get(LocalManualCache.java:62)
at com.microsoft.jenkins.azuread.AzureCachePool.getBelongingGroupsByOid(AzureCachePool.java:34)
at com.microsoft.jenkins.azuread.AzureSecurityRealm.lambda$doFinishLogin$3(AzureSecurityRealm.java:382)
at com.github.benmanes.caffeine.cache.BoundedLocalCache.lambda$doComputeIfAbsent$14(BoundedLocalCache.java:2405)
at java.base/java.util.concurrent.ConcurrentHashMap.compute(ConcurrentHashMap.java:1908)
at com.github.benmanes.caffeine.cache.BoundedLocalCache.doComputeIfAbsent(BoundedLocalCache.java:2403)
at com.github.benmanes.caffeine.cache.BoundedLocalCache.computeIfAbsent(BoundedLocalCache.java:2386)
at com.github.benmanes.caffeine.cache.LocalCache.computeIfAbsent(LocalCache.java:108)
at com.github.benmanes.caffeine.cache.LocalManualCache.get(LocalManualCache.java:62)
at com.microsoft.jenkins.azuread.AzureSecurityRealm.doFinishLogin(AzureSecurityRealm.java:375)
...
0)org.eclipse.jetty.util.thread.QueuedThreadPool$Runner.run(QueuedThreadPool.java:905)
at java.base/java.lang.Thread.run(Thread.java:834)
This works but I would like to retrieve only groups (and then filter on securityGroup only as well):
DirectoryObjectCollectionWithReferencesPage collection = azure
.users(oid)
// TODO asGroup isn't working json error, and neither is $filter on securityEnabled
.transitiveMemberOf()
.buildRequest()
.get();
List<AzureAdGroup> groups = new ArrayList<>();
while (collection != null) {
final List<DirectoryObject> directoryObjects = collection.getCurrentPage();
List<AzureAdGroup> groupsFromPage = directoryObjects.stream()
.filter(group -> group instanceof Group
&& Boolean.TRUE.equals(((Group) group).securityEnabled))
.map(group -> new AzureAdGroup(group.id, ((Group) group).displayName))
.collect(Collectors.toList());
groups.addAll(groupsFromPage);
DirectoryObjectCollectionWithReferencesRequestBuilder nextPage = collection
.getNextPage();
if (nextPage == null) {
break;
} else {
collection = nextPage.buildRequest().get();
}
}
GraphServiceClient graphClient = GraphServiceClient.builder().authenticationProvider( authProvider ).buildClient();
DirectoryObjectCollectionWithReferencesPage transitiveMemberOf = graphClient.groups("{id}").transitiveMemberOf()
.buildRequest()
.get();
List<DirectoryObject> page = transitiveMemberOf.getCurrentPage();
for (DirectoryObject directoryObject : page) {
Check --> directoryObject.oDataType
...

Kafka Twitter streaming TwitterException error

I am trying the sample code on Kafka Twitter streaming from the following tutorial.
https://www.tutorialspoint.com/apache_kafka/apache_kafka_real_time_application.htm
Here is my code:
import java.util.Arrays;
import java.util.Properties;
import java.util.concurrent.LinkedBlockingQueue;
import twitter4j.*;
import twitter4j.conf.*;
import twitter4j.StatusListener;
import org.apache.kafka.clients.producer.Producer;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerRecord;
public class KafkaTwitterProducer {
public static void main(String[] args) throws Exception {
LinkedBlockingQueue<Status> queue = new LinkedBlockingQueue<Status>(1000);
String consumerKey = “XXXXXXXXXXXXXXXXX”; //args[0].toString();
String consumerSecret = "XXXXXXXXXXXXXXXXX"; //args[1].toString();
String accessToken = "XXXXXXXXXXXXXXXXX" ; //args[2].toString();
String accessTokenSecret = "XXXXXXXXXXXXXXXXX" ; //args[3].toString();
String topicName = "twittertest" ; //args[4].toString();
//String[] arguments = args.clone();
String[] keyWords = {“Hello”,”Hi”,”Welcome”}; //Arrays.copyOfRange(arguments, 5, arguments.length);
ConfigurationBuilder cb = new ConfigurationBuilder();
cb.setDebugEnabled(true)
.setOAuthConsumerKey(consumerKey)
.setOAuthConsumerSecret(consumerSecret)
.setOAuthAccessToken(accessToken)
.setOAuthAccessTokenSecret(accessTokenSecret);
TwitterStream twitterStream = new TwitterStreamFactory(cb.build()).getInstance();
StatusListener listener = new StatusListener() {
#Override
public void onStatus(Status status) {
queue.offer(status);
System.out.println("#" + status.getUser().getScreenName()
+ " - " + status.getText());
// System.out.println("#" + status.getUser().getScreen-Name());
/*for(URLEntity urle : status.getURLEntities()) {
System.out.println(urle.getDisplayURL());
}*/
/*for(HashtagEntity hashtage : status.getHashtagEntities()) {
System.out.println(hashtage.getText());
}*/
}
#Override
public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) {
System.out.println("Got a status deletion notice id:"
+ statusDeletionNotice.getStatusId());
}
#Override
public void onTrackLimitationNotice(int numberOfLimitedStatuses) {
System.out.println("Got track limitation notice:" +
numberOfLimitedStatuses);
}
#Override
public void onScrubGeo(long userId, long upToStatusId) {
System.out.println("Got scrub_geo event userId:" + userId +
"upToStatusId:" + upToStatusId);
}
#Override
public void onStallWarning(StallWarning warning) {
// System.out.println("Got stall warning:" + warning);
}
#Override
public void onException(Exception ex) {
ex.printStackTrace();
}
};
twitterStream.addListener(listener);
FilterQuery query = new FilterQuery().track(keyWords);
twitterStream.filter(query);
Thread.sleep(5000);
//Add Kafka producer config settings
Properties props = new Properties();
props.put("bootstrap.servers", "localhost:9092");
props.put("client.id", "SampleProducer");
props.put("auto.commit.interval.ms", "1000");
props.put("session.timeout.ms", "30000");
props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
//props.put("key.serializer",
// "org.apache.kafka.common.serialization.StringSerializer");
//props.put("value.serializer",
// "org.apache.kafka.common.serialization.StringSerializer");
Producer<String, String> producer = new KafkaProducer<String, String>(props);
int i = 0;
int j = 0;
while(i < 10) {
Status ret = queue.poll();
if (ret == null) {
Thread.sleep(100);
i++;
}else {
for(HashtagEntity hashtage : ret.getHashtagEntities()) {
System.out.println("Hashtag: " + hashtage.getText());
producer.send(new ProducerRecord<String, String>(
topicName, Integer.toString(j++), hashtage.getText()));
}
}
}
producer.close();
Thread.sleep(5000);
twitterStream.shutdown();
}
}
When I run this as Java application, I am getting the following error: (this is not compile/build error)
Read timed out
Relevant discussions can be found on the Internet at:
http://www.google.co.jp/search?q=1169356e or
http://www.google.co.jp/search?q=c04b39f0
TwitterException{exceptionCode=[1169356e-c04b39f0 c2863472-491bffd7], statusCode=-1, message=null, code=-1, retryAfter=-1, rateLimitStatus=null, version=4.0.4}
at twitter4j.HttpClientImpl.handleRequest(HttpClientImpl.java:179)
at twitter4j.HttpClientBase.request(HttpClientBase.java:57)
at twitter4j.HttpClientBase.post(HttpClientBase.java:86)
at twitter4j.TwitterStreamImpl.getFilterStream(TwitterStreamImpl.java:346)
at twitter4j.TwitterStreamImpl$8.getStream(TwitterStreamImpl.java:322)
at twitter4j.TwitterStreamImpl$TwitterStreamConsumer.run(TwitterStreamImpl.java:552)
Caused by: java.net.SocketTimeoutException: Read timed out
at java.net.SocketInputStream.socketRead0(Native Method)
at java.net.SocketInputStream.socketRead(SocketInputStream.java:116)
at java.net.SocketInputStream.read(SocketInputStream.java:170)
at java.net.SocketInputStream.read(SocketInputStream.java:141)
at sun.security.ssl.InputRecord.readFully(InputRecord.java:465)
at sun.security.ssl.InputRecord.read(InputRecord.java:503)
at sun.security.ssl.SSLSocketImpl.readRecord(SSLSocketImpl.java:973)
at sun.security.ssl.SSLSocketImpl.readDataRecord(SSLSocketImpl.java:930)
at sun.security.ssl.AppInputStream.read(AppInputStream.java:105)
at java.io.BufferedInputStream.fill(BufferedInputStream.java:246)
at java.io.BufferedInputStream.read1(BufferedInputStream.java:286)
at java.io.BufferedInputStream.read(BufferedInputStream.java:345)
at sun.net.www.http.HttpClient.parseHTTPHeader(HttpClient.java:704)
at sun.net.www.http.HttpClient.parseHTTP(HttpClient.java:647)
at sun.net.www.protocol.http.HttpURLConnection.getInputStream0(HttpURLConnection.java:1536)
at sun.net.www.protocol.http.HttpURLConnection.getInputStream(HttpURLConnection.java:1441)
at java.net.HttpURLConnection.getResponseCode(HttpURLConnection.java:480)
at sun.net.www.protocol.https.HttpsURLConnectionImpl.getResponseCode(HttpsURLConnectionImpl.java:338)
at twitter4j.HttpResponseImpl.<init>(HttpResponseImpl.java:35)
at twitter4j.HttpClientImpl.handleRequest(HttpClientImpl.java:143)
... 5 more
I am not sure what is the problem here. Could someone suggest me the solution or fix please?
Ok Update here: It is working now if key words are generic like String[] keyWords = {"USA","Basketball","Sports};
If I change this to my requirement with specific keywords like my company name, product name etc., for ex: String[] keyWords = {"XXX","YYY","ZZZ"}; then the java application is getting terminated. What could be the reason? How to fix it in this code? Please advise?
The Twitter4J source code shows that this exception is thrown because of Http connection time out.
I get similar exception by setting a low value for connection timeout.
ConfigurationBuilder cb = new ConfigurationBuilder();
cb.setDebugEnabled(true)
.setOAuthConsumerKey(consumerKey)
.setOAuthConsumerSecret(consumerSecret)
.setOAuthAccessToken(accessToken)
.setOAuthAccessTokenSecret(accessTokenSecret)
.setHttpStreamingReadTimeout(10);
This is the stack trace I get.
TwitterException{exceptionCode=[1169356e-c3c3770e 1169356e-c3c376e4], statusCode=-1, message=null, code=-1, retryAfter=-1, rateLimitStatus=null, version=4.0.6}
at twitter4j.HttpClientImpl.handleRequest(HttpClientImpl.java:179)
at twitter4j.HttpClientBase.request(HttpClientBase.java:57)
at twitter4j.HttpClientBase.post(HttpClientBase.java:86)
at twitter4j.TwitterStreamImpl.getFilterStream(TwitterStreamImpl.java:347)
at twitter4j.TwitterStreamImpl$8.getStream(TwitterStreamImpl.java:323)
at twitter4j.TwitterStreamImpl$TwitterStreamConsumer.run(TwitterStreamImpl.java:554)
Caused by: java.net.SocketTimeoutException: Read timed out
at java.net.SocketInputStream.socketRead0(Native Method)
at java.net.SocketInputStream.socketRead(SocketInputStream.java:116)
at java.net.SocketInputStream.read(SocketInputStream.java:171)
at java.net.SocketInputStream.read(SocketInputStream.java:141)
at sun.security.ssl.InputRecord.readFully(InputRecord.java:465)
at sun.security.ssl.InputRecord.read(InputRecord.java:503)
at sun.security.ssl.SSLSocketImpl.readRecord(SSLSocketImpl.java:983)
at sun.security.ssl.SSLSocketImpl.performInitialHandshake(SSLSocketImpl.java:1385)
at sun.security.ssl.SSLSocketImpl.startHandshake(SSLSocketImpl.java:1413)
at sun.security.ssl.SSLSocketImpl.startHandshake(SSLSocketImpl.java:1397)
at sun.net.www.protocol.https.HttpsClient.afterConnect(HttpsClient.java:559)
at sun.net.www.protocol.https.AbstractDelegateHttpsURLConnection.connect(AbstractDelegateHttpsURLConnection.java:185)
at sun.net.www.protocol.http.HttpURLConnection.getOutputStream0(HttpURLConnection.java:1316)
at sun.net.www.protocol.http.HttpURLConnection.getOutputStream(HttpURLConnection.java:1291)
at sun.net.www.protocol.https.HttpsURLConnectionImpl.getOutputStream(HttpsURLConnectionImpl.java:250)
at twitter4j.HttpClientImpl.handleRequest(HttpClientImpl.java:137)
... 5 more
For your example, please try setting a higher value for HttpStreamingReadTimeout. The default value in the code is 40 seconds. Try setting it to 120,000 (milliseconds) or higher. That should work.

Empty side outputs throwing NPE on SDK 2.0.0 for Dataflow/Beam

We're trying to migrate our Dataflow/Beam pipelines from 2.0.0-beta3 to 2.0.0.
However, when we use the 2.0.0 version, the pipeline fails with a NPE deep in the Dataflow/Beam API. Changing back to 2.0.0-beta3, and it works fine again.
The only changes made to the code are to incorporate the API changes for the 2.0.0 SDK. We haven't changed anything else. The problem appears to be when a side output is empty. Empty side outputs work fine on 2.0.0-beta3.
Are we doing something wrong with our migration to 2.0.0?
Here's an example which reproduces the problem. Run with the following args:
--project=<project-id>
--runner=DirectRunner
--tempLocation=gs://<your-bucket>
--stagingLocation=gs://<your-bucket>
2.0.0-beta3 (runs fine)
public class EmptySideOutputNPE implements Serializable {
private static final TupleTag<TableRow> mainOutputTag = new TupleTag<TableRow>("mainOutputTag") {
};
private static final TupleTag<TableRow> sideOutputTag = new TupleTag<TableRow>("sideOutputTag") {
};
private static final TupleTag<TableRow> possibleEmptySideOutputTag = new TupleTag<TableRow>("possibleEmptySideOutputTag") {
};
public static void main(String[] args) {
PipelineOptions options = PipelineOptionsFactory
.fromArgs(args)
.withValidation()
.as(PipelineOptions.class);
Pipeline pipeline = Pipeline.create(options);
//Read from BigQuery public dataset
PCollectionTuple results = pipeline.apply("Read-BQ", BigQueryIO.Read.from("bigquery-samples:wikipedia_benchmark.Wiki1k"))
.apply(ParDo.of(new DoFn<TableRow, TableRow>() {
#ProcessElement
public void processElement(ProcessContext c) throws Exception {
TableRow inputRow = c.element();
//output the title to main output tag
TableRow titleRow = new TableRow();
titleRow.set("col", inputRow.get("title"));
c.output(titleRow);
//output the language to the side output
TableRow languageRow = new TableRow();
languageRow.set("col", inputRow.get("language"));
c.sideOutput(sideOutputTag, languageRow);
//don' output anything for the possibleEmptySideOutputTag tag
}
}).withOutputTags(mainOutputTag, TupleTagList.of(sideOutputTag).and(possibleEmptySideOutputTag)));
//write the results:
results.get(mainOutputTag).apply("Title write",
BigQueryIO.Write.to("<project-id>:<dataset>.2_0_0_sdk_test_title")
.withCreateDisposition(CREATE_IF_NEEDED)
.withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE)
.withSchema(getTableSchema()));
results.get(sideOutputTag).apply("Language write",
BigQueryIO.Write.to("<project-id>:<dataset>.2_0_0_sdk_test_language")
.withCreateDisposition(CREATE_IF_NEEDED)
.withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE)
.withSchema(getTableSchema()));
results.get(possibleEmptySideOutputTag).apply("Empty write",
BigQueryIO.Write.to("<project-id>:<dataset>.2_0_0_sdk_test_empty")
.withCreateDisposition(CREATE_IF_NEEDED)
.withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE)
.withSchema(getTableSchema()));
pipeline.run();
}
private static TableSchema getTableSchema() {
List<TableFieldSchema> fields = new ArrayList<>();
fields.add(new TableFieldSchema().setName("col").setType("STRING"));
return new TableSchema().setFields(fields);
}
}
2.0.0 (NPE)
public class EmptySideOutputNPE implements Serializable {
private static final TupleTag<TableRow> mainOutputTag = new TupleTag<TableRow>("mainOutputTag") {
};
private static final TupleTag<TableRow> sideOutputTag = new TupleTag<TableRow>("sideOutputTag") {
};
private static final TupleTag<TableRow> possibleEmptySideOutputTag = new TupleTag<TableRow>("possibleEmptySideOutputTag") {
};
public static void main(String[] args) {
PipelineOptions options = PipelineOptionsFactory
.fromArgs(args)
.withValidation()
.as(PipelineOptions.class);
Pipeline pipeline = Pipeline.create(options);
//Read from BigQuery public dataset
PCollectionTuple results = pipeline.apply("Read-BQ", BigQueryIO.read().from("bigquery-samples:wikipedia_benchmark.Wiki1k"))
.apply(ParDo.of(new DoFn<TableRow, TableRow>() {
#ProcessElement
public void processElement(ProcessContext c) throws Exception {
TableRow inputRow = c.element();
//output the title to main output tag
TableRow titleRow = new TableRow();
titleRow.set("col", inputRow.get("title"));
c.output(titleRow);
//output the language to the side output
TableRow languageRow = new TableRow();
languageRow.set("col", inputRow.get("language"));
c.output(sideOutputTag, languageRow);
//don' output anything for the possibleEmptySideOutputTag tag
}
}).withOutputTags(mainOutputTag, TupleTagList.of(sideOutputTag).and(possibleEmptySideOutputTag)));
//write the results:
results.get(mainOutputTag).apply("Title write",
BigQueryIO.writeTableRows().to("<project-id>:<dataset>.2_0_0_sdk_test_title")
.withCreateDisposition(CREATE_IF_NEEDED)
.withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE)
.withSchema(getTableSchema()));
results.get(sideOutputTag).apply("Language write",
BigQueryIO.writeTableRows().to("<project-id>:<dataset>.2_0_0_sdk_test_language")
.withCreateDisposition(CREATE_IF_NEEDED)
.withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE)
.withSchema(getTableSchema()));
results.get(possibleEmptySideOutputTag).apply("Empty write",
BigQueryIO.writeTableRows().to("<project-id>:<dataset>.2_0_0_sdk_test_empty")
.withCreateDisposition(CREATE_IF_NEEDED)
.withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE)
.withSchema(getTableSchema()));
pipeline.run();
}
private static TableSchema getTableSchema() {
List<TableFieldSchema> fields = new ArrayList<>();
fields.add(new TableFieldSchema().setName("col").setType("STRING"));
return new TableSchema().setFields(fields);
}
}
23:43:09,484 0 [main] INFO org.apache.beam.sdk.io.gcp.bigquery.BigQuerySourceBase - Starting BigQuery extract job: beam_job_885a1329f1a045d6a6422c975690967e_emptysideoutputnpepolleyg0715134309b6259542-extract
23:43:11,209 1725 [main] INFO org.apache.beam.sdk.io.gcp.bigquery.BigQueryServicesImpl - Started BigQuery job: {jobId=beam_job_885a1329f1a045d6a6422c975690967e_emptysideoutputnpepolleyg0715134309b6259542-extract, projectId=<redacted>}.
bq show -j --format=prettyjson --project_id=<redacted> beam_job_885a1329f1a045d6a6422c975690967e_emptysideoutputnpepolleyg0715134309b6259542-extract
23:43:12,718 3234 [main] INFO org.apache.beam.sdk.io.gcp.bigquery.BigQuerySourceBase - BigQuery extract job completed: beam_job_885a1329f1a045d6a6422c975690967e_emptysideoutputnpepolleyg0715134309b6259542-extract
23:43:14,738 5254 [direct-runner-worker] INFO org.apache.beam.sdk.io.FileBasedSource - Matched 1 files for pattern gs://nonsense/BigQueryExtractTemp/885a1329f1a045d6a6422c975690967e/000000000000.avro
23:43:18,171 8687 [direct-runner-worker] INFO org.apache.beam.sdk.io.FileBasedSource - Filepattern gs://nonsense/BigQueryExtractTemp/885a1329f1a045d6a6422c975690967e/000000000000.avro matched 1 files with total size 60370
23:43:18,653 9169 [direct-runner-worker] INFO org.apache.beam.sdk.io.gcp.bigquery.TableRowWriter - Opening TableRowWriter to gs://nonsense/BigQueryWriteTemp/956c7d7b866941aaa406bd9e5cb63aab/399d59ec-2475-4d07-9fa9-25feadf53737.
23:43:18,653 9169 [direct-runner-worker] INFO org.apache.beam.sdk.io.gcp.bigquery.TableRowWriter - Opening TableRowWriter to gs://nonsense/BigQueryWriteTemp/4377160da6184249a5ffc7cc27155265/8db1d8c4-9e4d-4093-8b9f-3e892de78057.
23:43:22,839 13355 [direct-runner-worker] INFO org.apache.beam.sdk.io.gcp.bigquery.TableRowWriter - Opening TableRowWriter to gs://nonsense/BigQueryWriteTemp/956c7d7b866941aaa406bd9e5cb63aab/1b544d4b-650c-4e05-abc0-f80318278a2f.
23:43:22,849 13365 [direct-runner-worker] INFO org.apache.beam.sdk.io.gcp.bigquery.TableRowWriter - Opening TableRowWriter to gs://nonsense/BigQueryWriteTemp/4377160da6184249a5ffc7cc27155265/2f3164e0-674e-4926-925f-678657587e75.
23:43:27,428 17944 [direct-runner-worker] INFO org.apache.beam.sdk.io.gcp.bigquery.TableRowWriter - Opening TableRowWriter to gs://nonsense/BigQueryWriteTemp/4377160da6184249a5ffc7cc27155265/b0d8ae7a-e6b0-48ac-a0a1-fd3e0fa17f75.
23:43:27,434 17950 [direct-runner-worker] INFO org.apache.beam.sdk.io.gcp.bigquery.TableRowWriter - Opening TableRowWriter to gs://nonsense/BigQueryWriteTemp/956c7d7b866941aaa406bd9e5cb63aab/b77b17e3-562c-47b0-8a6c-ee8eb7745fc8.
23:43:33,242 23758 [direct-runner-worker] INFO org.apache.beam.sdk.io.gcp.bigquery.TableRowWriter - Opening TableRowWriter to gs://nonsense/BigQueryWriteTemp/1f559dd752eb43f7bd1af1c881c21235/a8e51a20-408d-4628-abf3-bbdb2ebd9527.
23:43:35,046 25562 [direct-runner-worker] INFO org.apache.beam.sdk.io.gcp.bigquery.BigQueryServicesImpl - Started BigQuery job: {jobId=956c7d7b866941aaa406bd9e5cb63aab_e9f0a5890698d99399a6106c26d65de2_00001-0, projectId=<redacted>}.
bq show -j --format=prettyjson --project_id=<redacted> 956c7d7b866941aaa406bd9e5cb63aab_e9f0a5890698d99399a6106c26d65de2_00001-0
23:43:35,126 25642 [direct-runner-worker] INFO org.apache.beam.sdk.io.gcp.bigquery.BigQueryServicesImpl - Started BigQuery job: {jobId=4377160da6184249a5ffc7cc27155265_a6c30233d929e6958a536246c31fe3d1_00001-0, projectId=<redacted>}.
bq show -j --format=prettyjson --project_id=<redacted> 4377160da6184249a5ffc7cc27155265_a6c30233d929e6958a536246c31fe3d1_00001-0
Exception in thread "main" org.apache.beam.sdk.Pipeline$PipelineExecutionException: java.lang.NullPointerException
at org.apache.beam.runners.direct.DirectRunner$DirectPipelineResult.waitUntilFinish(DirectRunner.java:322)
at org.apache.beam.runners.direct.DirectRunner$DirectPipelineResult.waitUntilFinish(DirectRunner.java:292)
at org.apache.beam.runners.direct.DirectRunner.run(DirectRunner.java:200)
at org.apache.beam.runners.direct.DirectRunner.run(DirectRunner.java:63)
at org.apache.beam.sdk.Pipeline.run(Pipeline.java:295)
at org.apache.beam.sdk.Pipeline.run(Pipeline.java:281)
at com.pipelines.EmptySideOutputNPE.main(EmptySideOutputNPE.java:85)
Caused by: java.lang.NullPointerException
at org.apache.beam.sdk.io.gcp.bigquery.WriteTables.processElement(WriteTables.java:97)
Observations:
It runs fine on 2.0.0 when removing possibleEmptySideOutputTag from the pipeline i.e .withOutputTags(mainOutputTag, TupleTagList.of(sideOutputTag)));
it runs fine on 2.0.0 when adding 1+ rows to possibleEmptySideOutputTag in the ParDo.
This looks like https://issues.apache.org/jira/browse/BEAM-2406 which has been fixed and the fix is available either at HEAD or in the upcoming 2.1.0 release.

I receive errors with my F# implementation of SimpleClusterListener

I have observed the following error on my F# implementation of SimpleClusterListener:
[ERROR][3/20/2017 11:32:53 AM][Thread
0008][[akka://ClusterSystem/system/endpoin
tManager/reliableEndpointWriter-akka.tcp%3A%2F%2FClusterSystem%400.0.0.0%3A2552-
5/endpointWriter#1522364225]] Dropping message
[Akka.Actor.ActorSelectionMessage ] for non-local recipient
[[akka.tcp://ClusterSystem#localhost:2552/]] arriving at
[akka.tcp://ClusterSystem#localhost:2552] inbound addresses
[akka.tcp://Clust erSystem#0.0.0.0:2552]
I ran the C# implementation (referenced in the Appendix below) with no issues. In addition, I am using the same ports that the C# implementation is using.
NOTE:
I'm new to Akka.Net and as a result, am struggling to troubleshoot where I went wrong with the example I attempted to port.
My implementation is as follows:
Main.fs
module Program
open System
open System.Configuration
open Akka.Configuration.Hocon
open Akka.Configuration
open Akka.Actor
open Samples.Cluster.Simple
[<Literal>]
let ExitWithSuccess = 0
let createActor port =
let section = ConfigurationManager.GetSection "akka" :?> AkkaConfigurationSection
let config = ConfigurationFactory.ParseString("akka.remote.dot-netty.tcp.port=" + port)
.WithFallback(section.AkkaConfig)
let system = ActorSystem.Create ("ClusterSystem", config)
let actorRef = Props.Create(typeof<SimpleClusterListener>)
system.ActorOf(actorRef, "clusterListener") |> ignore
let startUp (ports:string list) = ports |> List.iter createActor
[<EntryPoint>]
let main args =
startUp ["2551"; "2552"; "0"]
Console.WriteLine("Press any key to exit")
Console.ReadLine() |> ignore
ExitWithSuccess
SimpleClusterListener.fs
namespace Samples.Cluster.Simple
open Akka.Actor
open Akka.Cluster
open Akka.Event
type SimpleClusterListener() =
inherit UntypedActor()
override this.PreStart() =
let cluster = Cluster.Get(UntypedActor.Context.System)
let (events:System.Type array) = [| typeof<ClusterEvent.IMemberEvent>
typeof<ClusterEvent.UnreachableMember> |]
cluster.Subscribe(base.Self, ClusterEvent.InitialStateAsEvents, events)
override this.OnReceive(message:obj) =
let log = UntypedActor.Context.GetLogger()
match message with
| :? ClusterEvent.MemberUp as e -> log.Info("Member is up: {0}", e.Member)
| :? ClusterEvent.UnreachableMember as e -> log.Info("Member detected as unreachable: {0}", e.Member)
| :? ClusterEvent.MemberRemoved as e -> log.Info("Member is removed: {0}", e.Member)
| _ -> ()
override this.PostStop() =
let cluster = Akka.Cluster.Cluster.Get(UntypedActor.Context.System)
cluster.Unsubscribe base.Self
The OnReceive method above never gets invoked. However, the PreStart method does.
Appendix:
As stated earlier, I ported the C# implementation below. I successfully ran this code. Thus, I am confused as to where I went wrong when I attempted to port it.
//-----------------------------------------------------------------------
// <copyright file="Program.cs" company="Akka.NET Project">
// Copyright (C) 2009-2016 Lightbend Inc. <http://www.lightbend.com>
// Copyright (C) 2013-2016 Akka.NET project <https://github.com/akkadotnet/akka.net>
// </copyright>
//-----------------------------------------------------------------------
using Akka.Actor;
using Akka.Configuration;
using Akka.Configuration.Hocon;
using System;
using System.Configuration;
namespace Samples.Cluster.Simple
{
class Program
{
static void Main(string[] args)
{
StartUp(args.Length == 0 ? new String[] { "2551", "2552", "0" } : args);
Console.WriteLine("Press any key to exit");
Console.ReadLine();
}
public static void StartUp(string[] ports)
{
var section = (AkkaConfigurationSection)ConfigurationManager.GetSection("akka");
foreach (var port in ports)
{
//Override the configuration of the port
var config =
ConfigurationFactory.ParseString("akka.remote.dot-netty.tcp.port=" + port)
.WithFallback(section.AkkaConfig);
//create an Akka system
var system = ActorSystem.Create("ClusterSystem", config);
//create an actor that handles cluster domain events
system.ActorOf(Props.Create(typeof(SimpleClusterListener)), "clusterListener");
}
}
}
}
//-----------------------------------------------------------------------
// <copyright file="SimpleClusterListener.cs" company="Akka.NET Project">
// Copyright (C) 2009-2016 Lightbend Inc. <http://www.lightbend.com>
// Copyright (C) 2013-2016 Akka.NET project <https://github.com/akkadotnet/akka.net>
// </copyright>
//-----------------------------------------------------------------------
using Akka.Actor;
using Akka.Cluster;
using Akka.Event;
namespace Samples.Cluster.Simple
{
public class SimpleClusterListener : UntypedActor
{
protected ILoggingAdapter Log = Context.GetLogger();
protected Akka.Cluster.Cluster Cluster = Akka.Cluster.Cluster.Get(Context.System);
/// <summary>
/// Need to subscribe to cluster changes
/// </summary>
protected override void PreStart() =>
Cluster.Subscribe(Self, ClusterEvent.InitialStateAsEvents, new[] { typeof(ClusterEvent.IMemberEvent), typeof(ClusterEvent.UnreachableMember) });
/// <summary>
/// Re-subscribe on restart
/// </summary>
protected override void PostStop() => Cluster.Unsubscribe(Self);
protected override void OnReceive(object message)
{
var up = message as ClusterEvent.MemberUp;
if (up != null)
{
var mem = up;
Log.Info("Member is Up: {0}", mem.Member);
}
else if (message is ClusterEvent.UnreachableMember)
{
var unreachable = (ClusterEvent.UnreachableMember)message;
Log.Info("Member detected as unreachable: {0}", unreachable.Member);
}
else if (message is ClusterEvent.MemberRemoved)
{
var removed = (ClusterEvent.MemberRemoved)message;
Log.Info("Member is Removed: {0}", removed.Member);
}
else if (message is ClusterEvent.IMemberEvent)
{
//IGNORE
}
else if (message is ClusterEvent.CurrentClusterState)
{
}
else
{
Unhandled(message);
}
}
}
}
<?xml version="1.0" encoding="utf-8"?>
<configuration>
<configSections>
<section name="akka" type="Akka.Configuration.Hocon.AkkaConfigurationSection, Akka"/>
</configSections>
<startup>
<supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.5.2"/>
</startup>
<akka>
<hocon>
<![CDATA[
akka {
actor {
provider = "Akka.Cluster.ClusterActorRefProvider, Akka.Cluster"
}
remote {
log-remote-lifecycle-events = DEBUG
dot-netty.tcp {
hostname = "localhost"
port = 0
}
}
cluster {
seed-nodes = [
"akka.tcp://ClusterSystem#localhost:2551",
"akka.tcp://ClusterSystem#localhost:2552"]
#auto-down-unreachable-after = 30s
}
}
]]>
</hocon>
</akka>
</configuration>
Assuming your using the 1.1.3 packages.
You should use
ConfigurationFactory.ParseString("akka.remote.helios.tcp.port=" + port)
.WithFallback(section.AkkaConfig);`
And not the dot-netty transport. That one is not released by us yet. And is only available in the dev branch.

Resources