kafka-health-check/src/main/java/com/deviceinsight/kafka/health/KafkaConsumingHealthIndicat...

245 lines
8.4 KiB
Java
Raw Normal View History

2019-03-28 18:35:14 +03:00
package com.deviceinsight.kafka.health;
import static java.util.concurrent.TimeUnit.MILLISECONDS;
2019-06-03 15:04:34 +03:00
import com.github.benmanes.caffeine.cache.Cache;
import com.github.benmanes.caffeine.cache.Caffeine;
2021-04-13 10:36:26 +03:00
import io.micrometer.core.instrument.MeterRegistry;
import io.micrometer.core.instrument.Tag;
import io.micrometer.core.instrument.binder.cache.CaffeineCacheMetrics;
2019-03-28 18:35:14 +03:00
import org.apache.kafka.clients.consumer.Consumer;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRebalanceListener;
2019-06-03 15:54:27 +03:00
import org.apache.kafka.clients.consumer.ConsumerRecords;
2019-03-28 18:35:14 +03:00
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.Producer;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.serialization.StringDeserializer;
import org.apache.kafka.common.serialization.StringSerializer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
2019-06-03 15:54:27 +03:00
import org.springframework.beans.factory.BeanInitializationException;
2019-03-28 18:35:14 +03:00
import org.springframework.boot.actuate.health.AbstractHealthIndicator;
import org.springframework.boot.actuate.health.Health;
import java.net.InetAddress;
import java.net.UnknownHostException;
2019-04-01 19:18:24 +03:00
import java.time.Duration;
2019-03-28 18:35:14 +03:00
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.UUID;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
2019-05-16 10:25:22 +03:00
import java.util.concurrent.Executors;
2019-03-28 18:35:14 +03:00
import java.util.concurrent.RejectedExecutionException;
import java.util.concurrent.TimeoutException;
2019-05-16 10:25:22 +03:00
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.stream.StreamSupport;
2019-03-28 18:35:14 +03:00
import javax.annotation.PostConstruct;
import javax.annotation.PreDestroy;
public class KafkaConsumingHealthIndicator extends AbstractHealthIndicator {
2019-06-03 15:04:34 +03:00
private static final Logger logger = LoggerFactory.getLogger(KafkaConsumingHealthIndicator.class);
2019-05-24 14:02:45 +03:00
private static final String CONSUMER_GROUP_PREFIX = "health-check-";
2021-04-13 10:36:26 +03:00
private static final String CACHE_NAME = "kafka-health-check";
2019-03-28 18:35:14 +03:00
private final Consumer<String, String> consumer;
private final Producer<String, String> producer;
private final String topic;
2019-10-10 21:33:20 +03:00
private final Duration sendReceiveTimeout;
private final Duration pollTimeout;
private final Duration subscriptionTimeout;
2019-03-28 18:35:14 +03:00
private final ExecutorService executor;
2019-05-16 10:25:22 +03:00
private final AtomicBoolean running;
2019-06-03 15:04:34 +03:00
private final Cache<String, String> cache;
2021-04-13 10:36:26 +03:00
private final String consumerGroupId;
2019-03-28 18:35:14 +03:00
2019-05-16 10:25:22 +03:00
private KafkaCommunicationResult kafkaCommunicationResult;
2019-03-28 18:35:14 +03:00
public KafkaConsumingHealthIndicator(KafkaHealthProperties kafkaHealthProperties,
Map<String, Object> kafkaConsumerProperties, Map<String, Object> kafkaProducerProperties) {
2021-04-13 10:36:26 +03:00
this(kafkaHealthProperties, kafkaConsumerProperties, kafkaProducerProperties, null);
}
public KafkaConsumingHealthIndicator(KafkaHealthProperties kafkaHealthProperties,
Map<String, Object> kafkaConsumerProperties, Map<String, Object> kafkaProducerProperties,
MeterRegistry meterRegistry) {
2019-03-28 18:35:14 +03:00
2019-10-10 21:33:20 +03:00
logger.info("Initializing kafka health check with properties: {}", kafkaHealthProperties);
2019-03-28 18:35:14 +03:00
this.topic = kafkaHealthProperties.getTopic();
2019-10-10 21:33:20 +03:00
this.sendReceiveTimeout = kafkaHealthProperties.getSendReceiveTimeout();
this.pollTimeout = kafkaHealthProperties.getPollTimeout();
this.subscriptionTimeout = kafkaHealthProperties.getSubscriptionTimeout();
2019-03-28 18:35:14 +03:00
Map<String, Object> kafkaConsumerPropertiesCopy = new HashMap<>(kafkaConsumerProperties);
2021-04-13 10:36:26 +03:00
this.consumerGroupId = getUniqueConsumerGroupId(kafkaConsumerPropertiesCopy);
kafkaConsumerPropertiesCopy.put(ConsumerConfig.GROUP_ID_CONFIG, consumerGroupId);
2019-03-28 18:35:14 +03:00
StringDeserializer deserializer = new StringDeserializer();
StringSerializer serializer = new StringSerializer();
this.consumer = new KafkaConsumer<>(kafkaConsumerPropertiesCopy, deserializer, deserializer);
this.producer = new KafkaProducer<>(kafkaProducerProperties, serializer, serializer);
2019-06-03 15:54:27 +03:00
this.executor = Executors.newSingleThreadExecutor();
2019-05-16 10:25:22 +03:00
this.running = new AtomicBoolean(true);
this.cache = Caffeine.newBuilder()
.expireAfterWrite(sendReceiveTimeout)
.maximumSize(kafkaHealthProperties.getCache().getMaximumSize())
.build();
2021-04-13 10:36:26 +03:00
enableCacheMetrics(cache, meterRegistry);
2019-05-16 10:25:22 +03:00
2019-06-03 15:54:27 +03:00
this.kafkaCommunicationResult =
KafkaCommunicationResult.failure(new RejectedExecutionException("Kafka Health Check is starting."));
2019-03-28 18:35:14 +03:00
}
@PostConstruct
void subscribeAndSendMessage() throws InterruptedException {
subscribeToTopic();
2019-05-16 10:25:22 +03:00
2019-03-28 18:35:14 +03:00
if (kafkaCommunicationResult.isFailure()) {
2021-04-13 10:36:26 +03:00
throw new BeanInitializationException("Kafka health check failed", kafkaCommunicationResult.getException());
2019-03-28 18:35:14 +03:00
}
2019-05-16 10:25:22 +03:00
executor.submit(() -> {
while (running.get()) {
2019-10-10 21:33:20 +03:00
ConsumerRecords<String, String> records = consumer.poll(pollTimeout);
StreamSupport.stream(records.spliterator(), false)
.filter(record -> record.key() != null && record.key().equals(consumerGroupId))
.forEach(record -> cache.put(record.key(), record.value()));
2019-05-16 10:25:22 +03:00
}
});
2019-03-28 18:35:14 +03:00
}
@PreDestroy
void shutdown() {
2019-05-16 10:25:22 +03:00
running.set(false);
2019-06-03 15:54:27 +03:00
executor.shutdownNow();
2019-03-28 18:35:14 +03:00
producer.close();
consumer.close();
}
2021-04-13 10:36:26 +03:00
private String getUniqueConsumerGroupId(Map<String, Object> kafkaConsumerProperties) {
2019-03-28 18:35:14 +03:00
try {
2019-05-15 17:23:27 +03:00
String groupId = (String) kafkaConsumerProperties.getOrDefault(ConsumerConfig.GROUP_ID_CONFIG,
UUID.randomUUID().toString());
2021-04-13 10:36:26 +03:00
return CONSUMER_GROUP_PREFIX + groupId + "-" + InetAddress.getLocalHost().getHostAddress();
2019-03-28 18:35:14 +03:00
} catch (UnknownHostException e) {
throw new IllegalStateException(e);
}
}
2019-06-03 15:54:27 +03:00
private void subscribeToTopic() throws InterruptedException {
2019-03-28 18:35:14 +03:00
final CountDownLatch subscribed = new CountDownLatch(1);
logger.info("Subscribe to health check topic={}", topic);
consumer.subscribe(Collections.singleton(topic), new ConsumerRebalanceListener() {
@Override
public void onPartitionsRevoked(Collection<TopicPartition> partitions) {
// nothing to do her
}
@Override
public void onPartitionsAssigned(Collection<TopicPartition> partitions) {
logger.debug("Got partitions = {}", partitions);
if (!partitions.isEmpty()) {
subscribed.countDown();
}
}
});
2019-10-10 21:33:20 +03:00
consumer.poll(pollTimeout);
if (!subscribed.await(subscriptionTimeout.toMillis(), MILLISECONDS)) {
2019-06-03 15:54:27 +03:00
throw new BeanInitializationException("Subscription to kafka failed, topic=" + topic);
2019-03-28 18:35:14 +03:00
}
2019-06-03 15:54:27 +03:00
this.kafkaCommunicationResult = KafkaCommunicationResult.success();
2019-03-28 18:35:14 +03:00
}
2019-06-03 15:54:27 +03:00
private String sendMessage() {
2019-03-28 18:35:14 +03:00
try {
2019-06-03 15:54:27 +03:00
return sendKafkaMessage();
2019-03-28 18:35:14 +03:00
} catch (ExecutionException e) {
logger.warn("Kafka health check execution failed.", e);
2019-06-03 15:54:27 +03:00
this.kafkaCommunicationResult = KafkaCommunicationResult.failure(e);
2019-03-28 18:35:14 +03:00
} catch (TimeoutException | InterruptedException e) {
logger.warn("Kafka health check timed out.", e);
2019-06-03 15:54:27 +03:00
this.kafkaCommunicationResult = KafkaCommunicationResult.failure(e);
2019-03-28 18:35:14 +03:00
} catch (RejectedExecutionException e) {
logger.debug("Ignore health check, already running...");
}
2019-06-03 15:54:27 +03:00
return null;
2019-03-28 18:35:14 +03:00
}
2019-06-03 15:54:27 +03:00
private String sendKafkaMessage() throws InterruptedException, ExecutionException, TimeoutException {
2019-03-28 18:35:14 +03:00
String message = UUID.randomUUID().toString();
2019-09-10 11:45:21 +03:00
logger.trace("Send health check message = {}", message);
2019-03-28 18:35:14 +03:00
producer.send(new ProducerRecord<>(topic, consumerGroupId, message))
.get(sendReceiveTimeout.toMillis(), MILLISECONDS);
2019-03-28 18:35:14 +03:00
2019-06-03 15:54:27 +03:00
return message;
2019-03-28 18:35:14 +03:00
}
2019-06-03 15:54:27 +03:00
@Override
protected void doHealthCheck(Health.Builder builder) {
String expectedMessage = sendMessage();
if (expectedMessage == null) {
goDown(builder);
return;
}
2019-03-28 18:35:14 +03:00
2019-06-03 15:54:27 +03:00
long startTime = System.currentTimeMillis();
while (true) {
String receivedMessage = cache.getIfPresent(consumerGroupId);
2019-06-03 15:54:27 +03:00
if (expectedMessage.equals(receivedMessage)) {
2019-03-28 18:35:14 +03:00
2019-06-03 15:54:27 +03:00
builder.up();
return;
2019-10-10 21:33:20 +03:00
} else if (System.currentTimeMillis() - startTime > sendReceiveTimeout.toMillis()) {
2019-03-28 18:35:14 +03:00
2019-06-03 15:54:27 +03:00
if (kafkaCommunicationResult.isFailure()) {
goDown(builder);
} else {
2021-04-13 10:36:26 +03:00
builder.down(new TimeoutException("Sending and receiving took longer than " + sendReceiveTimeout))
2019-06-03 15:54:27 +03:00
.withDetail("topic", topic);
}
return;
}
2019-03-28 18:35:14 +03:00
}
}
2019-05-16 10:25:22 +03:00
2019-06-03 15:54:27 +03:00
private void goDown(Health.Builder builder) {
builder.down(kafkaCommunicationResult.getException()).withDetail("topic", topic);
2019-05-16 10:25:22 +03:00
}
2021-04-13 10:36:26 +03:00
private void enableCacheMetrics(Cache<String, String> cache, MeterRegistry meterRegistry) {
if (meterRegistry == null) {
return;
}
CaffeineCacheMetrics.monitor(meterRegistry, cache, CACHE_NAME,
Collections.singletonList(Tag.of("instance", consumerGroupId)));
}
2019-03-28 18:35:14 +03:00
}